aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Deegan <Tim.Deegan@xensource.com>2006-10-17 11:11:48 +0100
committerTim Deegan <Tim.Deegan@xensource.com>2006-10-17 11:11:48 +0100
commitf26cc7890337eb60472e9888a547a608f2555916 (patch)
tree8b9e726efaa0dbb8d8b71688a077e8b6d421b05d
parent3cc943dc88804bb61719d991267cc4b86f0fe198 (diff)
downloadxen-f26cc7890337eb60472e9888a547a608f2555916.tar.gz
xen-f26cc7890337eb60472e9888a547a608f2555916.tar.bz2
xen-f26cc7890337eb60472e9888a547a608f2555916.zip
[XEN] Don't keep shadows of PAE guest l3 tables.
Holding pages readonly that have guest PAE l3 tables in them means a performance hit and potential bug if the guest puts other datastructures on the same page as an l3 table. Instead of shadowing them, treat PAE guests as if they had four CR3 registers, and load all four top-level entries when we handle a CR3 write. This also cuts about 500 lines of special-case refcounting and re-copying code. Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
-rw-r--r--xen/arch/x86/hvm/svm/svm.c17
-rw-r--r--xen/arch/x86/hvm/svm/vmcb.c9
-rw-r--r--xen/arch/x86/mm/shadow/common.c102
-rw-r--r--xen/arch/x86/mm/shadow/multi.c775
-rw-r--r--xen/arch/x86/mm/shadow/multi.h4
-rw-r--r--xen/arch/x86/mm/shadow/private.h4
-rw-r--r--xen/arch/x86/mm/shadow/types.h123
-rw-r--r--xen/include/asm-x86/domain.h15
-rw-r--r--xen/include/asm-x86/hvm/vcpu.h5
-rw-r--r--xen/include/asm-x86/mm.h17
-rw-r--r--xen/include/asm-x86/shadow.h3
11 files changed, 254 insertions, 820 deletions
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index ffefba24fc..b11432b3a5 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1739,9 +1739,6 @@ static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
if (old_base_mfn)
put_page(mfn_to_page(old_base_mfn));
- /*
- * arch.shadow_table should now hold the next CR3 for shadow
- */
v->arch.hvm_svm.cpu_cr3 = value;
update_cr3(v);
vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
@@ -1788,10 +1785,6 @@ static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
- /*
- * arch->shadow_table should hold the next CR3 for shadow
- */
-
HVM_DBG_LOG(DBG_LEVEL_VMMU,
"Update CR3 value = %lx, mfn = %lx",
v->arch.hvm_svm.cpu_cr3, mfn);
@@ -2355,7 +2348,7 @@ void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
{
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
+ unsigned long pt = v->arch.hvm_vcpu.hw_cr3;
printf("%s: guest registers from %s:\n", __func__, from);
#if defined (__x86_64__)
@@ -2681,11 +2674,11 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
if (do_debug)
{
printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
- "shadow_table = 0x%08x\n",
+ "hw_cr3 = 0x%16lx\n",
__func__,
(int) v->arch.guest_table.pfn,
(int) v->arch.monitor_table.pfn,
- (int) v->arch.shadow_table.pfn);
+ (long unsigned int) v->arch.hvm_vcpu.hw_cr3);
svm_dump_vmcb(__func__, vmcb);
svm_dump_regs(__func__, regs);
@@ -2913,10 +2906,10 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
if (do_debug)
{
printk("vmexit_handler():- guest_table = 0x%08x, "
- "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
+ "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n",
(int)v->arch.guest_table.pfn,
(int)v->arch.monitor_table.pfn,
- (int)v->arch.shadow_table.pfn);
+ (int)v->arch.hvm_vcpu.hw_cr3);
printk("svm_vmexit_handler: Returning\n");
}
#endif
diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c
index b3e3cd13c9..93fced8753 100644
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -372,8 +372,8 @@ void svm_do_launch(struct vcpu *v)
if (svm_dbg_on)
{
unsigned long pt;
- pt = pagetable_get_paddr(v->arch.shadow_table);
- printk("%s: shadow_table = %lx\n", __func__, pt);
+ printk("%s: hw_cr3 = %llx\n", __func__,
+ (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
pt = pagetable_get_paddr(v->arch.guest_table);
printk("%s: guest_table = %lx\n", __func__, pt);
pt = pagetable_get_paddr(v->domain->arch.phys_table);
@@ -387,8 +387,9 @@ void svm_do_launch(struct vcpu *v)
{
printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3);
printk("init_guest_table: guest_table = 0x%08x, monitor_table = 0x%08x,"
- " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn,
- (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn);
+ " hw_cr3 = 0x%16llx\n", (int)v->arch.guest_table.pfn,
+ (int)v->arch.monitor_table.pfn,
+ (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
}
v->arch.schedule_tail = arch_svm_do_resume;
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 662d49daea..238a454c2c 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -283,11 +283,8 @@ __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
if ( page->shadow_flags & SHF_L2H_PAE )
result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
(v, gmfn, entry, size);
- if ( page->shadow_flags & SHF_L3_PAE )
- result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3)
- (v, gmfn, entry, size);
#else /* 32-bit non-PAE hypervisor does not support PAE guests */
- ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
+ ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
#endif
#if CONFIG_PAGING_LEVELS >= 4
@@ -427,22 +424,16 @@ shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
/* Allocating shadow pages
* -----------------------
*
- * Most shadow pages are allocated singly, but there are two cases where we
- * need to allocate multiple pages together.
- *
- * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows.
- * A 32-bit guest l1 table covers 4MB of virtuial address space,
- * and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB
- * of virtual address space each). Similarly, a 32-bit guest l2 table
- * (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va
- * each). These multi-page shadows are contiguous and aligned;
- * functions for handling offsets into them are defined in shadow.c
- * (shadow_l1_index() etc.)
+ * Most shadow pages are allocated singly, but there is one case where
+ * we need to allocate multiple pages together: shadowing 32-bit guest
+ * tables on PAE or 64-bit shadows. A 32-bit guest l1 table covers 4MB
+ * of virtuial address space, and needs to be shadowed by two PAE/64-bit
+ * l1 tables (covering 2MB of virtual address space each). Similarly, a
+ * 32-bit guest l2 table (4GB va) needs to be shadowed by four
+ * PAE/64-bit l2 tables (1GB va each). These multi-page shadows are
+ * contiguous and aligned; functions for handling offsets into them are
+ * defined in shadow.c (shadow_l1_index() etc.)
*
- * 2: Shadowing PAE top-level pages. Each guest page that contains
- * any PAE top-level pages requires two shadow pages to shadow it.
- * They contain alternating l3 tables and pae_l3_bookkeeping structs.
- *
* This table shows the allocation behaviour of the different modes:
*
* Xen paging 32b pae pae 64b 64b 64b
@@ -452,7 +443,7 @@ shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
*
* sl1 size 4k 8k 4k 8k 4k 4k
* sl2 size 4k 16k 4k 16k 4k 4k
- * sl3 size - - 8k - 8k 4k
+ * sl3 size - - - - - 4k
* sl4 size - - - - - 4k
*
* We allocate memory from xen in four-page units and break them down
@@ -506,7 +497,6 @@ shadow_order(u32 shadow_type)
0, /* PGC_SH_fl1_pae_shadow */
0, /* PGC_SH_l2_pae_shadow */
0, /* PGC_SH_l2h_pae_shadow */
- 1, /* PGC_SH_l3_pae_shadow */
0, /* PGC_SH_l1_64_shadow */
0, /* PGC_SH_fl1_64_shadow */
0, /* PGC_SH_l2_64_shadow */
@@ -549,7 +539,8 @@ void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
#endif
break;
#if CONFIG_PAGING_LEVELS >= 3
- case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
+ case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift:
+ case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
break;
#endif
@@ -590,18 +581,8 @@ void shadow_prealloc(struct domain *d, unsigned int order)
pg = list_entry(l, struct page_info, list);
smfn = page_to_mfn(pg);
-#if CONFIG_PAGING_LEVELS >= 3
- if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow )
- {
- /* For PAE, we need to unpin each subshadow on this shadow */
- SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn);
- }
- else
-#endif /* 32-bit code always takes this branch */
- {
- /* Unpin this top-level shadow */
- sh_unpin(v, smfn);
- }
+ /* Unpin this top-level shadow */
+ sh_unpin(v, smfn);
/* See if that freed up a chunk of appropriate size */
if ( chunk_is_available(d, order) ) return;
@@ -623,8 +604,12 @@ void shadow_prealloc(struct domain *d, unsigned int order)
shadow_unhook_mappings(v, smfn);
/* Need to flush TLB if we've altered our own tables */
- if ( !shadow_mode_external(d)
- && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) )
+ if ( !shadow_mode_external(d) &&
+ (pagetable_get_pfn(current->arch.shadow_table[0]) == mfn_x(smfn)
+ || pagetable_get_pfn(current->arch.shadow_table[1]) == mfn_x(smfn)
+ || pagetable_get_pfn(current->arch.shadow_table[2]) == mfn_x(smfn)
+ || pagetable_get_pfn(current->arch.shadow_table[3]) == mfn_x(smfn)
+ ) )
local_flush_tlb();
/* See if that freed up a chunk of appropriate size */
@@ -923,9 +908,20 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
#if CONFIG_PAGING_LEVELS == 3
if (type == PGT_l2_page_table)
{
+ struct vcpu *v;
/* We have written to the p2m l3: need to sync the per-vcpu
* copies of it in the monitor tables */
p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry);
+ /* Also, any vcpus running on shadows of the p2m need to
+ * reload their CR3s so the change propagates to the shadow */
+ ASSERT(shadow_lock_is_acquired(d));
+ for_each_vcpu(d, v)
+ {
+ if ( pagetable_get_pfn(v->arch.guest_table)
+ == pagetable_get_pfn(d->arch.phys_table)
+ && v->arch.shadow.mode != NULL )
+ v->arch.shadow.mode->update_cr3(v);
+ }
}
#endif
/* The P2M can be shadowed: keep the shadows synced */
@@ -1714,9 +1710,6 @@ void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
break;
- case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
- SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn);
- break;
#endif
#if CONFIG_PAGING_LEVELS >= 4
@@ -1771,7 +1764,6 @@ int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn,
#endif
NULL, /* l2_pae */
NULL, /* l2h_pae */
- NULL, /* l3_pae */
#if CONFIG_PAGING_LEVELS >= 4
SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64 */
SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64 */
@@ -1935,7 +1927,6 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
#endif
NULL, /* l2_pae */
NULL, /* l2h_pae */
- NULL, /* l3_pae */
#if CONFIG_PAGING_LEVELS >= 4
SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64 */
SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64 */
@@ -2008,7 +1999,8 @@ static int sh_remove_shadow_via_pointer(struct vcpu *v, mfn_t smfn)
ASSERT((pg->count_info & PGC_SH_type_mask) > 0);
ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow);
ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow);
- ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow);
+ ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_pae_shadow);
+ ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2h_pae_shadow);
ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow);
if (pg->up == 0) return 0;
@@ -2037,7 +2029,6 @@ static int sh_remove_shadow_via_pointer(struct vcpu *v, mfn_t smfn)
case PGC_SH_l1_pae_shadow:
case PGC_SH_l2_pae_shadow:
case PGC_SH_l2h_pae_shadow:
- case PGC_SH_l3_pae_shadow:
SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
break;
#if CONFIG_PAGING_LEVELS >= 4
@@ -2091,11 +2082,9 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
#if CONFIG_PAGING_LEVELS >= 3
SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae */
SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
- SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae */
#else
NULL, /* l2_pae */
NULL, /* l2h_pae */
- NULL, /* l3_pae */
#endif
NULL, /* l1_64 */
NULL, /* fl1_64 */
@@ -2121,9 +2110,8 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift))
| (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae */
0, /* fl1_pae */
- 1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae */
- 1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae */
- 0, /* l3_pae */
+ 0, /* l2_pae */
+ 0, /* l2h_pae */
1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64 */
0, /* fl1_64 */
1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64 */
@@ -2166,17 +2154,14 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \
if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned ) \
sh_unpin(v, smfn); \
- if ( (_type) == PGC_SH_l3_pae_shadow ) \
- SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); \
} while (0)
if ( sh_flags & SHF_L1_32 ) DO_UNSHADOW(PGC_SH_l1_32_shadow);
if ( sh_flags & SHF_L2_32 ) DO_UNPIN(PGC_SH_l2_32_shadow);
#if CONFIG_PAGING_LEVELS >= 3
if ( sh_flags & SHF_L1_PAE ) DO_UNSHADOW(PGC_SH_l1_pae_shadow);
- if ( sh_flags & SHF_L2_PAE ) DO_UNSHADOW(PGC_SH_l2_pae_shadow);
- if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow);
- if ( sh_flags & SHF_L3_PAE ) DO_UNPIN(PGC_SH_l3_pae_shadow);
+ if ( sh_flags & SHF_L2_PAE ) DO_UNPIN(PGC_SH_l2_pae_shadow);
+ if ( sh_flags & SHF_L2H_PAE ) DO_UNPIN(PGC_SH_l2h_pae_shadow);
#if CONFIG_PAGING_LEVELS >= 4
if ( sh_flags & SHF_L1_64 ) DO_UNSHADOW(PGC_SH_l1_64_shadow);
if ( sh_flags & SHF_L2_64 ) DO_UNSHADOW(PGC_SH_l2_64_shadow);
@@ -2188,14 +2173,6 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
#undef DO_UNSHADOW
#undef DO_UNPIN
-
-#if CONFIG_PAGING_LEVELS > 2
- /* We may have caused some PAE l3 entries to change: need to
- * fix up the copies of them in various places */
- if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) )
- sh_pae_recopy(v->domain);
-#endif
-
/* If that didn't catch the shadows, something is wrong */
if ( !fast && (pg->count_info & PGC_page_table) )
{
@@ -3127,7 +3104,6 @@ void shadow_audit_tables(struct vcpu *v)
SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2_pae */
SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2h_pae */
- SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3), /* l3_pae */
#if CONFIG_PAGING_LEVELS >= 4
SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4), /* l1_64 */
SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64 */
@@ -3152,7 +3128,7 @@ void shadow_audit_tables(struct vcpu *v)
{
case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
- |SHF_L2H_PAE|SHF_L3_PAE); break;
+ |SHF_L2H_PAE); break;
case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64
|SHF_L3_64|SHF_L4_64); break;
default: BUG();
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index de103e1b6c..1d693da1ec 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -21,20 +21,6 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-// DESIGN QUESTIONS:
-// Why use subshadows for PAE guests?
-// - reduces pressure in the hash table
-// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
-// - would need to find space in the page_info to store 7 more bits of
-// backpointer
-// - independent shadows of 32 byte chunks makes it non-obvious how to quickly
-// figure out when to demote the guest page from l3 status
-//
-// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
-// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
-// space for both PV and HVM guests.
-//
-
#include <xen/config.h>
#include <xen/types.h>
#include <xen/mm.h>
@@ -118,9 +104,6 @@ static char *fetch_type_names[] = {
#endif
/* XXX forward declarations */
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res);
-#endif
static inline void sh_update_linear_entries(struct vcpu *v);
/**************************************************************************/
@@ -129,8 +112,6 @@ static inline void sh_update_linear_entries(struct vcpu *v);
* Normal case: maps the mfn of a guest page to the mfn of its shadow page.
* FL1's: maps the *gfn* of the start of a superpage to the mfn of a
* shadow L1 which maps its "splinters".
- * PAE CR3s: maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
- * PAE L3 info page for that CR3 value.
*/
static inline mfn_t
@@ -429,18 +410,16 @@ static void sh_audit_gw(struct vcpu *v, walk_t *gw)
if ( !(SHADOW_AUDIT_ENABLE) )
return;
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
if ( valid_mfn(gw->l4mfn)
&& valid_mfn((smfn = get_shadow_status(v, gw->l4mfn,
PGC_SH_l4_shadow))) )
(void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
-#endif /* PAE or 64... */
if ( valid_mfn(gw->l3mfn)
&& valid_mfn((smfn = get_shadow_status(v, gw->l3mfn,
PGC_SH_l3_shadow))) )
(void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
-#endif /* All levels... */
+#endif /* PAE or 64... */
if ( valid_mfn(gw->l2mfn) )
{
if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn,
@@ -498,8 +477,7 @@ static u32 guest_set_ad_bits(struct vcpu *v,
flags = guest_l1e_get_flags(*ep);
/* PAE l3s do not have A and D bits */
- if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
- return flags;
+ ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
/* Need the D bit as well for writes, in L1es and PSE L2es. */
if ( ft == ft_demand_write
@@ -646,38 +624,14 @@ shadow_l2_index(mfn_t *smfn, u32 guest_index)
#endif
}
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
static inline u32
shadow_l3_index(mfn_t *smfn, u32 guest_index)
{
-#if GUEST_PAGING_LEVELS == 3
- u32 group_id;
-
- // Because we use twice the space in L3 shadows as was consumed in guest
- // L3s, the number of guest entries per shadow page is
- // SHADOW_L2_PAGETABLE_ENTRIES/2. (Note this is *not*
- // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
- //
- *smfn = _mfn(mfn_x(*smfn) +
- (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
-
- // We store PAE L3 shadows in groups of 4, alternating shadows and
- // pae_l3_bookkeeping structs. So the effective shadow index is
- // the the group_id * 8 + the offset within the group.
- //
- guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
- group_id = guest_index / 4;
- return (group_id * 8) + (guest_index % 4);
-#else
return guest_index;
-#endif
}
-#endif // GUEST_PAGING_LEVELS >= 3
-
-#if GUEST_PAGING_LEVELS >= 4
-
static inline u32
shadow_l4_index(mfn_t *smfn, u32 guest_index)
{
@@ -722,6 +676,9 @@ do { \
u32 pass_thru_flags;
u32 sflags;
+ /* We don't shadow PAE l3s */
+ ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
+
// XXX -- might want to think about PAT support for HVM guests...
#ifndef NDEBUG
@@ -757,29 +714,16 @@ do { \
if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
- // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
- //
- if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
- pass_thru_flags = _PAGE_PRESENT;
- else
- {
- pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
- _PAGE_RW | _PAGE_PRESENT);
- if ( guest_supports_nx(v) )
- pass_thru_flags |= _PAGE_NX_BIT;
- }
-
- // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
- // L3e's; they are all implied. So we emulate them here.
- //
- if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
- gflags = pass_thru_flags;
// Propagate bits from the guest to the shadow.
// Some of these may be overwritten, below.
// Since we know the guest's PRESENT bit is set, we also set the shadow's
// SHADOW_PRESENT bit.
//
+ pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
+ _PAGE_RW | _PAGE_PRESENT);
+ if ( guest_supports_nx(v) )
+ pass_thru_flags |= _PAGE_NX_BIT;
sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
// Copy the guest's RW bit into the SHADOW_RW bit.
@@ -800,8 +744,7 @@ do { \
// If the A or D bit has not yet been set in the guest, then we must
// prevent the corresponding kind of access.
//
- if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
- !(gflags & _PAGE_ACCESSED)) )
+ if ( unlikely(!(gflags & _PAGE_ACCESSED)) )
sflags &= ~_PAGE_PRESENT;
/* D bits exist in L1es and PSE L2es */
@@ -890,9 +833,7 @@ l4e_propagate_from_guest(struct vcpu *v,
fetch_type_names[ft], gl4e->l4, sl4p->l4);
ASSERT(sflags != -1);
}
-#endif // GUEST_PAGING_LEVELS >= 4
-#if GUEST_PAGING_LEVELS >= 3
static void
l3e_propagate_from_guest(struct vcpu *v,
guest_l3e_t *gl3e,
@@ -912,7 +853,7 @@ l3e_propagate_from_guest(struct vcpu *v,
fetch_type_names[ft], gl3e->l3, sl3p->l3);
ASSERT(sflags != -1);
}
-#endif // GUEST_PAGING_LEVELS >= 3
+#endif // GUEST_PAGING_LEVELS >= 4
static void
l2e_propagate_from_guest(struct vcpu *v,
@@ -1081,9 +1022,6 @@ shadow_write_entries(void *d, void *s, int entries, mfn_t mfn)
safe_write_entry(dst++, src++);
if ( map != NULL ) sh_unmap_domain_page(map);
-
- /* XXX TODO:
- * Update min/max field in page_info struct of this mfn */
}
static inline int
@@ -1195,9 +1133,7 @@ static int shadow_set_l4e(struct vcpu *v,
}
return flags;
}
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-#if GUEST_PAGING_LEVELS >= 3
static int shadow_set_l3e(struct vcpu *v,
shadow_l3e_t *sl3e,
shadow_l3e_t new_sl3e,
@@ -1224,28 +1160,6 @@ static int shadow_set_l3e(struct vcpu *v,
shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
flags |= SHADOW_SET_CHANGED;
-#if GUEST_PAGING_LEVELS == 3
- /* We wrote a guest l3e in a PAE pagetable. This table is copied in
- * the linear pagetable entries of its l2s, and may also be copied
- * to a low memory location to make it fit in CR3. Report that we
- * need to resync those copies (we can't wait for the guest to flush
- * the TLB because it might be an increase in rights). */
- {
- struct vcpu *vcpu;
-
- struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
- for_each_vcpu(v->domain, vcpu)
- {
- if (info->vcpus & (1 << vcpu->vcpu_id))
- {
- // Remember that this flip/update needs to occur.
- vcpu->arch.shadow.pae_flip_pending = 1;
- flags |= SHADOW_SET_L3PAE_RECOPY;
- }
- }
- }
-#endif
-
if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT )
{
/* We lost a reference to an old mfn. */
@@ -1260,7 +1174,7 @@ static int shadow_set_l3e(struct vcpu *v,
}
return flags;
}
-#endif /* GUEST_PAGING_LEVELS >= 3 */
+#endif /* GUEST_PAGING_LEVELS >= 4 */
static int shadow_set_l2e(struct vcpu *v,
shadow_l2e_t *sl2e,
@@ -1535,51 +1449,7 @@ do { \
#endif /* different kinds of l2 */
-#if GUEST_PAGING_LEVELS == 3
-
-/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
-#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code) \
-do { \
- int _i; \
- for ( _i = 0; _i < 4; _i++ ) \
- { \
- if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT ) \
- {_code} \
- if ( _done ) break; \
- _sl3e++; \
- increment_ptr_to_guest_entry(_gl3p); \
- } \
-} while (0)
-
-/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
-#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \
-do { \
- int _i, _j, _k, __done = 0; \
- ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask) \
- == PGC_SH_l3_pae_shadow); \
- /* The subshadows are split, 64 on each page of the shadow */ \
- for ( _j = 0; _j < 2 && !__done; _j++ ) \
- { \
- void *_sp = sh_map_domain_page(_sl3mfn); \
- for ( _i = 0; _i < 64; _i++ ) \
- { \
- /* Every second 32-byte region is a bookkeeping entry */ \
- _sl3e = (shadow_l3e_t *)(_sp + (64 * _i)); \
- if ( (sl3p_to_info(_sl3e))->refcount > 0 ) \
- SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, \
- ({ __done = (_done); __done; }), \
- _code); \
- else \
- for ( _k = 0 ; _k < 4 ; _k++ ) \
- increment_ptr_to_guest_entry(_gl3p); \
- if ( __done ) break; \
- } \
- sh_unmap_domain_page(_sp); \
- _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1); \
- } \
-} while (0)
-
-#elif GUEST_PAGING_LEVELS == 4
+#if GUEST_PAGING_LEVELS == 4
/* 64-bit l3: touch all entries */
#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \
@@ -1711,8 +1581,8 @@ void sh_install_xen_entries_in_l2h(struct vcpu *v,
/* We don't set up a linear mapping here because we can't until this
* l2h is installed in an l3e. sh_update_linear_entries() handles
- * the linear mappings when the l3 is loaded. We zero them here, just as
- * a safety measure.
+ * the linear mappings when CR3 (and so the fourth l3e) is loaded.
+ * We zero them here, just as a safety measure.
*/
for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
@@ -1740,37 +1610,6 @@ void sh_install_xen_entries_in_l2h(struct vcpu *v,
sh_unmap_domain_page(sl2e);
}
-
-void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
-{
- shadow_l3e_t *sl3e;
- guest_l3e_t *gl3e = v->arch.guest_vtable;
- shadow_l3e_t new_sl3e;
- gfn_t l2gfn;
- mfn_t l2gmfn, l2smfn;
- int r;
-
- ASSERT(!shadow_mode_external(v->domain));
- ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
- l2gfn = guest_l3e_get_gfn(gl3e[3]);
- l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn));
- l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
- if ( !valid_mfn(l2smfn) )
- {
- /* must remove write access to this page before shadowing it */
- // XXX -- should check to see whether this is better with level==0 or
- // level==2...
- if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 )
- flush_tlb_mask(v->domain->domain_dirty_cpumask);
-
- l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
- }
- l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
- ft_prefetch);
- sl3e = sh_map_domain_page(sl3mfn);
- r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
- sh_unmap_domain_page(sl3e);
-}
#endif
@@ -1827,8 +1666,6 @@ void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
-
-
/**************************************************************************/
/* Create a shadow of a given guest page.
*/
@@ -1839,7 +1676,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
mfn_x(gmfn), shadow_type, mfn_x(smfn));
- if ( shadow_type != PGC_SH_guest_root_type )
+ if ( shadow_type != PGC_SH_l2_32_shadow
+ && shadow_type != PGC_SH_l2_pae_shadow
+ && shadow_type != PGC_SH_l2h_pae_shadow
+ && shadow_type != PGC_SH_l4_64_shadow )
/* Lower-level shadow, not yet linked form a higher level */
mfn_to_page(smfn)->up = 0;
@@ -1853,8 +1693,6 @@ sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
#endif
#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
- case PGC_SH_l3_shadow:
- sh_install_xen_entries_in_l3(v, gmfn, smfn); break;
case PGC_SH_l2h_shadow:
sh_install_xen_entries_in_l2h(v, smfn); break;
#endif
@@ -1988,20 +1826,16 @@ static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v,
mfn_t *sl4mfn)
{
/* There is always a shadow of the top level table. Get it. */
- *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
+ *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
/* Reading the top level table is always valid. */
return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
}
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-#if GUEST_PAGING_LEVELS >= 3
static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v,
walk_t *gw,
mfn_t *sl3mfn,
fetch_type_t ft)
{
-#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
mfn_t sl4mfn;
shadow_l4e_t *sl4e;
if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
@@ -2032,19 +1866,8 @@ static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v,
}
/* Now follow it down a level. Guaranteed to succeed. */
return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
-#else /* PAE... */
- /* There is always a shadow of the top level table. Get it. */
- *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
- /* This next line is important: the shadow l3 table is in an 8k
- * shadow and we need to return the right mfn of the pair. This call
- * will set it for us as a side-effect. */
- (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
- ASSERT(v->arch.shadow_vtable);
- return ((shadow_l3e_t *)v->arch.shadow_vtable)
- + shadow_l3_table_offset(gw->va);
-#endif /* GUEST_PAGING_LEVELS >= 4 */
}
-#endif /* GUEST_PAGING_LEVELS >= 3 */
+#endif /* GUEST_PAGING_LEVELS >= 4 */
static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v,
@@ -2052,7 +1875,7 @@ static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v,
mfn_t *sl2mfn,
fetch_type_t ft)
{
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
mfn_t sl3mfn = _mfn(INVALID_MFN);
shadow_l3e_t *sl3e;
if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
@@ -2080,17 +1903,22 @@ static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v,
*sl2mfn, &new_sl3e, ft);
r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
ASSERT((r & SHADOW_SET_FLUSH) == 0);
-#if GUEST_PAGING_LEVELS == 3
- /* Need to sync up the linear maps, as we are about to use them */
- ASSERT( r & SHADOW_SET_L3PAE_RECOPY );
- sh_pae_recopy(v->domain);
-#endif
}
/* Now follow it down a level. Guaranteed to succeed. */
return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
+#elif GUEST_PAGING_LEVELS == 3 /* PAE... */
+ /* We never demand-shadow PAE l3es: they are only created in
+ * sh_update_cr3(). Check if the relevant sl3e is present. */
+ shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table)
+ + shadow_l3_linear_offset(gw->va);
+ if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
+ return NULL;
+ *sl2mfn = shadow_l3e_get_mfn(*sl3e);
+ ASSERT(valid_mfn(*sl2mfn));
+ return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
#else /* 32bit... */
/* There is always a shadow of the top level table. Get it. */
- *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
+ *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
/* This next line is important: the guest l2 has a 16k
* shadow, we need to return the right mfn of the four. This
* call will set it for us as a side-effect. */
@@ -2213,9 +2041,7 @@ void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
/* Put the memory back in the pool */
shadow_free(v->domain, smfn);
}
-#endif
-#if GUEST_PAGING_LEVELS >= 3
void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
{
shadow_l3e_t *sl3e;
@@ -2230,10 +2056,6 @@ void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
-#if GUEST_PAGING_LEVELS == 3
- /* Take this shadow off the list of root shadows */
- list_del_init(&mfn_to_page(smfn)->list);
-#endif
/* Decrement refcounts of all the old entries */
sl3mfn = smfn;
@@ -2247,53 +2069,8 @@ void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
/* Put the memory back in the pool */
shadow_free(v->domain, smfn);
}
-#endif
-
-
-#if GUEST_PAGING_LEVELS == 3
-static void sh_destroy_l3_subshadow(struct vcpu *v,
- shadow_l3e_t *sl3e)
-/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
-{
- int i;
- mfn_t sl3mfn = _mfn(maddr_from_mapped_domain_page(sl3e) >> PAGE_SHIFT);
- ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0);
- for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ )
- if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT )
- shadow_set_l3e(v, &sl3e[i], shadow_l3e_empty(), sl3mfn);
-}
-#endif
+#endif /* GUEST_PAGING_LEVELS >= 4 */
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
-/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
-{
- int i, j;
- struct pae_l3_bookkeeping *bk;
-
- ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask)
- == PGC_SH_l3_pae_shadow);
- /* The subshadows are split, 64 on each page of the shadow */
- for ( i = 0; i < 2; i++ )
- {
- void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i));
- for ( j = 0; j < 64; j++ )
- {
- /* Every second 32-byte region is a bookkeeping entry */
- bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
- if ( bk->pinned )
- sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
- /* Check whether we've just freed the whole shadow */
- if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 )
- {
- sh_unmap_domain_page(p);
- return;
- }
- }
- sh_unmap_domain_page(p);
- }
-}
-#endif
void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
{
@@ -2311,7 +2088,7 @@ void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
-#if GUEST_PAGING_LEVELS == 2
+#if (GUEST_PAGING_LEVELS == 2) || (GUEST_PAGING_LEVELS == 3)
/* Take this shadow off the list of root shadows */
list_del_init(&mfn_to_page(smfn)->list);
#endif
@@ -2421,31 +2198,14 @@ void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
#elif GUEST_PAGING_LEVELS == 3
-void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
-/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
+void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn)
+/* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
{
- shadow_l3e_t *sl3e;
- SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
- if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
- mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
- if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask)
- == PGC_SH_l2h_pae_shadow )
- {
- /* High l2: need to pick particular l2es to unhook */
- shadow_l2e_t *sl2e;
- SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
- (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
- });
- }
- else
- {
- /* Normal l2: can safely unhook the whole l3e */
- (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
- }
- }
+ shadow_l2e_t *sl2e;
+ int xen_mappings = !shadow_mode_external(v->domain);
+ SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
+ (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
});
- /* We've changed PAE L3 entries: must sync up various copies of them */
- sh_pae_recopy(v->domain);
}
#elif GUEST_PAGING_LEVELS == 4
@@ -2523,9 +2283,8 @@ static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
return result;
}
-#endif // GUEST_PAGING_LEVELS >= 4
-#if GUEST_PAGING_LEVELS >= 3
+
static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
{
shadow_l3e_t new_sl3e;
@@ -2536,16 +2295,6 @@ static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
perfc_incrc(shadow_validate_gl3e_calls);
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
- {
- /* If we've updated a subshadow which is unreferenced then
- we don't care what value is being written - bail. */
- struct pae_l3_bookkeeping *info = sl3p_to_info(se);
- if(!info->refcount)
- return result;
- }
-#endif
-
if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
{
gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
@@ -2559,16 +2308,9 @@ static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
sl2mfn, &new_sl3e, ft_prefetch);
result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
-#if GUEST_PAGING_LEVELS == 3
- /* We have changed a PAE l3 entry: need to sync up the possible copies
- * of it */
- if ( result & SHADOW_SET_L3PAE_RECOPY )
- sh_pae_recopy(v->domain);
-#endif
-
return result;
}
-#endif // GUEST_PAGING_LEVELS >= 3
+#endif // GUEST_PAGING_LEVELS >= 4
static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
{
@@ -2755,12 +2497,12 @@ int
sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
void *new_gl3p, u32 size)
{
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
return sh_map_and_validate(v, gl3mfn, new_gl3p, size,
PGC_SH_l3_shadow,
shadow_l3_index,
validate_gl3e);
-#else // ! GUEST_PAGING_LEVELS >= 3
+#else // ! GUEST_PAGING_LEVELS >= 4
SHADOW_PRINTK("called in wrong paging mode!\n");
BUG();
return 0;
@@ -2822,7 +2564,7 @@ static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
{
u32 flags = mfn_to_page(gmfn)->shadow_flags;
mfn_t smfn;
- if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) )
+ if ( !(flags & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
{
perfc_incrc(shadow_early_unshadow);
sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
@@ -2840,9 +2582,14 @@ static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow);
shadow_unhook_mappings(v, smfn);
}
- if ( flags & SHF_L3_PAE )
+ if ( flags & SHF_L2_PAE )
{
- smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow);
+ smfn = get_shadow_status(v, gmfn, PGC_SH_l2_pae_shadow);
+ shadow_unhook_mappings(v, smfn);
+ }
+ if ( flags & SHF_L2H_PAE )
+ {
+ smfn = get_shadow_status(v, gmfn, PGC_SH_l2h_pae_shadow);
shadow_unhook_mappings(v, smfn);
}
if ( flags & SHF_L4_64 )
@@ -3183,8 +2930,7 @@ sh_invlpg(struct vcpu *v, unsigned long va)
return 0;
}
#elif SHADOW_PAGING_LEVELS == 3
- if ( !(shadow_l3e_get_flags(
- ((shadow_l3e_t *)v->arch.shadow_vtable)[shadow_l3_linear_offset(va)])
+ if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)])
& _PAGE_PRESENT) )
// no need to flush anything if there's no SL2...
return 0;
@@ -3249,34 +2995,6 @@ sh_gva_to_gpa(struct vcpu *v, unsigned long va)
}
-// XXX -- should this be in this file?
-// Or should it be moved to shadow-common.c?
-//
-/* returns a lowmem machine address of the copied HVM L3 root table
- * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
- * otherwise blank out any entries with reserved bits in them. */
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-static unsigned long
-hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
-{
- int i, f;
- int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
- l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
- memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
- for ( i = 0; i < 4; i++ )
- {
- f = l3e_get_flags(l3tab[i]);
- if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
- new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
- else
- new_l3e = l3e_empty();
- safe_write_entry(&copy[i], &new_l3e);
- }
- return __pa(copy);
-}
-#endif
-
-
static inline void
sh_update_linear_entries(struct vcpu *v)
/* Sync up all the linear mappings for this vcpu's pagetables */
@@ -3330,7 +3048,7 @@ sh_update_linear_entries(struct vcpu *v)
if ( v == current )
{
__linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] =
- l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+ l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
__PAGE_HYPERVISOR);
}
else
@@ -3338,7 +3056,7 @@ sh_update_linear_entries(struct vcpu *v)
l4_pgentry_t *ml4e;
ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
- l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+ l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
__PAGE_HYPERVISOR);
sh_unmap_domain_page(ml4e);
}
@@ -3379,13 +3097,8 @@ sh_update_linear_entries(struct vcpu *v)
sh_unmap_domain_page(ml4e);
}
-#if GUEST_PAGING_LEVELS == 2
/* Shadow l3 tables are made up by update_cr3 */
- sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-#else
- /* Always safe to use shadow_vtable, because it's globally mapped */
- sl3e = v->arch.shadow_vtable;
-#endif
+ sl3e = v->arch.shadow.l3table;
for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
{
@@ -3424,14 +3137,14 @@ sh_update_linear_entries(struct vcpu *v)
#if GUEST_PAGING_LEVELS == 2
/* Shadow l3 tables were built by update_cr3 */
if ( shadow_mode_external(d) )
- shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+ shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
else
BUG(); /* PV 2-on-3 is not supported yet */
#else /* GUEST_PAGING_LEVELS == 3 */
- /* Always safe to use *_vtable, because they're globally mapped */
- shadow_l3e = v->arch.shadow_vtable;
+ shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
+ /* Always safe to use guest_vtable, because it's globally mapped */
guest_l3e = v->arch.guest_vtable;
#endif /* GUEST_PAGING_LEVELS */
@@ -3510,7 +3223,7 @@ sh_update_linear_entries(struct vcpu *v)
if ( v == current )
{
__linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] =
- l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+ l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
__PAGE_HYPERVISOR);
}
else
@@ -3518,7 +3231,7 @@ sh_update_linear_entries(struct vcpu *v)
l2_pgentry_t *ml2e;
ml2e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
- l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+ l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
__PAGE_HYPERVISOR);
sh_unmap_domain_page(ml2e);
}
@@ -3530,69 +3243,7 @@ sh_update_linear_entries(struct vcpu *v)
}
-// XXX -- should this be in this file?
-// Or should it be moved to shadow-common.c?
-//
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-void sh_pae_recopy(struct domain *d)
-/* Called whenever we write to the l3 entries of a PAE pagetable which
- * is currently in use. Each vcpu that is using the table needs to
- * resync its copies of the l3s in linear maps and any low-memory
- * copies it might have made for fitting into 32bit CR3.
- * Since linear maps are also resynced when we change CR3, we don't
- * need to worry about changes to PAE l3es that are not currently in use.*/
-{
- struct vcpu *v;
- cpumask_t flush_mask = CPU_MASK_NONE;
- ASSERT(shadow_lock_is_acquired(d));
-
- for_each_vcpu(d, v)
- {
- if ( !v->arch.shadow.pae_flip_pending )
- continue;
-
- cpu_set(v->processor, flush_mask);
-
- SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
-
- /* This vcpu has a copy in its linear maps */
- sh_update_linear_entries(v);
- if ( hvm_guest(v) )
- {
- /* This vcpu has a copy in its HVM PAE l3 */
- v->arch.hvm_vcpu.hw_cr3 =
- hvm_pae_copy_root(v, v->arch.shadow_vtable,
- !shadow_vcpu_mode_translate(v));
- }
-#if CONFIG_PAGING_LEVELS == 3
- else
- {
- /* This vcpu might have copied the l3 to below 4GB */
- if ( v->arch.cr3 >> PAGE_SHIFT
- != pagetable_get_pfn(v->arch.shadow_table) )
- {
- /* Recopy to where that copy is. */
- int i;
- l3_pgentry_t *dst, *src;
- dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
- src = v->arch.shadow_vtable;
- for ( i = 0 ; i < 4 ; i++ )
- safe_write_entry(dst + i, src + i);
- }
- }
-#endif
- v->arch.shadow.pae_flip_pending = 0;
- }
-
- flush_tlb_mask(flush_mask);
-}
-#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
-
-
-/* removes:
- * vcpu->arch.guest_vtable
- * vcpu->arch.shadow_table
- * vcpu->arch.shadow_vtable
+/* Removes vcpu->arch.guest_vtable and vcpu->arch.shadow_table[].
* Does all appropriate management/bookkeeping/refcounting/etc...
*/
static void
@@ -3600,6 +3251,7 @@ sh_detach_old_tables(struct vcpu *v)
{
struct domain *d = v->domain;
mfn_t smfn;
+ int i = 0;
////
//// vcpu->arch.guest_vtable
@@ -3620,56 +3272,80 @@ sh_detach_old_tables(struct vcpu *v)
}
////
- //// vcpu->arch.shadow_table
+ //// vcpu->arch.shadow_table[]
////
- smfn = pagetable_get_mfn(v->arch.shadow_table);
- if ( mfn_x(smfn) )
- {
- ASSERT(v->arch.shadow_vtable);
-#if GUEST_PAGING_LEVELS == 3
- // PAE guests do not (necessarily) use an entire page for their
- // 4-entry L3s, so we have to deal with them specially.
- //
- sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
-#else
- sh_put_ref(v, smfn, 0);
-#endif
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
- {
- struct pae_l3_bookkeeping *info =
- sl3p_to_info(v->arch.shadow_vtable);
- ASSERT(test_bit(v->vcpu_id, &info->vcpus));
- clear_bit(v->vcpu_id, &info->vcpus);
- }
+#if GUEST_PAGING_LEVELS == 3
+ /* PAE guests have four shadow_table entries */
+ for ( i = 0 ; i < 4 ; i++ )
#endif
- v->arch.shadow_table = pagetable_null();
+ {
+ smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
+ if ( mfn_x(smfn) )
+ sh_put_ref(v, smfn, 0);
+ v->arch.shadow_table[i] = pagetable_null();
}
+}
- ////
- //// vcpu->arch.shadow_vtable
- ////
- if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
- v->arch.shadow_vtable )
+/* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
+static void
+sh_set_toplevel_shadow(struct vcpu *v,
+ int slot,
+ mfn_t gmfn,
+ unsigned int root_type)
+{
+ mfn_t smfn = get_shadow_status(v, gmfn, root_type);
+ struct domain *d = v->domain;
+ ASSERT(pagetable_is_null(v->arch.shadow_table[slot]));
+ if ( valid_mfn(smfn) )
+ {
+ /* Pull this root shadow to the front of the list of roots. */
+ list_del(&mfn_to_page(smfn)->list);
+ list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+ }
+ else
{
- // Q: why does this need to use (un)map_domain_page_*global* ?
- /* A: so sh_update_linear_entries can operate on other vcpus */
- sh_unmap_domain_page_global(v->arch.shadow_vtable);
- v->arch.shadow_vtable = NULL;
+ /* This guest MFN is a pagetable. Must revoke write access. */
+ if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) != 0 )
+ flush_tlb_mask(v->domain->domain_dirty_cpumask);
+ /* Make sure there's enough free shadow memory. */
+ shadow_prealloc(d, SHADOW_MAX_ORDER);
+ /* Shadow the page. */
+ smfn = sh_make_shadow(v, gmfn, root_type);
+ list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
}
+ ASSERT(valid_mfn(smfn));
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+ /* Once again OK to unhook entries from this table if we see fork/exit */
+ ASSERT(sh_mfn_is_a_page_table(gmfn));
+ mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
+#endif
+
+ /* Take a ref to this page: it will be released in sh_detach_old_tables. */
+ sh_get_ref(smfn, 0);
+ sh_pin(smfn);
+
+ /* Done. Install it */
+ SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n",
+ GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot,
+ mfn_x(gmfn), mfn_x(smfn));
+ v->arch.shadow_table[slot] = pagetable_from_mfn(smfn);
}
+
static void
sh_update_cr3(struct vcpu *v)
-/* Updates vcpu->arch.shadow_table after the guest has changed CR3.
+/* Updates vcpu->arch.cr3 after the guest has changed CR3.
* Paravirtual guests should set v->arch.guest_table (and guest_table_user,
* if appropriate).
- * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
+ * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works,
+ * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards.
*/
{
struct domain *d = v->domain;
- mfn_t gmfn, smfn;
+ mfn_t gmfn;
#if GUEST_PAGING_LEVELS == 3
u32 guest_idx=0;
#endif
@@ -3770,159 +3446,93 @@ sh_update_cr3(struct vcpu *v)
#endif
////
- //// vcpu->arch.shadow_table
+ //// vcpu->arch.shadow_table[]
////
- smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type);
- if ( valid_mfn(smfn) )
- {
- /* Pull this root shadow to the front of the list of roots. */
- list_del(&mfn_to_page(smfn)->list);
- list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
- }
- else
- {
- /* This guest MFN is a pagetable. Must revoke write access. */
- if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0)
- != 0 )
- flush_tlb_mask(d->domain_dirty_cpumask);
- /* Make sure there's enough free shadow memory. */
- shadow_prealloc(d, SHADOW_MAX_ORDER);
- /* Shadow the page. */
- smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type);
- list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
- }
- ASSERT(valid_mfn(smfn));
- v->arch.shadow_table = pagetable_from_mfn(smfn);
-
-#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
- /* Once again OK to unhook entries from this table if we see fork/exit */
- ASSERT(sh_mfn_is_a_page_table(gmfn));
- mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
-#endif
-
- ////
- //// vcpu->arch.shadow_vtable
- ////
- if ( shadow_mode_external(d) )
+#if GUEST_PAGING_LEVELS == 2
+ sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l2_shadow);
+#elif GUEST_PAGING_LEVELS == 3
+ /* PAE guests have four shadow_table entries, based on the
+ * current values of the guest's four l3es. */
{
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
- mfn_t adjusted_smfn = smfn;
- u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
- // Q: why does this need to use (un)map_domain_page_*global* ?
- v->arch.shadow_vtable =
- (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) +
- shadow_idx;
-#else
- // Q: why does this need to use (un)map_domain_page_*global* ?
- v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
-#endif
+ int i;
+ guest_l3e_t *gl3e = (guest_l3e_t*)v->arch.guest_vtable;
+ for ( i = 0; i < 4; i++ )
+ {
+ ASSERT(pagetable_is_null(v->arch.shadow_table[i]));
+ if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
+ {
+ gfn_t gl2gfn = guest_l3e_get_gfn(gl3e[i]);
+ mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
+ if ( valid_mfn(gl2mfn) )
+ sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3)
+ ? PGC_SH_l2h_shadow
+ : PGC_SH_l2_shadow);
+ }
+ }
}
- else
- {
-#if SHADOW_PAGING_LEVELS == 4
- v->arch.shadow_vtable = __sh_linear_l4_table;
-#elif GUEST_PAGING_LEVELS == 3
- // XXX - why does this need a global map?
- v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
+#elif GUEST_PAGING_LEVELS == 4
+ sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l4_shadow);
#else
- v->arch.shadow_vtable = __sh_linear_l2_table;
+#error This should never happen
#endif
- }
#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
- // Now that shadow_vtable is in place, check that the sl3e[3] is properly
- // shadowed and installed in PAE PV guests...
- if ( !shadow_mode_external(d) &&
- !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) &
- _PAGE_PRESENT) )
- {
- sh_install_xen_entries_in_l3(v, gmfn, smfn);
- }
#endif
- ////
- //// Take a ref to the new shadow table, and pin it.
- ////
- //
- // This ref is logically "held" by v->arch.shadow_table entry itself.
- // Release the old ref.
- //
-#if GUEST_PAGING_LEVELS == 3
- // PAE guests do not (necessarily) use an entire page for their
- // 4-entry L3s, so we have to deal with them specially.
- //
- // XXX - might want to revisit this if/when we do multiple compilation for
- // HVM-vs-PV guests, as PAE PV guests could get away without doing
- // subshadows.
- //
- sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
- sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
+ ///
+ /// v->arch.shadow.l3table
+ ///
+#if SHADOW_PAGING_LEVELS == 3
+ {
+ mfn_t smfn;
+ int i;
+ for ( i = 0; i < 4; i++ )
+ {
+#if GUEST_PAGING_LEVELS == 2
+ /* 2-on-3: make a PAE l3 that points at the four-page l2 */
+ smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[0]) + i);
#else
- sh_get_ref(smfn, 0);
- sh_pin(smfn);
-#endif
-
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
- // PAE 3-on-3 shadows have to keep track of which vcpu's are using
- // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY
- // case from validate_gl3e(). Search for SHADOW_SET_L3PAE_RECOPY
- // in the code for more info.
- //
- {
- struct pae_l3_bookkeeping *info =
- sl3p_to_info(v->arch.shadow_vtable);
- ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
- set_bit(v->vcpu_id, &info->vcpus);
- }
+ /* 3-on-3: make a PAE l3 that points at the four l2 pages */
+ smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
#endif
+ v->arch.shadow.l3table[i] =
+ (mfn_x(smfn) == 0)
+ ? shadow_l3e_empty()
+ : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT);
+ }
+ }
+#endif /* SHADOW_PAGING_LEVELS == 3 */
- debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
- __func__, gmfn, smfn);
///
- /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
+ /// v->arch.cr3
///
if ( shadow_mode_external(d) )
{
- ASSERT(hvm_guest(v));
make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
-
-#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
-#if SHADOW_PAGING_LEVELS != 3
-#error unexpected combination of GUEST and SHADOW paging levels
-#endif
- /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
- {
- mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
- int i;
-
- ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
- virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
- for (i = 0; i < 4; i++)
- {
- v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
- shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
- }
- }
-#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
- /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
- * If paging is disabled, clear l3e reserved bits; otherwise
- * remove entries that have reserved bits set. */
- v->arch.hvm_vcpu.hw_cr3 =
- hvm_pae_copy_root(v, v->arch.shadow_vtable,
- !shadow_vcpu_mode_translate(v));
-#else
- /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
- v->arch.hvm_vcpu.hw_cr3 =
- pagetable_get_paddr(v->arch.shadow_table);
-#endif
}
else // not shadow_mode_external...
{
/* We don't support PV except guest == shadow == config levels */
BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
- make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
+ make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0]));
+ }
+
+
+ ///
+ /// v->arch.hvm_vcpu.hw_cr3
+ ///
+ if ( shadow_mode_external(d) )
+ {
+ ASSERT(hvm_guest(v));
+#if SHADOW_PAGING_LEVELS == 3
+ /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */
+ v->arch.hvm_vcpu.hw_cr3 = virt_to_maddr(&v->arch.shadow.l3table);
+#else
+ /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
+ v->arch.hvm_vcpu.hw_cr3 = pagetable_get_paddr(v->arch.shadow_table[0]);
+#endif
}
/* Fix up the linear pagetable mappings */
@@ -3950,7 +3560,6 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
/* Carefully look in the shadow linear map for the l1e we expect */
- if ( v->arch.shadow_vtable == NULL ) return 0;
#if GUEST_PAGING_LEVELS >= 4
sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
@@ -3959,7 +3568,7 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
return 0;
#elif GUEST_PAGING_LEVELS == 3
- sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable)
+ sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table)
+ shadow_l3_linear_offset(vaddr);
if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
return 0;
@@ -4044,14 +3653,12 @@ void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
case PGC_SH_l2h_shadow:
#endif
shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
case PGC_SH_l3_shadow:
shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
-#if GUEST_PAGING_LEVELS >= 4
case PGC_SH_l4_shadow:
shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
#endif
-#endif
default: BUG(); /* Called with the wrong kind of shadow. */
}
}
@@ -4081,7 +3688,7 @@ int sh_remove_l1_shadow(struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn)
return done;
}
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
/* Remove all mappings of this l2 shadow from this l3 shadow */
{
@@ -4104,7 +3711,6 @@ int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
return done;
}
-#if GUEST_PAGING_LEVELS >= 4
int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
/* Remove all mappings of this l3 shadow from this l4 shadow */
{
@@ -4127,7 +3733,6 @@ int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
return done;
}
#endif /* 64bit guest */
-#endif /* PAE guest */
/**************************************************************************/
/* Handling HVM guest writes to pagetables */
@@ -4448,7 +4053,7 @@ int sh_audit_l2_table(struct vcpu *v, mfn_t sl2mfn, mfn_t x)
return 0;
}
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
{
guest_l3e_t *gl3e, *gp;
@@ -4486,9 +4091,7 @@ int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
sh_unmap_domain_page(gp);
return 0;
}
-#endif /* GUEST_PAGING_LEVELS >= 3 */
-#if GUEST_PAGING_LEVELS >= 4
int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
{
guest_l4e_t *gl4e, *gp;
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 732782cd62..2cc61b830f 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -50,10 +50,6 @@ extern void
SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
struct vcpu *v, mfn_t smfn);
-extern void
-SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows, 3, 3)
- (struct vcpu *v, mfn_t smfn);
-
extern void
SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
(struct vcpu *v, mfn_t sl2mfn);
diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
index a357e189fb..f0daec35f5 100644
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -336,13 +336,9 @@ void shadow_convert_to_log_dirty(struct vcpu *v, mfn_t smfn);
* non-Xen mappings in this top-level shadow mfn */
void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn);
-/* Re-sync copies of PAE shadow L3 tables if they have been changed */
-void sh_pae_recopy(struct domain *d);
-
/* Install the xen mappings in various flavours of shadow */
void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
-void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn);
void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 0faf217f06..69d077f561 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -215,8 +215,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \
})
-// shadow linear L3 and L4 tables only exist in 4 level paging...
-#if SHADOW_PAGING_LEVELS == 4
+#if SHADOW_PAGING_LEVELS >= 4
#define sh_linear_l3_table(v) ({ \
ASSERT(current == (v)); \
((shadow_l3e_t *) \
@@ -386,7 +385,6 @@ static inline guest_l4e_t guest_l4e_from_gfn(gfn_t gfn, u32 flags)
#define PGC_SH_fl1_shadow PGC_SH_fl1_pae_shadow
#define PGC_SH_l2_shadow PGC_SH_l2_pae_shadow
#define PGC_SH_l2h_shadow PGC_SH_l2h_pae_shadow
-#define PGC_SH_l3_shadow PGC_SH_l3_pae_shadow
#else
#define PGC_SH_l1_shadow PGC_SH_l1_64_shadow
#define PGC_SH_fl1_shadow PGC_SH_fl1_64_shadow
@@ -405,14 +403,6 @@ valid_gfn(gfn_t m)
return VALID_GFN(gfn_x(m));
}
-#if GUEST_PAGING_LEVELS == 2
-#define PGC_SH_guest_root_type PGC_SH_l2_32_shadow
-#elif GUEST_PAGING_LEVELS == 3
-#define PGC_SH_guest_root_type PGC_SH_l3_pae_shadow
-#else
-#define PGC_SH_guest_root_type PGC_SH_l4_64_shadow
-#endif
-
/* Translation between mfns and gfns */
static inline mfn_t
vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn)
@@ -490,8 +480,6 @@ struct shadow_walk_t
#define sh_map_and_validate_gl1e INTERNAL_NAME(sh_map_and_validate_gl1e)
#define sh_destroy_l4_shadow INTERNAL_NAME(sh_destroy_l4_shadow)
#define sh_destroy_l3_shadow INTERNAL_NAME(sh_destroy_l3_shadow)
-#define sh_destroy_l3_subshadow INTERNAL_NAME(sh_destroy_l3_subshadow)
-#define sh_unpin_all_l3_subshadows INTERNAL_NAME(sh_unpin_all_l3_subshadows)
#define sh_destroy_l2_shadow INTERNAL_NAME(sh_destroy_l2_shadow)
#define sh_destroy_l1_shadow INTERNAL_NAME(sh_destroy_l1_shadow)
#define sh_unhook_32b_mappings INTERNAL_NAME(sh_unhook_32b_mappings)
@@ -533,115 +521,6 @@ struct shadow_walk_t
SHADOW_PAGING_LEVELS)
-#if GUEST_PAGING_LEVELS == 3
-/*
- * Accounting information stored in the shadow of PAE Guest L3 pages.
- * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep
- * various refcounts, etc., on the page_info of their page. We provide extra
- * bookkeeping space in the shadow itself, and this is the structure
- * definition for that bookkeeping information.
- */
-struct pae_l3_bookkeeping {
- u32 vcpus; /* bitmap of which vcpus are currently storing
- * copies of this 32-byte page */
- u32 refcount; /* refcount for this 32-byte page */
- u8 pinned; /* is this 32-byte page pinned or not? */
-};
-
-// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer.
-#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *) \
- (((unsigned long)(_ptr) & ~31) + 32))
-
-static void sh_destroy_l3_subshadow(struct vcpu *v,
- shadow_l3e_t *sl3e);
-
-/* Increment a subshadow ref
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. */
-static inline void sh_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
-{
- struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
- /* First ref to the subshadow takes a ref to the full shadow */
- if ( bk->refcount == 0 )
- sh_get_ref(smfn, 0);
- if ( unlikely(++(bk->refcount) == 0) )
- {
- SHADOW_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH_PRI_mfn " sh=%p\n",
- mfn_x(smfn), sl3e);
- domain_crash_synchronous();
- }
-}
-
-/* Decrement a subshadow ref.
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. Calling this may cause the
- * entire shadow to disappear, so the caller must immediately unmap
- * the pointer after calling. */
-static inline void sh_put_ref_l3_subshadow(struct vcpu *v,
- shadow_l3e_t *sl3e,
- mfn_t smfn)
-{
- struct pae_l3_bookkeeping *bk;
-
- bk = sl3p_to_info(sl3e);
-
- ASSERT(bk->refcount > 0);
- if ( --(bk->refcount) == 0 )
- {
- /* Need to destroy this subshadow */
- sh_destroy_l3_subshadow(v, sl3e);
- /* Last ref to the subshadow had a ref to the full shadow */
- sh_put_ref(v, smfn, 0);
- }
-}
-
-/* Pin a subshadow
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. */
-static inline void sh_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
-{
- struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-#if 0
- debugtrace_printk("%s smfn=%05lx offset=%ld\n",
- __func__, mfn_x(smfn),
- ((unsigned long)sl3e & ~PAGE_MASK) / 64);
-#endif
-
- if ( !bk->pinned )
- {
- bk->pinned = 1;
- sh_get_ref_l3_subshadow(sl3e, smfn);
- }
-}
-
-/* Unpin a sub-shadow.
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. Calling this may cause the
- * entire shadow to disappear, so the caller must immediately unmap
- * the pointer after calling. */
-static inline void sh_unpin_l3_subshadow(struct vcpu *v,
- shadow_l3e_t *sl3e,
- mfn_t smfn)
-{
- struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-#if 0
- debugtrace_printk("%s smfn=%05lx offset=%ld\n",
- __func__, mfn_x(smfn),
- ((unsigned long)sl3e & ~PAGE_MASK) / 64);
-#endif
-
- if ( bk->pinned )
- {
- bk->pinned = 0;
- sh_put_ref_l3_subshadow(v, sl3e, smfn);
- }
-}
-
-#endif /* GUEST_PAGING_LEVELS == 3 */
-
#if SHADOW_PAGING_LEVELS == 3
#define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20)
#endif
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 3486fb234c..7e200d5c68 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -134,6 +134,10 @@ struct pae_l3_cache { };
#endif
struct shadow_vcpu {
+#if CONFIG_PAGING_LEVELS >= 3
+ /* PAE guests: per-vcpu shadow top-level table */
+ l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
+#endif
/* Pointers to mode-specific entry points. */
struct shadow_paging_mode *mode;
/* Last MFN that we emulated a write to. */
@@ -142,10 +146,6 @@ struct shadow_vcpu {
unsigned int translate_enabled:1;
/* Emulated fault needs to be propagated to guest? */
unsigned int propagate_fault:1;
-#if CONFIG_PAGING_LEVELS >= 3
- /* Shadow update requires this PAE cpu to recopy/install its L3 table. */
- unsigned int pae_flip_pending:1;
-#endif
};
struct arch_vcpu
@@ -190,13 +190,12 @@ struct arch_vcpu
pagetable_t guest_table; /* (MFN) guest notion of cr3 */
/* guest_table holds a ref to the page, and also a type-count unless
* shadow refcounts are in use */
- pagetable_t shadow_table; /* (MFN) shadow of guest */
+ pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */
unsigned long cr3; /* (MA) value to install in HW CR3 */
- void *guest_vtable; /* virtual address of pagetable */
- void *shadow_vtable; /* virtual address of shadow_table */
- root_pgentry_t *monitor_vtable; /* virtual address of monitor_table */
+ void *guest_vtable; /* virtual addr of pagetable */
+ root_pgentry_t *monitor_vtable; /* virtual addr of monitor_table */
/* Current LDT details. */
unsigned long shadow_ldt_mapcnt;
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index b607a4578b..f613ae6a09 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -41,11 +41,6 @@ struct hvm_vcpu {
int xen_port;
-#if CONFIG_PAGING_LEVELS >= 3
- l3_pgentry_t hvm_lowmem_l3tab[4]
- __attribute__((__aligned__(32)));
-#endif
-
/* Flags */
int flag_dr_dirty;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index dd85519c12..389366a7ba 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -114,15 +114,14 @@ struct page_info
#define PGC_SH_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
#define PGC_SH_l2_pae_shadow (6U<<28) /* shadowing a pae L2-low page */
#define PGC_SH_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
-#define PGC_SH_l3_pae_shadow (8U<<28) /* shadowing a pae L3 page */
-#define PGC_SH_l1_64_shadow (9U<<28) /* shadowing a 64-bit L1 page */
-#define PGC_SH_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
-#define PGC_SH_l2_64_shadow (11U<<28) /* shadowing a 64-bit L2 page */
-#define PGC_SH_l3_64_shadow (12U<<28) /* shadowing a 64-bit L3 page */
-#define PGC_SH_l4_64_shadow (13U<<28) /* shadowing a 64-bit L4 page */
-#define PGC_SH_max_shadow (13U<<28)
-#define PGC_SH_p2m_table (14U<<28) /* in use as the p2m table */
-#define PGC_SH_monitor_table (15U<<28) /* in use as a monitor table */
+#define PGC_SH_l1_64_shadow (8U<<28) /* shadowing a 64-bit L1 page */
+#define PGC_SH_fl1_64_shadow (9U<<28) /* L1 shadow for 64-bit 2M superpg */
+#define PGC_SH_l2_64_shadow (10U<<28) /* shadowing a 64-bit L2 page */
+#define PGC_SH_l3_64_shadow (11U<<28) /* shadowing a 64-bit L3 page */
+#define PGC_SH_l4_64_shadow (12U<<28) /* shadowing a 64-bit L4 page */
+#define PGC_SH_max_shadow (12U<<28)
+#define PGC_SH_p2m_table (13U<<28) /* in use as the p2m table */
+#define PGC_SH_monitor_table (14U<<28) /* in use as a monitor table */
#define PGC_SH_unused (15U<<28)
#define PGC_SH_type_mask (15U<<28)
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 9301022abd..27cf6843eb 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -72,7 +72,6 @@
#define SHADOW_SET_CHANGED 0x1
#define SHADOW_SET_FLUSH 0x2
#define SHADOW_SET_ERROR 0x4
-#define SHADOW_SET_L3PAE_RECOPY 0x8
// How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
#ifdef __x86_64__
@@ -406,7 +405,6 @@ shadow_update_cr3(struct vcpu *v)
* for HVM guests, arch.monitor_table and hvm's guest CR3.
*
* Update ref counts to shadow tables appropriately.
- * For PAE, relocate L3 entries, if necessary, into low memory.
*/
static inline void update_cr3(struct vcpu *v)
{
@@ -587,7 +585,6 @@ shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
#define SHF_FL1_PAE (1u << PGC_SH_type_to_index(PGC_SH_fl1_pae_shadow))
#define SHF_L2_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2_pae_shadow))
#define SHF_L2H_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2h_pae_shadow))
-#define SHF_L3_PAE (1u << PGC_SH_type_to_index(PGC_SH_l3_pae_shadow))
#define SHF_L1_64 (1u << PGC_SH_type_to_index(PGC_SH_l1_64_shadow))
#define SHF_FL1_64 (1u << PGC_SH_type_to_index(PGC_SH_fl1_64_shadow))
#define SHF_L2_64 (1u << PGC_SH_type_to_index(PGC_SH_l2_64_shadow))