aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xen/arch/x86/hvm/hvm.c3
-rw-r--r--xen/arch/x86/hvm/svm/svm.c198
-rw-r--r--xen/arch/x86/hvm/svm/vmcb.c10
-rw-r--r--xen/arch/x86/mm/Makefile1
-rw-r--r--xen/arch/x86/mm/hap/Makefile2
-rw-r--r--xen/arch/x86/mm/hap/hap.c708
-rw-r--r--xen/arch/x86/mm/hap/private.h112
-rw-r--r--xen/arch/x86/mm/hap/support.c334
-rw-r--r--xen/arch/x86/mm/page-guest32.h (renamed from xen/arch/x86/mm/shadow/page-guest32.h)0
-rw-r--r--xen/arch/x86/mm/paging.c34
-rw-r--r--xen/arch/x86/mm/shadow/types.h2
-rw-r--r--xen/include/asm-x86/domain.h16
-rw-r--r--xen/include/asm-x86/hap.h122
13 files changed, 1525 insertions, 17 deletions
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index bda506b182..f48bb6f49a 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -161,7 +161,8 @@ int hvm_domain_initialise(struct domain *d)
spin_lock_init(&d->arch.hvm_domain.buffered_io_lock);
spin_lock_init(&d->arch.hvm_domain.irq_lock);
- rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external);
+ /* paging support will be determined inside paging.c */
+ rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
if ( rc != 0 )
return rc;
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 40910395e0..61380c606a 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -49,6 +49,7 @@
#include <public/sched.h>
#include <asm/hvm/vpt.h>
#include <asm/hvm/trace.h>
+#include <asm/hap.h>
#define SVM_EXTRA_DEBUG
@@ -76,6 +77,10 @@ static void *root_vmcb[NR_CPUS] __read_mostly;
/* physical address of above for host VMSAVE/VMLOAD */
u64 root_vmcb_pa[NR_CPUS] __read_mostly;
+/* hardware assisted paging bits */
+extern int opt_hap_enabled;
+extern int hap_capable_system;
+
static inline void svm_inject_exception(struct vcpu *v, int trap,
int ev, int error_code)
{
@@ -240,7 +245,9 @@ static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
/*
* Check for EFER.LME transitions from 0->1 or 1->0. Do the
* sanity checks and then make sure that both EFER.LME and
- * EFER.LMA are cleared.
+ * EFER.LMA are cleared. (EFER.LME can't be set in the vmcb
+ * until the guest also sets CR0.PG, since even if the guest has
+ * paging "disabled", the vmcb's CR0 always has PG set.)
*/
if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) )
{
@@ -269,10 +276,12 @@ static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
vmcb->efer &= ~(EFER_LME | EFER_LMA);
}
+
#endif /* __x86_64__ */
/* update the guest EFER's shadow with the intended value */
v->arch.hvm_svm.cpu_shadow_efer = msr_content;
+
break;
#ifdef __x86_64__
@@ -902,6 +911,10 @@ static void arch_svm_do_launch(struct vcpu *v)
{
svm_do_launch(v);
+ if ( paging_mode_hap(v->domain) ) {
+ v->arch.hvm_svm.vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
+ }
+
if ( v->vcpu_id != 0 )
{
cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
@@ -1008,6 +1021,21 @@ static struct hvm_function_table svm_function_table = {
.event_injection_faulted = svm_event_injection_faulted
};
+void svm_npt_detect(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ /* check CPUID for nested paging support */
+ cpuid(0x8000000A, &eax, &ebx, &ecx, &edx);
+ if ( edx & 0x01 ) { /* nested paging */
+ hap_capable_system = 1;
+ }
+ else if ( opt_hap_enabled ) {
+ printk(" nested paging is not supported by this CPU.\n");
+ hap_capable_system = 0; /* no nested paging, we disable flag. */
+ }
+}
+
int start_svm(void)
{
u32 eax, ecx, edx;
@@ -1038,6 +1066,8 @@ int start_svm(void)
wrmsr(MSR_EFER, eax, edx);
printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
+ svm_npt_detect();
+
/* Initialize the HSA for this core */
phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
phys_hsa_lo = (u32) phys_hsa;
@@ -1074,6 +1104,18 @@ void arch_svm_do_resume(struct vcpu *v)
}
}
+static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
+{
+ if (mmio_space(gpa)) {
+ handle_mmio(gpa);
+ return 1;
+ }
+
+ /* We should not reach here. Otherwise, P2M table is not correct.*/
+ return 0;
+}
+
+
static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
{
HVM_DBG_LOG(DBG_LEVEL_VMMU,
@@ -1700,6 +1742,52 @@ static void svm_io_instruction(struct vcpu *v)
}
}
+static int npt_set_cr0(unsigned long value)
+{
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ ASSERT(vmcb);
+
+ /* ET is reserved and should be always be 1*/
+ value |= X86_CR0_ET;
+
+ /* Check whether the guest is about to turn on long mode.
+ * If it is, set EFER.LME and EFER.LMA. Update the shadow EFER.LMA
+ * bit too, so svm_long_mode_enabled() will work.
+ */
+ if ( (value & X86_CR0_PG) && svm_lme_is_set(v) &&
+ (vmcb->cr4 & X86_CR4_PAE) && (vmcb->cr0 & X86_CR0_PE) )
+ {
+ v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
+ vmcb->efer |= EFER_LMA | EFER_LME;
+ }
+
+ /* Whenever CR0.PG is cleared under long mode, LMA will be cleared
+ * immediatly. We emulate this process for svm_long_mode_enabled().
+ */
+ if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
+ {
+ if ( svm_long_mode_enabled(v) )
+ {
+ v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
+ }
+ }
+
+ vmcb->cr0 = value | X86_CR0_WP;
+ v->arch.hvm_svm.cpu_shadow_cr0 = value;
+
+ /* TS cleared? Then initialise FPU now. */
+ if ( !(value & X86_CR0_TS) ) {
+ setup_fpu(v);
+ vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
+ }
+
+ paging_update_paging_modes(v);
+
+ return 1;
+}
+
static int svm_set_cr0(unsigned long value)
{
struct vcpu *v = current;
@@ -1797,6 +1885,85 @@ static int svm_set_cr0(unsigned long value)
return 1;
}
+//
+// nested paging functions
+//
+
+static int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
+{
+ unsigned long value;
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ ASSERT(vmcb);
+
+ value = get_reg(gpreg, regs, vmcb);
+
+ switch (cr) {
+ case 0:
+ return npt_set_cr0(value);
+
+ case 3:
+ vmcb->cr3 = value;
+ v->arch.hvm_svm.cpu_cr3 = value;
+ break;
+
+ case 4: /* CR4 */
+ vmcb->cr4 = value;
+ v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ paging_update_paging_modes(v);
+ break;
+
+ case 8:
+ vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
+ vmcb->vintr.fields.tpr = value & 0x0F;
+ break;
+
+ default:
+ gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
+ domain_crash(v->domain);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+{
+ unsigned long value = 0;
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb;
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ vmcb = v->arch.hvm_svm.vmcb;
+ ASSERT(vmcb);
+
+ switch(cr) {
+ case 0:
+ value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr0;
+ break;
+ case 2:
+ value = vmcb->cr2;
+ break;
+ case 3:
+ value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
+ break;
+ case 4:
+ value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
+ break;
+ case 8:
+ value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
+ value = (value & 0xF0) >> 4;
+ break;
+ default:
+ domain_crash(v->domain);
+ return;
+ }
+
+ set_reg(gp, value, regs, vmcb);
+}
+
/*
* Read from control registers. CR0 and CR4 are read from the shadow.
*/
@@ -2043,12 +2210,18 @@ static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
{
case INSTR_MOV2CR:
gpreg = decode_src_reg(prefix, buffer[index+2]);
- result = mov_to_cr(gpreg, cr, regs);
+ if ( paging_mode_hap(v->domain) )
+ result = npt_mov_to_cr(gpreg, cr, regs);
+ else
+ result = mov_to_cr(gpreg, cr, regs);
break;
case INSTR_MOVCR2:
gpreg = decode_src_reg(prefix, buffer[index+2]);
- mov_from_cr(cr, gpreg, regs);
+ if ( paging_mode_hap(v->domain) )
+ npt_mov_from_cr(cr, gpreg, regs);
+ else
+ mov_from_cr(cr, gpreg, regs);
break;
case INSTR_CLTS:
@@ -2075,7 +2248,10 @@ static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
if (svm_dbg_on)
printk("CR0-LMSW CR0 - New value=%lx\n", value);
- result = svm_set_cr0(value);
+ if ( paging_mode_hap(v->domain) )
+ result = npt_set_cr0(value);
+ else
+ result = svm_set_cr0(value);
break;
case INSTR_SMSW:
@@ -2359,6 +2535,11 @@ static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
vmcb->cr4 = SVM_CR4_HOST_MASK;
v->arch.hvm_svm.cpu_shadow_cr4 = 0;
+ if ( paging_mode_hap(v->domain) ) {
+ vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+ vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ }
+
/* This will jump to ROMBIOS */
vmcb->rip = 0xFFF0;
@@ -3042,6 +3223,15 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
hvm_triple_fault();
break;
+ case VMEXIT_NPF:
+ {
+ regs->error_code = vmcb->exitinfo1;
+ if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) ) {
+ domain_crash(v->domain);
+ }
+ break;
+ }
+
default:
exit_and_crash:
gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c
index 25b30f8e58..f2a220011e 100644
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -201,6 +201,13 @@ static int construct_vmcb(struct vcpu *v)
arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
+ if ( paging_mode_hap(v->domain) ) {
+ vmcb->cr0 = arch_svm->cpu_shadow_cr0;
+ vmcb->np_enable = 1; /* enable nested paging */
+ vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
+ vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_PG;
+ }
+
return 0;
}
@@ -310,7 +317,8 @@ void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb)
printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
(unsigned long long) vmcb->kerngsbase,
(unsigned long long) vmcb->g_pat);
-
+ printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
+
/* print out all the selectors */
svm_dump_sel("CS", &vmcb->cs);
svm_dump_sel("DS", &vmcb->ds);
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index a58211d91a..79b25962ac 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -1,4 +1,5 @@
subdir-y += shadow
+subdir-y += hap
obj-y += paging.o
obj-y += p2m.o
diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
new file mode 100644
index 0000000000..8833ea5c75
--- /dev/null
+++ b/xen/arch/x86/mm/hap/Makefile
@@ -0,0 +1,2 @@
+obj-y += hap.o
+obj-y += support.o
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
new file mode 100644
index 0000000000..57a2ee4fd4
--- /dev/null
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -0,0 +1,708 @@
+/******************************************************************************
+ * arch/x86/mm/hap/hap.c
+ *
+ * hardware assisted paging
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2007 by XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/irq.h>
+#include <xen/domain_page.h>
+#include <xen/guest_access.h>
+#include <xen/keyhandler.h>
+#include <asm/event.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/shared.h>
+#include <asm/hap.h>
+#include <asm/paging.h>
+#include <asm/domain.h>
+
+#include "private.h"
+
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_to_page
+#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#undef mfn_valid
+#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#undef page_to_mfn
+#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+
+/************************************************/
+/* HAP SUPPORT FUNCTIONS */
+/************************************************/
+mfn_t hap_alloc(struct domain *d, unsigned long backpointer)
+{
+ struct page_info *sp = NULL;
+ void *p;
+
+ ASSERT(hap_locked_by_me(d));
+
+ sp = list_entry(d->arch.paging.hap.freelists.next, struct page_info, list);
+ list_del(&sp->list);
+ d->arch.paging.hap.free_pages -= 1;
+
+ /* Now safe to clear the page for reuse */
+ p = hap_map_domain_page(page_to_mfn(sp));
+ ASSERT(p != NULL);
+ clear_page(p);
+ hap_unmap_domain_page(p);
+
+ return page_to_mfn(sp);
+}
+
+void hap_free(struct domain *d, mfn_t smfn)
+{
+ struct page_info *sp = mfn_to_page(smfn);
+
+ ASSERT(hap_locked_by_me(d));
+
+ d->arch.paging.hap.free_pages += 1;
+ list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
+}
+
+static int hap_alloc_p2m_pages(struct domain *d)
+{
+ struct page_info *pg;
+
+ ASSERT(hap_locked_by_me(d));
+
+ pg = mfn_to_page(hap_alloc(d, 0));
+ d->arch.paging.hap.p2m_pages += 1;
+ d->arch.paging.hap.total_pages -= 1;
+
+ page_set_owner(pg, d);
+ pg->count_info = 1;
+ list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist);
+
+ return 1;
+}
+
+struct page_info * hap_alloc_p2m_page(struct domain *d)
+{
+ struct list_head *entry;
+ struct page_info *pg;
+ mfn_t mfn;
+ void *p;
+
+ hap_lock(d);
+
+ if ( list_empty(&d->arch.paging.hap.p2m_freelist) &&
+ !hap_alloc_p2m_pages(d) ) {
+ hap_unlock(d);
+ return NULL;
+ }
+ entry = d->arch.paging.hap.p2m_freelist.next;
+ list_del(entry);
+
+ hap_unlock(d);
+
+ pg = list_entry(entry, struct page_info, list);
+ mfn = page_to_mfn(pg);
+ p = hap_map_domain_page(mfn);
+ clear_page(p);
+ hap_unmap_domain_page(p);
+
+ return pg;
+}
+
+void hap_free_p2m_page(struct domain *d, struct page_info *pg)
+{
+ ASSERT(page_get_owner(pg) == d);
+ /* Should have just the one ref we gave it in alloc_p2m_page() */
+ if ( (pg->count_info & PGC_count_mask) != 1 ) {
+ HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+ pg->count_info, pg->u.inuse.type_info);
+ }
+ /* Free should not decrement domain's total allocation, since
+ * these pages were allocated without an owner. */
+ page_set_owner(pg, NULL);
+ free_domheap_pages(pg, 0);
+ d->arch.paging.hap.p2m_pages--;
+}
+
+/* Return the size of the pool, rounded up to the nearest MB */
+static unsigned int
+hap_get_allocation(struct domain *d)
+{
+ unsigned int pg = d->arch.paging.hap.total_pages;
+
+ HERE_I_AM;
+ return ((pg >> (20 - PAGE_SHIFT))
+ + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
+}
+
+/* Set the pool of pages to the required number of pages.
+ * Returns 0 for success, non-zero for failure. */
+static unsigned int
+hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
+{
+ struct page_info *sp;
+
+ ASSERT(hap_locked_by_me(d));
+
+ while ( d->arch.paging.hap.total_pages != pages ) {
+ if ( d->arch.paging.hap.total_pages < pages ) {
+ /* Need to allocate more memory from domheap */
+ sp = alloc_domheap_pages(NULL, 0, 0);
+ if ( sp == NULL ) {
+ HAP_PRINTK("failed to allocate hap pages.\n");
+ return -ENOMEM;
+ }
+ d->arch.paging.hap.free_pages += 1;
+ d->arch.paging.hap.total_pages += 1;
+ list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
+ }
+ else if ( d->arch.paging.hap.total_pages > pages ) {
+ /* Need to return memory to domheap */
+ ASSERT(!list_empty(&d->arch.paging.hap.freelists));
+ sp = list_entry(d->arch.paging.hap.freelists.next,
+ struct page_info, list);
+ list_del(&sp->list);
+ d->arch.paging.hap.free_pages -= 1;
+ d->arch.paging.hap.total_pages -= 1;
+ free_domheap_pages(sp, 0);
+ }
+
+ /* Check to see if we need to yield and try again */
+ if ( preempted && hypercall_preempt_check() ) {
+ *preempted = 1;
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+#if CONFIG_PAGING_LEVELS == 4
+void hap_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
+{
+ struct domain *d = v->domain;
+ l4_pgentry_t *sl4e;
+
+ sl4e = hap_map_domain_page(sl4mfn);
+ ASSERT(sl4e != NULL);
+
+ /* Copy the common Xen mappings from the idle domain */
+ memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+ &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+ ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+
+ /* Install the per-domain mappings for this domain */
+ sl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))),
+ __PAGE_HYPERVISOR);
+
+ sl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
+ l4e_from_pfn(mfn_x(gl4mfn), __PAGE_HYPERVISOR);
+
+ /* install domain-specific P2M table */
+ sl4e[l4_table_offset(RO_MPT_VIRT_START)] =
+ l4e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
+ __PAGE_HYPERVISOR);
+
+ hap_unmap_domain_page(sl4e);
+}
+#endif /* CONFIG_PAGING_LEVELS == 4 */
+
+#if CONFIG_PAGING_LEVELS == 3
+void hap_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn)
+{
+ struct domain *d = v->domain;
+ l2_pgentry_t *sl2e;
+
+ int i;
+
+ sl2e = hap_map_domain_page(sl2hmfn);
+ ASSERT(sl2e != NULL);
+
+ /* Copy the common Xen mappings from the idle domain */
+ memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
+ &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+ /* Install the per-domain mappings for this domain */
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_pfn(
+ mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
+ __PAGE_HYPERVISOR);
+
+ for ( i = 0; i < HAP_L3_PAGETABLE_ENTRIES; i++ )
+ sl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+ l2e_empty();
+
+ if ( paging_mode_translate(d) )
+ {
+ /* Install the domain-specific p2m table */
+ l3_pgentry_t *p2m;
+ ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+ p2m = hap_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+ for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
+ {
+ sl2e[l2_table_offset(RO_MPT_VIRT_START) + i] =
+ (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
+ ? l2e_from_pfn(mfn_x(_mfn(l3e_get_pfn(p2m[i]))),
+ __PAGE_HYPERVISOR)
+ : l2e_empty();
+ }
+ hap_unmap_domain_page(p2m);
+ }
+
+ hap_unmap_domain_page(sl2e);
+}
+#endif
+
+#if CONFIG_PAGING_LEVELS == 2
+void hap_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
+{
+ struct domain *d = v->domain;
+ l2_pgentry_t *sl2e;
+ int i;
+
+ sl2e = hap_map_domain_page(sl2mfn);
+ ASSERT(sl2e != NULL);
+
+ /* Copy the common Xen mappings from the idle domain */
+ memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+ &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+ /* Install the per-domain mappings for this domain */
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_pfn(
+ mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
+ __PAGE_HYPERVISOR);
+
+
+ sl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
+ l2e_from_pfn(mfn_x(gl2mfn), __PAGE_HYPERVISOR);
+
+ /* install domain-specific P2M table */
+ sl2e[l2_table_offset(RO_MPT_VIRT_START)] =
+ l2e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
+ __PAGE_HYPERVISOR);
+
+ hap_unmap_domain_page(sl2e);
+}
+#endif
+
+mfn_t hap_make_monitor_table(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+
+ ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
+
+#if CONFIG_PAGING_LEVELS == 4
+ {
+ mfn_t m4mfn;
+ m4mfn = hap_alloc(d, 0);
+ hap_install_xen_entries_in_l4(v, m4mfn, m4mfn);
+ return m4mfn;
+ }
+#elif CONFIG_PAGING_LEVELS == 3
+ {
+ mfn_t m3mfn, m2mfn;
+ l3_pgentry_t *l3e;
+ l2_pgentry_t *l2e;
+ int i;
+
+ m3mfn = hap_alloc(d, 0);
+
+ /* Install a monitor l2 table in slot 3 of the l3 table.
+ * This is used for all Xen entries, including linear maps
+ */
+ m2mfn = hap_alloc(d, 0);
+ l3e = hap_map_domain_page(m3mfn);
+ l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
+ hap_install_xen_entries_in_l2h(v, m2mfn);
+ /* Install the monitor's own linear map */
+ l2e = hap_map_domain_page(m2mfn);
+ for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+ l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+ (l3e_get_flags(l3e[i]) & _PAGE_PRESENT)
+ ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR)
+ : l2e_empty();
+ hap_unmap_domain_page(l2e);
+ hap_unmap_domain_page(l3e);
+
+ HAP_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
+ return m3mfn;
+ }
+#else
+ {
+ mfn_t m2mfn;
+
+ m2mfn = hap_alloc(d, 0);
+ hap_install_xen_entries_in_l2(v, m2mfn, m2mfn);
+
+ return m2mfn;
+ }
+#endif
+}
+
+void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
+{
+ struct domain *d = v->domain;
+
+#if CONFIG_PAGING_LEVELS == 4
+ /* Need to destroy the l3 monitor page in slot 0 too */
+ {
+ mfn_t m3mfn;
+ l4_pgentry_t *l4e = hap_map_domain_page(mmfn);
+ ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+ m3mfn = _mfn(l4e_get_pfn(l4e[0]));
+ hap_free(d, m3mfn);
+ hap_unmap_domain_page(l4e);
+ }
+#elif CONFIG_PAGING_LEVELS == 3
+ /* Need to destroy the l2 monitor page in slot 4 too */
+ {
+ l3_pgentry_t *l3e = hap_map_domain_page(mmfn);
+ ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+ hap_free(d, _mfn(l3e_get_pfn(l3e[3])));
+ hap_unmap_domain_page(l3e);
+ }
+#endif
+
+ /* Put the memory back in the pool */
+ hap_free(d, mmfn);
+}
+
+/************************************************/
+/* HAP DOMAIN LEVEL FUNCTIONS */
+/************************************************/
+void hap_domain_init(struct domain *d)
+{
+ hap_lock_init(d);
+ INIT_LIST_HEAD(&d->arch.paging.hap.freelists);
+ INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist);
+}
+
+/* return 0 for success, -errno for failure */
+int hap_enable(struct domain *d, u32 mode)
+{
+ unsigned int old_pages;
+ int rv = 0;
+
+ HERE_I_AM;
+
+ domain_pause(d);
+ /* error check */
+ if ( (d == current->domain) ) {
+ rv = -EINVAL;
+ goto out;
+ }
+
+ old_pages = d->arch.paging.hap.total_pages;
+ if ( old_pages == 0 ) {
+ unsigned int r;
+ hap_lock(d);
+ r = hap_set_allocation(d, 256, NULL);
+ hap_unlock(d);
+ if ( r != 0 ) {
+ hap_set_allocation(d, 0, NULL);
+ rv = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /* allocate P2m table */
+ if ( mode & PG_translate ) {
+ rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page);
+ if ( rv != 0 )
+ goto out;
+ }
+
+ d->arch.paging.mode = mode | PG_SH_enable;
+
+ out:
+ domain_unpause(d);
+ return rv;
+}
+
+void hap_final_teardown(struct domain *d)
+{
+ HERE_I_AM;
+
+ if ( d->arch.paging.hap.total_pages != 0 )
+ hap_teardown(d);
+
+ p2m_teardown(d);
+}
+
+void hap_teardown(struct domain *d)
+{
+ struct vcpu *v;
+ mfn_t mfn;
+ HERE_I_AM;
+
+ ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
+ ASSERT(d != current->domain);
+
+ if ( !hap_locked_by_me(d) )
+ hap_lock(d); /* Keep various asserts happy */
+
+ if ( paging_mode_enabled(d) ) {
+ /* release the monitor table held by each vcpu */
+ for_each_vcpu(d, v) {
+ if ( v->arch.paging.mode && paging_mode_external(d) ) {
+ mfn = pagetable_get_mfn(v->arch.monitor_table);
+ if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
+ hap_destroy_monitor_table(v, mfn);
+ v->arch.monitor_table = pagetable_null();
+ }
+ }
+ }
+
+ if ( d->arch.paging.hap.total_pages != 0 ) {
+ HAP_PRINTK("teardown of domain %u starts."
+ " pages total = %u, free = %u, p2m=%u\n",
+ d->domain_id,
+ d->arch.paging.hap.total_pages,
+ d->arch.paging.hap.free_pages,
+ d->arch.paging.hap.p2m_pages);
+ hap_set_allocation(d, 0, NULL);
+ HAP_PRINTK("teardown done."
+ " pages total = %u, free = %u, p2m=%u\n",
+ d->arch.paging.hap.total_pages,
+ d->arch.paging.hap.free_pages,
+ d->arch.paging.hap.p2m_pages);
+ ASSERT(d->arch.paging.hap.total_pages == 0);
+ }
+
+ d->arch.paging.mode &= ~PG_log_dirty;
+
+ hap_unlock(d);
+}
+
+int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE(void) u_domctl)
+{
+ int rc, preempted = 0;
+
+ HERE_I_AM;
+
+ if ( unlikely(d == current->domain) ) {
+ gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n");
+ return -EINVAL;
+ }
+
+ switch ( sc->op ) {
+ case XEN_DOMCTL_SHADOW_OP_OFF:
+ case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
+ case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+ case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
+ case XEN_DOMCTL_SHADOW_OP_CLEAN:
+ case XEN_DOMCTL_SHADOW_OP_PEEK:
+ case XEN_DOMCTL_SHADOW_OP_ENABLE:
+ HAP_ERROR("Bad hap domctl op %u\n", sc->op);
+ domain_crash(d);
+ return -EINVAL;
+ case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+ hap_lock(d);
+ rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
+ hap_unlock(d);
+ if ( preempted )
+ /* Not finished. Set up to re-run the call. */
+ rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
+ u_domctl);
+ else
+ /* Finished. Return the new allocation */
+ sc->mb = hap_get_allocation(d);
+ return rc;
+ case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+ sc->mb = hap_get_allocation(d);
+ return 0;
+ default:
+ HAP_ERROR("Bad hap domctl op %u\n", sc->op);
+ return -EINVAL;
+ }
+}
+
+void hap_vcpu_init(struct vcpu *v)
+{
+ v->arch.paging.mode = &hap_paging_real_mode;
+}
+/************************************************/
+/* HAP PAGING MODE FUNCTIONS */
+/************************************************/
+/* In theory, hap should not intercept guest page fault. This function can
+ * be recycled to handle host/nested page fault, if needed.
+ */
+int hap_page_fault(struct vcpu *v, unsigned long va,
+ struct cpu_user_regs *regs)
+{
+ HERE_I_AM;
+ domain_crash(v->domain);
+ return 0;
+}
+
+/* called when guest issues a invlpg request.
+ * Return 1 if need to issue page invalidation on CPU; Return 0 if does not
+ * need to do so.
+ */
+int hap_invlpg(struct vcpu *v, unsigned long va)
+{
+ HERE_I_AM;
+ return 0;
+}
+
+void hap_update_cr3(struct vcpu *v, int do_locking)
+{
+ struct domain *d = v->domain;
+ mfn_t gmfn;
+
+ HERE_I_AM;
+ /* Don't do anything on an uninitialised vcpu */
+ if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) {
+ ASSERT(v->arch.cr3 == 0);
+ return;
+ }
+
+ if ( do_locking )
+ hap_lock(v->domain);
+
+ ASSERT(hap_locked_by_me(v->domain));
+ ASSERT(v->arch.paging.mode);
+
+ gmfn = pagetable_get_mfn(v->arch.guest_table);
+
+ make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
+
+ hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.monitor_table));
+
+ HAP_PRINTK("d=%u v=%u guest_table=%05lx, monitor_table = %05lx\n",
+ d->domain_id, v->vcpu_id,
+ (unsigned long)pagetable_get_pfn(v->arch.guest_table),
+ (unsigned long)pagetable_get_pfn(v->arch.monitor_table));
+
+ flush_tlb_mask(d->domain_dirty_cpumask);
+
+ if ( do_locking )
+ hap_unlock(v->domain);
+}
+
+void hap_update_paging_modes(struct vcpu *v)
+{
+ struct domain *d;
+
+ HERE_I_AM;
+
+ d = v->domain;
+ hap_lock(d);
+
+ /* update guest paging mode. Note that we rely on hvm functions to detect
+ * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER)
+ * reflect guest's status correctly.
+ */
+ if ( hvm_paging_enabled(v) ) {
+ if ( hvm_long_mode_enabled(v) )
+ v->arch.paging.mode = &hap_paging_long_mode;
+ else if ( hvm_pae_enabled(v) )
+ v->arch.paging.mode = &hap_paging_pae_mode;
+ else
+ v->arch.paging.mode = &hap_paging_protected_mode;
+ }
+ else {
+ v->arch.paging.mode = &hap_paging_real_mode;
+ }
+
+ v->arch.paging.translate_enabled = !!hvm_paging_enabled(v);
+
+ /* use p2m map */
+ v->arch.guest_table =
+ pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+
+ if ( pagetable_is_null(v->arch.monitor_table) ) {
+ mfn_t mmfn = hap_make_monitor_table(v);
+ v->arch.monitor_table = pagetable_from_mfn(mmfn);
+ make_cr3(v, mfn_x(mmfn));
+ }
+
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ hap_unlock(d);
+}
+
+void
+hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
+ l1_pgentry_t new, unsigned int level)
+{
+ hap_lock(v->domain);
+ safe_write_pte(p, new);
+ hap_unlock(v->domain);
+}
+
+/* Entry points into this mode of the hap code. */
+struct paging_mode hap_paging_real_mode = {
+ .page_fault = hap_page_fault,
+ .invlpg = hap_invlpg,
+ .gva_to_gfn = hap_gva_to_gfn_real_mode,
+ .update_cr3 = hap_update_cr3,
+ .update_paging_modes = hap_update_paging_modes,
+ .write_p2m_entry = hap_write_p2m_entry,
+ .guest_levels = 1
+};
+
+struct paging_mode hap_paging_protected_mode = {
+ .page_fault = hap_page_fault,
+ .invlpg = hap_invlpg,
+ .gva_to_gfn = hap_gva_to_gfn_protected_mode,
+ .update_cr3 = hap_update_cr3,
+ .update_paging_modes = hap_update_paging_modes,
+ .write_p2m_entry = hap_write_p2m_entry,
+ .guest_levels = 2
+};
+
+struct paging_mode hap_paging_pae_mode = {
+ .page_fault = hap_page_fault,
+ .invlpg = hap_invlpg,
+ .gva_to_gfn = hap_gva_to_gfn_pae_mode,
+ .update_cr3 = hap_update_cr3,
+ .update_paging_modes = hap_update_paging_modes,
+ .write_p2m_entry = hap_write_p2m_entry,
+ .guest_levels = 3
+};
+
+struct paging_mode hap_paging_long_mode = {
+ .page_fault = hap_page_fault,
+ .invlpg = hap_invlpg,
+ .gva_to_gfn = hap_gva_to_gfn_long_mode,
+ .update_cr3 = hap_update_cr3,
+ .update_paging_modes = hap_update_paging_modes,
+ .write_p2m_entry = hap_write_p2m_entry,
+ .guest_levels = 4
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
+
diff --git a/xen/arch/x86/mm/hap/private.h b/xen/arch/x86/mm/hap/private.h
new file mode 100644
index 0000000000..aa5100c271
--- /dev/null
+++ b/xen/arch/x86/mm/hap/private.h
@@ -0,0 +1,112 @@
+/*
+ * arch/x86/mm/hap/private.h
+ *
+ * Copyright (c) 2007, AMD Corporation (Wei Huang)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __HAP_PRIVATE_H__
+#define __HAP_PRIVATE_H__
+
+#include <asm/flushtlb.h>
+#include <asm/hvm/support.h>
+
+/********************************************/
+/* GUEST TRANSLATION FUNCS */
+/********************************************/
+unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva);
+/********************************************/
+/* MISC DEFINITIONS */
+/********************************************/
+
+/* PT_SHIFT describes the amount by which a virtual address is shifted right
+ * to right justify the portion to be used for indexing into a page
+ * table, given the guest memory model (i.e. number of levels) and the level
+ * of the page table being accessed. The idea is from Virtual Iron's code.
+ */
+static const int PT_SHIFT[][5] =
+ { /* ------ level ------ nr_levels */
+ /* 1 2 3 4 */
+ { 0, 0, 0, 0, 0}, /* 0 not used */
+ { 0, 0, 0, 0, 0}, /* 1 not used */
+ { 0, 12, 22, 0, 0}, /* 2 */
+ { 0, 12, 21, 30, 0}, /* 3 */
+ { 0, 12, 21, 30, 39} /* 4 */
+ };
+
+/* PT_ENTRIES describes the number of entries in a page table, given the
+ * memory model (i.e. number of levels) and the level of the page table
+ * being considered. This idea from Virtual Iron's shadow code*/
+static const int PT_ENTRIES[][5] =
+ { /* ------ level ------ nr_levels */
+ /* 1 2 3 4 */
+ { 0, 0, 0, 0, 0}, /* 0 not used */
+ { 0, 0, 0, 0, 0}, /* 1 not used */
+ { 0, 1024, 1024, 0, 0}, /* 2 */
+ { 0, 512, 512, 4, 0}, /* 3 */
+ { 0, 512, 512, 512, 512} /* 4 */
+ };
+
+/********************************************/
+/* PAGING DEFINITION FOR GUEST */
+/********************************************/
+#define PHYSICAL_PAGE_4K_SIZE (1UL << 12)
+#define PHYSICAL_PAGE_2M_SIZE (1UL << 21)
+#define PHYSICAL_PAGE_4M_SIZE (1UL << 22)
+#define PHYSICAL_PAGE_4K_MASK ( ~(PHYSICAL_PAGE_4K_SIZE - 1) )
+#define PHYSICAL_PAGE_2M_MASK ( ~(PHYSICAL_PAGE_2M_SIZE - 1) )
+#define PHYSICAL_PAGE_4M_MASK ( ~(PHYSICAL_PAGE_4M_SIZE - 1) )
+
+/* long mode physical address mask */
+#define PHYSICAL_ADDR_BITS_LM 52
+#define PHYSICAL_ADDR_MASK_LM ((1UL << PHYSICAL_ADDR_BITS_LM)-1)
+#define PHYSICAL_ADDR_2M_MASK_LM (PHYSICAL_PAGE_2M_MASK & PHYSICAL_ADDR_MASK_LM)
+#define PHYSICAL_ADDR_4K_MASK_LM (PHYSICAL_PAGE_4K_MASK & PHYSICAL_ADDR_MASK_LM)
+
+#define PAGE_NX_BIT (1ULL << 63)
+/************************************************/
+/* PAGETABLE RELATED VARIABLES */
+/************************************************/
+#if CONFIG_PAGING_LEVELS == 2
+#define HAP_L1_PAGETABLE_ENTRIES 1024
+#define HAP_L2_PAGETABLE_ENTRIES 1024
+#define HAP_L1_PAGETABLE_SHIFT 12
+#define HAP_L2_PAGETABLE_SHIFT 22
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3
+#define HAP_L1_PAGETABLE_ENTRIES 512
+#define HAP_L2_PAGETABLE_ENTRIES 512
+#define HAP_L3_PAGETABLE_ENTRIES 4
+#define HAP_L1_PAGETABLE_SHIFT 12
+#define HAP_L2_PAGETABLE_SHIFT 21
+#define HAP_L3_PAGETABLE_SHIFT 30
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
+#define HAP_L1_PAGETABLE_ENTRIES 512
+#define HAP_L2_PAGETABLE_ENTRIES 512
+#define HAP_L3_PAGETABLE_ENTRIES 512
+#define HAP_L4_PAGETABLE_ENTRIES 512
+#define HAP_L1_PAGETABLE_SHIFT 12
+#define HAP_L2_PAGETABLE_SHIFT 21
+#define HAP_L3_PAGETABLE_SHIFT 30
+#define HAP_L4_PAGETABLE_SHIFT 39
+#endif
+
+#endif /* __SVM_NPT_H__ */
diff --git a/xen/arch/x86/mm/hap/support.c b/xen/arch/x86/mm/hap/support.c
new file mode 100644
index 0000000000..af5b7ec72f
--- /dev/null
+++ b/xen/arch/x86/mm/hap/support.c
@@ -0,0 +1,334 @@
+/*
+ * arch/x86/mm/hap/support.c
+ *
+ * guest page table walker
+ * Copyright (c) 2007, AMD Corporation (Wei Huang)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <xen/event.h>
+#include <xen/sched.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/domain.h>
+#include <asm/shadow.h>
+#include <asm/hap.h>
+
+#include "private.h"
+#include "../page-guest32.h"
+
+/*******************************************/
+/* Platform Specific Functions */
+/*******************************************/
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for real mode guest.
+ */
+unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva)
+{
+ HERE_I_AM;
+ return ((paddr_t)gva >> PAGE_SHIFT);
+}
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for protected guest.
+ */
+unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva)
+{
+ unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ int mode = 2; /* two-level guest */
+ int lev, index;
+ paddr_t gpa = 0;
+ unsigned long gpfn, mfn;
+ int success = 1;
+ l2_pgentry_32_t *l2e; /* guest page entry size is 32-bit */
+ l1_pgentry_32_t *l1e;
+
+ HERE_I_AM;
+
+ gpfn = (gcr3 >> PAGE_SHIFT);
+ for ( lev = mode; lev >= 1; lev-- ) {
+ mfn = get_mfn_from_gpfn( gpfn );
+ if ( mfn == INVALID_MFN ) {
+ HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
+ lev);
+ success = 0;
+ break;
+ }
+ index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+ if ( lev == 2 ) {
+ l2e = map_domain_page( mfn );
+ HAP_PRINTK("l2 page table entry is %ulx at index = %d\n",
+ l2e[index].l2, index);
+ if ( !(l2e_get_flags_32(l2e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+ success = 0;
+ }
+
+ if ( l2e_get_flags_32(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+ HAP_PRINTK("guest page table is PSE\n");
+ if ( l2e_get_intpte(l2e[index]) & 0x001FE000UL ) { /*[13:20] */
+ printk("guest physical memory size is too large!\n");
+ domain_crash(v->domain);
+ }
+ gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_4M_MASK) +
+ (gva & ~PHYSICAL_PAGE_4M_MASK);
+ unmap_domain_page(l2e);
+ break; /* last level page table, return from here */
+ }
+ else {
+ gpfn = l2e_get_pfn( l2e[index] );
+ }
+ unmap_domain_page(l2e);
+ }
+
+ if ( lev == 1 ) {
+ l1e = map_domain_page( mfn );
+ HAP_PRINTK("l1 page table entry is %ulx at index = %d\n",
+ l1e[index].l1, index);
+ if ( !(l1e_get_flags_32(l1e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l1e_get_pfn( l1e[index] );
+ gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) +
+ (gva & ~PHYSICAL_PAGE_4K_MASK);
+ unmap_domain_page(l1e);
+ }
+
+ if ( !success ) /* error happened, jump out */
+ break;
+ }
+
+ HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+ if ( !success ) /* error happened */
+ return INVALID_GFN;
+ else
+ return ((paddr_t)gpa >> PAGE_SHIFT);
+}
+
+
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for PAE mode guest.
+ */
+unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva)
+{
+#if CONFIG_PAGING_LEVELS >= 3
+ unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ int mode = 3; /* three-level guest */
+ int lev, index;
+ paddr_t gpa = 0;
+ unsigned long gpfn, mfn;
+ int success = 1;
+ l1_pgentry_t *l1e;
+ l2_pgentry_t *l2e;
+ l3_pgentry_t *l3e;
+
+ HERE_I_AM;
+
+ gpfn = (gcr3 >> PAGE_SHIFT);
+ for ( lev = mode; lev >= 1; lev-- ) {
+ mfn = get_mfn_from_gpfn( gpfn );
+ if ( mfn == INVALID_MFN ) {
+ HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
+ lev);
+ success = 0;
+ break;
+ }
+ index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+ if ( lev == 3 ) {
+ l3e = map_domain_page( mfn );
+ index += ( ((gcr3 >> 5 ) & 127 ) * 4 );
+ if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l3e_get_pfn( l3e[index] );
+ unmap_domain_page(l3e);
+ }
+
+ if ( lev == 2 ) {
+ l2e = map_domain_page( mfn );
+ if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+ success = 0;
+ }
+
+ if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+ HAP_PRINTK("guest page table is PSE\n");
+ gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) +
+ (gva & ~PHYSICAL_PAGE_2M_MASK);
+ unmap_domain_page(l2e);
+ break; /* last level page table, jump out from here */
+ }
+ else {
+ gpfn = l2e_get_pfn(l2e[index]);
+ }
+ unmap_domain_page(l2e);
+ }
+
+ if ( lev == 1 ) {
+ l1e = map_domain_page( mfn );
+ if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l1e_get_pfn( l1e[index] );
+ gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) +
+ (gva & ~PHYSICAL_PAGE_4K_MASK);
+ unmap_domain_page(l1e);
+ }
+
+ if ( success != 1 ) /* error happened, jump out */
+ break;
+ }
+
+ gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+ HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+ if ( !success )
+ return INVALID_GFN;
+ else
+ return ((paddr_t)gpa >> PAGE_SHIFT);
+#else
+ HERE_I_AM;
+ printk("guest paging level (3) is greater than host paging level!\n");
+ domain_crash(v->domain);
+ return INVALID_GFN;
+#endif
+}
+
+
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for long mode guest.
+ */
+unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva)
+{
+#if CONFIG_PAGING_LEVELS == 4
+ unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ int mode = 4; /* four-level guest */
+ int lev, index;
+ paddr_t gpa = 0;
+ unsigned long gpfn, mfn;
+ int success = 1;
+ l4_pgentry_t *l4e;
+ l3_pgentry_t *l3e;
+ l2_pgentry_t *l2e;
+ l1_pgentry_t *l1e;
+
+ HERE_I_AM;
+
+ gpfn = (gcr3 >> PAGE_SHIFT);
+ for ( lev = mode; lev >= 1; lev-- ) {
+ mfn = get_mfn_from_gpfn( gpfn );
+ if ( mfn == INVALID_MFN ) {
+ HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
+ lev);
+ success = 0;
+ break;
+ }
+ index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+ if ( lev == 4 ) {
+ l4e = map_domain_page( mfn );
+ if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 4 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l4e_get_pfn( l4e[index] );
+ unmap_domain_page(l4e);
+ }
+
+ if ( lev == 3 ) {
+ l3e = map_domain_page( mfn );
+ if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l3e_get_pfn( l3e[index] );
+ unmap_domain_page(l3e);
+ }
+
+ if ( lev == 2 ) {
+ l2e = map_domain_page( mfn );
+ if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+ success = 0;
+ }
+
+ if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+ HAP_PRINTK("guest page table is PSE\n");
+ gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_ADDR_2M_MASK_LM)
+ + (gva & ~PHYSICAL_PAGE_2M_MASK);
+ unmap_domain_page(l2e);
+ break; /* last level page table, jump out from here */
+ }
+ else {
+ gpfn = l2e_get_pfn(l2e[index]);
+ }
+ unmap_domain_page(l2e);
+ }
+
+ if ( lev == 1 ) {
+ l1e = map_domain_page( mfn );
+ if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l1e_get_pfn( l1e[index] );
+ gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_ADDR_4K_MASK_LM) +
+ (gva & ~PHYSICAL_PAGE_4K_MASK);
+ unmap_domain_page(l1e);
+ }
+
+ if ( success != 1 ) /* error happened, jump out */
+ break;
+ }
+
+ gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+ HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+ if ( !success )
+ return INVALID_GFN;
+ else
+ return ((paddr_t)gpa >> PAGE_SHIFT);
+#else
+ HERE_I_AM;
+ printk("guest paging level (4) is greater than host paging level!\n");
+ domain_crash(v->domain);
+ return INVALID_GFN;
+#endif
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/arch/x86/mm/shadow/page-guest32.h b/xen/arch/x86/mm/page-guest32.h
index 5d333bd91b..5d333bd91b 100644
--- a/xen/arch/x86/mm/shadow/page-guest32.h
+++ b/xen/arch/x86/mm/page-guest32.h
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index 4605dd5e24..18805c92e5 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -24,10 +24,12 @@
#include <asm/paging.h>
#include <asm/shadow.h>
#include <asm/p2m.h>
+#include <asm/hap.h>
/* Xen command-line option to enable hardware-assisted paging */
int opt_hap_enabled = 0;
boolean_param("hap", opt_hap_enabled);
+int hap_capable_system = 0;
/* Printouts */
#define PAGING_PRINTK(_f, _a...) \
@@ -46,12 +48,18 @@ void paging_domain_init(struct domain *d)
{
p2m_init(d);
shadow_domain_init(d);
+
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ hap_domain_init(d);
}
/* vcpu paging struct initialization goes here */
void paging_vcpu_init(struct vcpu *v)
{
- shadow_vcpu_init(v);
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_vcpu(v) )
+ hap_vcpu_init(v);
+ else
+ shadow_vcpu_init(v);
}
@@ -59,32 +67,38 @@ int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
XEN_GUEST_HANDLE(void) u_domctl)
{
/* Here, dispatch domctl to the appropriate paging code */
- return shadow_domctl(d, sc, u_domctl);
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ return hap_domctl(d, sc, u_domctl);
+ else
+ return shadow_domctl(d, sc, u_domctl);
}
/* Call when destroying a domain */
void paging_teardown(struct domain *d)
{
- shadow_teardown(d);
- /* Call other modes' teardown code here */
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ hap_teardown(d);
+ else
+ shadow_teardown(d);
}
/* Call once all of the references to the domain have gone away */
void paging_final_teardown(struct domain *d)
{
- shadow_teardown(d);
- /* Call other modes' final teardown code here */
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ hap_final_teardown(d);
+ else
+ shadow_final_teardown(d);
}
/* Enable an arbitrary paging-assistance mode. Call once at domain
* creation. */
int paging_enable(struct domain *d, u32 mode)
{
- if ( mode & PG_SH_enable )
- return shadow_enable(d, mode);
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ return hap_enable(d, mode | PG_HAP_enable);
else
- /* No other modes supported yet */
- return -EINVAL;
+ return shadow_enable(d, mode | PG_SH_enable);
}
/* Print paging-assistance info to the console */
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 9aaf92cf10..400514f241 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -235,7 +235,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
#if GUEST_PAGING_LEVELS == 2
-#include "page-guest32.h"
+#include "../page-guest32.h"
#define GUEST_L1_PAGETABLE_ENTRIES 1024
#define GUEST_L2_PAGETABLE_ENTRIES 1024
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index c7618fbf3a..94791f7a80 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -104,6 +104,21 @@ struct shadow_vcpu {
};
/************************************************/
+/* hardware assisted paging */
+/************************************************/
+struct hap_domain {
+ spinlock_t lock;
+ int locker;
+ const char *locker_function;
+
+ struct list_head freelists;
+ struct list_head p2m_freelist;
+ unsigned int total_pages; /* number of pages allocated */
+ unsigned int free_pages; /* number of pages on freelists */
+ unsigned int p2m_pages; /* number of pages allocates to p2m */
+};
+
+/************************************************/
/* p2m handling */
/************************************************/
@@ -135,6 +150,7 @@ struct paging_domain {
struct shadow_domain shadow;
/* Other paging assistance code will have structs here */
+ struct hap_domain hap;
};
struct paging_vcpu {
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
new file mode 100644
index 0000000000..9c070f6fa8
--- /dev/null
+++ b/xen/include/asm-x86/hap.h
@@ -0,0 +1,122 @@
+/******************************************************************************
+ * include/asm-x86/hap.h
+ *
+ * hardware-assisted paging
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ *
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XEN_HAP_H
+#define _XEN_HAP_H
+
+#define HERE_I_AM \
+ debugtrace_printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__)
+#define HAP_PRINTK(_f, _a...) \
+ debugtrace_printk("hap: %s(): " _f, __func__, ##_a)
+#define HAP_ERROR(_f, _a...) \
+ printk("hap error: %s(): " _f, __func__, ##_a)
+
+/************************************************/
+/* hap domain page mapping */
+/************************************************/
+static inline void *
+hap_map_domain_page(mfn_t mfn)
+{
+ return map_domain_page(mfn_x(mfn));
+}
+
+static inline void
+hap_unmap_domain_page(void *p)
+{
+ unmap_domain_page(p);
+}
+
+static inline void *
+hap_map_domain_page_global(mfn_t mfn)
+{
+ return map_domain_page_global(mfn_x(mfn));
+}
+
+static inline void
+hap_unmap_domain_page_global(void *p)
+{
+ unmap_domain_page_global(p);
+}
+
+/************************************************/
+/* locking for hap code */
+/************************************************/
+#define hap_lock_init(_d) \
+ do { \
+ spin_lock_init(&(_d)->arch.paging.hap.lock); \
+ (_d)->arch.paging.hap.locker = -1; \
+ (_d)->arch.paging.hap.locker_function = "nobody"; \
+ } while (0)
+
+#define hap_locked_by_me(_d) \
+ (current->processor == (_d)->arch.paging.hap.locker)
+
+#define hap_lock(_d) \
+ do { \
+ if ( unlikely((_d)->arch.paging.hap.locker == current->processor) )\
+ { \
+ printk("Error: hap lock held by %s\n", \
+ (_d)->arch.paging.hap.locker_function); \
+ BUG(); \
+ } \
+ spin_lock(&(_d)->arch.paging.hap.lock); \
+ ASSERT((_d)->arch.paging.hap.locker == -1); \
+ (_d)->arch.paging.hap.locker = current->processor; \
+ (_d)->arch.paging.hap.locker_function = __func__; \
+ } while (0)
+
+#define hap_unlock(_d) \
+ do { \
+ ASSERT((_d)->arch.paging.hap.locker == current->processor); \
+ (_d)->arch.paging.hap.locker = -1; \
+ (_d)->arch.paging.hap.locker_function = "nobody"; \
+ spin_unlock(&(_d)->arch.paging.hap.lock); \
+ } while (0)
+
+/************************************************/
+/* hap domain level functions */
+/************************************************/
+void hap_domain_init(struct domain *d);
+int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE(void) u_domctl);
+int hap_enable(struct domain *d, u32 mode);
+void hap_final_teardown(struct domain *d);
+void hap_teardown(struct domain *d);
+void hap_vcpu_init(struct vcpu *v);
+
+extern struct paging_mode hap_paging_real_mode;
+extern struct paging_mode hap_paging_protected_mode;
+extern struct paging_mode hap_paging_pae_mode;
+extern struct paging_mode hap_paging_long_mode;
+#endif /* XEN_HAP_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */