13 files changed, 1525 insertions, 17 deletions
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index bda506b182..f48bb6f49a 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -161,7 +161,8 @@ int hvm_domain_initialise(struct domain *d)
     spin_lock_init(&d->arch.hvm_domain.buffered_io_lock);
     spin_lock_init(&d->arch.hvm_domain.irq_lock);
 
-    rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external);
+    /* paging support will be determined inside paging.c */
+    rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
     if ( rc != 0 )
         return rc;
 
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 40910395e0..61380c606a 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -49,6 +49,7 @@
 #include <public/sched.h>
 #include <asm/hvm/vpt.h>
 #include <asm/hvm/trace.h>
+#include <asm/hap.h>
 
 #define SVM_EXTRA_DEBUG
 
@@ -76,6 +77,10 @@ static void *root_vmcb[NR_CPUS] __read_mostly;
 /* physical address of above for host VMSAVE/VMLOAD */
 u64 root_vmcb_pa[NR_CPUS] __read_mostly;
 
+/* hardware assisted paging bits */
+extern int opt_hap_enabled;
+extern int hap_capable_system;
+
 static inline void svm_inject_exception(struct vcpu *v, int trap, 
                                         int ev, int error_code)
 {
@@ -240,7 +245,9 @@ static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
         /*
          * Check for EFER.LME transitions from 0->1 or 1->0.  Do the
          * sanity checks and then make sure that both EFER.LME and
-         * EFER.LMA are cleared.
+         * EFER.LMA are cleared. (EFER.LME can't be set in the vmcb
+         * until the guest also sets CR0.PG, since even if the guest has
+         * paging "disabled", the vmcb's CR0 always has PG set.)
          */
         if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) )
         {
@@ -269,10 +276,12 @@ static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
 
             vmcb->efer &= ~(EFER_LME | EFER_LMA);
         }
+
 #endif /* __x86_64__ */
 
         /* update the guest EFER's shadow with the intended value */
         v->arch.hvm_svm.cpu_shadow_efer = msr_content;
+
         break;
 
 #ifdef __x86_64__
@@ -902,6 +911,10 @@ static void arch_svm_do_launch(struct vcpu *v)
 {
     svm_do_launch(v);
 
+    if ( paging_mode_hap(v->domain) ) {
+        v->arch.hvm_svm.vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
+    }
+
     if ( v->vcpu_id != 0 )
     {
         cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
@@ -1008,6 +1021,21 @@ static struct hvm_function_table svm_function_table = {
     .event_injection_faulted = svm_event_injection_faulted
 };
 
+void svm_npt_detect(void)
+{
+    u32 eax, ebx, ecx, edx;
+
+    /* check CPUID for nested paging support */
+    cpuid(0x8000000A, &eax, &ebx, &ecx, &edx);
+    if ( edx & 0x01 ) { /* nested paging */
+        hap_capable_system = 1;
+    }
+    else if ( opt_hap_enabled ) {
+        printk(" nested paging is not supported by this CPU.\n");
+        hap_capable_system = 0; /* no nested paging, we disable flag. */
+    }
+}
+
 int start_svm(void)
 {
     u32 eax, ecx, edx;
@@ -1038,6 +1066,8 @@ int start_svm(void)
     wrmsr(MSR_EFER, eax, edx);
     printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
 
+    svm_npt_detect();
+
     /* Initialize the HSA for this core */
     phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
     phys_hsa_lo = (u32) phys_hsa;
@@ -1074,6 +1104,18 @@ void arch_svm_do_resume(struct vcpu *v)
     }
 }
 
+static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
+{
+    if (mmio_space(gpa)) {
+        handle_mmio(gpa);
+        return 1;
+    }
+
+    /* We should not reach here. Otherwise, P2M table is not correct.*/
+    return 0;
+}
+
+
 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 
 {
     HVM_DBG_LOG(DBG_LEVEL_VMMU, 
@@ -1700,6 +1742,52 @@ static void svm_io_instruction(struct vcpu *v)
     }
 }
 
+static int npt_set_cr0(unsigned long value) 
+{
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+  
+    ASSERT(vmcb);
+
+    /* ET is reserved and should be always be 1*/
+    value |= X86_CR0_ET;
+
+    /* Check whether the guest is about to turn on long mode. 
+     * If it is, set EFER.LME and EFER.LMA.  Update the shadow EFER.LMA
+     * bit too, so svm_long_mode_enabled() will work.
+     */
+    if ( (value & X86_CR0_PG) && svm_lme_is_set(v) &&
+         (vmcb->cr4 & X86_CR4_PAE) && (vmcb->cr0 & X86_CR0_PE) )
+    {
+        v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
+        vmcb->efer |= EFER_LMA | EFER_LME;
+    }
+
+    /* Whenever CR0.PG is cleared under long mode, LMA will be cleared 
+     * immediatly. We emulate this process for svm_long_mode_enabled().
+     */
+    if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
+    {
+        if ( svm_long_mode_enabled(v) )
+        {
+            v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
+        }
+    }
+    
+    vmcb->cr0 = value | X86_CR0_WP;
+    v->arch.hvm_svm.cpu_shadow_cr0 = value;
+
+    /* TS cleared? Then initialise FPU now. */
+    if ( !(value & X86_CR0_TS) ) {
+        setup_fpu(v);
+        vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
+    }
+    
+    paging_update_paging_modes(v);
+    
+    return 1;
+}
+
 static int svm_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
@@ -1797,6 +1885,85 @@ static int svm_set_cr0(unsigned long value)
     return 1;
 }
 
+//
+// nested paging functions
+//
+
+static int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
+{  
+    unsigned long value;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+
+    ASSERT(vmcb);
+
+    value = get_reg(gpreg, regs, vmcb);
+
+    switch (cr) {
+    case 0:
+        return npt_set_cr0(value);
+
+    case 3:
+        vmcb->cr3 = value;
+        v->arch.hvm_svm.cpu_cr3 = value;
+        break;
+
+    case 4: /* CR4 */
+        vmcb->cr4 = value;
+        v->arch.hvm_svm.cpu_shadow_cr4 = value;
+        paging_update_paging_modes(v);
+        break;
+
+    case 8:
+        vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
+        vmcb->vintr.fields.tpr = value & 0x0F;
+        break;
+
+    default:
+        gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
+        domain_crash(v->domain);
+        return 0;
+    }
+    
+    return 1;
+}
+
+static void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+{
+    unsigned long value = 0;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+
+    vmcb = v->arch.hvm_svm.vmcb;
+    ASSERT(vmcb);
+
+    switch(cr) {
+    case 0:
+        value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr0;
+        break;
+    case 2:
+        value = vmcb->cr2;
+        break;
+    case 3:
+        value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
+        break;
+    case 4:
+        value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
+       break;
+    case 8:
+        value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
+        value = (value & 0xF0) >> 4;
+        break;
+    default:
+        domain_crash(v->domain);
+        return;
+    }
+    
+    set_reg(gp, value, regs, vmcb);
+}
+
 /*
  * Read from control registers. CR0 and CR4 are read from the shadow.
  */
@@ -2043,12 +2210,18 @@ static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
     {
     case INSTR_MOV2CR:
         gpreg = decode_src_reg(prefix, buffer[index+2]);
-        result = mov_to_cr(gpreg, cr, regs);
+        if ( paging_mode_hap(v->domain) )
+            result = npt_mov_to_cr(gpreg, cr, regs);
+        else
+            result = mov_to_cr(gpreg, cr, regs);
         break;
 
     case INSTR_MOVCR2:
         gpreg = decode_src_reg(prefix, buffer[index+2]);
-        mov_from_cr(cr, gpreg, regs);
+        if ( paging_mode_hap(v->domain) )
+            npt_mov_from_cr(cr, gpreg, regs);
+        else
+            mov_from_cr(cr, gpreg, regs);
         break;
 
     case INSTR_CLTS:
@@ -2075,7 +2248,10 @@ static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
         if (svm_dbg_on)
             printk("CR0-LMSW CR0 - New value=%lx\n", value);
 
-        result = svm_set_cr0(value);
+        if ( paging_mode_hap(v->domain) )
+            result = npt_set_cr0(value);
+        else
+            result = svm_set_cr0(value);
         break;
 
     case INSTR_SMSW:
@@ -2359,6 +2535,11 @@ static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
     vmcb->cr4 = SVM_CR4_HOST_MASK;
     v->arch.hvm_svm.cpu_shadow_cr4 = 0;
 
+    if ( paging_mode_hap(v->domain) ) {
+        vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+    }
+
     /* This will jump to ROMBIOS */
     vmcb->rip = 0xFFF0;
 
@@ -3042,6 +3223,15 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
         hvm_triple_fault();
         break;
 
+    case VMEXIT_NPF:
+    {
+        regs->error_code = vmcb->exitinfo1;
+        if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) ) {
+            domain_crash(v->domain);
+        }
+        break;
+    }
+
     default:
     exit_and_crash:
         gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c
index 25b30f8e58..f2a220011e 100644
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -201,6 +201,13 @@ static int construct_vmcb(struct vcpu *v)
 
     arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
 
+    if ( paging_mode_hap(v->domain) ) {
+        vmcb->cr0 = arch_svm->cpu_shadow_cr0;
+        vmcb->np_enable = 1; /* enable nested paging */
+        vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
+        vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_PG;
+    }
+
     return 0;
 }
 
@@ -310,7 +317,8 @@ void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb)
     printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n", 
            (unsigned long long) vmcb->kerngsbase,
            (unsigned long long) vmcb->g_pat);
-    
+    printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
+
     /* print out all the selectors */
     svm_dump_sel("CS", &vmcb->cs);
     svm_dump_sel("DS", &vmcb->ds);
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index a58211d91a..79b25962ac 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -1,4 +1,5 @@
 subdir-y += shadow
+subdir-y += hap
 
 obj-y += paging.o
 obj-y += p2m.o
diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
new file mode 100644
index 0000000000..8833ea5c75
--- /dev/null
+++ b/xen/arch/x86/mm/hap/Makefile
@@ -0,0 +1,2 @@
+obj-y += hap.o
+obj-y += support.o
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
new file mode 100644
index 0000000000..57a2ee4fd4
--- /dev/null
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -0,0 +1,708 @@
+/******************************************************************************
+ * arch/x86/mm/hap/hap.c
+ *
+ * hardware assisted paging
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2007 by XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/irq.h>
+#include <xen/domain_page.h>
+#include <xen/guest_access.h>
+#include <xen/keyhandler.h>
+#include <asm/event.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/shared.h>
+#include <asm/hap.h>
+#include <asm/paging.h>
+#include <asm/domain.h>
+
+#include "private.h"
+
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_to_page
+#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#undef mfn_valid
+#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#undef page_to_mfn
+#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+
+/************************************************/
+/*             HAP SUPPORT FUNCTIONS            */
+/************************************************/
+mfn_t hap_alloc(struct domain *d, unsigned long backpointer)
+{
+    struct page_info *sp = NULL;
+    void *p;
+
+    ASSERT(hap_locked_by_me(d));
+
+    sp = list_entry(d->arch.paging.hap.freelists.next, struct page_info, list);
+    list_del(&sp->list);
+    d->arch.paging.hap.free_pages -= 1;
+
+    /* Now safe to clear the page for reuse */
+    p = hap_map_domain_page(page_to_mfn(sp));
+    ASSERT(p != NULL);
+    clear_page(p);
+    hap_unmap_domain_page(p);
+
+    return page_to_mfn(sp);
+}
+
+void hap_free(struct domain *d, mfn_t smfn)
+{
+    struct page_info *sp = mfn_to_page(smfn); 
+
+    ASSERT(hap_locked_by_me(d));
+
+    d->arch.paging.hap.free_pages += 1;
+    list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
+}
+
+static int hap_alloc_p2m_pages(struct domain *d)
+{
+    struct page_info *pg;
+
+    ASSERT(hap_locked_by_me(d));
+
+    pg = mfn_to_page(hap_alloc(d, 0));
+    d->arch.paging.hap.p2m_pages += 1;
+    d->arch.paging.hap.total_pages -= 1;
+    
+    page_set_owner(pg, d);
+    pg->count_info = 1;
+    list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist);
+
+    return 1;
+}
+
+struct page_info * hap_alloc_p2m_page(struct domain *d)
+{
+    struct list_head *entry;
+    struct page_info *pg;
+    mfn_t mfn;
+    void *p;
+
+    hap_lock(d);
+    
+    if ( list_empty(&d->arch.paging.hap.p2m_freelist) && 
+         !hap_alloc_p2m_pages(d) ) {
+        hap_unlock(d);
+        return NULL;
+    }
+    entry = d->arch.paging.hap.p2m_freelist.next;
+    list_del(entry);
+    
+    hap_unlock(d);
+
+    pg = list_entry(entry, struct page_info, list);
+    mfn = page_to_mfn(pg);
+    p = hap_map_domain_page(mfn);
+    clear_page(p);
+    hap_unmap_domain_page(p);
+
+    return pg;
+}
+
+void hap_free_p2m_page(struct domain *d, struct page_info *pg)
+{
+    ASSERT(page_get_owner(pg) == d);
+    /* Should have just the one ref we gave it in alloc_p2m_page() */
+    if ( (pg->count_info & PGC_count_mask) != 1 ) {
+        HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+                  pg->count_info, pg->u.inuse.type_info);
+    }
+    /* Free should not decrement domain's total allocation, since 
+     * these pages were allocated without an owner. */
+    page_set_owner(pg, NULL); 
+    free_domheap_pages(pg, 0);
+    d->arch.paging.hap.p2m_pages--;
+}
+
+/* Return the size of the pool, rounded up to the nearest MB */
+static unsigned int
+hap_get_allocation(struct domain *d)
+{
+    unsigned int pg = d->arch.paging.hap.total_pages;
+
+    HERE_I_AM;
+    return ((pg >> (20 - PAGE_SHIFT))
+            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
+}
+
+/* Set the pool of pages to the required number of pages.
+ * Returns 0 for success, non-zero for failure. */
+static unsigned int
+hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
+{
+    struct page_info *sp;
+
+    ASSERT(hap_locked_by_me(d));
+
+    while ( d->arch.paging.hap.total_pages != pages ) {
+        if ( d->arch.paging.hap.total_pages < pages ) {
+            /* Need to allocate more memory from domheap */
+            sp = alloc_domheap_pages(NULL, 0, 0);
+            if ( sp == NULL ) {
+                HAP_PRINTK("failed to allocate hap pages.\n");
+                return -ENOMEM;
+            }
+            d->arch.paging.hap.free_pages += 1;
+            d->arch.paging.hap.total_pages += 1;
+            list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
+        }
+        else if ( d->arch.paging.hap.total_pages > pages ) {
+            /* Need to return memory to domheap */
+            ASSERT(!list_empty(&d->arch.paging.hap.freelists));
+            sp = list_entry(d->arch.paging.hap.freelists.next,
+                            struct page_info, list);
+            list_del(&sp->list);
+            d->arch.paging.hap.free_pages -= 1;
+            d->arch.paging.hap.total_pages -= 1;
+            free_domheap_pages(sp, 0);
+        }
+        
+        /* Check to see if we need to yield and try again */
+        if ( preempted && hypercall_preempt_check() ) {
+            *preempted = 1;
+            return 0;
+        }
+    }
+
+    return 0;
+}
+
+#if CONFIG_PAGING_LEVELS == 4
+void hap_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
+{
+    struct domain *d = v->domain;
+    l4_pgentry_t *sl4e;
+
+    sl4e = hap_map_domain_page(sl4mfn);
+    ASSERT(sl4e != NULL);
+
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    sl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
+        l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))),
+                     __PAGE_HYPERVISOR);
+
+    sl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
+        l4e_from_pfn(mfn_x(gl4mfn), __PAGE_HYPERVISOR);
+
+    /* install domain-specific P2M table */
+    sl4e[l4_table_offset(RO_MPT_VIRT_START)] =
+        l4e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
+                     __PAGE_HYPERVISOR);
+
+    hap_unmap_domain_page(sl4e);
+}
+#endif /* CONFIG_PAGING_LEVELS == 4 */
+
+#if CONFIG_PAGING_LEVELS == 3
+void hap_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn)
+{
+    struct domain *d = v->domain;
+    l2_pgentry_t *sl2e;
+
+    int i;
+
+    sl2e = hap_map_domain_page(sl2hmfn);
+    ASSERT(sl2e != NULL);
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
+           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            l2e_from_pfn(
+                         mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
+                         __PAGE_HYPERVISOR);
+    
+    for ( i = 0; i < HAP_L3_PAGETABLE_ENTRIES; i++ )
+        sl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+            l2e_empty();
+
+    if ( paging_mode_translate(d) )
+    {
+        /* Install the domain-specific p2m table */
+        l3_pgentry_t *p2m;
+        ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+        p2m = hap_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+        for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
+        {
+            sl2e[l2_table_offset(RO_MPT_VIRT_START) + i] =
+                (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
+                ? l2e_from_pfn(mfn_x(_mfn(l3e_get_pfn(p2m[i]))),
+                                      __PAGE_HYPERVISOR)
+                : l2e_empty();
+        }
+        hap_unmap_domain_page(p2m);
+    }
+
+    hap_unmap_domain_page(sl2e);
+}
+#endif
+
+#if CONFIG_PAGING_LEVELS == 2
+void hap_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
+{
+    struct domain *d = v->domain;
+    l2_pgentry_t *sl2e;
+    int i;
+
+    sl2e = hap_map_domain_page(sl2mfn);
+    ASSERT(sl2e != NULL);
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            l2e_from_pfn(
+                mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
+                __PAGE_HYPERVISOR);
+
+
+    sl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
+        l2e_from_pfn(mfn_x(gl2mfn), __PAGE_HYPERVISOR);
+
+    /* install domain-specific P2M table */
+    sl2e[l2_table_offset(RO_MPT_VIRT_START)] =
+        l2e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
+                            __PAGE_HYPERVISOR);
+
+    hap_unmap_domain_page(sl2e);
+}
+#endif
+
+mfn_t hap_make_monitor_table(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+
+    ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
+
+#if CONFIG_PAGING_LEVELS == 4
+    {
+        mfn_t m4mfn;
+        m4mfn = hap_alloc(d, 0);
+        hap_install_xen_entries_in_l4(v, m4mfn, m4mfn);
+        return m4mfn;
+    }
+#elif CONFIG_PAGING_LEVELS == 3
+    {
+        mfn_t m3mfn, m2mfn; 
+        l3_pgentry_t *l3e;
+        l2_pgentry_t *l2e;
+        int i;
+
+        m3mfn = hap_alloc(d, 0);
+
+        /* Install a monitor l2 table in slot 3 of the l3 table.
+         * This is used for all Xen entries, including linear maps
+         */
+        m2mfn = hap_alloc(d, 0);
+        l3e = hap_map_domain_page(m3mfn);
+        l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
+        hap_install_xen_entries_in_l2h(v, m2mfn);
+        /* Install the monitor's own linear map */
+        l2e = hap_map_domain_page(m2mfn);
+        for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+            l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+                (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) 
+                ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) 
+                : l2e_empty();
+        hap_unmap_domain_page(l2e);
+        hap_unmap_domain_page(l3e);
+
+        HAP_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
+        return m3mfn;
+    }
+#else
+    {
+        mfn_t m2mfn;
+        
+        m2mfn = hap_alloc(d, 0);
+        hap_install_xen_entries_in_l2(v, m2mfn, m2mfn);
+    
+        return m2mfn;
+    }
+#endif
+}
+
+void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
+{
+    struct domain *d = v->domain;
+
+#if CONFIG_PAGING_LEVELS == 4
+    /* Need to destroy the l3 monitor page in slot 0 too */
+    {
+        mfn_t m3mfn;
+        l4_pgentry_t *l4e = hap_map_domain_page(mmfn);
+        ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+        m3mfn = _mfn(l4e_get_pfn(l4e[0]));
+        hap_free(d, m3mfn);
+        hap_unmap_domain_page(l4e);
+    }
+#elif CONFIG_PAGING_LEVELS == 3
+    /* Need to destroy the l2 monitor page in slot 4 too */
+    {
+        l3_pgentry_t *l3e = hap_map_domain_page(mmfn);
+        ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+        hap_free(d, _mfn(l3e_get_pfn(l3e[3])));
+        hap_unmap_domain_page(l3e);
+    }
+#endif
+
+    /* Put the memory back in the pool */
+    hap_free(d, mmfn);
+}
+
+/************************************************/
+/*          HAP DOMAIN LEVEL FUNCTIONS          */
+/************************************************/
+void hap_domain_init(struct domain *d)
+{
+    hap_lock_init(d);
+    INIT_LIST_HEAD(&d->arch.paging.hap.freelists);
+    INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist);
+}
+
+/* return 0 for success, -errno for failure */
+int hap_enable(struct domain *d, u32 mode)
+{
+    unsigned int old_pages;
+    int rv = 0;
+
+    HERE_I_AM;
+
+    domain_pause(d);
+    /* error check */
+    if ( (d == current->domain) ) {
+        rv = -EINVAL;
+        goto out;
+    }
+
+    old_pages = d->arch.paging.hap.total_pages;
+    if ( old_pages == 0 ) {
+        unsigned int r;
+        hap_lock(d);
+        r = hap_set_allocation(d, 256, NULL);
+        hap_unlock(d);
+        if ( r != 0 ) {
+            hap_set_allocation(d, 0, NULL);
+            rv = -ENOMEM;
+            goto out;
+        }
+    }
+
+    /* allocate P2m table */
+    if ( mode & PG_translate ) {
+        rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page);
+        if ( rv != 0 )
+            goto out;
+    }
+
+    d->arch.paging.mode = mode | PG_SH_enable;
+
+ out:
+    domain_unpause(d);
+    return rv;
+}
+
+void hap_final_teardown(struct domain *d)
+{
+    HERE_I_AM;
+
+    if ( d->arch.paging.hap.total_pages != 0 )
+        hap_teardown(d);
+
+    p2m_teardown(d);
+}
+
+void hap_teardown(struct domain *d)
+{
+    struct vcpu *v;
+    mfn_t mfn;
+    HERE_I_AM;
+
+    ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
+    ASSERT(d != current->domain);
+
+    if ( !hap_locked_by_me(d) )
+        hap_lock(d); /* Keep various asserts happy */
+
+    if ( paging_mode_enabled(d) ) {
+        /* release the monitor table held by each vcpu */
+        for_each_vcpu(d, v) {
+            if ( v->arch.paging.mode && paging_mode_external(d) ) {
+                mfn = pagetable_get_mfn(v->arch.monitor_table);
+                if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
+                    hap_destroy_monitor_table(v, mfn);
+                v->arch.monitor_table = pagetable_null();
+            }
+        }
+    }
+
+    if ( d->arch.paging.hap.total_pages != 0 ) {
+        HAP_PRINTK("teardown of domain %u starts."
+                      "  pages total = %u, free = %u, p2m=%u\n",
+                      d->domain_id,
+                      d->arch.paging.hap.total_pages,
+                      d->arch.paging.hap.free_pages,
+                      d->arch.paging.hap.p2m_pages);
+        hap_set_allocation(d, 0, NULL);
+        HAP_PRINTK("teardown done."
+                      "  pages total = %u, free = %u, p2m=%u\n",
+                      d->arch.paging.hap.total_pages,
+                      d->arch.paging.hap.free_pages,
+                      d->arch.paging.hap.p2m_pages);
+        ASSERT(d->arch.paging.hap.total_pages == 0);
+    }
+    
+    d->arch.paging.mode &= ~PG_log_dirty;
+
+    hap_unlock(d);
+}
+
+int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+               XEN_GUEST_HANDLE(void) u_domctl)
+{
+    int rc, preempted = 0;
+
+    HERE_I_AM;
+
+    if ( unlikely(d == current->domain) ) {
+        gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n");
+        return -EINVAL;
+    }
+    
+    switch ( sc->op ) {
+    case XEN_DOMCTL_SHADOW_OP_OFF:
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
+    case XEN_DOMCTL_SHADOW_OP_CLEAN:
+    case XEN_DOMCTL_SHADOW_OP_PEEK:
+    case XEN_DOMCTL_SHADOW_OP_ENABLE:
+        HAP_ERROR("Bad hap domctl op %u\n", sc->op);
+        domain_crash(d);
+        return -EINVAL;
+    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+        hap_lock(d);
+        rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
+        hap_unlock(d);
+        if ( preempted )
+            /* Not finished.  Set up to re-run the call. */
+            rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", 
+                                               u_domctl);
+        else
+            /* Finished.  Return the new allocation */
+            sc->mb = hap_get_allocation(d);
+        return rc;
+    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+        sc->mb = hap_get_allocation(d);
+        return 0;
+    default:
+        HAP_ERROR("Bad hap domctl op %u\n", sc->op);
+        return -EINVAL;
+    }
+}
+
+void hap_vcpu_init(struct vcpu *v)
+{
+    v->arch.paging.mode = &hap_paging_real_mode;
+}
+/************************************************/
+/*          HAP PAGING MODE FUNCTIONS           */
+/************************************************/
+/* In theory, hap should not intercept guest page fault. This function can 
+ * be recycled to handle host/nested page fault, if needed.
+ */
+int hap_page_fault(struct vcpu *v, unsigned long va, 
+                   struct cpu_user_regs *regs)
+{
+    HERE_I_AM;
+    domain_crash(v->domain);
+    return 0;
+}
+
+/* called when guest issues a invlpg request. 
+ * Return 1 if need to issue page invalidation on CPU; Return 0 if does not
+ * need to do so.
+ */
+int hap_invlpg(struct vcpu *v, unsigned long va)
+{
+    HERE_I_AM;
+    return 0;
+}
+
+void hap_update_cr3(struct vcpu *v, int do_locking)
+{
+    struct domain *d = v->domain;
+    mfn_t gmfn;
+
+    HERE_I_AM;
+    /* Don't do anything on an uninitialised vcpu */
+    if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) {
+        ASSERT(v->arch.cr3 == 0);
+        return;
+    }
+
+    if ( do_locking )
+        hap_lock(v->domain);
+    
+    ASSERT(hap_locked_by_me(v->domain));
+    ASSERT(v->arch.paging.mode);
+    
+    gmfn = pagetable_get_mfn(v->arch.guest_table);
+
+    make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
+    
+    hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.monitor_table));
+
+    HAP_PRINTK("d=%u v=%u guest_table=%05lx, monitor_table = %05lx\n", 
+               d->domain_id, v->vcpu_id, 
+               (unsigned long)pagetable_get_pfn(v->arch.guest_table),
+               (unsigned long)pagetable_get_pfn(v->arch.monitor_table));
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+
+    if ( do_locking )
+        hap_unlock(v->domain);
+}
+
+void hap_update_paging_modes(struct vcpu *v)
+{
+    struct domain *d;
+
+    HERE_I_AM;
+
+    d = v->domain;
+    hap_lock(d);
+
+    /* update guest paging mode. Note that we rely on hvm functions to detect
+     * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER)
+     * reflect guest's status correctly.
+     */
+    if ( hvm_paging_enabled(v) ) {
+        if ( hvm_long_mode_enabled(v) )
+            v->arch.paging.mode = &hap_paging_long_mode;
+        else if ( hvm_pae_enabled(v) )
+            v->arch.paging.mode = &hap_paging_pae_mode;
+        else
+            v->arch.paging.mode = &hap_paging_protected_mode;
+    }
+    else {
+        v->arch.paging.mode = &hap_paging_real_mode;
+    }
+
+    v->arch.paging.translate_enabled = !!hvm_paging_enabled(v);    
+
+    /* use p2m map */
+    v->arch.guest_table =
+        pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+
+    if ( pagetable_is_null(v->arch.monitor_table) ) {
+        mfn_t mmfn = hap_make_monitor_table(v);
+        v->arch.monitor_table = pagetable_from_mfn(mmfn);
+        make_cr3(v, mfn_x(mmfn));
+    }
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+    hap_unlock(d);
+}
+
+void 
+hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
+                    l1_pgentry_t new, unsigned int level)
+{
+    hap_lock(v->domain);
+    safe_write_pte(p, new);
+    hap_unlock(v->domain);
+}
+
+/* Entry points into this mode of the hap code. */
+struct paging_mode hap_paging_real_mode = {
+    .page_fault             = hap_page_fault, 
+    .invlpg                 = hap_invlpg,
+    .gva_to_gfn             = hap_gva_to_gfn_real_mode,
+    .update_cr3             = hap_update_cr3,
+    .update_paging_modes    = hap_update_paging_modes,
+    .write_p2m_entry        = hap_write_p2m_entry,
+    .guest_levels           = 1
+};
+
+struct paging_mode hap_paging_protected_mode = {
+    .page_fault             = hap_page_fault, 
+    .invlpg                 = hap_invlpg,
+    .gva_to_gfn             = hap_gva_to_gfn_protected_mode,
+    .update_cr3             = hap_update_cr3,
+    .update_paging_modes    = hap_update_paging_modes,
+    .write_p2m_entry        = hap_write_p2m_entry,
+    .guest_levels           = 2
+};
+
+struct paging_mode hap_paging_pae_mode = {
+    .page_fault             = hap_page_fault, 
+    .invlpg                 = hap_invlpg,
+    .gva_to_gfn             = hap_gva_to_gfn_pae_mode,
+    .update_cr3             = hap_update_cr3,
+    .update_paging_modes    = hap_update_paging_modes,
+    .write_p2m_entry        = hap_write_p2m_entry,
+    .guest_levels           = 3
+};
+
+struct paging_mode hap_paging_long_mode = {
+    .page_fault             = hap_page_fault, 
+    .invlpg                 = hap_invlpg,
+    .gva_to_gfn             = hap_gva_to_gfn_long_mode,
+    .update_cr3             = hap_update_cr3,
+    .update_paging_modes    = hap_update_paging_modes,
+    .write_p2m_entry        = hap_write_p2m_entry,
+    .guest_levels           = 4
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
+
diff --git a/xen/arch/x86/mm/hap/private.h b/xen/arch/x86/mm/hap/private.h
new file mode 100644
index 0000000000..aa5100c271
--- /dev/null
+++ b/xen/arch/x86/mm/hap/private.h
@@ -0,0 +1,112 @@
+/*
+ * arch/x86/mm/hap/private.h
+ *
+ * Copyright (c) 2007, AMD Corporation (Wei Huang)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __HAP_PRIVATE_H__
+#define __HAP_PRIVATE_H__
+
+#include <asm/flushtlb.h>
+#include <asm/hvm/support.h>
+
+/********************************************/
+/*          GUEST TRANSLATION FUNCS         */
+/********************************************/
+unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva);
+/********************************************/
+/*            MISC DEFINITIONS              */
+/********************************************/
+
+/* PT_SHIFT describes the amount by which a virtual address is shifted right 
+ * to right justify the portion to be used for indexing into a page 
+ * table, given the guest memory model (i.e. number of levels) and the level 
+ * of the page table being accessed. The idea is from Virtual Iron's code.
+ */
+static const int PT_SHIFT[][5] =
+  {   /*     ------  level ------           nr_levels  */
+    /*         1     2     3     4                   */
+    {    0,    0,    0,    0,    0},   /* 0 not used */
+    {    0,    0,    0,    0,    0},   /* 1 not used */
+    {    0,   12,   22,    0,    0},   /* 2  */
+    {    0,   12,   21,   30,    0},   /* 3  */
+    {    0,   12,   21,   30,   39}    /* 4  */
+  };
+
+/* PT_ENTRIES describes the number of entries in a page table, given the 
+ * memory model (i.e. number of levels) and the level of the page table 
+ * being considered. This idea from Virtual Iron's shadow code*/
+static const int PT_ENTRIES[][5] =
+  {   /*     ------  level ------           nr_levels  */
+    /*         1     2     3     4                   */
+    {    0,    0,    0,    0,    0},   /* 0 not used */
+    {    0,    0,    0,    0,    0},   /* 1 not used */
+    {    0, 1024, 1024,    0,    0},   /* 2  */
+    {    0,  512,  512,    4,    0},   /* 3  */
+    {    0,  512,  512,  512,  512}    /* 4  */
+  };
+
+/********************************************/
+/*       PAGING DEFINITION FOR GUEST        */
+/********************************************/
+#define PHYSICAL_PAGE_4K_SIZE (1UL << 12)
+#define PHYSICAL_PAGE_2M_SIZE (1UL << 21)
+#define PHYSICAL_PAGE_4M_SIZE (1UL << 22)
+#define PHYSICAL_PAGE_4K_MASK ( ~(PHYSICAL_PAGE_4K_SIZE - 1) )
+#define PHYSICAL_PAGE_2M_MASK ( ~(PHYSICAL_PAGE_2M_SIZE - 1) )
+#define PHYSICAL_PAGE_4M_MASK ( ~(PHYSICAL_PAGE_4M_SIZE - 1) )
+
+/* long mode physical address mask */
+#define PHYSICAL_ADDR_BITS_LM    52
+#define PHYSICAL_ADDR_MASK_LM    ((1UL << PHYSICAL_ADDR_BITS_LM)-1)
+#define PHYSICAL_ADDR_2M_MASK_LM (PHYSICAL_PAGE_2M_MASK & PHYSICAL_ADDR_MASK_LM)
+#define PHYSICAL_ADDR_4K_MASK_LM (PHYSICAL_PAGE_4K_MASK & PHYSICAL_ADDR_MASK_LM)
+
+#define PAGE_NX_BIT      (1ULL << 63)
+/************************************************/
+/*        PAGETABLE RELATED VARIABLES           */
+/************************************************/
+#if CONFIG_PAGING_LEVELS == 2
+#define HAP_L1_PAGETABLE_ENTRIES    1024
+#define HAP_L2_PAGETABLE_ENTRIES    1024
+#define HAP_L1_PAGETABLE_SHIFT        12
+#define HAP_L2_PAGETABLE_SHIFT        22
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3
+#define HAP_L1_PAGETABLE_ENTRIES     512
+#define HAP_L2_PAGETABLE_ENTRIES     512
+#define HAP_L3_PAGETABLE_ENTRIES       4
+#define HAP_L1_PAGETABLE_SHIFT        12
+#define HAP_L2_PAGETABLE_SHIFT        21
+#define HAP_L3_PAGETABLE_SHIFT        30
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
+#define HAP_L1_PAGETABLE_ENTRIES     512
+#define HAP_L2_PAGETABLE_ENTRIES     512
+#define HAP_L3_PAGETABLE_ENTRIES     512
+#define HAP_L4_PAGETABLE_ENTRIES     512
+#define HAP_L1_PAGETABLE_SHIFT        12
+#define HAP_L2_PAGETABLE_SHIFT        21
+#define HAP_L3_PAGETABLE_SHIFT        30
+#define HAP_L4_PAGETABLE_SHIFT        39
+#endif
+
+#endif /* __SVM_NPT_H__ */
diff --git a/xen/arch/x86/mm/hap/support.c b/xen/arch/x86/mm/hap/support.c
new file mode 100644
index 0000000000..af5b7ec72f
--- /dev/null
+++ b/xen/arch/x86/mm/hap/support.c
@@ -0,0 +1,334 @@
+/*
+ * arch/x86/mm/hap/support.c
+ * 
+ * guest page table walker
+ * Copyright (c) 2007, AMD Corporation (Wei Huang)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <xen/event.h>
+#include <xen/sched.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/domain.h>
+#include <asm/shadow.h>
+#include <asm/hap.h>
+
+#include "private.h"
+#include "../page-guest32.h"
+
+/*******************************************/
+/*      Platform Specific Functions        */
+/*******************************************/
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for real mode guest. 
+ */
+unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva)
+{
+    HERE_I_AM;
+    return ((paddr_t)gva >> PAGE_SHIFT);
+}
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for protected guest. 
+ */
+unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva)
+{
+    unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+    int mode = 2; /* two-level guest */
+    int lev, index;
+    paddr_t gpa = 0;
+    unsigned long gpfn, mfn;
+    int success = 1;
+    l2_pgentry_32_t *l2e; /* guest page entry size is 32-bit */
+    l1_pgentry_32_t *l1e;
+
+    HERE_I_AM;
+
+    gpfn = (gcr3 >> PAGE_SHIFT);
+    for ( lev = mode; lev >= 1; lev-- ) {
+        mfn = get_mfn_from_gpfn( gpfn );
+        if ( mfn == INVALID_MFN ) {
+            HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 
+                       lev);
+            success = 0;
+            break;
+        }
+        index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+        if ( lev == 2 ) {
+            l2e = map_domain_page( mfn );
+            HAP_PRINTK("l2 page table entry is %ulx at index = %d\n", 
+                       l2e[index].l2, index);
+            if ( !(l2e_get_flags_32(l2e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+                success = 0;
+            }
+
+            if ( l2e_get_flags_32(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+                HAP_PRINTK("guest page table is PSE\n");
+                if ( l2e_get_intpte(l2e[index]) & 0x001FE000UL ) { /*[13:20] */
+                    printk("guest physical memory size is too large!\n");
+                    domain_crash(v->domain);
+                }
+                gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_4M_MASK) + 
+                    (gva & ~PHYSICAL_PAGE_4M_MASK);
+                unmap_domain_page(l2e);
+                break; /* last level page table, return from here */
+            }
+            else {
+                gpfn = l2e_get_pfn( l2e[index] );
+            }
+            unmap_domain_page(l2e);
+        }
+
+        if ( lev == 1 ) {
+            l1e = map_domain_page( mfn );
+            HAP_PRINTK("l1 page table entry is %ulx at index = %d\n", 
+                       l1e[index].l1, index);
+            if ( !(l1e_get_flags_32(l1e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l1e_get_pfn( l1e[index] );
+            gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 
+                (gva & ~PHYSICAL_PAGE_4K_MASK);	    
+            unmap_domain_page(l1e);
+        }
+
+        if ( !success ) /* error happened, jump out */
+            break;
+    }
+
+    HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+    if ( !success ) /* error happened */
+        return INVALID_GFN;
+    else
+        return ((paddr_t)gpa >> PAGE_SHIFT);
+}
+
+
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for PAE mode guest. 
+ */
+unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva)
+{
+#if CONFIG_PAGING_LEVELS >= 3
+    unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+    int mode = 3; /* three-level guest */
+    int lev, index;
+    paddr_t gpa = 0;
+    unsigned long gpfn, mfn;
+    int success = 1;
+    l1_pgentry_t *l1e;
+    l2_pgentry_t *l2e;
+    l3_pgentry_t *l3e;
+    
+    HERE_I_AM;
+
+    gpfn = (gcr3 >> PAGE_SHIFT);
+    for ( lev = mode; lev >= 1; lev-- ) {
+        mfn = get_mfn_from_gpfn( gpfn );
+        if ( mfn == INVALID_MFN ) {
+            HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 
+                       lev);
+            success = 0;
+            break;
+        }
+        index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+        if ( lev == 3 ) {
+            l3e = map_domain_page( mfn );
+            index += ( ((gcr3 >> 5 ) & 127 ) * 4 );
+            if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l3e_get_pfn( l3e[index] );
+            unmap_domain_page(l3e);
+        }
+
+        if ( lev == 2 ) {
+            l2e = map_domain_page( mfn );
+            if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+                success = 0;
+            }
+
+            if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+                HAP_PRINTK("guest page table is PSE\n");
+                gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) + 
+                    (gva & ~PHYSICAL_PAGE_2M_MASK);
+                unmap_domain_page(l2e);
+                break; /* last level page table, jump out from here */
+            }
+            else { 
+                gpfn = l2e_get_pfn(l2e[index]);
+            }
+            unmap_domain_page(l2e);
+        }
+
+        if ( lev == 1 ) {
+            l1e = map_domain_page( mfn );
+            if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l1e_get_pfn( l1e[index] );
+            gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 
+                (gva & ~PHYSICAL_PAGE_4K_MASK);
+            unmap_domain_page(l1e);
+        }
+
+        if ( success != 1 ) /* error happened, jump out */
+            break;
+    }
+
+    gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+    HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+    if ( !success )
+        return INVALID_GFN;
+    else
+        return ((paddr_t)gpa >> PAGE_SHIFT);
+#else
+    HERE_I_AM;
+    printk("guest paging level (3) is greater than host paging level!\n");
+    domain_crash(v->domain);
+    return INVALID_GFN;
+#endif
+}
+
+
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for long mode guest. 
+ */
+unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva)
+{
+#if CONFIG_PAGING_LEVELS == 4
+    unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+    int mode = 4; /* four-level guest */
+    int lev, index;
+    paddr_t gpa = 0;
+    unsigned long gpfn, mfn;
+    int success = 1;
+    l4_pgentry_t *l4e;
+    l3_pgentry_t *l3e;
+    l2_pgentry_t *l2e;
+    l1_pgentry_t *l1e;
+
+    HERE_I_AM;
+
+    gpfn = (gcr3 >> PAGE_SHIFT);
+    for ( lev = mode; lev >= 1; lev-- ) {
+        mfn = get_mfn_from_gpfn( gpfn );
+        if ( mfn == INVALID_MFN ) {
+            HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 
+                       lev);
+            success = 0;
+            break;
+        }
+        index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+        if ( lev == 4 ) {
+            l4e = map_domain_page( mfn );
+            if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 4 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l4e_get_pfn( l4e[index] );
+            unmap_domain_page(l4e);
+        }
+
+        if ( lev == 3 ) {
+            l3e = map_domain_page( mfn );
+            if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l3e_get_pfn( l3e[index] );
+            unmap_domain_page(l3e);
+        }
+
+        if ( lev == 2 ) {
+            l2e = map_domain_page( mfn );
+            if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+                success = 0;
+            }
+
+            if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+                HAP_PRINTK("guest page table is PSE\n");
+                gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_ADDR_2M_MASK_LM) 
+                    + (gva & ~PHYSICAL_PAGE_2M_MASK);
+                unmap_domain_page(l2e);
+                break; /* last level page table, jump out from here */
+            }
+            else { 
+                gpfn = l2e_get_pfn(l2e[index]);
+            }
+            unmap_domain_page(l2e);
+        }
+
+        if ( lev == 1 ) {
+            l1e = map_domain_page( mfn );
+            if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l1e_get_pfn( l1e[index] );
+            gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_ADDR_4K_MASK_LM) + 
+                (gva & ~PHYSICAL_PAGE_4K_MASK);
+            unmap_domain_page(l1e);
+        }
+
+        if ( success != 1 ) /* error happened, jump out */
+            break;
+    }
+
+    gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+    HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+    if ( !success )
+        return INVALID_GFN;
+    else
+        return ((paddr_t)gpa >> PAGE_SHIFT);
+#else
+    HERE_I_AM;
+    printk("guest paging level (4) is greater than host paging level!\n");
+    domain_crash(v->domain);
+    return INVALID_GFN;
+#endif
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/arch/x86/mm/shadow/page-guest32.h b/xen/arch/x86/mm/page-guest32.h
index 5d333bd91b..5d333bd91b 100644
--- a/xen/arch/x86/mm/shadow/page-guest32.h
+++ b/xen/arch/x86/mm/page-guest32.h
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index 4605dd5e24..18805c92e5 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -24,10 +24,12 @@
 #include <asm/paging.h>
 #include <asm/shadow.h>
 #include <asm/p2m.h>
+#include <asm/hap.h>
 
 /* Xen command-line option to enable hardware-assisted paging */
 int opt_hap_enabled = 0; 
 boolean_param("hap", opt_hap_enabled);
+int hap_capable_system = 0;
 
 /* Printouts */
 #define PAGING_PRINTK(_f, _a...)                                     \
@@ -46,12 +48,18 @@ void paging_domain_init(struct domain *d)
 {
     p2m_init(d);
     shadow_domain_init(d);
+
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        hap_domain_init(d);
 }
 
 /* vcpu paging struct initialization goes here */
 void paging_vcpu_init(struct vcpu *v)
 {
-    shadow_vcpu_init(v);
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_vcpu(v) )
+        hap_vcpu_init(v);
+    else
+        shadow_vcpu_init(v);
 }
 
 
@@ -59,32 +67,38 @@ int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
                   XEN_GUEST_HANDLE(void) u_domctl)
 {
     /* Here, dispatch domctl to the appropriate paging code */
-    return shadow_domctl(d, sc, u_domctl);
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        return hap_domctl(d, sc, u_domctl);
+    else
+        return shadow_domctl(d, sc, u_domctl);
 }
 
 /* Call when destroying a domain */
 void paging_teardown(struct domain *d)
 {
-    shadow_teardown(d);
-    /* Call other modes' teardown code here */    
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        hap_teardown(d);
+    else
+        shadow_teardown(d);
 }
 
 /* Call once all of the references to the domain have gone away */
 void paging_final_teardown(struct domain *d)
 {
-    shadow_teardown(d);
-    /* Call other modes' final teardown code here */
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        hap_final_teardown(d);
+    else
+        shadow_final_teardown(d);
 }
 
 /* Enable an arbitrary paging-assistance mode.  Call once at domain
  * creation. */
 int paging_enable(struct domain *d, u32 mode)
 {
-    if ( mode & PG_SH_enable ) 
-        return shadow_enable(d, mode);
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        return hap_enable(d, mode | PG_HAP_enable);
     else
-        /* No other modes supported yet */
-        return -EINVAL; 
+        return shadow_enable(d, mode | PG_SH_enable);
 }
 
 /* Print paging-assistance info to the console */
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 9aaf92cf10..400514f241 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -235,7 +235,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 
 #if GUEST_PAGING_LEVELS == 2
 
-#include "page-guest32.h"
+#include "../page-guest32.h"
 
 #define GUEST_L1_PAGETABLE_ENTRIES     1024
 #define GUEST_L2_PAGETABLE_ENTRIES     1024
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index c7618fbf3a..94791f7a80 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -104,6 +104,21 @@ struct shadow_vcpu {
 };
 
 /************************************************/
+/*            hardware assisted paging          */
+/************************************************/
+struct hap_domain {
+    spinlock_t        lock;
+    int               locker;
+    const char       *locker_function;
+    
+    struct list_head  freelists;
+    struct list_head  p2m_freelist;
+    unsigned int      total_pages;  /* number of pages allocated */
+    unsigned int      free_pages;   /* number of pages on freelists */
+    unsigned int      p2m_pages;    /* number of pages allocates to p2m */
+};
+
+/************************************************/
 /*       p2m handling                           */
 /************************************************/
 
@@ -135,6 +150,7 @@ struct paging_domain {
     struct shadow_domain shadow;
 
     /* Other paging assistance code will have structs here */
+    struct hap_domain    hap;
 };
 
 struct paging_vcpu {
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
new file mode 100644
index 0000000000..9c070f6fa8
--- /dev/null
+++ b/xen/include/asm-x86/hap.h
@@ -0,0 +1,122 @@
+/******************************************************************************
+ * include/asm-x86/hap.h
+ *
+ * hardware-assisted paging
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * 
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XEN_HAP_H
+#define _XEN_HAP_H
+
+#define HERE_I_AM                                                     \
+    debugtrace_printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__)
+#define HAP_PRINTK(_f, _a...)                                         \
+    debugtrace_printk("hap: %s(): " _f, __func__, ##_a)
+#define HAP_ERROR(_f, _a...)                                          \
+    printk("hap error: %s(): " _f, __func__, ##_a)
+
+/************************************************/
+/*          hap domain page mapping             */
+/************************************************/
+static inline void *
+hap_map_domain_page(mfn_t mfn)
+{
+    return map_domain_page(mfn_x(mfn));
+}
+
+static inline void
+hap_unmap_domain_page(void *p)
+{
+    unmap_domain_page(p);
+}
+
+static inline void *
+hap_map_domain_page_global(mfn_t mfn)
+{
+    return map_domain_page_global(mfn_x(mfn));
+}
+
+static inline void 
+hap_unmap_domain_page_global(void *p) 
+{
+    unmap_domain_page_global(p);
+}
+
+/************************************************/
+/*           locking for hap code               */
+/************************************************/
+#define hap_lock_init(_d)                                   \
+    do {                                                    \
+        spin_lock_init(&(_d)->arch.paging.hap.lock);        \
+        (_d)->arch.paging.hap.locker = -1;                  \
+        (_d)->arch.paging.hap.locker_function = "nobody";   \
+    } while (0)
+
+#define hap_locked_by_me(_d)                     \
+    (current->processor == (_d)->arch.paging.hap.locker)
+
+#define hap_lock(_d)                                                       \
+    do {                                                                   \
+        if ( unlikely((_d)->arch.paging.hap.locker == current->processor) )\
+        {                                                                  \
+            printk("Error: hap lock held by %s\n",                         \
+                   (_d)->arch.paging.hap.locker_function);                 \
+            BUG();                                                         \
+        }                                                                  \
+        spin_lock(&(_d)->arch.paging.hap.lock);                            \
+        ASSERT((_d)->arch.paging.hap.locker == -1);                        \
+        (_d)->arch.paging.hap.locker = current->processor;                 \
+        (_d)->arch.paging.hap.locker_function = __func__;                  \
+    } while (0)
+
+#define hap_unlock(_d)                                              \
+    do {                                                            \
+        ASSERT((_d)->arch.paging.hap.locker == current->processor); \
+        (_d)->arch.paging.hap.locker = -1;                          \
+        (_d)->arch.paging.hap.locker_function = "nobody";           \
+        spin_unlock(&(_d)->arch.paging.hap.lock);                   \
+    } while (0)
+
+/************************************************/
+/*        hap domain level functions            */
+/************************************************/
+void  hap_domain_init(struct domain *d);
+int   hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+                 XEN_GUEST_HANDLE(void) u_domctl);
+int   hap_enable(struct domain *d, u32 mode);
+void  hap_final_teardown(struct domain *d);
+void  hap_teardown(struct domain *d);
+void  hap_vcpu_init(struct vcpu *v);
+
+extern struct paging_mode hap_paging_real_mode;
+extern struct paging_mode hap_paging_protected_mode;
+extern struct paging_mode hap_paging_pae_mode;
+extern struct paging_mode hap_paging_long_mode;
+#endif /* XEN_HAP_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */