aboutsummaryrefslogtreecommitdiffstats
path: root/xen/include
diff options
context:
space:
mode:
authortdeegan@york.uk.xensource.com <tdeegan@york.uk.xensource.com>2006-08-16 17:02:35 +0100
committertdeegan@york.uk.xensource.com <tdeegan@york.uk.xensource.com>2006-08-16 17:02:35 +0100
commit49f7c7364e0acbc44604e0315599782096eee522 (patch)
treeb9dcdab90c9598f12d5559edc96efdaf7afd0da3 /xen/include
parente3e351919cc62f3cdabd8cef9b3a6be9ab4f13dd (diff)
downloadxen-49f7c7364e0acbc44604e0315599782096eee522.tar.gz
xen-49f7c7364e0acbc44604e0315599782096eee522.tar.bz2
xen-49f7c7364e0acbc44604e0315599782096eee522.zip
Replace shadow pagetable code with shadow2.
Diffstat (limited to 'xen/include')
-rw-r--r--xen/include/asm-x86/bitops.h18
-rw-r--r--xen/include/asm-x86/config.h20
-rw-r--r--xen/include/asm-x86/domain.h93
-rw-r--r--xen/include/asm-x86/grant_table.h2
-rw-r--r--xen/include/asm-x86/hvm/hvm.h25
-rw-r--r--xen/include/asm-x86/hvm/support.h11
-rw-r--r--xen/include/asm-x86/hvm/vcpu.h6
-rw-r--r--xen/include/asm-x86/hvm/vmx/vmcs.h1
-rw-r--r--xen/include/asm-x86/hvm/vmx/vmx.h49
-rw-r--r--xen/include/asm-x86/mm.h136
-rw-r--r--xen/include/asm-x86/msr.h4
-rw-r--r--xen/include/asm-x86/page-guest32.h7
-rw-r--r--xen/include/asm-x86/page.h37
-rw-r--r--xen/include/asm-x86/perfc_defn.h53
-rw-r--r--xen/include/asm-x86/processor.h1
-rw-r--r--xen/include/asm-x86/shadow.h1791
-rw-r--r--xen/include/asm-x86/shadow2-multi.h116
-rw-r--r--xen/include/asm-x86/shadow2-private.h612
-rw-r--r--xen/include/asm-x86/shadow2-types.h705
-rw-r--r--xen/include/asm-x86/shadow2.h627
-rw-r--r--xen/include/asm-x86/shadow_64.h587
-rw-r--r--xen/include/asm-x86/shadow_ops.h138
-rw-r--r--xen/include/asm-x86/shadow_public.h61
-rw-r--r--xen/include/asm-x86/x86_32/page-2level.h1
-rw-r--r--xen/include/asm-x86/x86_32/page-3level.h3
-rw-r--r--xen/include/asm-x86/x86_64/page.h5
-rw-r--r--xen/include/public/dom0_ops.h16
-rw-r--r--xen/include/xen/domain_page.h13
-rw-r--r--xen/include/xen/lib.h4
-rw-r--r--xen/include/xen/list.h10
-rw-r--r--xen/include/xen/sched.h5
31 files changed, 2486 insertions, 2671 deletions
diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h
index b2ee953361..b9fd2557d0 100644
--- a/xen/include/asm-x86/bitops.h
+++ b/xen/include/asm-x86/bitops.h
@@ -75,6 +75,24 @@ static __inline__ void clear_bit(int nr, volatile void * addr)
:"=m" (ADDR)
:"dIr" (nr));
}
+
+/**
+ * __clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * Unlike clear_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __clear_bit(int nr, volatile void * addr)
+{
+ __asm__(
+ "btrl %1,%0"
+ :"=m" (ADDR)
+ :"dIr" (nr));
+}
+
#define smp_mb__before_clear_bit() barrier()
#define smp_mb__after_clear_bit() barrier()
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 99c74cf5ad..74a123de6f 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -79,9 +79,14 @@
#ifndef __ASSEMBLY__
extern unsigned long _end; /* standard ELF symbol */
-#endif /* __ASSEMBLY__ */
-#define FORCE_CRASH() __asm__ __volatile__ ( "ud2" )
+static inline void FORCE_CRASH(void) __attribute__((noreturn,always_inline));
+static inline void FORCE_CRASH(void)
+{
+ __asm__ __volatile__ ( "ud2" );
+ while(1);
+}
+#endif /* __ASSEMBLY__ */
#if defined(__x86_64__)
@@ -149,9 +154,14 @@ extern unsigned long _end; /* standard ELF symbol */
/* Slot 256: read-only guest-accessible machine-to-phys translation table. */
#define RO_MPT_VIRT_START (PML4_ADDR(256))
#define RO_MPT_VIRT_END (RO_MPT_VIRT_START + PML4_ENTRY_BYTES/2)
+
+// current unused?
+#if 0
/* Slot 257: read-only guest-accessible linear page table. */
#define RO_LINEAR_PT_VIRT_START (PML4_ADDR(257))
#define RO_LINEAR_PT_VIRT_END (RO_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
+#endif
+
/* Slot 258: linear page table (guest table). */
#define LINEAR_PT_VIRT_START (PML4_ADDR(258))
#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
@@ -175,7 +185,7 @@ extern unsigned long _end; /* standard ELF symbol */
#define DIRECTMAP_VIRT_START (PML4_ADDR(262))
#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
-#define PGT_base_page_table PGT_l4_page_table
+#define PGT_base_page_table PGT_l4_page_table
#define __HYPERVISOR_CS64 0xe010
#define __HYPERVISOR_CS32 0xe008
@@ -274,9 +284,9 @@ extern unsigned long _end; /* standard ELF symbol */
(L2_PAGETABLE_LAST_XEN_SLOT - L2_PAGETABLE_FIRST_XEN_SLOT + 1)
#ifdef CONFIG_X86_PAE
-# define PGT_base_page_table PGT_l3_page_table
+# define PGT_base_page_table PGT_l3_page_table
#else
-# define PGT_base_page_table PGT_l2_page_table
+# define PGT_base_page_table PGT_l2_page_table
#endif
#define __HYPERVISOR_CS 0xe008
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index a0efe89f0a..2ef0775795 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -73,42 +73,42 @@ struct arch_domain
/* I/O-port admin-specified access capabilities. */
struct rangeset *ioport_caps;
- /* Shadow mode status and controls. */
- struct shadow_ops *ops;
- unsigned int shadow_mode; /* flags to control shadow table operation */
- unsigned int shadow_nest; /* Recursive depth of shadow_lock() nesting */
-
- /* shadow hashtable */
- struct shadow_status *shadow_ht;
- struct shadow_status *shadow_ht_free;
- struct shadow_status *shadow_ht_extras; /* extra allocation units */
- unsigned int shadow_extras_count;
-
- /* shadow dirty bitmap */
+ /* HVM stuff */
+ struct hvm_domain hvm_domain;
+
+ /* Shadow-translated guest: Pseudophys base address of reserved area. */
+ unsigned long first_reserved_pfn;
+
+ /* Shadow2 stuff */
+ u32 shadow2_mode; /* flags to control shadow operation */
+ spinlock_t shadow2_lock; /* shadow2 domain lock */
+ int shadow2_locker; /* processor which holds the lock */
+ const char *shadow2_locker_function; /* Func that took it */
+ struct list_head shadow2_freelists[SHADOW2_MAX_ORDER + 1];
+ struct list_head shadow2_p2m_freelist;
+ struct list_head shadow2_p2m_inuse;
+ struct list_head shadow2_toplevel_shadows;
+ unsigned int shadow2_total_pages; /* number of pages allocated */
+ unsigned int shadow2_free_pages; /* number of pages on freelists */
+ unsigned int shadow2_p2m_pages; /* number of pages in p2m map */
+
+ /* Shadow2 hashtable */
+ struct shadow2_hash_entry *shadow2_hash_table;
+ struct shadow2_hash_entry *shadow2_hash_freelist;
+ struct shadow2_hash_entry *shadow2_hash_allocations;
+ int shadow2_hash_walking; /* Some function is walking the hash table */
+
+ /* Shadow log-dirty bitmap */
unsigned long *shadow_dirty_bitmap;
unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */
- /* shadow mode stats */
- unsigned int shadow_page_count;
- unsigned int hl2_page_count;
- unsigned int snapshot_page_count;
-
+ /* Shadow log-dirty mode stats */
unsigned int shadow_fault_count;
unsigned int shadow_dirty_count;
- /* full shadow mode */
- struct out_of_sync_entry *out_of_sync; /* list of out-of-sync pages */
- struct out_of_sync_entry *out_of_sync_free;
- struct out_of_sync_entry *out_of_sync_extras;
- unsigned int out_of_sync_extras_count;
+ /* Shadow translated domain: P2M mapping */
+ pagetable_t phys_table;
- struct list_head free_shadow_frames;
-
- pagetable_t phys_table; /* guest 1:1 pagetable */
- struct hvm_domain hvm_domain;
-
- /* Shadow-translated guest: Pseudophys base address of reserved area. */
- unsigned long first_reserved_pfn;
} __cacheline_aligned;
#ifdef CONFIG_X86_PAE
@@ -166,25 +166,34 @@ struct arch_vcpu
*/
l1_pgentry_t *perdomain_ptes;
- pagetable_t guest_table_user; /* x86/64: user-space pagetable. */
- pagetable_t guest_table; /* (MA) guest notion of cr3 */
- pagetable_t shadow_table; /* (MA) shadow of guest */
- pagetable_t monitor_table; /* (MA) used in hypervisor */
-
- l2_pgentry_t *guest_vtable; /* virtual address of pagetable */
- l2_pgentry_t *shadow_vtable; /* virtual address of shadow_table */
- l2_pgentry_t *monitor_vtable; /* virtual address of monitor_table */
- l1_pgentry_t *hl2_vtable; /* virtual address of hl2_table */
-
#ifdef CONFIG_X86_64
- l3_pgentry_t *guest_vl3table;
- l4_pgentry_t *guest_vl4table;
+ pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
#endif
+ pagetable_t guest_table; /* (MFN) guest notion of cr3 */
+ /* guest_table holds a ref to the page, and also a type-count unless
+ * shadow refcounts are in use */
+ pagetable_t shadow_table; /* (MFN) shadow of guest */
+ pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */
+ unsigned long cr3; /* (MA) value to install in HW CR3 */
- unsigned long monitor_shadow_ref;
+ void *guest_vtable; /* virtual address of pagetable */
+ void *shadow_vtable; /* virtual address of shadow_table */
+ root_pgentry_t *monitor_vtable; /* virtual address of monitor_table */
/* Current LDT details. */
unsigned long shadow_ldt_mapcnt;
+
+ /* Shadow2 stuff */
+ /* -- pointers to mode-specific entry points */
+ struct shadow2_entry_points *shadow2;
+ unsigned long last_emulated_mfn; /* last mfn we emulated a write to */
+ u8 shadow2_propagate_fault; /* emulated fault needs to be */
+ /* propagated to guest */
+#if CONFIG_PAGING_LEVELS >= 3
+ u8 shadow2_pae_flip_pending; /* shadow update requires this PAE cpu
+ * to recopy/install its L3 table.
+ */
+#endif
} __cacheline_aligned;
/* shorthands to improve code legibility */
diff --git a/xen/include/asm-x86/grant_table.h b/xen/include/asm-x86/grant_table.h
index 5c6600ac7e..277b93ca0c 100644
--- a/xen/include/asm-x86/grant_table.h
+++ b/xen/include/asm-x86/grant_table.h
@@ -31,7 +31,7 @@ int destroy_grant_host_mapping(
#define gnttab_shared_gmfn(d, t, i) \
(mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i)))
-#define gnttab_log_dirty(d, f) mark_dirty((d), (f))
+#define gnttab_mark_dirty(d, f) mark_dirty((d), (f))
static inline void gnttab_clear_flag(unsigned long nr, uint16_t *addr)
{
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 73f3b31275..cb573e5d9c 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -56,9 +56,16 @@ struct hvm_function_table {
*/
int (*realmode)(struct vcpu *v);
int (*paging_enabled)(struct vcpu *v);
+ int (*long_mode_enabled)(struct vcpu *v);
+ int (*guest_x86_mode)(struct vcpu *v);
int (*instruction_length)(struct vcpu *v);
unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
+ /*
+ * Re-set the value of CR3 that Xen runs on when handling VM exits
+ */
+ void (*update_host_cr3)(struct vcpu *v);
+
/*
* Update specifics of the guest state:
* 1) TS bit in guest cr0
@@ -134,11 +141,29 @@ hvm_paging_enabled(struct vcpu *v)
}
static inline int
+hvm_long_mode_enabled(struct vcpu *v)
+{
+ return hvm_funcs.long_mode_enabled(v);
+}
+
+static inline int
+hvm_guest_x86_mode(struct vcpu *v)
+{
+ return hvm_funcs.guest_x86_mode(v);
+}
+
+static inline int
hvm_instruction_length(struct vcpu *v)
{
return hvm_funcs.instruction_length(v);
}
+static inline void
+hvm_update_host_cr3(struct vcpu *v)
+{
+ hvm_funcs.update_host_cr3(v);
+}
+
void hvm_hypercall_page_initialise(struct domain *d,
void *hypercall_page);
diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h
index 35a0bfe464..6ccfdee678 100644
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -116,10 +116,13 @@ enum hval_bitmaps {
#define DBG_LEVEL_IOAPIC (1 << 9)
extern unsigned int opt_hvm_debug_level;
-#define HVM_DBG_LOG(level, _f, _a...) \
- if ( (level) & opt_hvm_debug_level ) \
- printk("[HVM:%d.%d] <%s> " _f "\n", \
- current->domain->domain_id, current->vcpu_id, __func__, ## _a)
+#define HVM_DBG_LOG(level, _f, _a...) \
+ do { \
+ if ( (level) & opt_hvm_debug_level ) \
+ printk("[HVM:%d.%d] <%s> " _f "\n", \
+ current->domain->domain_id, current->vcpu_id, __func__, \
+ ## _a); \
+ } while (0)
#else
#define HVM_DBG_LOG(level, _f, _a...)
#endif
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index f89b6ad787..b607a4578b 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -29,6 +29,7 @@
#define HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI 1
struct hvm_vcpu {
+ unsigned long hw_cr3; /* value we give to HW to use */
unsigned long ioflags;
struct hvm_io_op io_op;
struct vlapic *vlapic;
@@ -40,6 +41,11 @@ struct hvm_vcpu {
int xen_port;
+#if CONFIG_PAGING_LEVELS >= 3
+ l3_pgentry_t hvm_lowmem_l3tab[4]
+ __attribute__((__aligned__(32)));
+#endif
+
/* Flags */
int flag_dr_dirty;
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 85ee7046fd..524411be34 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -87,6 +87,7 @@ struct arch_vmx_struct {
unsigned long cpu_cr0; /* copy of guest CR0 */
unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */
+ unsigned long cpu_shadow_cr4; /* copy of guest read shadow CR4 */
unsigned long cpu_cr2; /* save CR2 */
unsigned long cpu_cr3;
unsigned long cpu_state;
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index 38ae0e3b0f..38e447259c 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -298,6 +298,9 @@ static always_inline void __vmwrite_vcpu(
case GUEST_CR0:
v->arch.hvm_vmx.cpu_cr0 = value;
break;
+ case CR4_READ_SHADOW:
+ v->arch.hvm_vmx.cpu_shadow_cr4 = value;
+ break;
case CPU_BASED_VM_EXEC_CONTROL:
v->arch.hvm_vmx.cpu_based_exec_control = value;
break;
@@ -317,11 +320,14 @@ static always_inline void __vmread_vcpu(
case GUEST_CR0:
*value = v->arch.hvm_vmx.cpu_cr0;
break;
+ case CR4_READ_SHADOW:
+ *value = v->arch.hvm_vmx.cpu_shadow_cr4;
+ break;
case CPU_BASED_VM_EXEC_CONTROL:
*value = v->arch.hvm_vmx.cpu_based_exec_control;
break;
default:
- printk("__vmread_cpu: invalid field %lx\n", field);
+ printk("__vmread_vcpu: invalid field %lx\n", field);
break;
}
}
@@ -342,6 +348,7 @@ static inline int __vmwrite(unsigned long field, unsigned long value)
switch ( field ) {
case CR0_READ_SHADOW:
case GUEST_CR0:
+ case CR4_READ_SHADOW:
case CPU_BASED_VM_EXEC_CONTROL:
__vmwrite_vcpu(v, field, value);
break;
@@ -404,6 +411,46 @@ static inline int vmx_paging_enabled(struct vcpu *v)
return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
}
+/* Works only for vcpu == current */
+static inline int vmx_long_mode_enabled(struct vcpu *v)
+{
+ ASSERT(v == current);
+ return VMX_LONG_GUEST(current);
+}
+
+/* Works only for vcpu == current */
+static inline int vmx_realmode(struct vcpu *v)
+{
+ unsigned long rflags;
+ ASSERT(v == current);
+
+ __vmread(GUEST_RFLAGS, &rflags);
+ return rflags & X86_EFLAGS_VM;
+}
+
+/* Works only for vcpu == current */
+static inline void vmx_update_host_cr3(struct vcpu *v)
+{
+ ASSERT(v == current);
+ __vmwrite(HOST_CR3, v->arch.cr3);
+}
+
+static inline int vmx_guest_x86_mode(struct vcpu *v)
+{
+ unsigned long cs_ar_bytes;
+ ASSERT(v == current);
+
+ if ( vmx_long_mode_enabled(v) )
+ {
+ __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes);
+ return (cs_ar_bytes & (1u<<13)) ? 8 : 4;
+ }
+ if ( vmx_realmode(v) )
+ return 2;
+ __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes);
+ return (cs_ar_bytes & (1u<<14)) ? 4 : 2;
+}
+
static inline int vmx_pgbit_test(struct vcpu *v)
{
unsigned long cr0;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 06ea598754..0b19fbe7ec 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -20,7 +20,11 @@
struct page_info
{
/* Each frame can be threaded onto a doubly-linked list. */
- struct list_head list;
+ union {
+ struct list_head list;
+ /* Shadow2 uses this field as an up-pointer in lower-level shadows */
+ paddr_t up;
+ };
/* Reference count and various PGC_xxx flags and fields. */
u32 count_info;
@@ -46,8 +50,20 @@ struct page_info
} u;
- /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
- u32 tlbflush_timestamp;
+ union {
+ /* Timestamp from 'TLB clock', used to reduce need for safety
+ * flushes. Only valid on a) free pages, and b) guest pages with a
+ * zero type count. */
+ u32 tlbflush_timestamp;
+
+ /* Only used on guest pages with a shadow.
+ * Guest pages with a shadow must have a non-zero type count, so this
+ * does not conflict with the tlbflush timestamp. */
+ u32 shadow2_flags;
+
+ // XXX -- we expect to add another field here, to be used for min/max
+ // purposes, which is only used for shadow pages.
+ };
};
/* The following page types are MUTUALLY EXCLUSIVE. */
@@ -60,6 +76,7 @@ struct page_info
#define PGT_ldt_page (6U<<29) /* using this page in an LDT? */
#define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
+#ifndef SHADOW2
#define PGT_l1_shadow PGT_l1_page_table
#define PGT_l2_shadow PGT_l2_page_table
#define PGT_l3_shadow PGT_l3_page_table
@@ -69,14 +86,16 @@ struct page_info
#define PGT_writable_pred (7U<<29) /* predicted gpfn with writable ref */
#define PGT_fl1_shadow (5U<<29)
+#endif
+
#define PGT_type_mask (7U<<29) /* Bits 29-31. */
- /* Has this page been validated for use as its current type? */
-#define _PGT_validated 28
-#define PGT_validated (1U<<_PGT_validated)
/* Owning guest has pinned this page to its current type? */
-#define _PGT_pinned 27
+#define _PGT_pinned 28
#define PGT_pinned (1U<<_PGT_pinned)
+ /* Has this page been validated for use as its current type? */
+#define _PGT_validated 27
+#define PGT_validated (1U<<_PGT_validated)
#if defined(__i386__)
/* The 11 most significant bits of virt address if this is a page table. */
#define PGT_va_shift 16
@@ -98,6 +117,7 @@ struct page_info
/* 16-bit count of uses of this frame as its current type. */
#define PGT_count_mask ((1U<<16)-1)
+#ifndef SHADOW2
#ifdef __x86_64__
#define PGT_high_mfn_shift 52
#define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift)
@@ -112,19 +132,53 @@ struct page_info
#define PGT_score_shift 23
#define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift)
#endif
+#endif /* SHADOW2 */
/* Cleared when the owning guest 'frees' this page. */
#define _PGC_allocated 31
#define PGC_allocated (1U<<_PGC_allocated)
- /* Set when fullshadow mode marks a page out-of-sync */
+ /* Set on a *guest* page to mark it out-of-sync with its shadow */
#define _PGC_out_of_sync 30
#define PGC_out_of_sync (1U<<_PGC_out_of_sync)
- /* Set when fullshadow mode is using a page as a page table */
+ /* Set when is using a page as a page table */
#define _PGC_page_table 29
#define PGC_page_table (1U<<_PGC_page_table)
/* 29-bit count of references to this frame. */
#define PGC_count_mask ((1U<<29)-1)
+/* shadow2 uses the count_info on shadow pages somewhat differently */
+/* NB: please coordinate any changes here with the SH2F's in shadow2.h */
+#define PGC_SH2_none (0U<<28) /* on the shadow2 free list */
+#define PGC_SH2_min_shadow (1U<<28)
+#define PGC_SH2_l1_32_shadow (1U<<28) /* shadowing a 32-bit L1 guest page */
+#define PGC_SH2_fl1_32_shadow (2U<<28) /* L1 shadow for a 32b 4M superpage */
+#define PGC_SH2_l2_32_shadow (3U<<28) /* shadowing a 32-bit L2 guest page */
+#define PGC_SH2_l1_pae_shadow (4U<<28) /* shadowing a pae L1 page */
+#define PGC_SH2_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
+#define PGC_SH2_l2_pae_shadow (6U<<28) /* shadowing a pae L2-low page */
+#define PGC_SH2_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
+#define PGC_SH2_l3_pae_shadow (8U<<28) /* shadowing a pae L3 page */
+#define PGC_SH2_l1_64_shadow (9U<<28) /* shadowing a 64-bit L1 page */
+#define PGC_SH2_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
+#define PGC_SH2_l2_64_shadow (11U<<28) /* shadowing a 64-bit L2 page */
+#define PGC_SH2_l3_64_shadow (12U<<28) /* shadowing a 64-bit L3 page */
+#define PGC_SH2_l4_64_shadow (13U<<28) /* shadowing a 64-bit L4 page */
+#define PGC_SH2_max_shadow (13U<<28)
+#define PGC_SH2_p2m_table (14U<<28) /* in use as the p2m table */
+#define PGC_SH2_monitor_table (15U<<28) /* in use as a monitor table */
+#define PGC_SH2_unused (15U<<28)
+
+#define PGC_SH2_type_mask (15U<<28)
+#define PGC_SH2_type_shift 28
+
+#define PGC_SH2_pinned (1U<<27)
+
+#define _PGC_SH2_log_dirty 26
+#define PGC_SH2_log_dirty (1U<<26)
+
+/* 26 bit ref count for shadow pages */
+#define PGC_SH2_count_mask ((1U<<26) - 1)
+
/* We trust the slab allocator in slab.c, and our use of it. */
#define PageSlab(page) (1)
#define PageSetSlab(page) ((void)0)
@@ -134,16 +188,24 @@ struct page_info
#if defined(__i386__)
#define pickle_domptr(_d) ((u32)(unsigned long)(_d))
-#define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d))
+static inline struct domain *unpickle_domptr(u32 _domain)
+{ return (_domain & 1) ? NULL : (void *)_domain; }
#define PRtype_info "08lx" /* should only be used for printk's */
#elif defined(__x86_64__)
static inline struct domain *unpickle_domptr(u32 _domain)
-{ return (_domain == 0) ? NULL : __va(_domain); }
+{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
static inline u32 pickle_domptr(struct domain *domain)
{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
#define PRtype_info "016lx"/* should only be used for printk's */
#endif
+/* The order of the largest allocation unit we use for shadow pages */
+#if CONFIG_PAGING_LEVELS == 2
+#define SHADOW2_MAX_ORDER 0 /* Only ever need 4k allocations */
+#else
+#define SHADOW2_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
+#endif
+
#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
@@ -165,7 +227,7 @@ extern void invalidate_shadow_ldt(struct vcpu *d);
extern int shadow_remove_all_write_access(
struct domain *d, unsigned long gmfn, unsigned long mfn);
extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
-extern int _shadow_mode_refcounts(struct domain *d);
+extern int _shadow2_mode_refcounts(struct domain *d);
static inline void put_page(struct page_info *page)
{
@@ -197,8 +259,8 @@ static inline int get_page(struct page_info *page,
unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
unlikely(d != _domain) ) /* Wrong owner? */
{
- if ( !_shadow_mode_refcounts(domain) )
- DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
+ if ( !_shadow2_mode_refcounts(domain) )
+ DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
PRtype_info "\n",
page_to_mfn(page), domain, unpickle_domptr(d),
x, page->u.inuse.type_info);
@@ -254,6 +316,16 @@ static inline int page_is_removable(struct page_info *page)
ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
ASSERT(page_get_owner(_p) == (_d))
+// Quick test for whether a given page can be represented directly in CR3.
+//
+#if CONFIG_PAGING_LEVELS == 3
+#define MFN_FITS_IN_CR3(_MFN) !(mfn_x(_MFN) >> 20)
+
+/* returns a lowmem machine address of the copied L3 root table */
+unsigned long
+pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab);
+#endif /* CONFIG_PAGING_LEVELS == 3 */
+
int check_descriptor(struct desc_struct *d);
/*
@@ -271,29 +343,44 @@ int check_descriptor(struct desc_struct *d);
#define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
#define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
+
+#define mfn_to_gmfn(_d, mfn) \
+ ( (shadow2_mode_translate(_d)) \
+ ? get_gpfn_from_mfn(mfn) \
+ : (mfn) )
+
+#define gmfn_to_mfn(_d, gpfn) mfn_x(sh2_gfn_to_mfn(_d, gpfn))
+
+
/*
* The phys_to_machine_mapping is the reversed mapping of MPT for full
* virtualization. It is only used by shadow_mode_translate()==true
* guests, so we steal the address space that would have normally
* been used by the read-only MPT map.
*/
-#define phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
-#define NR_P2M_TABLE_ENTRIES ((unsigned long *)RO_MPT_VIRT_END \
- - phys_to_machine_mapping)
+#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
#define INVALID_MFN (~0UL)
#define VALID_MFN(_mfn) (!((_mfn) & (1U<<31)))
-#define set_mfn_from_gpfn(pfn, mfn) (phys_to_machine_mapping[(pfn)] = (mfn))
static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
{
- unsigned long mfn;
+ l1_pgentry_t l1e = l1e_empty();
+ int ret;
+
+#if CONFIG_PAGING_LEVELS > 2
+ if ( pfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t) )
+ /* This pfn is higher than the p2m map can hold */
+ return INVALID_MFN;
+#endif
+
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[pfn],
+ sizeof(l1e));
- if ( unlikely(pfn >= NR_P2M_TABLE_ENTRIES) ||
- unlikely(__copy_from_user(&mfn, &phys_to_machine_mapping[pfn],
- sizeof(mfn))) )
- mfn = INVALID_MFN;
+ if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) )
+ return l1e_get_pfn(l1e);
- return mfn;
+ return INVALID_MFN;
}
#ifdef MEMORY_GUARD
@@ -333,6 +420,7 @@ void audit_domains(void);
#endif
int new_guest_cr3(unsigned long pfn);
+void make_cr3(struct vcpu *v, unsigned long mfn);
void propagate_page_fault(unsigned long addr, u16 error_code);
diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index f1c08cf500..07c09b2ae2 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -112,6 +112,10 @@ static inline void wrmsrl(unsigned int msr, __u64 val)
#define MSR_IA32_VMX_EXIT_CTLS_MSR 0x483
#define MSR_IA32_VMX_ENTRY_CTLS_MSR 0x484
#define MSR_IA32_VMX_MISC_MSR 0x485
+#define MSR_IA32_VMX_CR0_FIXED0 0x486
+#define MSR_IA32_VMX_CR0_FIXED1 0x487
+#define MSR_IA32_VMX_CR4_FIXED0 0x488
+#define MSR_IA32_VMX_CR4_FIXED1 0x489
#define IA32_FEATURE_CONTROL_MSR 0x3a
#define IA32_FEATURE_CONTROL_MSR_LOCK 0x1
#define IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON 0x4
diff --git a/xen/include/asm-x86/page-guest32.h b/xen/include/asm-x86/page-guest32.h
index cf5595b078..e93206169a 100644
--- a/xen/include/asm-x86/page-guest32.h
+++ b/xen/include/asm-x86/page-guest32.h
@@ -89,15 +89,8 @@ static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
#define linear_l1_table_32 \
((l1_pgentry_32_t *)(LINEAR_PT_VIRT_START))
-#define __linear_l2_table_32 \
- ((l2_pgentry_32_t *)(LINEAR_PT_VIRT_START + \
- (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0))))
#define linear_pg_table_32 linear_l1_table_32
-#define linear_l2_table_32(_ed) ((_ed)->arch.guest_vtable)
-
-#define va_to_l1mfn_32(_ed, _va) \
- (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
#endif /* __X86_PAGE_GUEST_H__ */
diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
index 6432402066..94158c7f3d 100644
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -233,26 +233,18 @@ typedef struct { u64 pfn; } pagetable_t;
+ DOMAIN_ENTRIES_PER_L4_PAGETABLE)
#endif
-#define LINEAR_PT_OFFSET (LINEAR_PT_VIRT_START & VADDR_MASK)
-#define linear_l1_table \
- ((l1_pgentry_t *)(LINEAR_PT_VIRT_START))
-#define __linear_l2_table \
- ((l2_pgentry_t *)(LINEAR_PT_VIRT_START + \
- (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0))))
-#define __linear_l3_table \
- ((l3_pgentry_t *)(LINEAR_PT_VIRT_START + \
- (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) + \
- (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1))))
-#define __linear_l4_table \
- ((l4_pgentry_t *)(LINEAR_PT_VIRT_START + \
- (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) + \
- (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1)) + \
- (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<2))))
-
+/* Where to find each level of the linear mapping */
+#define __linear_l1_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START))
+#define __linear_l2_table \
+ ((l2_pgentry_t *)(__linear_l1_table + l1_linear_offset(LINEAR_PT_VIRT_START)))
+#define __linear_l3_table \
+ ((l3_pgentry_t *)(__linear_l2_table + l2_linear_offset(LINEAR_PT_VIRT_START)))
+#define __linear_l4_table \
+ ((l4_pgentry_t *)(__linear_l3_table + l3_linear_offset(LINEAR_PT_VIRT_START)))
+
+#define linear_l1_table __linear_l1_table
#define linear_pg_table linear_l1_table
-#define linear_l2_table(v) ((v)->arch.guest_vtable)
-#define linear_l3_table(v) ((v)->arch.guest_vl3table)
-#define linear_l4_table(v) ((v)->arch.guest_vl4table)
+#define linear_l2_table(v) ((l2_pgentry_t *)(v)->arch.guest_vtable)
#ifndef __ASSEMBLY__
#if CONFIG_PAGING_LEVELS == 3
@@ -294,6 +286,7 @@ extern void paging_init(void);
#define _PAGE_AVAIL1 0x400U
#define _PAGE_AVAIL2 0x800U
#define _PAGE_AVAIL 0xE00U
+#define _PAGE_PSE_PAT 0x1000U
/*
* Debug option: Ensure that granted mappings are not implicitly unmapped.
@@ -307,9 +300,9 @@ extern void paging_init(void);
#endif
/*
- * Disallow unused flag bits plus PAT, PSE and GLOBAL. Also disallow GNTTAB
- * if we are using it for grant-table debugging. Permit the NX bit if the
- * hardware supports it.
+ * Disallow unused flag bits plus PAT, PSE and GLOBAL.
+ * Also disallow GNTTAB if we are using it for grant-table debugging.
+ * Permit the NX bit if the hardware supports it.
*/
#define BASE_DISALLOW_MASK ((0xFFFFF180U | _PAGE_GNTTAB) & ~_PAGE_NX)
diff --git a/xen/include/asm-x86/perfc_defn.h b/xen/include/asm-x86/perfc_defn.h
index 54bc01ea7c..d6e24b207d 100644
--- a/xen/include/asm-x86/perfc_defn.h
+++ b/xen/include/asm-x86/perfc_defn.h
@@ -144,4 +144,57 @@ PERFCOUNTER_CPU(remove_write_predicted, "remove_write predict hit&exit")
PERFCOUNTER_CPU(remove_write_bad_prediction, "remove_write bad prediction")
PERFCOUNTER_CPU(update_hl2e_invlpg, "update_hl2e calls invlpg")
+/* Shadow2 counters */
+PERFCOUNTER_CPU(shadow2_alloc, "calls to shadow2_alloc")
+PERFCOUNTER_CPU(shadow2_alloc_tlbflush, "shadow2_alloc flushed TLBs")
+PERFSTATUS(shadow2_alloc_count, "number of shadow pages in use")
+PERFCOUNTER_CPU(shadow2_free, "calls to shadow2_free")
+PERFCOUNTER_CPU(shadow2_prealloc_1, "shadow2 recycles old shadows")
+PERFCOUNTER_CPU(shadow2_prealloc_2, "shadow2 recycles in-use shadows")
+PERFCOUNTER_CPU(shadow2_linear_map_failed, "shadow2 hit read-only linear map")
+PERFCOUNTER_CPU(shadow2_a_update, "shadow2 A bit update")
+PERFCOUNTER_CPU(shadow2_ad_update, "shadow2 A&D bit update")
+PERFCOUNTER_CPU(shadow2_fault, "calls to shadow2_fault")
+PERFCOUNTER_CPU(shadow2_fault_bail_bad_gfn, "shadow2_fault guest bad gfn")
+PERFCOUNTER_CPU(shadow2_fault_bail_not_present,
+ "shadow2_fault guest not-present")
+PERFCOUNTER_CPU(shadow2_fault_bail_nx, "shadow2_fault guest NX fault")
+PERFCOUNTER_CPU(shadow2_fault_bail_ro_mapping, "shadow2_fault guest R/W fault")
+PERFCOUNTER_CPU(shadow2_fault_bail_user_supervisor,
+ "shadow2_fault guest U/S fault")
+PERFCOUNTER_CPU(shadow2_fault_emulate_read, "shadow2_fault emulates a read")
+PERFCOUNTER_CPU(shadow2_fault_emulate_write, "shadow2_fault emulates a write")
+PERFCOUNTER_CPU(shadow2_fault_emulate_failed, "shadow2_fault emulator fails")
+PERFCOUNTER_CPU(shadow2_fault_mmio, "shadow2_fault handled as mmio")
+PERFCOUNTER_CPU(shadow2_fault_fixed, "shadow2_fault fixed fault")
+PERFCOUNTER_CPU(shadow2_ptwr_emulate, "shadow2 causes ptwr to emulate")
+PERFCOUNTER_CPU(shadow2_validate_gl1e_calls, "calls to shadow2_validate_gl1e")
+PERFCOUNTER_CPU(shadow2_validate_gl2e_calls, "calls to shadow2_validate_gl2e")
+PERFCOUNTER_CPU(shadow2_validate_gl3e_calls, "calls to shadow2_validate_gl3e")
+PERFCOUNTER_CPU(shadow2_validate_gl4e_calls, "calls to shadow2_validate_gl4e")
+PERFCOUNTER_CPU(shadow2_hash_lookups, "calls to shadow2_hash_lookup")
+PERFCOUNTER_CPU(shadow2_hash_lookup_head, "shadow2 hash hit in bucket head")
+PERFCOUNTER_CPU(shadow2_hash_lookup_miss, "shadow2 hash misses")
+PERFCOUNTER_CPU(shadow2_get_shadow_status, "calls to get_shadow_status")
+PERFCOUNTER_CPU(shadow2_hash_inserts, "calls to shadow2_hash_insert")
+PERFCOUNTER_CPU(shadow2_hash_deletes, "calls to shadow2_hash_delete")
+PERFCOUNTER_CPU(shadow2_writeable, "shadow2 removes write access")
+PERFCOUNTER_CPU(shadow2_writeable_h_1, "shadow2 writeable: 32b w2k3")
+PERFCOUNTER_CPU(shadow2_writeable_h_2, "shadow2 writeable: 32pae w2k3")
+PERFCOUNTER_CPU(shadow2_writeable_h_3, "shadow2 writeable: 64b w2k3")
+PERFCOUNTER_CPU(shadow2_writeable_h_4, "shadow2 writeable: 32b linux low")
+PERFCOUNTER_CPU(shadow2_writeable_bf, "shadow2 writeable brute-force")
+PERFCOUNTER_CPU(shadow2_mappings, "shadow2 removes all mappings")
+PERFCOUNTER_CPU(shadow2_mappings_bf, "shadow2 rm-mappings brute-force")
+PERFCOUNTER_CPU(shadow2_early_unshadow, "shadow2 unshadows for fork/exit")
+PERFCOUNTER_CPU(shadow2_early_unshadow_top, "shadow2 unhooks for fork/exit")
+PERFCOUNTER_CPU(shadow2_unshadow, "shadow2 unshadows a page")
+PERFCOUNTER_CPU(shadow2_up_pointer, "shadow2 unshadow by up-pointer")
+PERFCOUNTER_CPU(shadow2_unshadow_bf, "shadow2 unshadow brute-force")
+PERFCOUNTER_CPU(shadow2_get_page_fail, "shadow2_get_page_from_l1e failed")
+PERFCOUNTER_CPU(shadow2_guest_walk, "shadow2 walks guest tables")
+PERFCOUNTER_CPU(shadow2_walk_cache_hit, "shadow2 walk-cache hits")
+PERFCOUNTER_CPU(shadow2_walk_cache_miss, "shadow2 walk-cache misses")
+
+
/*#endif*/ /* __XEN_PERFC_DEFN_H__ */
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index d460544d3e..81c8757f8e 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -545,6 +545,7 @@ extern always_inline void prefetchw(const void *x)
#endif
void show_stack(struct cpu_user_regs *regs);
+void show_xen_trace(void);
void show_stack_overflow(unsigned long esp);
void show_registers(struct cpu_user_regs *regs);
void show_execution_state(struct cpu_user_regs *regs);
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 7144b24d8b..efade3021c 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -1,8 +1,7 @@
/******************************************************************************
* include/asm-x86/shadow.h
*
- * Copyright (c) 2005 Michael A Fetterman
- * Based on an earlier implementation by Ian Pratt et al
+ * Copyright (c) 2006 by XenSource Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,1782 +21,28 @@
#ifndef _XEN_SHADOW_H
#define _XEN_SHADOW_H
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/perfc.h>
-#include <xen/sched.h>
-#include <xen/mm.h>
-#include <xen/domain_page.h>
-#include <asm/current.h>
-#include <asm/flushtlb.h>
-#include <asm/processor.h>
-#include <asm/hvm/hvm.h>
-#include <asm/hvm/support.h>
-#include <asm/regs.h>
-#include <public/dom0_ops.h>
-#include <asm/shadow_public.h>
-#include <asm/page-guest32.h>
-#include <asm/shadow_ops.h>
+/* This file is just a wrapper around the new Shadow2 header,
+ * providing names that must be defined in any shadow implementation. */
-/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
+#include <asm/shadow2.h>
-#define SHM_enable (1<<0) /* we're in one of the shadow modes */
-#define SHM_refcounts (1<<1) /* refcounts based on shadow tables instead of
- guest tables */
-#define SHM_write_all (1<<2) /* allow write access to all guest pt pages,
- regardless of pte write permissions */
-#define SHM_log_dirty (1<<3) /* enable log dirty mode */
-#define SHM_translate (1<<4) /* Xen does p2m translation, not guest */
-#define SHM_external (1<<5) /* Xen does not steal address space from the
- domain for its own booking; requires VT or
- similar mechanisms */
-#define SHM_wr_pt_pte (1<<6) /* guest allowed to set PAGE_RW bit in PTEs which
- point to page table pages. */
+/* How to make sure a page is not referred to in a shadow PT */
+/* This will need to be a for_each_vcpu if we go to per-vcpu shadows */
+#define shadow_drop_references(_d, _p) \
+ shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
+#define shadow_sync_and_drop_references(_d, _p) \
+ shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
-#define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode)
-#define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts)
-#define shadow_mode_write_l1(_d) (VM_ASSIST(_d, VMASST_TYPE_writable_pagetables))
-#define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all)
-#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
-#define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
-#define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external)
-#define shadow_mode_wr_pt_pte(_d) ((_d)->arch.shadow_mode & SHM_wr_pt_pte)
+/* Whether we are translating the domain's frame numbers for it */
+#define shadow_mode_translate(d) shadow2_mode_translate(d)
-#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
-#define __shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
- (SH_LINEAR_PT_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
-#define shadow_linear_l2_table(_v) ((_v)->arch.shadow_vtable)
+/* ...and if so, how to add and remove entries in the mapping */
+#define guest_physmap_add_page(_d, _p, _m) \
+ shadow2_guest_physmap_add_page((_d), (_p), (_m))
+#define guest_physmap_remove_page(_d, _p, _m ) \
+ shadow2_guest_physmap_remove_page((_d), (_p), (_m))
-// easy access to the hl2 table (for translated but not external modes only)
-#define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \
- (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
-
-/*
- * For now we use the per-domain BIGLOCK rather than a shadow-specific lock.
- * We usually have the BIGLOCK already acquired anyway, so this is unlikely
- * to cause much unnecessary extra serialisation. Also it's a recursive
- * lock, and there are some code paths containing nested shadow_lock().
- * The #if0'ed code below is therefore broken until such nesting is removed.
- */
-#if 0
-#define shadow_lock_init(_d) \
- spin_lock_init(&(_d)->arch.shadow_lock)
-#define shadow_lock_is_acquired(_d) \
- spin_is_locked(&(_d)->arch.shadow_lock)
-#define shadow_lock(_d) \
-do { \
- ASSERT(!shadow_lock_is_acquired(_d)); \
- spin_lock(&(_d)->arch.shadow_lock); \
-} while (0)
-#define shadow_unlock(_d) \
-do { \
- ASSERT(!shadow_lock_is_acquired(_d)); \
- spin_unlock(&(_d)->arch.shadow_lock); \
-} while (0)
-#else
-#define shadow_lock_init(_d) \
- ((_d)->arch.shadow_nest = 0)
-#define shadow_lock_is_acquired(_d) \
- (spin_is_locked(&(_d)->big_lock) && ((_d)->arch.shadow_nest != 0))
-#define shadow_lock(_d) \
-do { \
- LOCK_BIGLOCK(_d); \
- (_d)->arch.shadow_nest++; \
-} while (0)
-#define shadow_unlock(_d) \
-do { \
- ASSERT(shadow_lock_is_acquired(_d)); \
- (_d)->arch.shadow_nest--; \
- UNLOCK_BIGLOCK(_d); \
-} while (0)
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 3
-static inline u64 get_cr3_idxval(struct vcpu *v)
-{
- u64 pae_cr3;
-
- if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 &&
- !shadow_mode_log_dirty(v->domain) )
- {
- pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
- return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
- }
- else
- return 0;
-}
-
-#define shadow_key_t u64
-#define index_to_key(x) ((x) << 32)
-#else
-#define get_cr3_idxval(v) (0)
-#define shadow_key_t unsigned long
-#define index_to_key(x) (0)
-#endif
-
-
-#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min))
-#define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
-#define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16))
-extern void shadow_direct_map_clean(struct domain *d);
-extern int shadow_direct_map_init(struct domain *d);
-extern int shadow_direct_map_fault(
- unsigned long vpa, struct cpu_user_regs *regs);
-extern void shadow_mode_init(void);
-extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc);
-extern int shadow_fault(unsigned long va, struct cpu_user_regs *regs);
-extern int shadow_mode_enable(struct domain *p, unsigned int mode);
-extern void shadow_invlpg(struct vcpu *, unsigned long);
-extern struct out_of_sync_entry *shadow_mark_mfn_out_of_sync(
- struct vcpu *v, unsigned long gpfn, unsigned long mfn);
-extern void free_monitor_pagetable(struct vcpu *v);
-extern void __shadow_sync_all(struct domain *d);
-extern int __shadow_out_of_sync(struct vcpu *v, unsigned long va);
-extern int set_p2m_entry(
- struct domain *d, unsigned long pfn, unsigned long mfn,
- struct domain_mmap_cache *l2cache,
- struct domain_mmap_cache *l1cache);
-extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype);
-
-extern void free_shadow_page(unsigned long smfn);
-
-extern void shadow_l1_normal_pt_update(struct domain *d,
- paddr_t pa, l1_pgentry_t l1e,
- struct domain_mmap_cache *cache);
-extern void shadow_l2_normal_pt_update(struct domain *d,
- paddr_t pa, l2_pgentry_t l2e,
- struct domain_mmap_cache *cache);
-#if CONFIG_PAGING_LEVELS >= 3
-#include <asm/page-guest32.h>
-/*
- * va_mask cannot be used because it's used by the shadow hash.
- * Use the score area for for now.
- */
-#define is_xen_l2_slot(t,s) \
- ( ((((t) & PGT_score_mask) >> PGT_score_shift) == 3) && \
- ((s) >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
-
-extern unsigned long gva_to_gpa(unsigned long gva);
-extern void shadow_l3_normal_pt_update(struct domain *d,
- paddr_t pa, l3_pgentry_t l3e,
- struct domain_mmap_cache *cache);
-#endif
-#if CONFIG_PAGING_LEVELS >= 4
-extern void shadow_l4_normal_pt_update(struct domain *d,
- paddr_t pa, l4_pgentry_t l4e,
- struct domain_mmap_cache *cache);
-#endif
-extern int shadow_do_update_va_mapping(unsigned long va,
- l1_pgentry_t val,
- struct vcpu *v);
-
-
-static inline unsigned long __shadow_status(
- struct domain *d, unsigned long gpfn, unsigned long stype);
-
-#if CONFIG_PAGING_LEVELS <= 2
-static inline void update_hl2e(struct vcpu *v, unsigned long va);
-#endif
-
-static inline int page_is_page_table(struct page_info *page)
-{
- struct domain *owner = page_get_owner(page);
- u32 type_info;
-
- if ( owner && shadow_mode_refcounts(owner) )
- return page->count_info & PGC_page_table;
-
- type_info = page->u.inuse.type_info & PGT_type_mask;
- return type_info && (type_info <= PGT_l4_page_table);
-}
-
-static inline int mfn_is_page_table(unsigned long mfn)
-{
- if ( !mfn_valid(mfn) )
- return 0;
-
- return page_is_page_table(mfn_to_page(mfn));
-}
-
-static inline int page_out_of_sync(struct page_info *page)
-{
- return page->count_info & PGC_out_of_sync;
-}
-
-static inline int mfn_out_of_sync(unsigned long mfn)
-{
- if ( !mfn_valid(mfn) )
- return 0;
-
- return page_out_of_sync(mfn_to_page(mfn));
-}
-
-
-/************************************************************************/
-
-static void inline
-__shadow_sync_mfn(struct domain *d, unsigned long mfn)
-{
- if ( d->arch.out_of_sync )
- {
- // XXX - could be smarter
- //
- __shadow_sync_all(d);
- }
-}
-
-static void inline
-__shadow_sync_va(struct vcpu *v, unsigned long va)
-{
- struct domain *d = v->domain;
-
- if ( d->arch.out_of_sync && __shadow_out_of_sync(v, va) )
- {
- perfc_incrc(shadow_sync_va);
-
- // XXX - could be smarter
- //
- __shadow_sync_all(v->domain);
- }
-#if CONFIG_PAGING_LEVELS <= 2
- // Also make sure the HL2 is up-to-date for this address.
- //
- if ( unlikely(shadow_mode_translate(v->domain)) )
- update_hl2e(v, va);
-#endif
-}
-
-static void inline
-shadow_sync_all(struct domain *d)
-{
- if ( unlikely(shadow_mode_enabled(d)) )
- {
- shadow_lock(d);
-
- if ( d->arch.out_of_sync )
- __shadow_sync_all(d);
-
- ASSERT(d->arch.out_of_sync == NULL);
-
- shadow_unlock(d);
- }
-}
-
-// SMP BUG: This routine can't ever be used properly in an SMP context.
-// It should be something like get_shadow_and_sync_va().
-// This probably shouldn't exist.
-//
-static void inline
-shadow_sync_va(struct vcpu *v, unsigned long gva)
-{
- struct domain *d = v->domain;
- if ( unlikely(shadow_mode_enabled(d)) )
- {
- shadow_lock(d);
- __shadow_sync_va(v, gva);
- shadow_unlock(d);
- }
-}
-
-extern void __shadow_mode_disable(struct domain *d);
-static inline void shadow_mode_disable(struct domain *d)
-{
- if ( unlikely(shadow_mode_enabled(d)) )
- {
- shadow_lock(d);
- __shadow_mode_disable(d);
- shadow_unlock(d);
- }
-}
-
-/************************************************************************/
-
-#define mfn_to_gmfn(_d, mfn) \
- ( (shadow_mode_translate(_d)) \
- ? get_gpfn_from_mfn(mfn) \
- : (mfn) )
-
-#define gmfn_to_mfn(_d, gpfn) \
- ({ \
- unlikely(shadow_mode_translate(_d)) \
- ? (likely(current->domain == (_d)) \
- ? get_mfn_from_gpfn(gpfn) \
- : get_mfn_from_gpfn_foreign(_d, gpfn)) \
- : (gpfn); \
- })
-
-extern unsigned long get_mfn_from_gpfn_foreign(
- struct domain *d, unsigned long gpfn);
-
-/************************************************************************/
-
-struct shadow_status {
- struct shadow_status *next; /* Pull-to-front list per hash bucket. */
- shadow_key_t gpfn_and_flags; /* Guest pfn plus flags. */
- unsigned long smfn; /* Shadow mfn. */
-};
-
-#define shadow_ht_extra_size 128
-#define shadow_ht_buckets 256
-
-struct out_of_sync_entry {
- struct out_of_sync_entry *next;
- struct vcpu *v;
- unsigned long gpfn; /* why is this here? */
- unsigned long gmfn;
- unsigned long snapshot_mfn;
- paddr_t writable_pl1e; /* NB: this is a machine address */
- unsigned long va;
-};
-
-#define out_of_sync_extra_size 127
-
-#define SHADOW_SNAPSHOT_ELSEWHERE (-1L)
-
-/************************************************************************/
-#define SHADOW_DEBUG 0
-#define SHADOW_VERBOSE_DEBUG 0
-#define SHADOW_VVERBOSE_DEBUG 0
-#define SHADOW_VVVERBOSE_DEBUG 0
-#define SHADOW_HASH_DEBUG 0
-#define FULLSHADOW_DEBUG 0
-
-#if SHADOW_DEBUG
-extern int shadow_status_noswap;
-#define SHADOW_REFLECTS_SNAPSHOT _PAGE_AVAIL0
-#endif
-
-#if SHADOW_VERBOSE_DEBUG
-#define SH_LOG(_f, _a...) \
- printk("DOM%uP%u: SH_LOG(%d): " _f "\n", \
- current->domain->domain_id , smp_processor_id(), __LINE__ , ## _a )
-#define SH_VLOG(_f, _a...) \
- printk("DOM%uP%u: SH_VLOG(%d): " _f "\n", \
- current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
-#else
-#define SH_LOG(_f, _a...) ((void)0)
-#define SH_VLOG(_f, _a...) ((void)0)
-#endif
-
-#if SHADOW_VVERBOSE_DEBUG
-#define SH_VVLOG(_f, _a...) \
- printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n", \
- current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
-#else
-#define SH_VVLOG(_f, _a...) ((void)0)
-#endif
-
-#if SHADOW_VVVERBOSE_DEBUG
-#define SH_VVVLOG(_f, _a...) \
- printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n", \
- current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
-#else
-#define SH_VVVLOG(_f, _a...) ((void)0)
-#endif
-
-#if FULLSHADOW_DEBUG
-#define FSH_LOG(_f, _a...) \
- printk("DOM%uP%u: FSH_LOG(%d): " _f "\n", \
- current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
-#else
-#define FSH_LOG(_f, _a...) ((void)0)
-#endif
-
-
-/************************************************************************/
-
-static inline int
-shadow_get_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
-{
- l1_pgentry_t nl1e;
- int res;
- unsigned long mfn;
- struct domain *owner;
-
- ASSERT(l1e_get_flags(l1e) & _PAGE_PRESENT);
-
- if ( !shadow_mode_refcounts(d) )
- return 1;
-
- nl1e = l1e;
- l1e_remove_flags(nl1e, _PAGE_GLOBAL);
-
- if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
- return 0;
-
- res = get_page_from_l1e(nl1e, d);
-
- if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
- !(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) &&
- (mfn = l1e_get_pfn(nl1e)) &&
- mfn_valid(mfn) &&
- (owner = page_get_owner(mfn_to_page(mfn))) &&
- (d != owner) )
- {
- res = get_page_from_l1e(nl1e, owner);
- printk("tried to map mfn %lx from domain %d into shadow page tables "
- "of domain %d; %s\n",
- mfn, owner->domain_id, d->domain_id,
- res ? "success" : "failed");
- }
-
- if ( unlikely(!res) )
- {
- perfc_incrc(shadow_get_page_fail);
- FSH_LOG("%s failed to get ref l1e=%" PRIpte "\n",
- __func__, l1e_get_intpte(l1e));
- }
-
- return res;
-}
-
-static inline void
-shadow_put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
-{
- if ( !shadow_mode_refcounts(d) )
- return;
-
- put_page_from_l1e(l1e, d);
-}
-
-static inline void
-shadow_put_page_type(struct domain *d, struct page_info *page)
-{
- if ( !shadow_mode_refcounts(d) )
- return;
-
- put_page_type(page);
-}
-
-static inline int shadow_get_page(struct domain *d,
- struct page_info *page,
- struct domain *owner)
-{
- if ( !shadow_mode_refcounts(d) )
- return 1;
- return get_page(page, owner);
-}
-
-static inline void shadow_put_page(struct domain *d,
- struct page_info *page)
-{
- if ( !shadow_mode_refcounts(d) )
- return;
- put_page(page);
-}
-
-/************************************************************************/
-
-static inline void __mark_dirty(struct domain *d, unsigned long mfn)
-{
- unsigned long pfn;
-
- ASSERT(shadow_lock_is_acquired(d));
-
- if ( likely(!shadow_mode_log_dirty(d)) || !VALID_MFN(mfn) )
- return;
-
- ASSERT(d->arch.shadow_dirty_bitmap != NULL);
-
- /* We /really/ mean PFN here, even for non-translated guests. */
- pfn = get_gpfn_from_mfn(mfn);
-
- /*
- * Values with the MSB set denote MFNs that aren't really part of the
- * domain's pseudo-physical memory map (e.g., the shared info frame).
- * Nothing to do here...
- */
- if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) )
- return;
-
- /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
- if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) &&
- !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) )
- {
- d->arch.shadow_dirty_count++;
- }
-#ifndef NDEBUG
- else if ( mfn_valid(mfn) )
- {
- SH_VLOG("mark_dirty OOR! mfn=%lx pfn=%lx max=%x (dom %p)",
- mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
- SH_VLOG("dom=%p caf=%08x taf=%" PRtype_info,
- page_get_owner(mfn_to_page(mfn)),
- mfn_to_page(mfn)->count_info,
- mfn_to_page(mfn)->u.inuse.type_info );
- }
-#endif
-}
-
-
-static inline void mark_dirty(struct domain *d, unsigned int mfn)
-{
- if ( unlikely(shadow_mode_log_dirty(d)) )
- {
- shadow_lock(d);
- __mark_dirty(d, mfn);
- shadow_unlock(d);
- }
-}
-
-
-/************************************************************************/
-#if CONFIG_PAGING_LEVELS <= 2
-static inline void
-__shadow_get_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
-{
- ASSERT(shadow_mode_enabled(v->domain));
-
- *psl2e = v->arch.shadow_vtable[l2_table_offset(va)];
-}
-
-static inline void
-__shadow_set_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t value)
-{
- ASSERT(shadow_mode_enabled(v->domain));
-
- v->arch.shadow_vtable[l2_table_offset(va)] = value;
-}
-
-static inline void
-__guest_get_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t *pl2e)
-{
- *pl2e = v->arch.guest_vtable[l2_table_offset(va)];
-}
-
-static inline void
-__guest_set_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t value)
-{
- struct domain *d = v->domain;
-
- v->arch.guest_vtable[l2_table_offset(va)] = value;
-
- if ( unlikely(shadow_mode_translate(d)) )
- update_hl2e(v, va);
-
- __mark_dirty(d, pagetable_get_pfn(v->arch.guest_table));
-}
-
-static inline void
-__direct_get_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
-{
- l2_pgentry_t *phys_vtable;
-
- ASSERT(shadow_mode_enabled(v->domain));
-
- phys_vtable = map_domain_page(
- pagetable_get_pfn(v->domain->arch.phys_table));
-
- *psl2e = phys_vtable[l2_table_offset(va)];
-
- unmap_domain_page(phys_vtable);
-}
-
-static inline void
-__direct_set_l2e(
- struct vcpu *v, unsigned long va, l2_pgentry_t value)
-{
- l2_pgentry_t *phys_vtable;
-
- ASSERT(shadow_mode_enabled(v->domain));
-
- phys_vtable = map_domain_page(
- pagetable_get_pfn(v->domain->arch.phys_table));
-
- phys_vtable[l2_table_offset(va)] = value;
-
- unmap_domain_page(phys_vtable);
-}
-
-static inline void
-update_hl2e(struct vcpu *v, unsigned long va)
-{
- int index = l2_table_offset(va);
- unsigned long mfn;
- l2_pgentry_t gl2e = v->arch.guest_vtable[index];
- l1_pgentry_t old_hl2e, new_hl2e;
- int need_flush = 0;
-
- ASSERT(shadow_mode_translate(v->domain));
-
- old_hl2e = v->arch.hl2_vtable[index];
-
- if ( (l2e_get_flags(gl2e) & _PAGE_PRESENT) &&
- VALID_MFN(mfn = get_mfn_from_gpfn(l2e_get_pfn(gl2e))) )
- new_hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
- else
- new_hl2e = l1e_empty();
-
- // only do the ref counting if something has changed.
- //
- if ( (l1e_has_changed(old_hl2e, new_hl2e, PAGE_FLAG_MASK)) )
- {
- if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
- !shadow_get_page(v->domain, mfn_to_page(l1e_get_pfn(new_hl2e)),
- v->domain) )
- new_hl2e = l1e_empty();
- if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
- {
- shadow_put_page(v->domain, mfn_to_page(l1e_get_pfn(old_hl2e)));
- need_flush = 1;
- }
-
- v->arch.hl2_vtable[l2_table_offset(va)] = new_hl2e;
-
- if ( need_flush )
- {
- perfc_incrc(update_hl2e_invlpg);
- flush_tlb_one_mask(v->domain->domain_dirty_cpumask,
- &linear_pg_table[l1_linear_offset(va)]);
- }
- }
-}
-
-static inline void shadow_drop_references(
- struct domain *d, struct page_info *page)
-{
- if ( likely(!shadow_mode_refcounts(d)) ||
- ((page->u.inuse.type_info & PGT_count_mask) == 0) )
- return;
-
- /* XXX This needs more thought... */
- printk("%s: needing to call shadow_remove_all_access for mfn=%lx\n",
- __func__, page_to_mfn(page));
- printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page),
- page->count_info, page->u.inuse.type_info);
-
- shadow_lock(d);
- shadow_remove_all_access(d, page_to_mfn(page));
- shadow_unlock(d);
-
- printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page),
- page->count_info, page->u.inuse.type_info);
-}
-
-/* XXX Needs more thought. Neither pretty nor fast: a place holder. */
-static inline void shadow_sync_and_drop_references(
- struct domain *d, struct page_info *page)
-{
- if ( likely(!shadow_mode_refcounts(d)) )
- return;
-
- if ( page_out_of_sync(page) )
- __shadow_sync_mfn(d, page_to_mfn(page));
-
- shadow_remove_all_access(d, page_to_mfn(page));
-}
-#endif
-
-/************************************************************************/
-
-/*
- * Add another shadow reference to smfn.
- */
-static inline int
-get_shadow_ref(unsigned long smfn)
-{
- u32 x, nx;
-
- ASSERT(mfn_valid(smfn));
-
- x = mfn_to_page(smfn)->count_info;
- nx = x + 1;
-
- if ( unlikely(nx == 0) )
- {
- printk("get_shadow_ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
- mfn_to_page(smfn)->u.inuse.type_info & PGT_mfn_mask,
- smfn);
- BUG();
- }
-
- // Guarded by the shadow lock...
- //
- mfn_to_page(smfn)->count_info = nx;
-
- return 1;
-}
-
-/*
- * Drop a shadow reference to smfn.
- */
-static inline void
-put_shadow_ref(unsigned long smfn)
-{
- u32 x, nx;
-
- ASSERT(mfn_valid(smfn));
-
- x = mfn_to_page(smfn)->count_info;
- nx = x - 1;
-
- if ( unlikely(x == 0) )
- {
- printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%"
- PRtype_info "\n",
- smfn,
- mfn_to_page(smfn)->count_info,
- mfn_to_page(smfn)->u.inuse.type_info);
- BUG();
- }
-
- // Guarded by the shadow lock...
- //
- mfn_to_page(smfn)->count_info = nx;
-
- if ( unlikely(nx == 0) )
- {
- free_shadow_page(smfn);
- }
-}
-
-static inline void
-shadow_pin(unsigned long smfn)
-{
- ASSERT( !(mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) );
-
- mfn_to_page(smfn)->u.inuse.type_info |= PGT_pinned;
- if ( unlikely(!get_shadow_ref(smfn)) )
- BUG();
-}
-
-static inline void
-shadow_unpin(unsigned long smfn)
-{
- ASSERT( (mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) );
-
- mfn_to_page(smfn)->u.inuse.type_info &= ~PGT_pinned;
- put_shadow_ref(smfn);
-}
-
-/*
- * SMP issue. The following code assumes the shadow lock is held. Re-visit
- * when working on finer-gained locks for shadow.
- */
-static inline void set_guest_back_ptr(
- struct domain *d, l1_pgentry_t spte,
- unsigned long smfn, unsigned int index)
-{
- struct page_info *gpage;
-
- ASSERT(shadow_lock_is_acquired(d));
-
- if ( !shadow_mode_external(d) ||
- ((l1e_get_flags(spte) & (_PAGE_PRESENT|_PAGE_RW)) !=
- (_PAGE_PRESENT|_PAGE_RW)) )
- return;
-
- gpage = l1e_get_page(spte);
-
- ASSERT(smfn != 0);
- ASSERT(page_to_mfn(gpage) != 0);
-
- gpage->tlbflush_timestamp = smfn;
- gpage->u.inuse.type_info &= ~PGT_va_mask;
- gpage->u.inuse.type_info |= (unsigned long)index << PGT_va_shift;
-}
-
-/************************************************************************/
-#if CONFIG_PAGING_LEVELS <= 2
-extern void shadow_mark_va_out_of_sync(
- struct vcpu *v, unsigned long gpfn, unsigned long mfn,
- unsigned long va);
-
-static inline int l1pte_write_fault(
- struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
- unsigned long va)
-{
- struct domain *d = v->domain;
- l1_pgentry_t gpte = *gpte_p;
- l1_pgentry_t spte;
- unsigned long gpfn = l1e_get_pfn(gpte);
- unsigned long gmfn = gmfn_to_mfn(d, gpfn);
-
- //printk("l1pte_write_fault gmfn=%lx\n", gmfn);
-
- if ( unlikely(!VALID_MFN(gmfn)) )
- {
- SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
- *spte_p = l1e_empty();
- return 0;
- }
-
- ASSERT(l1e_get_flags(gpte) & _PAGE_RW);
- l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
- spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
-
- SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
- l1e_get_intpte(spte), l1e_get_intpte(gpte));
-
- __mark_dirty(d, gmfn);
-
- if ( mfn_is_page_table(gmfn) )
- shadow_mark_va_out_of_sync(v, gpfn, gmfn, va);
-
- *gpte_p = gpte;
- *spte_p = spte;
-
- return 1;
-}
-
-static inline int l1pte_read_fault(
- struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
-{
- l1_pgentry_t gpte = *gpte_p;
- l1_pgentry_t spte = *spte_p;
- unsigned long pfn = l1e_get_pfn(gpte);
- unsigned long mfn = gmfn_to_mfn(d, pfn);
-
- if ( unlikely(!VALID_MFN(mfn)) )
- {
- SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
- *spte_p = l1e_empty();
- return 0;
- }
-
- l1e_add_flags(gpte, _PAGE_ACCESSED);
- spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
-
- if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
- mfn_is_page_table(mfn) )
- {
- l1e_remove_flags(spte, _PAGE_RW);
- }
-
- SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
- l1e_get_intpte(spte), l1e_get_intpte(gpte));
- *gpte_p = gpte;
- *spte_p = spte;
-
- return 1;
-}
-#endif
-
-static inline void l1pte_propagate_from_guest(
- struct domain *d, guest_l1_pgentry_t gpte, l1_pgentry_t *spte_p)
-{
- unsigned long mfn;
- l1_pgentry_t spte;
-
- spte = l1e_empty();
-
- if ( ((guest_l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
- (_PAGE_PRESENT|_PAGE_ACCESSED)) &&
- VALID_MFN(mfn = gmfn_to_mfn(d, l1e_get_pfn(gpte))) )
- {
- spte = l1e_from_pfn(
- mfn, guest_l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL));
-
- if ( shadow_mode_log_dirty(d) ||
- !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) ||
- mfn_is_page_table(mfn) )
- {
- l1e_remove_flags(spte, _PAGE_RW);
- }
- }
-
- if ( l1e_get_intpte(spte) || l1e_get_intpte(gpte) )
- SH_VVVLOG("%s: gpte=%" PRIpte ", new spte=%" PRIpte,
- __func__, l1e_get_intpte(gpte), l1e_get_intpte(spte));
-
- *spte_p = spte;
-}
-
-static inline void hl2e_propagate_from_guest(
- struct domain *d, l2_pgentry_t gpde, l1_pgentry_t *hl2e_p)
-{
- unsigned long pfn = l2e_get_pfn(gpde);
- unsigned long mfn;
- l1_pgentry_t hl2e;
-
- hl2e = l1e_empty();
-
- if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
- {
- mfn = gmfn_to_mfn(d, pfn);
- if ( VALID_MFN(mfn) && mfn_valid(mfn) )
- hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
- }
-
- if ( l1e_get_intpte(hl2e) || l2e_get_intpte(gpde) )
- SH_VVLOG("%s: gpde=%" PRIpte " hl2e=%" PRIpte, __func__,
- l2e_get_intpte(gpde), l1e_get_intpte(hl2e));
-
- *hl2e_p = hl2e;
-}
-
-static inline void l2pde_general(
- struct domain *d,
- guest_l2_pgentry_t *gpde_p,
- l2_pgentry_t *spde_p,
- unsigned long sl1mfn)
-{
- guest_l2_pgentry_t gpde = *gpde_p;
- l2_pgentry_t spde;
-
- spde = l2e_empty();
- if ( (guest_l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
- {
- spde = l2e_from_pfn(
- sl1mfn,
- (guest_l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
-
- /* N.B. PDEs do not have a dirty bit. */
- guest_l2e_add_flags(gpde, _PAGE_ACCESSED);
-
- *gpde_p = gpde;
- }
-
- if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) )
- SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__,
- l2e_get_intpte(gpde), l2e_get_intpte(spde));
-
- *spde_p = spde;
-}
-
-static inline void l2pde_propagate_from_guest(
- struct domain *d, guest_l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p)
-{
- guest_l2_pgentry_t gpde = *gpde_p;
- unsigned long sl1mfn = 0;
-
- if ( guest_l2e_get_flags(gpde) & _PAGE_PRESENT )
- sl1mfn = __shadow_status(d, l2e_get_pfn(gpde), PGT_l1_shadow);
- l2pde_general(d, gpde_p, spde_p, sl1mfn);
-}
-
-/************************************************************************/
-
-// returns true if a tlb flush is needed
-//
-static int inline
-validate_pte_change(
- struct domain *d,
- guest_l1_pgentry_t new_pte,
- l1_pgentry_t *shadow_pte_p)
-{
- l1_pgentry_t old_spte, new_spte;
- int need_flush = 0;
-
- perfc_incrc(validate_pte_calls);
-
- l1pte_propagate_from_guest(d, new_pte, &new_spte);
-
- if ( shadow_mode_refcounts(d) )
- {
- old_spte = *shadow_pte_p;
-
- if ( l1e_get_intpte(old_spte) == l1e_get_intpte(new_spte) )
- {
- // No accounting required...
- //
- perfc_incrc(validate_pte_changes1);
- }
- else if ( l1e_get_intpte(old_spte) == (l1e_get_intpte(new_spte)|_PAGE_RW) )
- {
- // Fast path for PTEs that have merely been write-protected
- // (e.g., during a Unix fork()). A strict reduction in privilege.
- //
- perfc_incrc(validate_pte_changes2);
- if ( likely(l1e_get_flags(new_spte) & _PAGE_PRESENT) )
- shadow_put_page_type(d, mfn_to_page(l1e_get_pfn(new_spte)));
- }
- else if ( ((l1e_get_flags(old_spte) | l1e_get_flags(new_spte)) &
- _PAGE_PRESENT ) &&
- l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
- {
- // only do the ref counting if something important changed.
- //
- perfc_incrc(validate_pte_changes3);
-
- if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
- {
- shadow_put_page_from_l1e(old_spte, d);
- need_flush = 1;
- }
- if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(new_spte, d) ) {
- new_spte = l1e_empty();
- need_flush = -1; /* need to unshadow the page */
- }
- }
- else
- {
- perfc_incrc(validate_pte_changes4);
- }
- }
-
- *shadow_pte_p = new_spte;
-
- return need_flush;
-}
-
-// returns true if a tlb flush is needed
-//
-static int inline
-validate_hl2e_change(
- struct domain *d,
- l2_pgentry_t new_gpde,
- l1_pgentry_t *shadow_hl2e_p)
-{
- l1_pgentry_t old_hl2e, new_hl2e;
- int need_flush = 0;
-
- perfc_incrc(validate_hl2e_calls);
-
- old_hl2e = *shadow_hl2e_p;
- hl2e_propagate_from_guest(d, new_gpde, &new_hl2e);
-
- // Only do the ref counting if something important changed.
- //
- if ( ((l1e_get_flags(old_hl2e) | l1e_get_flags(new_hl2e)) & _PAGE_PRESENT) &&
- l1e_has_changed(old_hl2e, new_hl2e, _PAGE_PRESENT) )
- {
- perfc_incrc(validate_hl2e_changes);
-
- if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
- !get_page(mfn_to_page(l1e_get_pfn(new_hl2e)), d) )
- new_hl2e = l1e_empty();
- if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
- {
- put_page(mfn_to_page(l1e_get_pfn(old_hl2e)));
- need_flush = 1;
- }
- }
-
- *shadow_hl2e_p = new_hl2e;
-
- return need_flush;
-}
-
-// returns true if a tlb flush is needed
-//
-static int inline
-validate_pde_change(
- struct domain *d,
- guest_l2_pgentry_t new_gpde,
- l2_pgentry_t *shadow_pde_p)
-{
- l2_pgentry_t old_spde, new_spde;
- int need_flush = 0;
-
- perfc_incrc(validate_pde_calls);
-
- old_spde = *shadow_pde_p;
- l2pde_propagate_from_guest(d, &new_gpde, &new_spde);
-
- // Only do the ref counting if something important changed.
- //
- if ( ((l2e_get_intpte(old_spde) | l2e_get_intpte(new_spde)) & _PAGE_PRESENT) &&
- l2e_has_changed(old_spde, new_spde, _PAGE_PRESENT) )
- {
- perfc_incrc(validate_pde_changes);
-
- if ( (l2e_get_flags(new_spde) & _PAGE_PRESENT) &&
- !get_shadow_ref(l2e_get_pfn(new_spde)) )
- BUG();
- if ( l2e_get_flags(old_spde) & _PAGE_PRESENT )
- {
- put_shadow_ref(l2e_get_pfn(old_spde));
- need_flush = 1;
- }
- }
-
- *shadow_pde_p = new_spde;
-
- return need_flush;
-}
-
-/*********************************************************************/
-
-#if SHADOW_HASH_DEBUG
-
-static void shadow_audit(struct domain *d, int print)
-{
- int live = 0, free = 0, j = 0, abs;
- struct shadow_status *a;
-
- for ( j = 0; j < shadow_ht_buckets; j++ )
- {
- a = &d->arch.shadow_ht[j];
- if ( a->gpfn_and_flags )
- {
- live++;
- ASSERT(a->smfn);
- }
- else
- ASSERT(!a->next);
-
- a = a->next;
- while ( a && (live < 9999) )
- {
- live++;
- if ( (a->gpfn_and_flags == 0) || (a->smfn == 0) )
- {
- printk("XXX live=%d gpfn+flags=%lx sp=%lx next=%p\n",
- live, a->gpfn_and_flags, a->smfn, a->next);
- BUG();
- }
- ASSERT(a->smfn);
- a = a->next;
- }
- ASSERT(live < 9999);
- }
-
- for ( a = d->arch.shadow_ht_free; a != NULL; a = a->next )
- free++;
-
- if ( print )
- printk("Xlive=%d free=%d\n", live, free);
-
- // BUG: this only works if there's only a single domain which is
- // using shadow tables.
- //
- abs = (
- perfc_value(shadow_l1_pages) +
- perfc_value(shadow_l2_pages) +
- perfc_value(hl2_table_pages) +
- perfc_value(snapshot_pages) +
- perfc_value(writable_pte_predictions)
- ) - live;
-#ifdef PERF_COUNTERS
- if ( (abs < -1) || (abs > 1) )
- {
- printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n",
- live, free,
- perfc_value(shadow_l1_pages),
- perfc_value(shadow_l2_pages),
- perfc_value(hl2_table_pages),
- perfc_value(snapshot_pages),
- perfc_value(writable_pte_predictions));
- BUG();
- }
-#endif
-
- // XXX ought to add some code to audit the out-of-sync entries, too.
- //
-}
-#else
-#define shadow_audit(p, print) ((void)0)
-#endif
-
-
-static inline struct shadow_status *hash_bucket(
- struct domain *d, unsigned int gpfn)
-{
- return &d->arch.shadow_ht[gpfn % shadow_ht_buckets];
-}
-
-
-/*
- * N.B. This takes a guest pfn (i.e. a pfn in the guest's namespace,
- * which, depending on full shadow mode, may or may not equal
- * its mfn).
- * It returns the shadow's mfn, or zero if it doesn't exist.
- */
-static inline unsigned long __shadow_status(
- struct domain *d, unsigned long gpfn, unsigned long stype)
-{
- struct shadow_status *p, *x, *head;
- shadow_key_t key;
-#if CONFIG_PAGING_LEVELS >= 3
- if ( d->arch.ops->guest_paging_levels == PAGING_L3 && stype == PGT_l4_shadow )
- key = gpfn | stype | index_to_key(get_cr3_idxval(current));
- else
-#endif
- key = gpfn | stype;
-
- ASSERT(shadow_lock_is_acquired(d));
- ASSERT(gpfn == (gpfn & PGT_mfn_mask));
- ASSERT(stype && !(stype & ~PGT_type_mask));
-
- perfc_incrc(shadow_status_calls);
-
- x = head = hash_bucket(d, gpfn);
- p = NULL;
-
- shadow_audit(d, 0);
-
- do
- {
- ASSERT(x->gpfn_and_flags || ((x == head) && (x->next == NULL)));
-
- if ( x->gpfn_and_flags == key )
- {
-#if SHADOW_DEBUG
- if ( unlikely(shadow_status_noswap) )
- return x->smfn;
-#endif
- /* Pull-to-front if 'x' isn't already the head item. */
- if ( unlikely(x != head) )
- {
- /* Delete 'x' from list and reinsert immediately after head. */
- p->next = x->next;
- x->next = head->next;
- head->next = x;
-
- /* Swap 'x' contents with head contents. */
- SWAP(head->gpfn_and_flags, x->gpfn_and_flags);
- SWAP(head->smfn, x->smfn);
- }
- else
- {
- perfc_incrc(shadow_status_hit_head);
- }
-
- return head->smfn;
- }
-
- p = x;
- x = x->next;
- }
- while ( x != NULL );
-
- perfc_incrc(shadow_status_miss);
- return 0;
-}
-
-/*
- * Not clear if pull-to-front is worth while for this or not,
- * as it generally needs to scan the entire bucket anyway.
- * Much simpler without.
- *
- * Either returns PGT_none, or PGT_l{1,2,3,4}_page_table.
- */
-static inline u32
-shadow_max_pgtable_type(struct domain *d, unsigned long gpfn,
- unsigned long *smfn)
-{
- struct shadow_status *x;
- u32 pttype = PGT_none, type;
-
- ASSERT(shadow_lock_is_acquired(d));
- ASSERT(gpfn == (gpfn & PGT_mfn_mask));
-
- perfc_incrc(shadow_max_type);
-
- x = hash_bucket(d, gpfn);
-
- while ( x && x->gpfn_and_flags )
- {
- if ( (x->gpfn_and_flags & PGT_mfn_mask) == gpfn )
- {
- type = x->gpfn_and_flags & PGT_type_mask;
-
- switch ( type )
- {
- case PGT_hl2_shadow:
- // Treat an HL2 as if it's an L1
- //
- type = PGT_l1_shadow;
- break;
- case PGT_snapshot:
- case PGT_writable_pred:
- // Ignore snapshots -- they don't in and of themselves constitute
- // treating a page as a page table
- //
- goto next;
- case PGT_base_page_table:
- // Early exit if we found the max possible value
- //
- return type;
- default:
- break;
- }
-
- if ( type > pttype )
- {
- pttype = type;
- if ( smfn )
- *smfn = x->smfn;
- }
- }
- next:
- x = x->next;
- }
-
- return pttype;
-}
-
-static inline void delete_shadow_status(
- struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype, u64 index)
-{
- struct shadow_status *p, *x, *n, *head;
-
- shadow_key_t key = gpfn | stype | index_to_key(index);
-
- ASSERT(shadow_lock_is_acquired(d));
- ASSERT(!(gpfn & ~PGT_mfn_mask));
- ASSERT(stype && !(stype & ~PGT_type_mask));
-
- head = hash_bucket(d, gpfn);
-
- SH_VLOG("delete gpfn=%lx t=%08x bucket=%p", gpfn, stype, head);
- shadow_audit(d, 0);
-
- /* Match on head item? */
- if ( head->gpfn_and_flags == key )
- {
- if ( (n = head->next) != NULL )
- {
- /* Overwrite head with contents of following node. */
- head->gpfn_and_flags = n->gpfn_and_flags;
- head->smfn = n->smfn;
-
- /* Delete following node. */
- head->next = n->next;
-
- /* Add deleted node to the free list. */
- n->gpfn_and_flags = 0;
- n->smfn = 0;
- n->next = d->arch.shadow_ht_free;
- d->arch.shadow_ht_free = n;
- }
- else
- {
- /* This bucket is now empty. Initialise the head node. */
- head->gpfn_and_flags = 0;
- head->smfn = 0;
- }
-
- goto found;
- }
-
- p = head;
- x = head->next;
-
- do
- {
- if ( x->gpfn_and_flags == key )
- {
- /* Delete matching node. */
- p->next = x->next;
-
- /* Add deleted node to the free list. */
- x->gpfn_and_flags = 0;
- x->smfn = 0;
- x->next = d->arch.shadow_ht_free;
- d->arch.shadow_ht_free = x;
-
- goto found;
- }
-
- p = x;
- x = x->next;
- }
- while ( x != NULL );
-
- /* If we got here, it wasn't in the list! */
- BUG();
-
- found:
- // release ref to page
- if ( stype != PGT_writable_pred )
- put_page(mfn_to_page(gmfn));
-
- shadow_audit(d, 0);
-}
-
-static inline void set_shadow_status(
- struct domain *d, unsigned long gpfn, unsigned long gmfn,
- unsigned long smfn, unsigned long stype, u64 index)
-{
- struct shadow_status *x, *head, *extra;
- int i;
-
- shadow_key_t key = gpfn | stype | index_to_key(index);
-
- SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype);
-
- ASSERT(shadow_lock_is_acquired(d));
-
- ASSERT(shadow_mode_translate(d) || gpfn);
- ASSERT(!(gpfn & ~PGT_mfn_mask));
-
- // XXX - need to be more graceful.
- ASSERT(VALID_MFN(gmfn));
-
- ASSERT(stype && !(stype & ~PGT_type_mask));
-
- x = head = hash_bucket(d, gpfn);
-
- SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)",
- gpfn, smfn, stype, x, x->next);
- shadow_audit(d, 0);
-
- // grab a reference to the guest page to represent the entry in the shadow
- // hash table
- //
- // XXX - Should PGT_writable_pred grab a page ref?
- // - Who/how are these hash table entry refs flushed if/when a page
- // is given away by the domain?
- //
- if ( stype != PGT_writable_pred )
- get_page(mfn_to_page(gmfn), d);
-
- /*
- * STEP 1. If page is already in the table, update it in place.
- */
- do
- {
- if ( unlikely(x->gpfn_and_flags == key) )
- {
- if ( stype != PGT_writable_pred )
- BUG(); // we should never replace entries into the hash table
- x->smfn = smfn;
- if ( stype != PGT_writable_pred )
- put_page(mfn_to_page(gmfn)); // already had a ref...
- goto done;
- }
-
- x = x->next;
- }
- while ( x != NULL );
-
- /*
- * STEP 2. The page must be inserted into the table.
- */
-
- /* If the bucket is empty then insert the new page as the head item. */
- if ( head->gpfn_and_flags == 0 )
- {
- head->gpfn_and_flags = key;
- head->smfn = smfn;
- ASSERT(head->next == NULL);
- goto done;
- }
-
- /* We need to allocate a new node. Ensure the quicklist is non-empty. */
- if ( unlikely(d->arch.shadow_ht_free == NULL) )
- {
- SH_VLOG("Allocate more shadow hashtable blocks.");
-
- extra = xmalloc_bytes(
- sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
-
- /* XXX Should be more graceful here. */
- if ( extra == NULL )
- BUG();
-
- memset(extra, 0, sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
-
- /* Record the allocation block so it can be correctly freed later. */
- d->arch.shadow_extras_count++;
- *((struct shadow_status **)&extra[shadow_ht_extra_size]) =
- d->arch.shadow_ht_extras;
- d->arch.shadow_ht_extras = &extra[0];
-
- /* Thread a free chain through the newly-allocated nodes. */
- for ( i = 0; i < (shadow_ht_extra_size - 1); i++ )
- extra[i].next = &extra[i+1];
- extra[i].next = NULL;
-
- /* Add the new nodes to the free list. */
- d->arch.shadow_ht_free = &extra[0];
- }
-
- /* Allocate a new node from the quicklist. */
- x = d->arch.shadow_ht_free;
- d->arch.shadow_ht_free = x->next;
-
- /* Initialise the new node and insert directly after the head item. */
- x->gpfn_and_flags = key;
- x->smfn = smfn;
- x->next = head->next;
- head->next = x;
-
- done:
- shadow_audit(d, 0);
-
- if ( stype <= PGT_l4_shadow )
- {
- // add to front of list of pages to check when removing write
- // permissions for a page...
- //
- }
-}
-
-/************************************************************************/
-
-static inline void guest_physmap_add_page(
- struct domain *d, unsigned long gpfn, unsigned long mfn)
-{
- struct domain_mmap_cache c1, c2;
-
- if ( likely(!shadow_mode_translate(d)) )
- return;
-
- domain_mmap_cache_init(&c1);
- domain_mmap_cache_init(&c2);
- shadow_lock(d);
- shadow_sync_and_drop_references(d, mfn_to_page(mfn));
- set_p2m_entry(d, gpfn, mfn, &c1, &c2);
- set_gpfn_from_mfn(mfn, gpfn);
- shadow_unlock(d);
- domain_mmap_cache_destroy(&c1);
- domain_mmap_cache_destroy(&c2);
-}
-
-static inline void guest_physmap_remove_page(
- struct domain *d, unsigned long gpfn, unsigned long mfn)
-{
- struct domain_mmap_cache c1, c2;
- unsigned long type;
-
- if ( likely(!shadow_mode_translate(d)) )
- return;
-
- domain_mmap_cache_init(&c1);
- domain_mmap_cache_init(&c2);
- shadow_lock(d);
- shadow_sync_and_drop_references(d, mfn_to_page(mfn));
- while ( (type = shadow_max_pgtable_type(d, gpfn, NULL)) != PGT_none )
- free_shadow_page(__shadow_status(d, gpfn, type));
- set_p2m_entry(d, gpfn, -1, &c1, &c2);
- set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
- shadow_unlock(d);
- domain_mmap_cache_destroy(&c1);
- domain_mmap_cache_destroy(&c2);
-}
-
-/************************************************************************/
-
-void static inline
-shadow_update_min_max(unsigned long smfn, int index)
-{
- struct page_info *sl1page = mfn_to_page(smfn);
- u32 min_max = sl1page->tlbflush_timestamp;
- int min = SHADOW_MIN(min_max);
- int max = SHADOW_MAX(min_max);
- int update = 0;
-
- if ( index < min )
- {
- min = index;
- update = 1;
- }
- if ( index > max )
- {
- max = index;
- update = 1;
- }
- if ( update )
- sl1page->tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max);
-}
-
-#if CONFIG_PAGING_LEVELS <= 2
-extern void shadow_map_l1_into_current_l2(unsigned long va);
-
-void static inline
-shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
-{
- struct vcpu *v = current;
- struct domain *d = v->domain;
- l2_pgentry_t sl2e = {0};
-
- __shadow_get_l2e(v, va, &sl2e);
- if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
- {
- /*
- * Either the L1 is not shadowed, or the shadow isn't linked into
- * the current shadow L2.
- */
- if ( create_l1_shadow )
- {
- perfc_incrc(shadow_set_l1e_force_map);
- shadow_map_l1_into_current_l2(va);
- }
- else /* check to see if it exists; if so, link it in */
- {
- l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
- unsigned long gl1pfn = l2e_get_pfn(gpde);
- unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
-
- ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
-
- if ( sl1mfn )
- {
- perfc_incrc(shadow_set_l1e_unlinked);
- if ( !get_shadow_ref(sl1mfn) )
- BUG();
- l2pde_general(d, &gpde, &sl2e, sl1mfn);
- __guest_set_l2e(v, va, gpde);
- __shadow_set_l2e(v, va, sl2e);
- }
- else
- {
- // no shadow exists, so there's nothing to do.
- perfc_incrc(shadow_set_l1e_fail);
- return;
- }
- }
- }
-
- __shadow_get_l2e(v, va, &sl2e);
-
- if ( shadow_mode_refcounts(d) )
- {
- l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
-
- // only do the ref counting if something important changed.
- //
- if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
- {
- if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(new_spte, d) )
- new_spte = l1e_empty();
- if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
- shadow_put_page_from_l1e(old_spte, d);
- }
-
- }
-
- set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
- shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
- shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
-}
-#endif
-/************************************************************************/
-
-static inline int
-shadow_mode_page_writable(unsigned long va, struct cpu_user_regs *regs, unsigned long gpfn)
-{
- struct vcpu *v = current;
- struct domain *d = v->domain;
- unsigned long mfn = gmfn_to_mfn(d, gpfn);
- u32 type = mfn_to_page(mfn)->u.inuse.type_info & PGT_type_mask;
-
- if ( shadow_mode_refcounts(d) &&
- (type == PGT_writable_page) )
- type = shadow_max_pgtable_type(d, gpfn, NULL);
-
- // Strange but true: writable page tables allow kernel-mode access
- // to L1 page table pages via write-protected PTEs... Similarly, write
- // access to all page table pages is granted for shadow_mode_write_all
- // clients.
- //
- if ( ((shadow_mode_write_l1(d) && (type == PGT_l1_page_table)) ||
- (shadow_mode_write_all(d) && type && (type <= PGT_l4_page_table))) &&
- ((va < HYPERVISOR_VIRT_START)
-#if defined(__x86_64__)
- || (va >= HYPERVISOR_VIRT_END)
-#endif
- ) &&
- guest_kernel_mode(v, regs) )
- return 1;
-
- return 0;
-}
-
-#if CONFIG_PAGING_LEVELS <= 2
-static inline l1_pgentry_t gva_to_gpte(unsigned long gva)
-{
- l2_pgentry_t gpde;
- l1_pgentry_t gpte;
- struct vcpu *v = current;
-
- ASSERT( shadow_mode_translate(current->domain) );
-
- __guest_get_l2e(v, gva, &gpde);
- if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
- return l1e_empty();;
-
- // This is actually overkill - we only need to make sure the hl2
- // is in-sync.
- //
- shadow_sync_va(v, gva);
-
- if ( unlikely(__copy_from_user(&gpte,
- &linear_pg_table[gva >> PAGE_SHIFT],
- sizeof(gpte))) )
- {
- FSH_LOG("gva_to_gpte got a fault on gva=%lx", gva);
- return l1e_empty();
- }
-
- return gpte;
-}
-
-static inline unsigned long gva_to_gpa(unsigned long gva)
-{
- l1_pgentry_t gpte;
-
- gpte = gva_to_gpte(gva);
- if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) )
- return 0;
-
- return l1e_get_paddr(gpte) + (gva & ~PAGE_MASK);
-}
-#endif
-
-static inline unsigned long gva_to_mfn(unsigned long gva)
-{
- unsigned long gpa = gva_to_gpa(gva);
- return get_mfn_from_gpfn(gpa >> PAGE_SHIFT);
-}
-
-/************************************************************************/
-
-extern void __update_pagetables(struct vcpu *v);
-static inline void update_pagetables(struct vcpu *v)
-{
- struct domain *d = v->domain;
- int paging_enabled;
-
- if ( hvm_guest(v) )
- paging_enabled = hvm_paging_enabled(v);
- else
- // HACK ALERT: there's currently no easy way to figure out if a domU
- // has set its arch.guest_table to zero, vs not yet initialized it.
- //
- paging_enabled = !!pagetable_get_paddr(v->arch.guest_table);
-
- /*
- * We don't call __update_pagetables() when hvm guest paging is
- * disabled as we want the linear_pg_table to be inaccessible so that
- * we bail out early of shadow_fault() if the hvm guest tries illegal
- * accesses while it thinks paging is turned off.
- */
- if ( unlikely(shadow_mode_enabled(d)) && paging_enabled )
- {
- shadow_lock(d);
- __update_pagetables(v);
- shadow_unlock(d);
- }
-
- if ( likely(!shadow_mode_external(d)) )
- {
- if ( shadow_mode_enabled(d) )
- v->arch.monitor_table = v->arch.shadow_table;
- else
-#if CONFIG_PAGING_LEVELS == 4
- if ( !(v->arch.flags & TF_kernel_mode) )
- v->arch.monitor_table = v->arch.guest_table_user;
- else
-#endif
- v->arch.monitor_table = v->arch.guest_table;
- }
-}
-
-void clear_all_shadow_status(struct domain *d);
-
-#if SHADOW_DEBUG
-extern int _check_pagetable(struct vcpu *v, char *s);
-extern int _check_all_pagetables(struct vcpu *v, char *s);
-
-#define check_pagetable(_v, _s) _check_pagetable(_v, _s)
-//#define check_pagetable(_v, _s) _check_all_pagetables(_v, _s)
-
-#else
-#define check_pagetable(_v, _s) ((void)0)
-#endif
-
-#endif /* XEN_SHADOW_H */
+#endif /* _XEN_SHADOW_H */
/*
* Local variables:
diff --git a/xen/include/asm-x86/shadow2-multi.h b/xen/include/asm-x86/shadow2-multi.h
new file mode 100644
index 0000000000..3b23a2f198
--- /dev/null
+++ b/xen/include/asm-x86/shadow2-multi.h
@@ -0,0 +1,116 @@
+/******************************************************************************
+ * arch/x86/shadow2-multi.h
+ *
+ * Shadow2 declarations which will be multiply compiled.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+extern int
+SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl1e, SHADOW_LEVELS, GUEST_LEVELS)(
+ struct vcpu *v, mfn_t gl1mfn, void *new_gl1p, u32 size);
+extern int
+SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2e, SHADOW_LEVELS, GUEST_LEVELS)(
+ struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size);
+extern int
+SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2he, SHADOW_LEVELS, GUEST_LEVELS)(
+ struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size);
+extern int
+SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl3e, SHADOW_LEVELS, GUEST_LEVELS)(
+ struct vcpu *v, mfn_t gl3mfn, void *new_gl3p, u32 size);
+extern int
+SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl4e, SHADOW_LEVELS, GUEST_LEVELS)(
+ struct vcpu *v, mfn_t gl4mfn, void *new_gl4p, u32 size);
+
+extern void
+SHADOW2_INTERNAL_NAME(sh2_destroy_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+ struct vcpu *v, mfn_t smfn);
+extern void
+SHADOW2_INTERNAL_NAME(sh2_destroy_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+ struct vcpu *v, mfn_t smfn);
+extern void
+SHADOW2_INTERNAL_NAME(sh2_destroy_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+ struct vcpu *v, mfn_t smfn);
+extern void
+SHADOW2_INTERNAL_NAME(sh2_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+ struct vcpu *v, mfn_t smfn);
+
+extern void
+SHADOW2_INTERNAL_NAME(sh2_unpin_all_l3_subshadows, 3, 3)
+ (struct vcpu *v, mfn_t smfn);
+
+extern void
+SHADOW2_INTERNAL_NAME(sh2_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl2mfn);
+extern void
+SHADOW2_INTERNAL_NAME(sh2_unhook_pae_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl3mfn);
+extern void
+SHADOW2_INTERNAL_NAME(sh2_unhook_64b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl4mfn);
+
+extern int
+SHADOW2_INTERNAL_NAME(sh2_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn);
+extern int
+SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
+
+extern void
+SHADOW2_INTERNAL_NAME(sh2_clear_shadow_entry, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, void *ep, mfn_t smfn);
+
+extern int
+SHADOW2_INTERNAL_NAME(sh2_remove_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn);
+extern int
+SHADOW2_INTERNAL_NAME(sh2_remove_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn);
+extern int
+SHADOW2_INTERNAL_NAME(sh2_remove_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn);
+
+#if SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES
+int
+SHADOW2_INTERNAL_NAME(sh2_audit_l1_table, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl1mfn, mfn_t x);
+int
+SHADOW2_INTERNAL_NAME(sh2_audit_fl1_table, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl1mfn, mfn_t x);
+int
+SHADOW2_INTERNAL_NAME(sh2_audit_l2_table, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl2mfn, mfn_t x);
+int
+SHADOW2_INTERNAL_NAME(sh2_audit_l3_table, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl3mfn, mfn_t x);
+int
+SHADOW2_INTERNAL_NAME(sh2_audit_l4_table, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t sl4mfn, mfn_t x);
+#endif
+
+#if SHADOW_LEVELS == GUEST_LEVELS
+extern mfn_t
+SHADOW2_INTERNAL_NAME(sh2_make_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v);
+extern void
+SHADOW2_INTERNAL_NAME(sh2_destroy_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t mmfn);
+#endif
+
+extern struct shadow2_entry_points
+SHADOW2_INTERNAL_NAME(shadow2_entry, SHADOW_LEVELS, GUEST_LEVELS);
diff --git a/xen/include/asm-x86/shadow2-private.h b/xen/include/asm-x86/shadow2-private.h
new file mode 100644
index 0000000000..7b2ac57572
--- /dev/null
+++ b/xen/include/asm-x86/shadow2-private.h
@@ -0,0 +1,612 @@
+/******************************************************************************
+ * arch/x86/shadow2-private.h
+ *
+ * Shadow2 code that is private, and does not need to be multiply compiled.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XEN_SHADOW2_PRIVATE_H
+#define _XEN_SHADOW2_PRIVATE_H
+
+// In order to override the definition of mfn_to_page, we make sure page.h has
+// been included...
+#include <asm/page.h>
+#include <xen/domain_page.h>
+#include <asm/x86_emulate.h>
+#include <asm/hvm/support.h>
+
+
+/******************************************************************************
+ * Definitions for the use of the "available" bits in the shadow PTEs.
+ *
+ * Review of the low 12 bits of a shadow page table entry:
+ *
+ * in a guest: in a shadow:
+ * Bit 11: _PAGE_AVAIL2, aka _PAGE_GNTTAB
+ * Bit 10: _PAGE_AVAIL1 _PAGE_SHADOW_RW ("SW" below)
+ * Bit 9: _PAGE_AVAIL0 _PAGE_SHADOW_PRESENT ("SP" below)
+ * Bit 8: _PAGE_GLOBAL _PAGE_SHADOW_MMIO ("MMIO" below),
+ * aka _PAGE_SHADOW_GUEST_NOT_PRESENT
+ * Bit 7: _PAGE_PSE, aka _PAGE_PAT
+ * Bit 6: _PAGE_DIRTY
+ * Bit 5: _PAGE_ACCESSED
+ * Bit 4: _PAGE_PCD
+ * Bit 3: _PAGE_PWT
+ * Bit 2: _PAGE_USER
+ * Bit 1: _PAGE_RW ("GW" below)
+ * Bit 0: _PAGE_PRESENT ("GP" below)
+ *
+ * Given a guest entry, as shown below, we can expect the following in the
+ * corresponding shadow entry:
+ *
+ * Guest entry Shadow entry Commentary
+ * ----------- ---------------- ---------------------------------------------
+ * Maps
+ * GP GW IO GP SP GW SW MMIO
+ * -- -- ---- -- -- -- -- ----
+ * - - - 0 0 0 0 0 The guest entry has not yet been shadowed.
+ * 0 - - 0 0 0 0 1 The guest entry is marked not-present.
+ * 1 1 no ? 1 ? 1 0 Writable entry in the guest.
+ * 1 0 no ? 1 0 0 0 Read-only entry in the guest.
+ * 1 1 yes 0 1 ? 1 1 Writable MMIO mapping in the guest.
+ * 1 0 yes 0 1 0 0 1 Read-only MMIO mapping in the guest.
+ *
+ * Normally, we would expect that GP=1 in the guest to imply GP=1 in the
+ * shadow, and similarly for GW=1. However, various functionality that may be
+ * implemented via the shadow can cause GP or GW to be cleared in such cases.
+ * A & D bit emulation is a prime example of such functionality.
+ *
+ * If _PAGE_SHADOW_PRESENT is zero, then the _PAGE_PRESENT bit in that same
+ * entry will always be zero, too.
+
+ * Bit 11 is used in debug builds as the _PAGE_GNTTAB bit in PV guests. It is
+ * currently available for random (ab)use in shadow entries.
+ *
+ * Bit 8 (the global bit) could be propagated from an HVM guest to the shadow,
+ * but currently there is no benefit, as the guest's TLB is flushed on every
+ * transition of CR3 anyway due to the HVM exit/re-entry.
+ *
+ * In shadow entries in which the _PAGE_SHADOW_PRESENT is set, bit 8 is used
+ * as the _PAGE_SHADOW_MMIO bit. In such entries, if _PAGE_SHADOW_MMIO is
+ * set, then the entry contains the *gfn* directly from the corresponding
+ * guest entry (not an mfn!!).
+ *
+ * Bit 7 is set in a guest L2 to signify a superpage entry. The current
+ * shadow code splinters superpage mappings into 512 or 1024 4K mappings; the
+ * resulting shadow L1 table is called an FL1. Note that there is no guest
+ * page that corresponds to an FL1.
+ *
+ * Bit 7 in a guest L1 is the PAT2 bit. Currently we do not support PAT in
+ * this shadow code.
+ *
+ * Bit 6 is the dirty bit.
+ *
+ * Bit 5 is the accessed bit.
+ *
+ * Bit 4 is the cache disable bit. If set in a guest, the hardware is
+ * supposed to refuse to cache anything found via this entry. It can be set
+ * in an L4e, L3e, L2e, or L1e. This shadow code currently does not support
+ * cache disable bits. They are silently ignored.
+ *
+ * Bit 4 is a guest L1 is also the PAT1 bit. Currently we do not support PAT
+ * in this shadow code.
+ *
+ * Bit 3 is the cache write-thru bit. If set in a guest, the hardware is
+ * supposed to use write-thru instead of write-back caching for anything found
+ * via this entry. It can be set in an L4e, L3e, L2e, or L1e. This shadow
+ * code currently does not support cache write-thru bits. They are silently
+ * ignored.
+ *
+ * Bit 3 is a guest L1 is also the PAT0 bit. Currently we do not support PAT
+ * in this shadow code.
+ *
+ * Bit 2 is the user bit.
+ *
+ * Bit 1 is the read-write bit.
+ *
+ * Bit 0 is the present bit.
+ */
+
+// Copy of the _PAGE_RW bit from the guest's PTE, appropriately zero'ed by
+// the appropriate shadow rules.
+#define _PAGE_SHADOW_RW _PAGE_AVAIL1
+
+// Copy of the _PAGE_PRESENT bit from the guest's PTE
+#define _PAGE_SHADOW_PRESENT _PAGE_AVAIL0
+
+// The matching guest entry maps MMIO space
+#define _PAGE_SHADOW_MMIO _PAGE_GLOBAL
+
+// Shadow flags value used when the guest is not present
+#define _PAGE_SHADOW_GUEST_NOT_PRESENT _PAGE_GLOBAL
+
+
+/******************************************************************************
+ * Debug and error-message output
+ */
+#define SHADOW2_PRINTK(_f, _a...) \
+ debugtrace_printk("sh2: %s(): " _f, __func__, ##_a)
+#define SHADOW2_ERROR(_f, _a...) \
+ printk("sh2 error: %s(): " _f, __func__, ##_a)
+#define SHADOW2_DEBUG(flag, _f, _a...) \
+ do { \
+ if (SHADOW2_DEBUG_ ## flag) \
+ debugtrace_printk("sh2debug: %s(): " _f, __func__, ##_a); \
+ } while (0)
+
+// The flags for use with SHADOW2_DEBUG:
+#define SHADOW2_DEBUG_PROPAGATE 0
+#define SHADOW2_DEBUG_MAKE_SHADOW 0
+#define SHADOW2_DEBUG_DESTROY_SHADOW 0
+#define SHADOW2_DEBUG_P2M 0
+#define SHADOW2_DEBUG_A_AND_D 0
+#define SHADOW2_DEBUG_EMULATE 0
+#define SHADOW2_DEBUG_LOGDIRTY 1
+
+
+/******************************************************************************
+ * Auditing routines
+ */
+
+#if SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES_FULL
+extern void shadow2_audit_tables(struct vcpu *v);
+#else
+#define shadow2_audit_tables(_v) do {} while(0)
+#endif
+
+#if SHADOW2_AUDIT & SHADOW2_AUDIT_P2M
+extern void shadow2_audit_p2m(struct domain *d);
+#else
+#define shadow2_audit_p2m(_d) do {} while(0)
+#endif
+
+
+/******************************************************************************
+ * Mechanism for double-checking the optimized pagefault path: this
+ * structure contains a record of actions taken by the fault handling
+ * code. In paranoid mode, the fast-path code fills out one of these
+ * structures (but doesn't take any actual action) and then the normal
+ * path fills in another. When the fault handler finishes, the
+ * two are compared */
+
+#ifdef SHADOW2_OPTIMIZATION_PARANOIA
+
+typedef struct shadow2_action_log sh2_log_t;
+struct shadow2_action_log {
+ paddr_t ad[CONFIG_PAGING_LEVELS]; /* A & D bits propagated here */
+ paddr_t mmio; /* Address of an mmio operation */
+ int rv; /* Result of the fault handler */
+};
+
+/* There are two logs, one for the fast path, one for the normal path */
+enum sh2_log_type { log_slow = 0, log_fast= 1 };
+
+/* Alloc and zero the logs */
+static inline void sh2_init_log(struct vcpu *v)
+{
+ if ( unlikely(!v->arch.shadow2_action_log) )
+ v->arch.shadow2_action_log = xmalloc_array(sh2_log_t, 2);
+ ASSERT(v->arch.shadow2_action_log);
+ memset(v->arch.shadow2_action_log, 0, 2 * sizeof (sh2_log_t));
+}
+
+/* Log an A&D-bit update */
+static inline void sh2_log_ad(struct vcpu *v, paddr_t e, unsigned int level)
+{
+ v->arch.shadow2_action_log[v->arch.shadow2_action_index].ad[level] = e;
+}
+
+/* Log an MMIO address */
+static inline void sh2_log_mmio(struct vcpu *v, paddr_t m)
+{
+ v->arch.shadow2_action_log[v->arch.shadow2_action_index].mmio = m;
+}
+
+/* Log the result */
+static inline void sh2_log_rv(struct vcpu *v, int rv)
+{
+ v->arch.shadow2_action_log[v->arch.shadow2_action_index].rv = rv;
+}
+
+/* Set which mode we're in */
+static inline void sh2_set_log_mode(struct vcpu *v, enum sh2_log_type t)
+{
+ v->arch.shadow2_action_index = t;
+}
+
+/* Know not to take action, because we're only checking the mechanism */
+static inline int sh2_take_no_action(struct vcpu *v)
+{
+ return (v->arch.shadow2_action_index == log_fast);
+}
+
+#else /* Non-paranoid mode: these logs do not exist */
+
+#define sh2_init_log(_v) do { (void)(_v); } while(0)
+#define sh2_set_log_mode(_v,_t) do { (void)(_v); } while(0)
+#define sh2_log_ad(_v,_e,_l) do { (void)(_v),(void)(_e),(void)(_l); } while (0)
+#define sh2_log_mmio(_v,_m) do { (void)(_v),(void)(_m); } while (0)
+#define sh2_log_rv(_v,_r) do { (void)(_v),(void)(_r); } while (0)
+#define sh2_take_no_action(_v) (((void)(_v)), 0)
+
+#endif /* SHADOW2_OPTIMIZATION_PARANOIA */
+
+
+/******************************************************************************
+ * Macro for dealing with the naming of the internal names of the
+ * shadow code's external entry points.
+ */
+#define SHADOW2_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels) \
+ name ## __shadow_ ## shadow_levels ## _guest_ ## guest_levels
+#define SHADOW2_INTERNAL_NAME(name, shadow_levels, guest_levels) \
+ SHADOW2_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels)
+
+#if CONFIG_PAGING_LEVELS == 2
+#define GUEST_LEVELS 2
+#define SHADOW_LEVELS 2
+#include <asm/shadow2-multi.h>
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+#endif /* CONFIG_PAGING_LEVELS == 2 */
+
+#if CONFIG_PAGING_LEVELS == 3
+#define GUEST_LEVELS 2
+#define SHADOW_LEVELS 3
+#include <asm/shadow2-multi.h>
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+
+#define GUEST_LEVELS 3
+#define SHADOW_LEVELS 3
+#include <asm/shadow2-multi.h>
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+#endif /* CONFIG_PAGING_LEVELS == 3 */
+
+#if CONFIG_PAGING_LEVELS == 4
+#define GUEST_LEVELS 2
+#define SHADOW_LEVELS 3
+#include <asm/shadow2-multi.h>
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+
+#define GUEST_LEVELS 3
+#define SHADOW_LEVELS 3
+#include <asm/shadow2-multi.h>
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+
+#define GUEST_LEVELS 3
+#define SHADOW_LEVELS 4
+#include <asm/shadow2-multi.h>
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+
+#define GUEST_LEVELS 4
+#define SHADOW_LEVELS 4
+#include <asm/shadow2-multi.h>
+#undef GUEST_LEVELS
+#undef SHADOW_LEVELS
+#endif /* CONFIG_PAGING_LEVELS == 4 */
+
+
+/******************************************************************************
+ * Various function declarations
+ */
+
+/* x86 emulator support */
+extern struct x86_emulate_ops shadow2_emulator_ops;
+
+/* Hash table functions */
+mfn_t shadow2_hash_lookup(struct vcpu *v, unsigned long n, u8 t);
+void shadow2_hash_insert(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn);
+void shadow2_hash_delete(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn);
+
+/* shadow promotion */
+void shadow2_promote(struct vcpu *v, mfn_t gmfn, u32 type);
+void shadow2_demote(struct vcpu *v, mfn_t gmfn, u32 type);
+
+/* Shadow page allocation functions */
+void shadow2_prealloc(struct domain *d, unsigned int order);
+mfn_t shadow2_alloc(struct domain *d,
+ u32 shadow_type,
+ unsigned long backpointer);
+void shadow2_free(struct domain *d, mfn_t smfn);
+
+/* Function to convert a shadow to log-dirty */
+void shadow2_convert_to_log_dirty(struct vcpu *v, mfn_t smfn);
+
+/* Dispatcher function: call the per-mode function that will unhook the
+ * non-Xen mappings in this top-level shadow mfn */
+void shadow2_unhook_mappings(struct vcpu *v, mfn_t smfn);
+
+/* Re-sync copies of PAE shadow L3 tables if they have been changed */
+void sh2_pae_recopy(struct domain *d);
+
+/* Install the xen mappings in various flavours of shadow */
+void sh2_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
+void sh2_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
+void sh2_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn);
+void sh2_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
+
+
+/******************************************************************************
+ * MFN/page-info handling
+ */
+
+// Override mfn_to_page from asm/page.h, which was #include'd above,
+// in order to make it work with our mfn type.
+#undef mfn_to_page
+#define mfn_to_page(_mfn) (frame_table + mfn_x(_mfn))
+
+// Override page_to_mfn from asm/page.h, which was #include'd above,
+// in order to make it work with our mfn type.
+#undef page_to_mfn
+#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+
+// Override mfn_valid from asm/page.h, which was #include'd above,
+// in order to make it work with our mfn type.
+#undef mfn_valid
+#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+
+// Provide mfn_t-aware versions of common xen functions
+static inline void *
+sh2_map_domain_page(mfn_t mfn)
+{
+ /* XXX Using the monitor-table as a map will happen here */
+ return map_domain_page(mfn_x(mfn));
+}
+
+static inline void
+sh2_unmap_domain_page(void *p)
+{
+ /* XXX Using the monitor-table as a map will happen here */
+ unmap_domain_page(p);
+}
+
+static inline void *
+sh2_map_domain_page_global(mfn_t mfn)
+{
+ /* XXX Using the monitor-table as a map will happen here */
+ return map_domain_page_global(mfn_x(mfn));
+}
+
+static inline void
+sh2_unmap_domain_page_global(void *p)
+{
+ /* XXX Using the monitor-table as a map will happen here */
+ unmap_domain_page_global(p);
+}
+
+static inline int
+sh2_mfn_is_dirty(struct domain *d, mfn_t gmfn)
+/* Is this guest page dirty? Call only in log-dirty mode. */
+{
+ unsigned long pfn;
+ ASSERT(shadow2_mode_log_dirty(d));
+ ASSERT(d->arch.shadow_dirty_bitmap != NULL);
+
+ /* We /really/ mean PFN here, even for non-translated guests. */
+ pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+ if ( likely(VALID_M2P(pfn))
+ && likely(pfn < d->arch.shadow_dirty_bitmap_size)
+ && test_bit(pfn, d->arch.shadow_dirty_bitmap) )
+ return 1;
+
+ return 0;
+}
+
+static inline int
+sh2_mfn_is_a_page_table(mfn_t gmfn)
+{
+ struct page_info *page = mfn_to_page(gmfn);
+ struct domain *owner;
+ unsigned long type_info;
+
+ if ( !valid_mfn(gmfn) )
+ return 0;
+
+ owner = page_get_owner(page);
+ if ( owner && shadow2_mode_refcounts(owner)
+ && (page->count_info & PGC_page_table) )
+ return 1;
+
+ type_info = page->u.inuse.type_info & PGT_type_mask;
+ return type_info && (type_info <= PGT_l4_page_table);
+}
+
+
+/**************************************************************************/
+/* Shadow-page refcounting. See comment in shadow2-common.c about the
+ * use of struct page_info fields for shadow pages */
+
+void sh2_destroy_shadow(struct vcpu *v, mfn_t smfn);
+
+/* Increase the refcount of a shadow page. Arguments are the mfn to refcount,
+ * and the physical address of the shadow entry that holds the ref (or zero
+ * if the ref is held by something else) */
+static inline void sh2_get_ref(mfn_t smfn, paddr_t entry_pa)
+{
+ u32 x, nx;
+ struct page_info *page = mfn_to_page(smfn);
+
+ ASSERT(mfn_valid(smfn));
+
+ x = page->count_info & PGC_SH2_count_mask;
+ nx = x + 1;
+
+ if ( unlikely(nx & ~PGC_SH2_count_mask) )
+ {
+ SHADOW2_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
+ page->u.inuse.type_info, mfn_x(smfn));
+ domain_crash_synchronous();
+ }
+
+ /* Guarded by the shadow lock, so no need for atomic update */
+ page->count_info &= ~PGC_SH2_count_mask;
+ page->count_info |= nx;
+
+ /* We remember the first shadow entry that points to each shadow. */
+ if ( entry_pa != 0 && page->up == 0 )
+ page->up = entry_pa;
+}
+
+
+/* Decrease the refcount of a shadow page. As for get_ref, takes the
+ * physical address of the shadow entry that held this reference. */
+static inline void sh2_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
+{
+ u32 x, nx;
+ struct page_info *page = mfn_to_page(smfn);
+
+ ASSERT(mfn_valid(smfn));
+ ASSERT(page_get_owner(page) == NULL);
+
+ /* If this is the entry in the up-pointer, remove it */
+ if ( entry_pa != 0 && page->up == entry_pa )
+ page->up = 0;
+
+ x = page->count_info & PGC_SH2_count_mask;
+ nx = x - 1;
+
+ if ( unlikely(x == 0) )
+ {
+ SHADOW2_PRINTK("shadow ref underflow, smfn=%lx oc=%08x t=%"
+ PRtype_info "\n",
+ mfn_x(smfn),
+ page->count_info & PGC_SH2_count_mask,
+ page->u.inuse.type_info);
+ domain_crash_synchronous();
+ }
+
+ /* Guarded by the shadow lock, so no need for atomic update */
+ page->count_info &= ~PGC_SH2_count_mask;
+ page->count_info |= nx;
+
+ if ( unlikely(nx == 0) )
+ sh2_destroy_shadow(v, smfn);
+}
+
+
+/* Pin a shadow page: take an extra refcount and set the pin bit. */
+static inline void sh2_pin(mfn_t smfn)
+{
+ struct page_info *page;
+
+ ASSERT(mfn_valid(smfn));
+ page = mfn_to_page(smfn);
+ if ( !(page->count_info & PGC_SH2_pinned) )
+ {
+ sh2_get_ref(smfn, 0);
+ page->count_info |= PGC_SH2_pinned;
+ }
+}
+
+/* Unpin a shadow page: unset the pin bit and release the extra ref. */
+static inline void sh2_unpin(struct vcpu *v, mfn_t smfn)
+{
+ struct page_info *page;
+
+ ASSERT(mfn_valid(smfn));
+ page = mfn_to_page(smfn);
+ if ( page->count_info & PGC_SH2_pinned )
+ {
+ page->count_info &= ~PGC_SH2_pinned;
+ sh2_put_ref(v, smfn, 0);
+ }
+}
+
+/**************************************************************************/
+/* CPU feature support querying */
+
+static inline int
+guest_supports_superpages(struct vcpu *v)
+{
+ return hvm_guest(v) && (hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PSE);
+}
+
+static inline int
+guest_supports_nx(struct vcpu *v)
+{
+ if ( !hvm_guest(v) )
+ return cpu_has_nx;
+
+ // XXX - fix this!
+ return 1;
+}
+
+/**************************************************************************/
+/* Guest physmap (p2m) support */
+
+/* Read our own P2M table, checking in the linear pagetables first to be
+ * sure that we will succeed. Call this function if you expect it to
+ * fail often, as it avoids page faults. If you expect to succeed, use
+ * vcpu_gfn_to_mfn, which copy_from_user()s the entry */
+static inline mfn_t
+vcpu_gfn_to_mfn_nofault(struct vcpu *v, unsigned long gfn)
+{
+ unsigned long entry_addr = (unsigned long) &phys_to_machine_mapping[gfn];
+#if CONFIG_PAGING_LEVELS >= 4
+ l4_pgentry_t *l4e;
+ l3_pgentry_t *l3e;
+#endif
+ l2_pgentry_t *l2e;
+ l1_pgentry_t *l1e;
+
+ ASSERT(current == v);
+ if ( !shadow2_vcpu_mode_translate(v) )
+ return _mfn(gfn);
+
+#if CONFIG_PAGING_LEVELS > 2
+ if ( gfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) )
+ /* This pfn is higher than the p2m map can hold */
+ return _mfn(INVALID_MFN);
+#endif
+
+ /* Walk the linear pagetables. Note that this is *not* the same as
+ * the walk in sh2_gfn_to_mfn_foreign, which is walking the p2m map */
+#if CONFIG_PAGING_LEVELS >= 4
+ l4e = __linear_l4_table + l4_linear_offset(entry_addr);
+ if ( !(l4e_get_flags(*l4e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
+ l3e = __linear_l3_table + l3_linear_offset(entry_addr);
+ if ( !(l3e_get_flags(*l3e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
+#endif
+ l2e = __linear_l2_table + l2_linear_offset(entry_addr);
+ if ( !(l2e_get_flags(*l2e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
+ l1e = __linear_l1_table + l1_linear_offset(entry_addr);
+ if ( !(l1e_get_flags(*l1e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
+
+ /* Safe to look at this part of the table */
+ if ( l1e_get_flags(phys_to_machine_mapping[gfn]) & _PAGE_PRESENT )
+ return _mfn(l1e_get_pfn(phys_to_machine_mapping[gfn]));
+
+ return _mfn(INVALID_MFN);
+}
+
+
+#endif /* _XEN_SHADOW2_PRIVATE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/shadow2-types.h b/xen/include/asm-x86/shadow2-types.h
new file mode 100644
index 0000000000..f593c97822
--- /dev/null
+++ b/xen/include/asm-x86/shadow2-types.h
@@ -0,0 +1,705 @@
+/******************************************************************************
+ * include/asm-x86/shadow2-types.h
+ *
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XEN_SHADOW2_TYPES_H
+#define _XEN_SHADOW2_TYPES_H
+
+// Map a shadow page
+static inline void *
+map_shadow_page(mfn_t smfn)
+{
+ // XXX -- Possible optimization/measurement question for 32-bit and PAE
+ // hypervisors:
+ // How often is this smfn already available in the shadow linear
+ // table? Might it be worth checking that table first,
+ // presumably using the reverse map hint in the page_info of this
+ // smfn, rather than calling map_domain_page()?
+ //
+ return sh2_map_domain_page(smfn);
+}
+
+// matching unmap for map_shadow_page()
+static inline void
+unmap_shadow_page(void *p)
+{
+ sh2_unmap_domain_page(p);
+}
+
+/*
+ * Define various types for handling pagetabels, based on these options:
+ * SHADOW_PAGING_LEVELS : Number of levels of shadow pagetables
+ * GUEST_PAGING_LEVELS : Number of levels of guest pagetables
+ */
+
+#if (CONFIG_PAGING_LEVELS < SHADOW_PAGING_LEVELS)
+#error Cannot have more levels of shadow pagetables than host pagetables
+#endif
+
+#if (SHADOW_PAGING_LEVELS < GUEST_PAGING_LEVELS)
+#error Cannot have more levels of guest pagetables than shadow pagetables
+#endif
+
+#if SHADOW_PAGING_LEVELS == 2
+#define SHADOW_L1_PAGETABLE_ENTRIES 1024
+#define SHADOW_L2_PAGETABLE_ENTRIES 1024
+#define SHADOW_L1_PAGETABLE_SHIFT 12
+#define SHADOW_L2_PAGETABLE_SHIFT 22
+#endif
+
+#if SHADOW_PAGING_LEVELS == 3
+#define SHADOW_L1_PAGETABLE_ENTRIES 512
+#define SHADOW_L2_PAGETABLE_ENTRIES 512
+#define SHADOW_L3_PAGETABLE_ENTRIES 4
+#define SHADOW_L1_PAGETABLE_SHIFT 12
+#define SHADOW_L2_PAGETABLE_SHIFT 21
+#define SHADOW_L3_PAGETABLE_SHIFT 30
+#endif
+
+#if SHADOW_PAGING_LEVELS == 4
+#define SHADOW_L1_PAGETABLE_ENTRIES 512
+#define SHADOW_L2_PAGETABLE_ENTRIES 512
+#define SHADOW_L3_PAGETABLE_ENTRIES 512
+#define SHADOW_L4_PAGETABLE_ENTRIES 512
+#define SHADOW_L1_PAGETABLE_SHIFT 12
+#define SHADOW_L2_PAGETABLE_SHIFT 21
+#define SHADOW_L3_PAGETABLE_SHIFT 30
+#define SHADOW_L4_PAGETABLE_SHIFT 39
+#endif
+
+/* Types of the shadow page tables */
+typedef l1_pgentry_t shadow_l1e_t;
+typedef l2_pgentry_t shadow_l2e_t;
+#if SHADOW_PAGING_LEVELS >= 3
+typedef l3_pgentry_t shadow_l3e_t;
+#if SHADOW_PAGING_LEVELS >= 4
+typedef l4_pgentry_t shadow_l4e_t;
+#endif
+#endif
+
+/* Access functions for them */
+static inline paddr_t shadow_l1e_get_paddr(shadow_l1e_t sl1e)
+{ return l1e_get_paddr(sl1e); }
+static inline paddr_t shadow_l2e_get_paddr(shadow_l2e_t sl2e)
+{ return l2e_get_paddr(sl2e); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline paddr_t shadow_l3e_get_paddr(shadow_l3e_t sl3e)
+{ return l3e_get_paddr(sl3e); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline paddr_t shadow_l4e_get_paddr(shadow_l4e_t sl4e)
+{ return l4e_get_paddr(sl4e); }
+#endif
+#endif
+
+static inline mfn_t shadow_l1e_get_mfn(shadow_l1e_t sl1e)
+{ return _mfn(l1e_get_pfn(sl1e)); }
+static inline mfn_t shadow_l2e_get_mfn(shadow_l2e_t sl2e)
+{ return _mfn(l2e_get_pfn(sl2e)); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline mfn_t shadow_l3e_get_mfn(shadow_l3e_t sl3e)
+{ return _mfn(l3e_get_pfn(sl3e)); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline mfn_t shadow_l4e_get_mfn(shadow_l4e_t sl4e)
+{ return _mfn(l4e_get_pfn(sl4e)); }
+#endif
+#endif
+
+static inline u32 shadow_l1e_get_flags(shadow_l1e_t sl1e)
+{ return l1e_get_flags(sl1e); }
+static inline u32 shadow_l2e_get_flags(shadow_l2e_t sl2e)
+{ return l2e_get_flags(sl2e); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline u32 shadow_l3e_get_flags(shadow_l3e_t sl3e)
+{ return l3e_get_flags(sl3e); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline u32 shadow_l4e_get_flags(shadow_l4e_t sl4e)
+{ return l4e_get_flags(sl4e); }
+#endif
+#endif
+
+static inline shadow_l1e_t
+shadow_l1e_remove_flags(shadow_l1e_t sl1e, u32 flags)
+{ l1e_remove_flags(sl1e, flags); return sl1e; }
+
+static inline shadow_l1e_t shadow_l1e_empty(void)
+{ return l1e_empty(); }
+static inline shadow_l2e_t shadow_l2e_empty(void)
+{ return l2e_empty(); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline shadow_l3e_t shadow_l3e_empty(void)
+{ return l3e_empty(); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline shadow_l4e_t shadow_l4e_empty(void)
+{ return l4e_empty(); }
+#endif
+#endif
+
+static inline shadow_l1e_t shadow_l1e_from_mfn(mfn_t mfn, u32 flags)
+{ return l1e_from_pfn(mfn_x(mfn), flags); }
+static inline shadow_l2e_t shadow_l2e_from_mfn(mfn_t mfn, u32 flags)
+{ return l2e_from_pfn(mfn_x(mfn), flags); }
+#if SHADOW_PAGING_LEVELS >= 3
+static inline shadow_l3e_t shadow_l3e_from_mfn(mfn_t mfn, u32 flags)
+{ return l3e_from_pfn(mfn_x(mfn), flags); }
+#if SHADOW_PAGING_LEVELS >= 4
+static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
+{ return l4e_from_pfn(mfn_x(mfn), flags); }
+#endif
+#endif
+
+#define shadow_l1_table_offset(a) l1_table_offset(a)
+#define shadow_l2_table_offset(a) l2_table_offset(a)
+#define shadow_l3_table_offset(a) l3_table_offset(a)
+#define shadow_l4_table_offset(a) l4_table_offset(a)
+
+/**************************************************************************/
+/* Access to the linear mapping of shadow page tables. */
+
+/* Offsets into each level of the linear mapping for a virtual address. */
+#define shadow_l1_linear_offset(_a) \
+ (((_a) & VADDR_MASK) >> SHADOW_L1_PAGETABLE_SHIFT)
+#define shadow_l2_linear_offset(_a) \
+ (((_a) & VADDR_MASK) >> SHADOW_L2_PAGETABLE_SHIFT)
+#define shadow_l3_linear_offset(_a) \
+ (((_a) & VADDR_MASK) >> SHADOW_L3_PAGETABLE_SHIFT)
+#define shadow_l4_linear_offset(_a) \
+ (((_a) & VADDR_MASK) >> SHADOW_L4_PAGETABLE_SHIFT)
+
+/* Where to find each level of the linear mapping. For PV guests, we use
+ * the shadow linear-map self-entry as many times as we need. For HVM
+ * guests, the shadow doesn't have a linear-map self-entry so we must use
+ * the monitor-table's linear-map entry N-1 times and then the shadow-map
+ * entry once. */
+#define __sh2_linear_l1_table ((shadow_l1e_t *)(SH_LINEAR_PT_VIRT_START))
+#define __sh2_linear_l2_table ((shadow_l2e_t *) \
+ (__sh2_linear_l1_table + shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)))
+
+// shadow linear L3 and L4 tables only exist in 4 level paging...
+#if SHADOW_PAGING_LEVELS == 4
+#define __sh2_linear_l3_table ((shadow_l3e_t *) \
+ (__sh2_linear_l2_table + shadow_l2_linear_offset(SH_LINEAR_PT_VIRT_START)))
+#define __sh2_linear_l4_table ((shadow_l4e_t *) \
+ (__sh2_linear_l3_table + shadow_l3_linear_offset(SH_LINEAR_PT_VIRT_START)))
+#endif
+
+#define sh2_linear_l1_table(v) ({ \
+ ASSERT(current == (v)); \
+ __sh2_linear_l1_table; \
+})
+
+#define sh2_linear_l2_table(v) ({ \
+ ASSERT(current == (v)); \
+ ((shadow_l2e_t *) \
+ (hvm_guest(v) ? __linear_l1_table : __sh2_linear_l1_table) + \
+ shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \
+})
+
+// shadow linear L3 and L4 tables only exist in 4 level paging...
+#if SHADOW_PAGING_LEVELS == 4
+#define sh2_linear_l3_table(v) ({ \
+ ASSERT(current == (v)); \
+ ((shadow_l3e_t *) \
+ (hvm_guest(v) ? __linear_l2_table : __sh2_linear_l2_table) + \
+ shadow_l2_linear_offset(SH_LINEAR_PT_VIRT_START)); \
+})
+
+// we use l4_pgentry_t instead of shadow_l4e_t below because shadow_l4e_t is
+// not defined for when xen_levels==4 & shadow_levels==3...
+#define sh2_linear_l4_table(v) ({ \
+ ASSERT(current == (v)); \
+ ((l4_pgentry_t *) \
+ (hvm_guest(v) ? __linear_l3_table : __sh2_linear_l3_table) + \
+ shadow_l3_linear_offset(SH_LINEAR_PT_VIRT_START)); \
+})
+#endif
+
+#if GUEST_PAGING_LEVELS == 2
+
+#include <asm/page-guest32.h>
+
+#define GUEST_L1_PAGETABLE_ENTRIES 1024
+#define GUEST_L2_PAGETABLE_ENTRIES 1024
+#define GUEST_L1_PAGETABLE_SHIFT 12
+#define GUEST_L2_PAGETABLE_SHIFT 22
+
+/* Type of the guest's frame numbers */
+TYPE_SAFE(u32,gfn)
+#define INVALID_GFN ((u32)(-1u))
+#define SH2_PRI_gfn "05x"
+
+/* Types of the guest's page tables */
+typedef l1_pgentry_32_t guest_l1e_t;
+typedef l2_pgentry_32_t guest_l2e_t;
+
+/* Access functions for them */
+static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
+{ return l1e_get_paddr_32(gl1e); }
+static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
+{ return l2e_get_paddr_32(gl2e); }
+
+static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
+{ return _gfn(l1e_get_paddr_32(gl1e) >> PAGE_SHIFT); }
+static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
+{ return _gfn(l2e_get_paddr_32(gl2e) >> PAGE_SHIFT); }
+
+static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
+{ return l1e_get_flags_32(gl1e); }
+static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
+{ return l2e_get_flags_32(gl2e); }
+
+static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags)
+{ l1e_add_flags_32(gl1e, flags); return gl1e; }
+static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags)
+{ l2e_add_flags_32(gl2e, flags); return gl2e; }
+
+static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
+{ return l1e_from_pfn_32(gfn_x(gfn), flags); }
+static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
+{ return l2e_from_pfn_32(gfn_x(gfn), flags); }
+
+#define guest_l1_table_offset(a) l1_table_offset_32(a)
+#define guest_l2_table_offset(a) l2_table_offset_32(a)
+
+/* The shadow types needed for the various levels. */
+#define PGC_SH2_l1_shadow PGC_SH2_l1_32_shadow
+#define PGC_SH2_l2_shadow PGC_SH2_l2_32_shadow
+#define PGC_SH2_fl1_shadow PGC_SH2_fl1_32_shadow
+
+#else /* GUEST_PAGING_LEVELS != 2 */
+
+#if GUEST_PAGING_LEVELS == 3
+#define GUEST_L1_PAGETABLE_ENTRIES 512
+#define GUEST_L2_PAGETABLE_ENTRIES 512
+#define GUEST_L3_PAGETABLE_ENTRIES 4
+#define GUEST_L1_PAGETABLE_SHIFT 12
+#define GUEST_L2_PAGETABLE_SHIFT 21
+#define GUEST_L3_PAGETABLE_SHIFT 30
+#else /* GUEST_PAGING_LEVELS == 4 */
+#define GUEST_L1_PAGETABLE_ENTRIES 512
+#define GUEST_L2_PAGETABLE_ENTRIES 512
+#define GUEST_L3_PAGETABLE_ENTRIES 512
+#define GUEST_L4_PAGETABLE_ENTRIES 512
+#define GUEST_L1_PAGETABLE_SHIFT 12
+#define GUEST_L2_PAGETABLE_SHIFT 21
+#define GUEST_L3_PAGETABLE_SHIFT 30
+#define GUEST_L4_PAGETABLE_SHIFT 39
+#endif
+
+/* Type of the guest's frame numbers */
+TYPE_SAFE(unsigned long,gfn)
+#define INVALID_GFN ((unsigned long)(-1ul))
+#define SH2_PRI_gfn "05lx"
+
+/* Types of the guest's page tables */
+typedef l1_pgentry_t guest_l1e_t;
+typedef l2_pgentry_t guest_l2e_t;
+typedef l3_pgentry_t guest_l3e_t;
+#if GUEST_PAGING_LEVELS >= 4
+typedef l4_pgentry_t guest_l4e_t;
+#endif
+
+/* Access functions for them */
+static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
+{ return l1e_get_paddr(gl1e); }
+static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
+{ return l2e_get_paddr(gl2e); }
+static inline paddr_t guest_l3e_get_paddr(guest_l3e_t gl3e)
+{ return l3e_get_paddr(gl3e); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline paddr_t guest_l4e_get_paddr(guest_l4e_t gl4e)
+{ return l4e_get_paddr(gl4e); }
+#endif
+
+static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
+{ return _gfn(l1e_get_paddr(gl1e) >> PAGE_SHIFT); }
+static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
+{ return _gfn(l2e_get_paddr(gl2e) >> PAGE_SHIFT); }
+static inline gfn_t guest_l3e_get_gfn(guest_l3e_t gl3e)
+{ return _gfn(l3e_get_paddr(gl3e) >> PAGE_SHIFT); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline gfn_t guest_l4e_get_gfn(guest_l4e_t gl4e)
+{ return _gfn(l4e_get_paddr(gl4e) >> PAGE_SHIFT); }
+#endif
+
+static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
+{ return l1e_get_flags(gl1e); }
+static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
+{ return l2e_get_flags(gl2e); }
+static inline u32 guest_l3e_get_flags(guest_l3e_t gl3e)
+{ return l3e_get_flags(gl3e); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline u32 guest_l4e_get_flags(guest_l4e_t gl4e)
+{ return l4e_get_flags(gl4e); }
+#endif
+
+static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags)
+{ l1e_add_flags(gl1e, flags); return gl1e; }
+static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags)
+{ l2e_add_flags(gl2e, flags); return gl2e; }
+static inline guest_l3e_t guest_l3e_add_flags(guest_l3e_t gl3e, u32 flags)
+{ l3e_add_flags(gl3e, flags); return gl3e; }
+#if GUEST_PAGING_LEVELS >= 4
+static inline guest_l4e_t guest_l4e_add_flags(guest_l4e_t gl4e, u32 flags)
+{ l4e_add_flags(gl4e, flags); return gl4e; }
+#endif
+
+static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
+{ return l1e_from_pfn(gfn_x(gfn), flags); }
+static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
+{ return l2e_from_pfn(gfn_x(gfn), flags); }
+static inline guest_l3e_t guest_l3e_from_gfn(gfn_t gfn, u32 flags)
+{ return l3e_from_pfn(gfn_x(gfn), flags); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline guest_l4e_t guest_l4e_from_gfn(gfn_t gfn, u32 flags)
+{ return l4e_from_pfn(gfn_x(gfn), flags); }
+#endif
+
+#define guest_l1_table_offset(a) l1_table_offset(a)
+#define guest_l2_table_offset(a) l2_table_offset(a)
+#define guest_l3_table_offset(a) l3_table_offset(a)
+#define guest_l4_table_offset(a) l4_table_offset(a)
+
+/* The shadow types needed for the various levels. */
+#if GUEST_PAGING_LEVELS == 3
+#define PGC_SH2_l1_shadow PGC_SH2_l1_pae_shadow
+#define PGC_SH2_fl1_shadow PGC_SH2_fl1_pae_shadow
+#define PGC_SH2_l2_shadow PGC_SH2_l2_pae_shadow
+#define PGC_SH2_l2h_shadow PGC_SH2_l2h_pae_shadow
+#define PGC_SH2_l3_shadow PGC_SH2_l3_pae_shadow
+#else
+#define PGC_SH2_l1_shadow PGC_SH2_l1_64_shadow
+#define PGC_SH2_fl1_shadow PGC_SH2_fl1_64_shadow
+#define PGC_SH2_l2_shadow PGC_SH2_l2_64_shadow
+#define PGC_SH2_l3_shadow PGC_SH2_l3_64_shadow
+#define PGC_SH2_l4_shadow PGC_SH2_l4_64_shadow
+#endif
+
+#endif /* GUEST_PAGING_LEVELS != 2 */
+
+#define VALID_GFN(m) (m != INVALID_GFN)
+
+static inline int
+valid_gfn(gfn_t m)
+{
+ return VALID_GFN(gfn_x(m));
+}
+
+#if GUEST_PAGING_LEVELS == 2
+#define PGC_SH2_guest_root_type PGC_SH2_l2_32_shadow
+#elif GUEST_PAGING_LEVELS == 3
+#define PGC_SH2_guest_root_type PGC_SH2_l3_pae_shadow
+#else
+#define PGC_SH2_guest_root_type PGC_SH2_l4_64_shadow
+#endif
+
+/* Translation between mfns and gfns */
+static inline mfn_t
+vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn)
+{
+ return sh2_vcpu_gfn_to_mfn(v, gfn_x(gfn));
+}
+
+static inline gfn_t
+mfn_to_gfn(struct domain *d, mfn_t mfn)
+{
+ return _gfn(sh2_mfn_to_gfn(d, mfn));
+}
+
+static inline paddr_t
+gfn_to_paddr(gfn_t gfn)
+{
+ return ((paddr_t)gfn_x(gfn)) << PAGE_SHIFT;
+}
+
+/* Type used for recording a walk through guest pagetables. It is
+ * filled in by the pagetable walk function, and also used as a cache
+ * for later walks.
+ * Any non-null pointer in this structure represents a mapping of guest
+ * memory. We must always call walk_init() before using a walk_t, and
+ * call walk_unmap() when we're done.
+ * The "Effective l1e" field is used when there isn't an l1e to point to,
+ * but we have fabricated an l1e for propagation to the shadow (e.g.,
+ * for splintering guest superpages into many shadow l1 entries). */
+typedef struct shadow2_walk_t walk_t;
+struct shadow2_walk_t
+{
+ unsigned long va; /* Address we were looking for */
+#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
+ guest_l4e_t *l4e; /* Pointer to guest's level 4 entry */
+#endif
+ guest_l3e_t *l3e; /* Pointer to guest's level 3 entry */
+#endif
+ guest_l2e_t *l2e; /* Pointer to guest's level 2 entry */
+ guest_l1e_t *l1e; /* Pointer to guest's level 1 entry */
+ guest_l1e_t eff_l1e; /* Effective level 1 entry */
+#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
+ mfn_t l4mfn; /* MFN that the level 4 entry is in */
+#endif
+ mfn_t l3mfn; /* MFN that the level 3 entry is in */
+#endif
+ mfn_t l2mfn; /* MFN that the level 2 entry is in */
+ mfn_t l1mfn; /* MFN that the level 1 entry is in */
+};
+
+
+/* X86 error code bits:
+ * These bits certainly ought to be defined somewhere other than here,
+ * but until that place is determined, here they sit.
+ *
+ * "PFEC" == "Page Fault Error Code"
+ */
+#define X86_PFEC_PRESENT 1 /* 0 == page was not present */
+#define X86_PFEC_WRITE_FAULT 2 /* 0 == reading, 1 == writing */
+#define X86_PFEC_SUPERVISOR_FAULT 4 /* 0 == supervisor-mode, 1 == user */
+#define X86_PFEC_RESERVED_BIT_FAULT 8 /* 1 == reserved bits set in pte */
+#define X86_PFEC_INSN_FETCH_FAULT 16 /* 0 == normal, 1 == instr'n fetch */
+
+/* macros for dealing with the naming of the internal function names of the
+ * shadow code's external entry points.
+ */
+#define INTERNAL_NAME(name) \
+ SHADOW2_INTERNAL_NAME(name, SHADOW_PAGING_LEVELS, GUEST_PAGING_LEVELS)
+
+/* macros for renaming the primary entry points, so that they are more
+ * easily distinguished from a debugger
+ */
+#define sh2_page_fault INTERNAL_NAME(sh2_page_fault)
+#define sh2_invlpg INTERNAL_NAME(sh2_invlpg)
+#define sh2_gva_to_gpa INTERNAL_NAME(sh2_gva_to_gpa)
+#define sh2_gva_to_gfn INTERNAL_NAME(sh2_gva_to_gfn)
+#define sh2_update_cr3 INTERNAL_NAME(sh2_update_cr3)
+#define sh2_remove_write_access INTERNAL_NAME(sh2_remove_write_access)
+#define sh2_remove_all_mappings INTERNAL_NAME(sh2_remove_all_mappings)
+#define sh2_remove_l1_shadow INTERNAL_NAME(sh2_remove_l1_shadow)
+#define sh2_remove_l2_shadow INTERNAL_NAME(sh2_remove_l2_shadow)
+#define sh2_remove_l3_shadow INTERNAL_NAME(sh2_remove_l3_shadow)
+#define sh2_map_and_validate_gl4e INTERNAL_NAME(sh2_map_and_validate_gl4e)
+#define sh2_map_and_validate_gl3e INTERNAL_NAME(sh2_map_and_validate_gl3e)
+#define sh2_map_and_validate_gl2e INTERNAL_NAME(sh2_map_and_validate_gl2e)
+#define sh2_map_and_validate_gl2he INTERNAL_NAME(sh2_map_and_validate_gl2he)
+#define sh2_map_and_validate_gl1e INTERNAL_NAME(sh2_map_and_validate_gl1e)
+#define sh2_destroy_l4_shadow INTERNAL_NAME(sh2_destroy_l4_shadow)
+#define sh2_destroy_l3_shadow INTERNAL_NAME(sh2_destroy_l3_shadow)
+#define sh2_destroy_l3_subshadow INTERNAL_NAME(sh2_destroy_l3_subshadow)
+#define sh2_unpin_all_l3_subshadows INTERNAL_NAME(sh2_unpin_all_l3_subshadows)
+#define sh2_destroy_l2_shadow INTERNAL_NAME(sh2_destroy_l2_shadow)
+#define sh2_destroy_l1_shadow INTERNAL_NAME(sh2_destroy_l1_shadow)
+#define sh2_unhook_32b_mappings INTERNAL_NAME(sh2_unhook_32b_mappings)
+#define sh2_unhook_pae_mappings INTERNAL_NAME(sh2_unhook_pae_mappings)
+#define sh2_unhook_64b_mappings INTERNAL_NAME(sh2_unhook_64b_mappings)
+#define shadow2_entry INTERNAL_NAME(shadow2_entry)
+#define sh2_detach_old_tables INTERNAL_NAME(sh2_detach_old_tables)
+#define sh2_x86_emulate_write INTERNAL_NAME(sh2_x86_emulate_write)
+#define sh2_x86_emulate_cmpxchg INTERNAL_NAME(sh2_x86_emulate_cmpxchg)
+#define sh2_x86_emulate_cmpxchg8b INTERNAL_NAME(sh2_x86_emulate_cmpxchg8b)
+#define sh2_audit_l1_table INTERNAL_NAME(sh2_audit_l1_table)
+#define sh2_audit_fl1_table INTERNAL_NAME(sh2_audit_fl1_table)
+#define sh2_audit_l2_table INTERNAL_NAME(sh2_audit_l2_table)
+#define sh2_audit_l3_table INTERNAL_NAME(sh2_audit_l3_table)
+#define sh2_audit_l4_table INTERNAL_NAME(sh2_audit_l4_table)
+#define sh2_guess_wrmap INTERNAL_NAME(sh2_guess_wrmap)
+#define sh2_clear_shadow_entry INTERNAL_NAME(sh2_clear_shadow_entry)
+
+/* sh2_make_monitor_table only depends on the number of shadow levels */
+#define sh2_make_monitor_table \
+ SHADOW2_INTERNAL_NAME(sh2_make_monitor_table, \
+ SHADOW_PAGING_LEVELS, \
+ SHADOW_PAGING_LEVELS)
+#define sh2_destroy_monitor_table \
+ SHADOW2_INTERNAL_NAME(sh2_destroy_monitor_table, \
+ SHADOW_PAGING_LEVELS, \
+ SHADOW_PAGING_LEVELS)
+
+
+#if GUEST_PAGING_LEVELS == 3
+/*
+ * Accounting information stored in the shadow of PAE Guest L3 pages.
+ * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep
+ * various refcounts, etc., on the page_info of their page. We provide extra
+ * bookkeeping space in the shadow itself, and this is the structure
+ * definition for that bookkeeping information.
+ */
+struct pae_l3_bookkeeping {
+ u32 vcpus; /* bitmap of which vcpus are currently storing
+ * copies of this 32-byte page */
+ u32 refcount; /* refcount for this 32-byte page */
+ u8 pinned; /* is this 32-byte page pinned or not? */
+};
+
+// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer.
+#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *) \
+ (((unsigned long)(_ptr) & ~31) + 32))
+
+static void sh2_destroy_l3_subshadow(struct vcpu *v,
+ shadow_l3e_t *sl3e);
+
+/* Increment a subshadow ref
+ * Called with a pointer to the subshadow, and the mfn of the
+ * *first* page of the overall shadow. */
+static inline void sh2_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
+{
+ struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
+
+ /* First ref to the subshadow takes a ref to the full shadow */
+ if ( bk->refcount == 0 )
+ sh2_get_ref(smfn, 0);
+ if ( unlikely(++(bk->refcount) == 0) )
+ {
+ SHADOW2_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH2_PRI_mfn " sh=%p\n",
+ mfn_x(smfn), sl3e);
+ domain_crash_synchronous();
+ }
+}
+
+/* Decrement a subshadow ref.
+ * Called with a pointer to the subshadow, and the mfn of the
+ * *first* page of the overall shadow. Calling this may cause the
+ * entire shadow to disappear, so the caller must immediately unmap
+ * the pointer after calling. */
+static inline void sh2_put_ref_l3_subshadow(struct vcpu *v,
+ shadow_l3e_t *sl3e,
+ mfn_t smfn)
+{
+ struct pae_l3_bookkeeping *bk;
+
+ bk = sl3p_to_info(sl3e);
+
+ ASSERT(bk->refcount > 0);
+ if ( --(bk->refcount) == 0 )
+ {
+ /* Need to destroy this subshadow */
+ sh2_destroy_l3_subshadow(v, sl3e);
+ /* Last ref to the subshadow had a ref to the full shadow */
+ sh2_put_ref(v, smfn, 0);
+ }
+}
+
+/* Pin a subshadow
+ * Called with a pointer to the subshadow, and the mfn of the
+ * *first* page of the overall shadow. */
+static inline void sh2_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
+{
+ struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
+
+#if 0
+ debugtrace_printk("%s smfn=%05lx offset=%ld\n",
+ __func__, mfn_x(smfn),
+ ((unsigned long)sl3e & ~PAGE_MASK) / 64);
+#endif
+
+ if ( !bk->pinned )
+ {
+ bk->pinned = 1;
+ sh2_get_ref_l3_subshadow(sl3e, smfn);
+ }
+}
+
+/* Unpin a sub-shadow.
+ * Called with a pointer to the subshadow, and the mfn of the
+ * *first* page of the overall shadow. Calling this may cause the
+ * entire shadow to disappear, so the caller must immediately unmap
+ * the pointer after calling. */
+static inline void sh2_unpin_l3_subshadow(struct vcpu *v,
+ shadow_l3e_t *sl3e,
+ mfn_t smfn)
+{
+ struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
+
+#if 0
+ debugtrace_printk("%s smfn=%05lx offset=%ld\n",
+ __func__, mfn_x(smfn),
+ ((unsigned long)sl3e & ~PAGE_MASK) / 64);
+#endif
+
+ if ( bk->pinned )
+ {
+ bk->pinned = 0;
+ sh2_put_ref_l3_subshadow(v, sl3e, smfn);
+ }
+}
+
+#endif /* GUEST_PAGING_LEVELS == 3 */
+
+#if SHADOW_PAGING_LEVELS == 3
+#define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20)
+#endif
+
+#if SHADOW_PAGING_LEVELS == 2
+#define SH2_PRI_pte "08x"
+#else /* SHADOW_PAGING_LEVELS >= 3 */
+#ifndef __x86_64__
+#define SH2_PRI_pte "016llx"
+#else
+#define SH2_PRI_pte "016lx"
+#endif
+#endif /* SHADOW_PAGING_LEVELS >= 3 */
+
+#if GUEST_PAGING_LEVELS == 2
+#define SH2_PRI_gpte "08x"
+#else /* GUEST_PAGING_LEVELS >= 3 */
+#ifndef __x86_64__
+#define SH2_PRI_gpte "016llx"
+#else
+#define SH2_PRI_gpte "016lx"
+#endif
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+static inline u32
+accumulate_guest_flags(walk_t *gw)
+{
+ u32 accumulated_flags;
+
+ // We accumulate the permission flags with bitwise ANDing.
+ // This works for the PRESENT bit, RW bit, and USER bit.
+ // For the NX bit, however, the polarity is wrong, so we accumulate the
+ // inverse of the NX bit.
+ //
+ accumulated_flags = guest_l1e_get_flags(gw->eff_l1e) ^ _PAGE_NX_BIT;
+ accumulated_flags &= guest_l2e_get_flags(*gw->l2e) ^ _PAGE_NX_BIT;
+
+ // Note that PAE guests do not have USER or RW or NX bits in their L3s.
+ //
+#if GUEST_PAGING_LEVELS == 3
+ accumulated_flags &=
+ ~_PAGE_PRESENT | (guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT);
+#elif GUEST_PAGING_LEVELS >= 4
+ accumulated_flags &= guest_l3e_get_flags(*gw->l3e) ^ _PAGE_NX_BIT;
+ accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT;
+#endif
+
+ // Finally, revert the NX bit back to its original polarity
+ accumulated_flags ^= _PAGE_NX_BIT;
+
+ return accumulated_flags;
+}
+
+#endif /* _XEN_SHADOW2_TYPES_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/shadow2.h b/xen/include/asm-x86/shadow2.h
new file mode 100644
index 0000000000..94de7781f8
--- /dev/null
+++ b/xen/include/asm-x86/shadow2.h
@@ -0,0 +1,627 @@
+/******************************************************************************
+ * include/asm-x86/shadow2.h
+ *
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XEN_SHADOW2_H
+#define _XEN_SHADOW2_H
+
+#include <public/dom0_ops.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <asm/flushtlb.h>
+
+/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
+
+#define SHM2_shift 10
+/* We're in one of the shadow modes */
+#define SHM2_enable (DOM0_SHADOW2_CONTROL_FLAG_ENABLE << SHM2_shift)
+/* Refcounts based on shadow tables instead of guest tables */
+#define SHM2_refcounts (DOM0_SHADOW2_CONTROL_FLAG_REFCOUNT << SHM2_shift)
+/* Enable log dirty mode */
+#define SHM2_log_dirty (DOM0_SHADOW2_CONTROL_FLAG_LOG_DIRTY << SHM2_shift)
+/* Xen does p2m translation, not guest */
+#define SHM2_translate (DOM0_SHADOW2_CONTROL_FLAG_TRANSLATE << SHM2_shift)
+/* Xen does not steal address space from the domain for its own booking;
+ * requires VT or similar mechanisms */
+#define SHM2_external (DOM0_SHADOW2_CONTROL_FLAG_EXTERNAL << SHM2_shift)
+
+#define shadow2_mode_enabled(_d) ((_d)->arch.shadow2_mode)
+#define shadow2_mode_refcounts(_d) ((_d)->arch.shadow2_mode & SHM2_refcounts)
+#define shadow2_mode_log_dirty(_d) ((_d)->arch.shadow2_mode & SHM2_log_dirty)
+#define shadow2_mode_translate(_d) ((_d)->arch.shadow2_mode & SHM2_translate)
+#define shadow2_mode_external(_d) ((_d)->arch.shadow2_mode & SHM2_external)
+
+/* Xen traps & emulates all reads of all page table pages:
+ *not yet supported
+ */
+#define shadow2_mode_trap_reads(_d) ({ (void)(_d); 0; })
+
+// flags used in the return value of the shadow_set_lXe() functions...
+#define SHADOW2_SET_CHANGED 0x1
+#define SHADOW2_SET_FLUSH 0x2
+#define SHADOW2_SET_ERROR 0x4
+#define SHADOW2_SET_L3PAE_RECOPY 0x8
+
+// How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
+#ifdef __x86_64__
+#define pv_32bit_guest(_v) 0 // not yet supported
+#else
+#define pv_32bit_guest(_v) !hvm_guest(v)
+#endif
+
+/* The shadow2 lock.
+ *
+ * This lock is per-domain. It is intended to allow us to make atomic
+ * updates to the software TLB that the shadow tables provide.
+ *
+ * Specifically, it protects:
+ * - all changes to shadow page table pages
+ * - the shadow hash table
+ * - the shadow page allocator
+ * - all changes to guest page table pages; if/when the notion of
+ * out-of-sync pages is added to this code, then the shadow lock is
+ * protecting all guest page table pages which are not listed as
+ * currently as both guest-writable and out-of-sync...
+ * XXX -- need to think about this relative to writable page tables.
+ * - all changes to the page_info->tlbflush_timestamp
+ * - the page_info->count fields on shadow pages
+ * - the shadow dirty bit array and count
+ * - XXX
+ */
+#ifndef CONFIG_SMP
+#error shadow2.h currently requires CONFIG_SMP
+#endif
+
+#define shadow2_lock_init(_d) \
+ do { \
+ spin_lock_init(&(_d)->arch.shadow2_lock); \
+ (_d)->arch.shadow2_locker = -1; \
+ (_d)->arch.shadow2_locker_function = "nobody"; \
+ } while (0)
+
+#define shadow2_lock_is_acquired(_d) \
+ (current->processor == (_d)->arch.shadow2_locker)
+
+#define shadow2_lock(_d) \
+ do { \
+ if ( unlikely((_d)->arch.shadow2_locker == current->processor) ) \
+ { \
+ printk("Error: shadow2 lock held by %s\n", \
+ (_d)->arch.shadow2_locker_function); \
+ BUG(); \
+ } \
+ spin_lock(&(_d)->arch.shadow2_lock); \
+ ASSERT((_d)->arch.shadow2_locker == -1); \
+ (_d)->arch.shadow2_locker = current->processor; \
+ (_d)->arch.shadow2_locker_function = __func__; \
+ } while (0)
+
+#define shadow2_unlock(_d) \
+ do { \
+ ASSERT((_d)->arch.shadow2_locker == current->processor); \
+ (_d)->arch.shadow2_locker = -1; \
+ (_d)->arch.shadow2_locker_function = "nobody"; \
+ spin_unlock(&(_d)->arch.shadow2_lock); \
+ } while (0)
+
+/*
+ * Levels of self-test and paranoia
+ * XXX should go in config files somewhere?
+ */
+#define SHADOW2_AUDIT_HASH 0x01 /* Check current hash bucket */
+#define SHADOW2_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */
+#define SHADOW2_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */
+#define SHADOW2_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */
+#define SHADOW2_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */
+#define SHADOW2_AUDIT_P2M 0x20 /* Check the p2m table */
+
+#ifdef NDEBUG
+#define SHADOW2_AUDIT 0
+#define SHADOW2_AUDIT_ENABLE 0
+#else
+#define SHADOW2_AUDIT 0x15 /* Basic audit of all except p2m. */
+#define SHADOW2_AUDIT_ENABLE shadow2_audit_enable
+extern int shadow2_audit_enable;
+#endif
+
+/*
+ * Levels of optimization
+ * XXX should go in config files somewhere?
+ */
+#define SH2OPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */
+#define SH2OPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */
+
+#define SHADOW2_OPTIMIZATIONS 0x03
+
+
+/* With shadow pagetables, the different kinds of address start
+ * to get get confusing.
+ *
+ * Virtual addresses are what they usually are: the addresses that are used
+ * to accessing memory while the guest is running. The MMU translates from
+ * virtual addresses to machine addresses.
+ *
+ * (Pseudo-)physical addresses are the abstraction of physical memory the
+ * guest uses for allocation and so forth. For the purposes of this code,
+ * we can largely ignore them.
+ *
+ * Guest frame numbers (gfns) are the entries that the guest puts in its
+ * pagetables. For normal paravirtual guests, they are actual frame numbers,
+ * with the translation done by the guest.
+ *
+ * Machine frame numbers (mfns) are the entries that the hypervisor puts
+ * in the shadow page tables.
+ *
+ * Elsewhere in the xen code base, the name "gmfn" is generally used to refer
+ * to a "machine frame number, from the guest's perspective", or in other
+ * words, pseudo-physical frame numbers. However, in the shadow code, the
+ * term "gmfn" means "the mfn of a guest page"; this combines naturally with
+ * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a
+ * guest L2 page), etc...
+ */
+
+/* With this defined, we do some ugly things to force the compiler to
+ * give us type safety between mfns and gfns and other integers.
+ * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions
+ * that translate beween int and foo_t.
+ *
+ * It does have some performance cost because the types now have
+ * a different storage attribute, so may not want it on all the time. */
+#ifndef NDEBUG
+#define TYPE_SAFETY 1
+#endif
+
+#ifdef TYPE_SAFETY
+#define TYPE_SAFE(_type,_name) \
+typedef struct { _type _name; } _name##_t; \
+static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \
+static inline _type _name##_x(_name##_t n) { return n._name; }
+#else
+#define TYPE_SAFE(_type,_name) \
+typedef _type _name##_t; \
+static inline _name##_t _##_name(_type n) { return n; } \
+static inline _type _name##_x(_name##_t n) { return n; }
+#endif
+
+TYPE_SAFE(unsigned long,mfn)
+#define SH2_PRI_mfn "05lx"
+
+static inline int
+valid_mfn(mfn_t m)
+{
+ return VALID_MFN(mfn_x(m));
+}
+
+static inline mfn_t
+pagetable_get_mfn(pagetable_t pt)
+{
+ return _mfn(pagetable_get_pfn(pt));
+}
+
+static inline pagetable_t
+pagetable_from_mfn(mfn_t mfn)
+{
+ return pagetable_from_pfn(mfn_x(mfn));
+}
+
+static inline int
+shadow2_vcpu_mode_translate(struct vcpu *v)
+{
+ // Returns true if this VCPU needs to be using the P2M table to translate
+ // between GFNs and MFNs.
+ //
+ // This is true of translated HVM domains on a vcpu which has paging
+ // enabled. (HVM vcpu's with paging disabled are using the p2m table as
+ // its paging table, so no translation occurs in this case.)
+ //
+ return v->vcpu_flags & VCPUF_shadow2_translate;
+}
+
+
+/**************************************************************************/
+/* Mode-specific entry points into the shadow code */
+
+struct x86_emulate_ctxt;
+struct shadow2_entry_points {
+ int (*page_fault )(struct vcpu *v, unsigned long va,
+ struct cpu_user_regs *regs);
+ int (*invlpg )(struct vcpu *v, unsigned long va);
+ unsigned long (*gva_to_gpa )(struct vcpu *v, unsigned long va);
+ unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va);
+ void (*update_cr3 )(struct vcpu *v);
+ int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn,
+ void *new_guest_entry, u32 size);
+ int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn,
+ void *new_guest_entry, u32 size);
+ int (*map_and_validate_gl2he)(struct vcpu *v, mfn_t gmfn,
+ void *new_guest_entry, u32 size);
+ int (*map_and_validate_gl3e )(struct vcpu *v, mfn_t gmfn,
+ void *new_guest_entry, u32 size);
+ int (*map_and_validate_gl4e )(struct vcpu *v, mfn_t gmfn,
+ void *new_guest_entry, u32 size);
+ void (*detach_old_tables )(struct vcpu *v);
+ int (*x86_emulate_write )(struct vcpu *v, unsigned long va,
+ void *src, u32 bytes,
+ struct x86_emulate_ctxt *ctxt);
+ int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va,
+ unsigned long old,
+ unsigned long new,
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
+ int (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va,
+ unsigned long old_lo,
+ unsigned long old_hi,
+ unsigned long new_lo,
+ unsigned long new_hi,
+ struct x86_emulate_ctxt *ctxt);
+ mfn_t (*make_monitor_table )(struct vcpu *v);
+ void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
+#if SHADOW2_OPTIMIZATIONS & SH2OPT_WRITABLE_HEURISTIC
+ int (*guess_wrmap )(struct vcpu *v,
+ unsigned long vaddr, mfn_t gmfn);
+#endif
+ /* For outsiders to tell what mode we're in */
+ unsigned int shadow_levels;
+ unsigned int guest_levels;
+};
+
+static inline int shadow2_guest_paging_levels(struct vcpu *v)
+{
+ ASSERT(v->arch.shadow2 != NULL);
+ return v->arch.shadow2->guest_levels;
+}
+
+/**************************************************************************/
+/* Entry points into the shadow code */
+
+/* Turning on shadow2 test mode */
+int shadow2_test_enable(struct domain *d);
+
+/* Handler for shadow control ops: enabling and disabling shadow modes,
+ * and log-dirty bitmap ops all happen through here. */
+int shadow2_control_op(struct domain *d,
+ dom0_shadow_control_t *sc,
+ XEN_GUEST_HANDLE(dom0_op_t) u_dom0_op);
+
+/* Call when destroying a domain */
+void shadow2_teardown(struct domain *d);
+
+/* Call once all of the references to the domain have gone away */
+void shadow2_final_teardown(struct domain *d);
+
+
+/* Mark a page as dirty in the bitmap */
+void sh2_do_mark_dirty(struct domain *d, mfn_t gmfn);
+static inline void mark_dirty(struct domain *d, unsigned long gmfn)
+{
+ if ( shadow2_mode_log_dirty(d) )
+ {
+ shadow2_lock(d);
+ sh2_do_mark_dirty(d, _mfn(gmfn));
+ shadow2_unlock(d);
+ }
+}
+
+/* Internal version, for when the shadow lock is already held */
+static inline void sh2_mark_dirty(struct domain *d, mfn_t gmfn)
+{
+ ASSERT(shadow2_lock_is_acquired(d));
+ if ( shadow2_mode_log_dirty(d) )
+ sh2_do_mark_dirty(d, gmfn);
+}
+
+static inline int
+shadow2_fault(unsigned long va, struct cpu_user_regs *regs)
+/* Called from pagefault handler in Xen, and from the HVM trap handlers
+ * for pagefaults. Returns 1 if this fault was an artefact of the
+ * shadow code (and the guest should retry) or 0 if it is not (and the
+ * fault should be handled elsewhere or passed to the guest). */
+{
+ struct vcpu *v = current;
+ perfc_incrc(shadow2_fault);
+ return v->arch.shadow2->page_fault(v, va, regs);
+}
+
+static inline int
+shadow2_invlpg(struct vcpu *v, unsigned long va)
+/* Called when the guest requests an invlpg. Returns 1 if the invlpg
+ * instruction should be issued on the hardware, or 0 if it's safe not
+ * to do so. */
+{
+ return v->arch.shadow2->invlpg(v, va);
+}
+
+static inline unsigned long
+shadow2_gva_to_gpa(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+ return v->arch.shadow2->gva_to_gpa(v, va);
+}
+
+static inline unsigned long
+shadow2_gva_to_gfn(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+ return v->arch.shadow2->gva_to_gfn(v, va);
+}
+
+static inline void
+shadow2_update_cr3(struct vcpu *v)
+/* Updates all the things that are derived from the guest's CR3.
+ * Called when the guest changes CR3. */
+{
+ shadow2_lock(v->domain);
+ v->arch.shadow2->update_cr3(v);
+ shadow2_unlock(v->domain);
+}
+
+
+/* Should be called after CR3 is updated.
+ * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
+ *
+ * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
+ * shadow_vtable, etc).
+ *
+ * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
+ * for HVM guests, arch.monitor_table and hvm's guest CR3.
+ *
+ * Update ref counts to shadow tables appropriately.
+ * For PAE, relocate L3 entries, if necessary, into low memory.
+ */
+static inline void update_cr3(struct vcpu *v)
+{
+ unsigned long cr3_mfn=0;
+
+ if ( shadow2_mode_enabled(v->domain) )
+ {
+ shadow2_update_cr3(v);
+ return;
+ }
+
+#if CONFIG_PAGING_LEVELS == 4
+ if ( !(v->arch.flags & TF_kernel_mode) )
+ cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
+ else
+#endif
+ cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
+
+ /* Update vcpu->arch.cr3 */
+ BUG_ON(cr3_mfn == 0);
+ make_cr3(v, cr3_mfn);
+}
+
+extern void sh2_update_paging_modes(struct vcpu *v);
+
+/* Should be called to initialise paging structures if the paging mode
+ * has changed, and when bringing up a VCPU for the first time. */
+static inline void shadow2_update_paging_modes(struct vcpu *v)
+{
+ ASSERT(shadow2_mode_enabled(v->domain));
+ shadow2_lock(v->domain);
+ sh2_update_paging_modes(v);
+ shadow2_unlock(v->domain);
+}
+
+static inline void
+shadow2_detach_old_tables(struct vcpu *v)
+{
+ v->arch.shadow2->detach_old_tables(v);
+}
+
+static inline mfn_t
+shadow2_make_monitor_table(struct vcpu *v)
+{
+ return v->arch.shadow2->make_monitor_table(v);
+}
+
+static inline void
+shadow2_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
+{
+ v->arch.shadow2->destroy_monitor_table(v, mmfn);
+}
+
+/* Validate a pagetable change from the guest and update the shadows. */
+extern int shadow2_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
+ void *new_guest_entry);
+
+/* Update the shadows in response to a pagetable write from a HVM guest */
+extern void shadow2_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
+ void *entry, u32 size);
+
+/* Remove all writeable mappings of a guest frame from the shadows.
+ * Returns non-zero if we need to flush TLBs.
+ * level and fault_addr desribe how we found this to be a pagetable;
+ * level==0 means we have some other reason for revoking write access. */
+extern int shadow2_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
+ unsigned int level,
+ unsigned long fault_addr);
+
+/* Remove all mappings of the guest mfn from the shadows.
+ * Returns non-zero if we need to flush TLBs. */
+extern int shadow2_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
+
+void
+shadow2_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
+/* This is a HVM page that we thing is no longer a pagetable.
+ * Unshadow it, and recursively unshadow pages that reference it. */
+
+/* Remove all shadows of the guest mfn. */
+extern void sh2_remove_shadows(struct vcpu *v, mfn_t gmfn, int all);
+static inline void shadow2_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
+{
+ sh2_remove_shadows(v, gmfn, 1);
+}
+
+/* Add a page to a domain */
+void
+shadow2_guest_physmap_add_page(struct domain *d, unsigned long gfn,
+ unsigned long mfn);
+
+/* Remove a page from a domain */
+void
+shadow2_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
+ unsigned long mfn);
+
+/*
+ * Definitions for the shadow2_flags field in page_info.
+ * These flags are stored on *guest* pages...
+ * Bits 1-13 are encodings for the shadow types.
+ */
+#define PGC_SH2_type_to_index(_type) ((_type) >> PGC_SH2_type_shift)
+#define SH2F_page_type_mask \
+ (((1u << PGC_SH2_type_to_index(PGC_SH2_max_shadow + 1u)) - 1u) - \
+ ((1u << PGC_SH2_type_to_index(PGC_SH2_min_shadow)) - 1u))
+
+#define SH2F_L1_32 (1u << PGC_SH2_type_to_index(PGC_SH2_l1_32_shadow))
+#define SH2F_FL1_32 (1u << PGC_SH2_type_to_index(PGC_SH2_fl1_32_shadow))
+#define SH2F_L2_32 (1u << PGC_SH2_type_to_index(PGC_SH2_l2_32_shadow))
+#define SH2F_L1_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_l1_pae_shadow))
+#define SH2F_FL1_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_fl1_pae_shadow))
+#define SH2F_L2_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_l2_pae_shadow))
+#define SH2F_L2H_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_l2h_pae_shadow))
+#define SH2F_L3_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_l3_pae_shadow))
+#define SH2F_L1_64 (1u << PGC_SH2_type_to_index(PGC_SH2_l1_64_shadow))
+#define SH2F_FL1_64 (1u << PGC_SH2_type_to_index(PGC_SH2_fl1_64_shadow))
+#define SH2F_L2_64 (1u << PGC_SH2_type_to_index(PGC_SH2_l2_64_shadow))
+#define SH2F_L3_64 (1u << PGC_SH2_type_to_index(PGC_SH2_l3_64_shadow))
+#define SH2F_L4_64 (1u << PGC_SH2_type_to_index(PGC_SH2_l4_64_shadow))
+
+/* Used for hysteresis when automatically unhooking mappings on fork/exit */
+#define SH2F_unhooked_mappings (1u<<31)
+
+/*
+ * Allocation of shadow pages
+ */
+
+/* Return the minumum acceptable number of shadow pages a domain needs */
+unsigned int shadow2_min_acceptable_pages(struct domain *d);
+
+/* Set the pool of shadow pages to the required number of MB.
+ * Input will be rounded up to at least min_acceptable_shadow_pages().
+ * Returns 0 for success, 1 for failure. */
+unsigned int shadow2_set_allocation(struct domain *d,
+ unsigned int megabytes,
+ int *preempted);
+
+/* Return the size of the shadow2 pool, rounded up to the nearest MB */
+static inline unsigned int shadow2_get_allocation(struct domain *d)
+{
+ unsigned int pg = d->arch.shadow2_total_pages;
+ return ((pg >> (20 - PAGE_SHIFT))
+ + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
+}
+
+/*
+ * Linked list for chaining entries in the shadow hash table.
+ */
+struct shadow2_hash_entry {
+ struct shadow2_hash_entry *next;
+ mfn_t smfn; /* MFN of the shadow */
+#ifdef _x86_64_ /* Shorten 'n' so we don't waste a whole word on storing 't' */
+ unsigned long n:56; /* MFN of guest PT or GFN of guest superpage */
+#else
+ unsigned long n; /* MFN of guest PT or GFN of guest superpage */
+#endif
+ unsigned char t; /* shadow type bits, or 0 for empty */
+};
+
+#define SHADOW2_HASH_BUCKETS 251
+/* Other possibly useful primes are 509, 1021, 2039, 4093, 8191, 16381 */
+
+
+#if SHADOW2_OPTIMIZATIONS & SH2OPT_CACHE_WALKS
+/* Optimization: cache the results of guest walks. This helps with MMIO
+ * and emulated writes, which tend to issue very similar walk requests
+ * repeatedly. We keep the results of the last few walks, and blow
+ * away the cache on guest cr3 write, mode change, or page fault. */
+
+#define SH2_WALK_CACHE_ENTRIES 4
+
+/* Rather than cache a guest walk, which would include mapped pointers
+ * to pages, we cache what a TLB would remember about the walk: the
+ * permissions and the l1 gfn */
+struct shadow2_walk_cache {
+ unsigned long va; /* The virtual address (or 0 == unused) */
+ unsigned long gfn; /* The gfn from the effective l1e */
+ u32 permissions; /* The aggregated permission bits */
+};
+#endif
+
+
+/**************************************************************************/
+/* Guest physmap (p2m) support */
+
+/* Walk another domain's P2M table, mapping pages as we go */
+extern mfn_t
+sh2_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+
+
+/* General conversion function from gfn to mfn */
+static inline mfn_t
+sh2_gfn_to_mfn(struct domain *d, unsigned long gfn)
+{
+ if ( !shadow2_mode_translate(d) )
+ return _mfn(gfn);
+ else if ( likely(current->domain == d) )
+ return _mfn(get_mfn_from_gpfn(gfn));
+ else
+ return sh2_gfn_to_mfn_foreign(d, gfn);
+}
+
+// vcpu-specific version of gfn_to_mfn(). This is where we hide the dirty
+// little secret that, for hvm guests with paging disabled, nearly all of the
+// shadow code actually think that the guest is running on *untranslated* page
+// tables (which is actually domain->phys_table).
+//
+static inline mfn_t
+sh2_vcpu_gfn_to_mfn(struct vcpu *v, unsigned long gfn)
+{
+ if ( !shadow2_vcpu_mode_translate(v) )
+ return _mfn(gfn);
+ if ( likely(current->domain == v->domain) )
+ return _mfn(get_mfn_from_gpfn(gfn));
+ return sh2_gfn_to_mfn_foreign(v->domain, gfn);
+}
+
+static inline unsigned long
+sh2_mfn_to_gfn(struct domain *d, mfn_t mfn)
+{
+ if ( shadow2_mode_translate(d) )
+ return get_gpfn_from_mfn(mfn_x(mfn));
+ else
+ return mfn_x(mfn);
+}
+
+
+
+#endif /* _XEN_SHADOW2_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/asm-x86/shadow_64.h b/xen/include/asm-x86/shadow_64.h
deleted file mode 100644
index d9afbdca18..0000000000
--- a/xen/include/asm-x86/shadow_64.h
+++ /dev/null
@@ -1,587 +0,0 @@
-/******************************************************************************
- * include/asm-x86/shadow_64.h
- *
- * Copyright (c) 2005 Michael A Fetterman
- * Based on an earlier implementation by Ian Pratt et al
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-/*
- * Jun Nakajima <jun.nakajima@intel.com>
- * Chengyuan Li <chengyuan.li@intel.com>
- *
- * Extended to support 64-bit guests.
- */
-#ifndef _XEN_SHADOW_64_H
-#define _XEN_SHADOW_64_H
-#include <asm/shadow.h>
-#include <asm/shadow_ops.h>
-#include <asm/hvm/hvm.h>
-
-/*
- * The naming convention of the shadow_ops:
- * MODE_<pgentry size>_<guest paging levels>_HANDLER
- */
-extern struct shadow_ops MODE_64_2_HANDLER;
-extern struct shadow_ops MODE_64_3_HANDLER;
-extern struct shadow_ops MODE_64_PAE_HANDLER;
-#if CONFIG_PAGING_LEVELS == 4
-extern struct shadow_ops MODE_64_4_HANDLER;
-#endif
-
-#if CONFIG_PAGING_LEVELS == 3
-#define L4_PAGETABLE_SHIFT 39
-#define L4_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
-typedef struct { intpte_t l4; } l4_pgentry_t;
-#define is_guest_l4_slot(_s) (1)
-#endif
-
-#define READ_FAULT 0
-#define WRITE_FAULT 1
-
-#define ERROR_P 1
-#define ERROR_W 2
-#define ERROR_U 4
-#define ERROR_I (1 << 4)
-
-#define X86_64_SHADOW_DEBUG 0
-
-#if X86_64_SHADOW_DEBUG
-#define ESH_LOG(_f, _a...) \
- printk(_f, ##_a)
-#else
-#define ESH_LOG(_f, _a...) ((void)0)
-#endif
-
-#define L_MASK 0xff
-
-#define PAE_PAGING_LEVELS 3
-
-#define ROOT_LEVEL_64 PAGING_L4
-#define ROOT_LEVEL_32 PAGING_L2
-
-#define DIRECT_ENTRY (4UL << 16)
-#define SHADOW_ENTRY (2UL << 16)
-#define GUEST_ENTRY (1UL << 16)
-
-#define GET_ENTRY (2UL << 8)
-#define SET_ENTRY (1UL << 8)
-
-#define PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
-
-/* For 32-bit VMX guest to allocate shadow L1 & L2*/
-#define SL1_ORDER 1
-#define SL2_ORDER 2
-
-typedef struct { intpte_t lo; } pgentry_64_t;
-#define shadow_level_to_type(l) (l << 29)
-#define shadow_type_to_level(t) (t >> 29)
-
-#define entry_get_value(_x) ((_x).lo)
-#define entry_get_pfn(_x) \
- (((_x).lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
-#define entry_get_paddr(_x) (((_x).lo & (PADDR_MASK&PAGE_MASK)))
-#define entry_get_flags(_x) (get_pte_flags((_x).lo))
-
-#define entry_empty() ((pgentry_64_t) { 0 })
-#define entry_from_pfn(pfn, flags) \
- ((pgentry_64_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
-#define entry_from_page(page, flags) (entry_from_pfn(page_to_mfn(page),(flags)))
-#define entry_add_flags(x, flags) ((x).lo |= put_pte_flags(flags))
-#define entry_remove_flags(x, flags) ((x).lo &= ~put_pte_flags(flags))
-#define entry_has_changed(x,y,flags) \
- ( !!(((x).lo ^ (y).lo) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
-
-/******************************************************************************/
-/*
- * The macro and inlines are for 32-bit PAE guest
- */
-#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */
-
-#define PAE_SHADOW_SELF_ENTRY 259
-#define PAE_L3_PAGETABLE_ENTRIES 4
-
-/******************************************************************************/
-static inline int table_offset_64(unsigned long va, int level)
-{
- switch(level) {
- case 1:
- return (((va) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1));
- case 2:
- return (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1));
- case 3:
- return (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1));
-#if CONFIG_PAGING_LEVELS == 3
- case 4:
- return PAE_SHADOW_SELF_ENTRY;
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 4
-#ifndef GUEST_PGENTRY_32
-#ifndef GUEST_32PAE
- case 4:
- return (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1));
-#else
- case 4:
- return PAE_SHADOW_SELF_ENTRY;
-#endif
-#else
- case 4:
- return PAE_SHADOW_SELF_ENTRY;
-#endif
-#endif
- default:
- return -1;
- }
-}
-
-/*****************************************************************************/
-
-#if defined( GUEST_32PAE )
-static inline int guest_table_offset_64(unsigned long va, int level, unsigned int index)
-{
- switch(level) {
- case 1:
- return (((va) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1));
- case 2:
- return (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1));
- case 3:
- return (index * 4 + ((va) >> L3_PAGETABLE_SHIFT));
-#if CONFIG_PAGING_LEVELS == 3
- case 4:
- return PAE_SHADOW_SELF_ENTRY;
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 4
-#ifndef GUEST_PGENTRY_32
- case 4:
- return (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1));
-#else
- case 4:
- return PAE_SHADOW_SELF_ENTRY;
-#endif
-#endif
- default:
- return -1;
- }
-}
-
-#define SH_GUEST_32PAE 1
-#else
-#define guest_table_offset_64(va, level, index) \
- table_offset_64((va),(level))
-#define SH_GUEST_32PAE 0
-#endif
-
-/********************************************************************************/
-
-static inline void free_out_of_sync_state(struct domain *d)
-{
- struct out_of_sync_entry *entry;
-
- // NB: Be careful not to call something that manipulates this list
- // while walking it. Remove one item at a time, and always
- // restart from start of list.
- //
- while ( (entry = d->arch.out_of_sync) )
- {
- d->arch.out_of_sync = entry->next;
- release_out_of_sync_entry(d, entry);
-
- entry->next = d->arch.out_of_sync_free;
- d->arch.out_of_sync_free = entry;
- }
-}
-
-static inline int __entry(
- struct vcpu *v, unsigned long va, pgentry_64_t *e_p, u32 flag)
-{
- int i;
- pgentry_64_t *le_e;
- pgentry_64_t *le_p = NULL;
- pgentry_64_t *phys_vtable = NULL;
- unsigned long mfn;
- int index;
- u32 level = flag & L_MASK;
- struct domain *d = v->domain;
- int root_level;
- unsigned int base_idx;
-
- base_idx = get_cr3_idxval(v);
-
- if ( flag & SHADOW_ENTRY )
- {
- root_level = ROOT_LEVEL_64;
- index = table_offset_64(va, root_level);
- le_e = (pgentry_64_t *)&v->arch.shadow_vtable[index];
- }
- else if ( flag & GUEST_ENTRY )
- {
- root_level = v->domain->arch.ops->guest_paging_levels;
- if ( root_level == PAGING_L3 )
- index = guest_table_offset_64(va, PAGING_L3, base_idx);
- else
- index = guest_table_offset_64(va, root_level, base_idx);
- le_e = (pgentry_64_t *)&v->arch.guest_vtable[index];
- }
- else /* direct mode */
- {
- root_level = PAE_PAGING_LEVELS;
- index = table_offset_64(va, root_level);
- phys_vtable = (pgentry_64_t *)map_domain_page(
- pagetable_get_pfn(v->domain->arch.phys_table));
- le_e = &phys_vtable[index];
- }
-
- /*
- * If it's not external mode, then mfn should be machine physical.
- */
- for ( i = root_level - level; i > 0; i-- )
- {
- if ( unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT)) )
- {
- if ( le_p )
- unmap_domain_page(le_p);
-
- if ( phys_vtable )
- unmap_domain_page(phys_vtable);
-
- return 0;
- }
-
- mfn = entry_get_pfn(*le_e);
- if ( (flag & GUEST_ENTRY) && shadow_mode_translate(d) )
- mfn = get_mfn_from_gpfn(mfn);
-
- if ( le_p )
- unmap_domain_page(le_p);
- le_p = (pgentry_64_t *)map_domain_page(mfn);
-
- if ( flag & SHADOW_ENTRY )
- index = table_offset_64(va, (level + i - 1));
- else
- index = guest_table_offset_64(va, (level + i - 1), base_idx);
- le_e = &le_p[index];
- }
-
- if ( flag & SET_ENTRY )
- *le_e = *e_p;
- else
- *e_p = *le_e;
-
- if ( le_p )
- unmap_domain_page(le_p);
-
- if ( phys_vtable )
- unmap_domain_page(phys_vtable);
-
- return 1;
-}
-
-static inline int __rw_entry(
- struct vcpu *v, unsigned long va, void *e_p, u32 flag)
-{
- pgentry_64_t *e = (pgentry_64_t *)e_p;
-
- if (e) {
- return __entry(v, va, e, flag);
- }
-
- return 0;
-}
-
-#define __shadow_set_l4e(v, va, value) \
- __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L4)
-#define __shadow_get_l4e(v, va, sl4e) \
- __rw_entry(v, va, sl4e, SHADOW_ENTRY | GET_ENTRY | PAGING_L4)
-#define __shadow_set_l3e(v, va, value) \
- __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L3)
-#define __shadow_get_l3e(v, va, sl3e) \
- __rw_entry(v, va, sl3e, SHADOW_ENTRY | GET_ENTRY | PAGING_L3)
-#define __shadow_set_l2e(v, va, value) \
- __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L2)
-#define __shadow_get_l2e(v, va, sl2e) \
- __rw_entry(v, va, sl2e, SHADOW_ENTRY | GET_ENTRY | PAGING_L2)
-#define __shadow_set_l1e(v, va, value) \
- __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L1)
-#define __shadow_get_l1e(v, va, sl1e) \
- __rw_entry(v, va, sl1e, SHADOW_ENTRY | GET_ENTRY | PAGING_L1)
-
-#define __guest_set_l4e(v, va, value) \
- __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L4)
-#define __guest_get_l4e(v, va, gl4e) \
- __rw_entry(v, va, gl4e, GUEST_ENTRY | GET_ENTRY | PAGING_L4)
-#define __guest_set_l3e(v, va, value) \
- __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L3)
-#define __guest_get_l3e(v, va, sl3e) \
- __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3)
-
-#define __direct_set_l3e(v, va, value) \
- __rw_entry(v, va, value, DIRECT_ENTRY | SET_ENTRY | PAGING_L3)
-#define __direct_get_l3e(v, va, sl3e) \
- __rw_entry(v, va, sl3e, DIRECT_ENTRY | GET_ENTRY | PAGING_L3)
-#define __direct_set_l2e(v, va, value) \
- __rw_entry(v, va, value, DIRECT_ENTRY | SET_ENTRY | PAGING_L2)
-#define __direct_get_l2e(v, va, sl2e) \
- __rw_entry(v, va, sl2e, DIRECT_ENTRY | GET_ENTRY | PAGING_L2)
-#define __direct_set_l1e(v, va, value) \
- __rw_entry(v, va, value, DIRECT_ENTRY | SET_ENTRY | PAGING_L1)
-#define __direct_get_l1e(v, va, sl1e) \
- __rw_entry(v, va, sl1e, DIRECT_ENTRY | GET_ENTRY | PAGING_L1)
-
-
-static inline int __guest_set_l2e(
- struct vcpu *v, unsigned long va, void *value, int size)
-{
- switch(size) {
- case 4:
- // 32-bit guest
- {
- l2_pgentry_32_t *l2va;
-
- l2va = (l2_pgentry_32_t *)v->arch.guest_vtable;
- if (value)
- l2va[l2_table_offset_32(va)] = *(l2_pgentry_32_t *)value;
- return 1;
- }
- case 8:
- return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L2);
- default:
- BUG();
- return 0;
- }
- return 0;
-}
-
-#define __guest_set_l2e(v, va, value) \
- __guest_set_l2e(v, (unsigned long)va, value, sizeof(*value))
-
-static inline int __guest_get_l2e(
- struct vcpu *v, unsigned long va, void *gl2e, int size)
-{
- switch(size) {
- case 4:
- // 32-bit guest
- {
- l2_pgentry_32_t *l2va;
- l2va = (l2_pgentry_32_t *)v->arch.guest_vtable;
- if (gl2e)
- *(l2_pgentry_32_t *)gl2e = l2va[l2_table_offset_32(va)];
- return 1;
- }
- case 8:
- return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | PAGING_L2);
- default:
- BUG();
- return 0;
- }
- return 0;
-}
-
-#define __guest_get_l2e(v, va, gl2e) \
- __guest_get_l2e(v, (unsigned long)va, gl2e, sizeof(*gl2e))
-
-static inline int __guest_set_l1e(
- struct vcpu *v, unsigned long va, void *value, int size)
-{
- switch(size) {
- case 4:
- // 32-bit guest
- {
- l2_pgentry_32_t gl2e;
- l1_pgentry_32_t *l1va;
- unsigned long l1mfn;
-
- if (!__guest_get_l2e(v, va, &gl2e))
- return 0;
- if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT)))
- return 0;
-
- l1mfn = get_mfn_from_gpfn(
- l2e_get_pfn(gl2e));
-
- l1va = (l1_pgentry_32_t *)map_domain_page(l1mfn);
- if (value)
- l1va[l1_table_offset_32(va)] = *(l1_pgentry_32_t *)value;
- unmap_domain_page(l1va);
-
- return 1;
- }
-
- case 8:
- return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L1);
- default:
- BUG();
- return 0;
- }
- return 0;
-}
-
-#define __guest_set_l1e(v, va, value) \
- __guest_set_l1e(v, (unsigned long)va, value, sizeof(*value))
-
-static inline int __guest_get_l1e(
- struct vcpu *v, unsigned long va, void *gl1e, int size)
-{
- switch(size) {
- case 4:
- // 32-bit guest
- {
- l2_pgentry_32_t gl2e;
- l1_pgentry_32_t *l1va;
- unsigned long l1mfn;
-
- if (!(__guest_get_l2e(v, va, &gl2e)))
- return 0;
-
-
- if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT)))
- return 0;
-
-
- l1mfn = get_mfn_from_gpfn(
- l2e_get_pfn(gl2e));
- l1va = (l1_pgentry_32_t *) map_domain_page(l1mfn);
- if (gl1e)
- *(l1_pgentry_32_t *)gl1e = l1va[l1_table_offset_32(va)];
- unmap_domain_page(l1va);
- return 1;
- }
- case 8:
- // 64-bit guest
- return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | PAGING_L1);
- default:
- BUG();
- return 0;
- }
- return 0;
-}
-
-#define __guest_get_l1e(v, va, gl1e) \
- __guest_get_l1e(v, (unsigned long)va, gl1e, sizeof(*gl1e))
-
-static inline void entry_general(
- struct domain *d,
- pgentry_64_t *gle_p,
- pgentry_64_t *sle_p,
- unsigned long smfn, u32 level)
-
-{
- pgentry_64_t gle = *gle_p;
- pgentry_64_t sle;
-
- sle = entry_empty();
- if ( (entry_get_flags(gle) & _PAGE_PRESENT) && (smfn != 0) )
- {
- if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) {
- sle = entry_from_pfn(smfn, entry_get_flags(gle));
- entry_remove_flags(sle, _PAGE_PSE);
-
- if ( shadow_mode_log_dirty(d) ||
- !(entry_get_flags(gle) & _PAGE_DIRTY) )
- {
- pgentry_64_t *l1_p;
- int i;
-
- l1_p =(pgentry_64_t *)map_domain_page(smfn);
- for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
- {
- if ( mfn_is_page_table(entry_get_pfn(l1_p[i])) )
- entry_remove_flags(l1_p[i], _PAGE_RW);
- }
-
- unmap_domain_page(l1_p);
- }
- } else {
- if (d->arch.ops->guest_paging_levels <= PAGING_L3
- && level == PAGING_L3) {
- sle = entry_from_pfn(smfn, entry_get_flags(gle));
- } else {
-
- sle = entry_from_pfn(
- smfn,
- (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
- entry_add_flags(gle, _PAGE_ACCESSED);
- }
- }
- // XXX mafetter: Hmm...
- // Shouldn't the dirty log be checked/updated here?
- // Actually, it needs to be done in this function's callers.
- //
- *gle_p = gle;
- }
-
- if ( entry_get_value(sle) || entry_get_value(gle) )
- SH_VVLOG("%s: gpde=%lx, new spde=%lx", __func__,
- entry_get_value(gle), entry_get_value(sle));
-
- *sle_p = sle;
-}
-
-static inline void entry_propagate_from_guest(
- struct domain *d, pgentry_64_t *gle_p, pgentry_64_t *sle_p, u32 level)
-{
- pgentry_64_t gle = *gle_p;
- unsigned long smfn = 0;
-
- if ( entry_get_flags(gle) & _PAGE_PRESENT ) {
- if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) {
- smfn = __shadow_status(d, entry_get_pfn(gle), PGT_fl1_shadow);
- } else {
- smfn = __shadow_status(d, entry_get_pfn(gle),
- shadow_level_to_type((level -1 )));
- }
- }
- entry_general(d, gle_p, sle_p, smfn, level);
-
-}
-
-static int inline
-validate_entry_change(
- struct domain *d,
- pgentry_64_t *new_gle_p,
- pgentry_64_t *shadow_le_p,
- u32 level)
-{
- pgentry_64_t old_sle, new_sle;
- pgentry_64_t new_gle = *new_gle_p;
-
- old_sle = *shadow_le_p;
- entry_propagate_from_guest(d, &new_gle, &new_sle, level);
-
- ESH_LOG("old_sle: %lx, new_gle: %lx, new_sle: %lx\n",
- entry_get_value(old_sle), entry_get_value(new_gle),
- entry_get_value(new_sle));
-
- if ( ((entry_get_value(old_sle) | entry_get_value(new_sle)) & _PAGE_PRESENT) &&
- entry_has_changed(old_sle, new_sle, _PAGE_PRESENT) )
- {
- perfc_incrc(validate_entry_changes);
-
- if ( (entry_get_flags(new_sle) & _PAGE_PRESENT) &&
- !get_shadow_ref(entry_get_pfn(new_sle)) )
- BUG();
- if ( entry_get_flags(old_sle) & _PAGE_PRESENT )
- put_shadow_ref(entry_get_pfn(old_sle));
- }
-
- *shadow_le_p = new_sle;
-
- return 1;
-}
-
-#endif
-
-
diff --git a/xen/include/asm-x86/shadow_ops.h b/xen/include/asm-x86/shadow_ops.h
deleted file mode 100644
index 8765ed8b10..0000000000
--- a/xen/include/asm-x86/shadow_ops.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/******************************************************************************
- * include/asm-x86/shadow_ops.h
- *
- * Copyright (c) 2005 Michael A Fetterman
- * Based on an earlier implementation by Ian Pratt et al
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _XEN_SHADOW_OPS_H
-#define _XEN_SHADOW_OPS_H
-
-#define PAGING_L4 4UL
-#define PAGING_L3 3UL
-#define PAGING_L2 2UL
-#define PAGING_L1 1UL
-
-#define PAE_CR3_ALIGN 5
-#define PAE_CR3_IDX_MASK 0x7f
-
-#if defined( GUEST_PGENTRY_32 )
-
-#define GUEST_L1_PAGETABLE_ENTRIES L1_PAGETABLE_ENTRIES_32
-#define GUEST_L2_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES_32
-#define GUEST_ROOT_PAGETABLE_ENTRIES ROOT_PAGETABLE_ENTRIES_32
-#define GUEST_L2_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT_32
-
-#define guest_l1_pgentry_t l1_pgentry_32_t
-#define guest_l2_pgentry_t l2_pgentry_32_t
-#define guest_root_pgentry_t l2_pgentry_32_t
-
-#define guest_l1e_get_paddr l1e_get_paddr_32
-#define guest_l2e_get_paddr l2e_get_paddr_32
-
-#define guest_get_pte_flags get_pte_flags_32
-#define guest_put_pte_flags put_pte_flags_32
-
-#define guest_l1e_get_flags l1e_get_flags_32
-#define guest_l2e_get_flags l2e_get_flags_32
-#define guest_root_get_flags l2e_get_flags_32
-#define guest_root_get_intpte l2e_get_intpte
-
-#define guest_l1e_empty l1e_empty_32
-#define guest_l2e_empty l2e_empty_32
-
-#define guest_l1e_from_pfn l1e_from_pfn_32
-#define guest_l2e_from_pfn l2e_from_pfn_32
-
-#define guest_l1e_from_paddr l1e_from_paddr_32
-#define guest_l2e_from_paddr l2e_from_paddr_32
-
-#define guest_l1e_from_page l1e_from_page_32
-#define guest_l2e_from_page l2e_from_page_32
-
-#define guest_l1e_add_flags l1e_add_flags_32
-#define guest_l2e_add_flags l2e_add_flags_32
-
-#define guest_l1e_remove_flag l1e_remove_flags_32
-#define guest_l2e_remove_flag l2e_remove_flags_32
-
-#define guest_l1e_has_changed l1e_has_changed_32
-#define guest_l2e_has_changed l2e_has_changed_32
-#define root_entry_has_changed l2e_has_changed_32
-
-#define guest_l1_table_offset l1_table_offset_32
-#define guest_l2_table_offset l2_table_offset_32
-
-#define guest_linear_l1_table linear_pg_table_32
-#define guest_linear_l2_table linear_l2_table_32
-
-#define guest_va_to_l1mfn va_to_l1mfn_32
-
-#else
-
-#define GUEST_L1_PAGETABLE_ENTRIES L1_PAGETABLE_ENTRIES
-#define GUEST_L2_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES
-#define GUEST_ROOT_PAGETABLE_ENTRIES ROOT_PAGETABLE_ENTRIES
-#define GUEST_L2_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT
-
-#define guest_l1_pgentry_t l1_pgentry_t
-#define guest_l2_pgentry_t l2_pgentry_t
-#define guest_root_pgentry_t l4_pgentry_t
-
-#define guest_l1e_get_paddr l1e_get_paddr
-#define guest_l2e_get_paddr l2e_get_paddr
-
-#define guest_get_pte_flags get_pte_flags
-#define guest_put_pte_flags put_pte_flags
-
-#define guest_l1e_get_flags l1e_get_flags
-#define guest_l2e_get_flags l2e_get_flags
-#define guest_root_get_flags l4e_get_flags
-#define guest_root_get_intpte l4e_get_intpte
-
-#define guest_l1e_empty l1e_empty
-#define guest_l2e_empty l2e_empty
-
-#define guest_l1e_from_pfn l1e_from_pfn
-#define guest_l2e_from_pfn l2e_from_pfn
-
-#define guest_l1e_from_paddr l1e_from_paddr
-#define guest_l2e_from_paddr l2e_from_paddr
-
-#define guest_l1e_from_page l1e_from_page
-#define guest_l2e_from_page l2e_from_page
-
-#define guest_l1e_add_flags l1e_add_flags
-#define guest_l2e_add_flags l2e_add_flags
-
-#define guest_l1e_remove_flag l1e_remove_flags
-#define guest_l2e_remove_flag l2e_remove_flags
-
-#define guest_l1e_has_changed l1e_has_changed
-#define guest_l2e_has_changed l2e_has_changed
-#define root_entry_has_changed l4e_has_changed
-
-#define guest_l1_table_offset l1_table_offset
-#define guest_l2_table_offset l2_table_offset
-
-#define guest_linear_l1_table linear_pg_table
-#define guest_linear_l2_table linear_l2_table
-
-#define guest_va_to_l1mfn va_to_l1mfn
-#endif
-
-#endif /* _XEN_SHADOW_OPS_H */
diff --git a/xen/include/asm-x86/shadow_public.h b/xen/include/asm-x86/shadow_public.h
deleted file mode 100644
index e2b4b5fd57..0000000000
--- a/xen/include/asm-x86/shadow_public.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/******************************************************************************
- * include/asm-x86/shadow_public.h
- *
- * Copyright (c) 2005 Michael A Fetterman
- * Based on an earlier implementation by Ian Pratt et al
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _XEN_SHADOW_PUBLIC_H
-#define _XEN_SHADOW_PUBLIC_H
-
-#if CONFIG_PAGING_LEVELS >= 3
-#define MFN_PINNED(_x) (mfn_to_page(_x)->u.inuse.type_info & PGT_pinned)
-
-extern void shadow_sync_and_drop_references(
- struct domain *d, struct page_info *page);
-extern void shadow_drop_references(
- struct domain *d, struct page_info *page);
-
-extern int shadow_set_guest_paging_levels(struct domain *d, int levels);
-
-extern void release_out_of_sync_entry(
- struct domain *d, struct out_of_sync_entry *entry);
-
-struct shadow_ops {
- unsigned long guest_paging_levels; /* guest paging levels */
- void (*invlpg)(struct vcpu *v, unsigned long va);
- int (*fault)(unsigned long va, struct cpu_user_regs *regs);
- void (*update_pagetables)(struct vcpu *v);
- void (*sync_all)(struct domain *d);
- int (*remove_all_write_access)(struct domain *d,
- unsigned long readonly_gpfn, unsigned long readonly_gmfn);
- int (*do_update_va_mapping)(unsigned long va, l1_pgentry_t val, struct vcpu *v);
- struct out_of_sync_entry *
- (*mark_mfn_out_of_sync)(struct vcpu *v, unsigned long gpfn,
- unsigned long mfn);
- int (*is_out_of_sync)(struct vcpu *v, unsigned long va);
- unsigned long (*gva_to_gpa)(unsigned long gva);
-};
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 4
-extern void shadow_l4_normal_pt_update(struct domain *d,
- unsigned long pa, l4_pgentry_t l4e,
- struct domain_mmap_cache *cache);
-#endif
-
-#endif
diff --git a/xen/include/asm-x86/x86_32/page-2level.h b/xen/include/asm-x86/x86_32/page-2level.h
index 764b1c2c05..7f450c4624 100644
--- a/xen/include/asm-x86/x86_32/page-2level.h
+++ b/xen/include/asm-x86/x86_32/page-2level.h
@@ -46,6 +46,7 @@ typedef l2_pgentry_t root_pgentry_t;
* 12-bit flags = (pte[11:0])
*/
+#define _PAGE_NX_BIT 0U
#define _PAGE_NX 0U
/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
diff --git a/xen/include/asm-x86/x86_32/page-3level.h b/xen/include/asm-x86/x86_32/page-3level.h
index 43e73033e3..e0187478cc 100644
--- a/xen/include/asm-x86/x86_32/page-3level.h
+++ b/xen/include/asm-x86/x86_32/page-3level.h
@@ -59,7 +59,8 @@ typedef l3_pgentry_t root_pgentry_t;
* 32-bit flags = (pte[63:44],pte[11:0])
*/
-#define _PAGE_NX (cpu_has_nx ? (1<<31) : 0)
+#define _PAGE_NX_BIT (1U<<31)
+#define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0)
/* Extract flags into 32-bit integer, or turn 32-bit flags into a pte mask. */
#define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h
index 0afb5e719b..429cfb8c5d 100644
--- a/xen/include/asm-x86/x86_64/page.h
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -44,6 +44,8 @@ typedef l4_pgentry_t root_pgentry_t;
/* Given a virtual address, get an entry offset into a linear page table. */
#define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT)
#define l2_linear_offset(_a) (((_a) & VADDR_MASK) >> L2_PAGETABLE_SHIFT)
+#define l3_linear_offset(_a) (((_a) & VADDR_MASK) >> L3_PAGETABLE_SHIFT)
+#define l4_linear_offset(_a) (((_a) & VADDR_MASK) >> L4_PAGETABLE_SHIFT)
#define is_guest_l1_slot(_s) (1)
#define is_guest_l2_slot(_t, _s) (1)
@@ -70,7 +72,8 @@ typedef l4_pgentry_t root_pgentry_t;
#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF))
/* Bit 23 of a 24-bit flag mask. This corresponds to bit 63 of a pte.*/
-#define _PAGE_NX (cpu_has_nx ? (1U<<23) : 0U)
+#define _PAGE_NX_BIT (1U<<23)
+#define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0U)
#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
diff --git a/xen/include/public/dom0_ops.h b/xen/include/public/dom0_ops.h
index d211ca1624..f12cc93108 100644
--- a/xen/include/public/dom0_ops.h
+++ b/xen/include/public/dom0_ops.h
@@ -262,6 +262,18 @@ DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t);
#define DOM0_SHADOW_CONTROL_OP_CLEAN 11
#define DOM0_SHADOW_CONTROL_OP_PEEK 12
+/* Shadow2 operations */
+#define DOM0_SHADOW2_CONTROL_OP_GET_ALLOCATION 30
+#define DOM0_SHADOW2_CONTROL_OP_SET_ALLOCATION 31
+#define DOM0_SHADOW2_CONTROL_OP_ENABLE 32
+
+/* Mode flags for Shadow2 enable op */
+#define DOM0_SHADOW2_CONTROL_FLAG_ENABLE (1 << 0)
+#define DOM0_SHADOW2_CONTROL_FLAG_REFCOUNT (1 << 1)
+#define DOM0_SHADOW2_CONTROL_FLAG_LOG_DIRTY (1 << 2)
+#define DOM0_SHADOW2_CONTROL_FLAG_TRANSLATE (1 << 3)
+#define DOM0_SHADOW2_CONTROL_FLAG_EXTERNAL (1 << 4)
+
struct dom0_shadow_control_stats {
uint32_t fault_count;
uint32_t dirty_count;
@@ -277,7 +289,9 @@ struct dom0_shadow_control {
uint32_t op;
XEN_GUEST_HANDLE(ulong) dirty_bitmap;
/* IN/OUT variables. */
- uint64_t pages; /* size of buffer, updated with actual size */
+ uint64_t pages; /* size of buffer, updated with actual size */
+ uint32_t mb; /* Shadow2 memory allocation in MB */
+ uint32_t mode; /* Shadow2 mode to enable */
/* OUT variables. */
struct dom0_shadow_control_stats stats;
};
diff --git a/xen/include/xen/domain_page.h b/xen/include/xen/domain_page.h
index 03d7af5f0f..2a51fcbacb 100644
--- a/xen/include/xen/domain_page.h
+++ b/xen/include/xen/domain_page.h
@@ -26,6 +26,13 @@ extern void *map_domain_page(unsigned long pfn);
*/
extern void unmap_domain_page(void *va);
+/*
+ * Convert a VA (within a page previously mapped in the context of the
+ * currently-executing VCPU via a call to map_domain_pages()) to a machine
+ * address
+ */
+extern paddr_t mapped_domain_page_to_maddr(void *va);
+
/*
* Similar to the above calls, except the mapping is accessible in all
* address spaces (not just within the VCPU that created the mapping). Global
@@ -98,6 +105,7 @@ domain_mmap_cache_destroy(struct domain_mmap_cache *cache)
#define map_domain_page(pfn) maddr_to_virt((pfn)<<PAGE_SHIFT)
#define unmap_domain_page(va) ((void)(va))
+#define mapped_domain_page_to_maddr(va) (virt_to_maddr(va))
#define map_domain_page_global(pfn) maddr_to_virt((pfn)<<PAGE_SHIFT)
#define unmap_domain_page_global(va) ((void)(va))
@@ -112,4 +120,9 @@ struct domain_mmap_cache {
#endif /* !CONFIG_DOMAIN_PAGE */
+#define HERE_I_AM \
+do { \
+ printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__); \
+} while (0)
+
#endif /* __XEN_DOMAIN_PAGE_H__ */
diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h
index e2c67a1d46..e7d84afd92 100644
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -18,7 +18,7 @@ extern void __bug(char *file, int line) __attribute__((noreturn));
#ifndef NDEBUG
#define ASSERT(_p) \
do { \
- if ( !(_p) ) \
+ if ( unlikely(!(_p)) ) \
{ \
printk("Assertion '%s' failed, line %d, file %s\n", #_p , \
__LINE__, __FILE__); \
@@ -41,7 +41,7 @@ struct domain;
void cmdline_parse(char *cmdline);
#ifndef NDEBUG
-extern int debugtrace_send_to_console;
+extern void debugtrace_toggle(void);
extern void debugtrace_dump(void);
extern void debugtrace_printk(const char *fmt, ...);
#else
diff --git a/xen/include/xen/list.h b/xen/include/xen/list.h
index 66cdfc814b..5072d0b924 100644
--- a/xen/include/xen/list.h
+++ b/xen/include/xen/list.h
@@ -162,6 +162,16 @@ static __inline__ void list_splice(struct list_head *list, struct list_head *hea
pos = n, n = pos->next)
/**
+ * list_for_each_backwards_safe - iterate backwards over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_backwards_safe(pos, n, head) \
+ for (pos = (head)->prev, n = pos->prev; pos != (head); \
+ pos = n, n = pos->prev)
+
+/**
* list_for_each_entry - iterate over list of given type
* @pos: the type * to use as a loop counter.
* @head: the head for your list.
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index c37e60f23a..d90b27adc7 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -376,9 +376,12 @@ extern struct domain *domain_list;
/* VCPU is paused by the hypervisor? */
#define _VCPUF_paused 11
#define VCPUF_paused (1UL<<_VCPUF_paused)
- /* VCPU is blocked awaiting an event to be consumed by Xen. */
+/* VCPU is blocked awaiting an event to be consumed by Xen. */
#define _VCPUF_blocked_in_xen 12
#define VCPUF_blocked_in_xen (1UL<<_VCPUF_blocked_in_xen)
+ /* HVM vcpu thinks CR0.PG == 0 */
+#define _VCPUF_shadow2_translate 13
+#define VCPUF_shadow2_translate (1UL<<_VCPUF_shadow2_translate)
/*
* Per-domain flags (domain_flags).