diff options
author | tdeegan@york.uk.xensource.com <tdeegan@york.uk.xensource.com> | 2006-08-16 17:02:35 +0100 |
---|---|---|
committer | tdeegan@york.uk.xensource.com <tdeegan@york.uk.xensource.com> | 2006-08-16 17:02:35 +0100 |
commit | 49f7c7364e0acbc44604e0315599782096eee522 (patch) | |
tree | b9dcdab90c9598f12d5559edc96efdaf7afd0da3 /xen/include | |
parent | e3e351919cc62f3cdabd8cef9b3a6be9ab4f13dd (diff) | |
download | xen-49f7c7364e0acbc44604e0315599782096eee522.tar.gz xen-49f7c7364e0acbc44604e0315599782096eee522.tar.bz2 xen-49f7c7364e0acbc44604e0315599782096eee522.zip |
Replace shadow pagetable code with shadow2.
Diffstat (limited to 'xen/include')
31 files changed, 2486 insertions, 2671 deletions
diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h index b2ee953361..b9fd2557d0 100644 --- a/xen/include/asm-x86/bitops.h +++ b/xen/include/asm-x86/bitops.h @@ -75,6 +75,24 @@ static __inline__ void clear_bit(int nr, volatile void * addr) :"=m" (ADDR) :"dIr" (nr)); } + +/** + * __clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * Unlike clear_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void __clear_bit(int nr, volatile void * addr) +{ + __asm__( + "btrl %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} + #define smp_mb__before_clear_bit() barrier() #define smp_mb__after_clear_bit() barrier() diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index 99c74cf5ad..74a123de6f 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -79,9 +79,14 @@ #ifndef __ASSEMBLY__ extern unsigned long _end; /* standard ELF symbol */ -#endif /* __ASSEMBLY__ */ -#define FORCE_CRASH() __asm__ __volatile__ ( "ud2" ) +static inline void FORCE_CRASH(void) __attribute__((noreturn,always_inline)); +static inline void FORCE_CRASH(void) +{ + __asm__ __volatile__ ( "ud2" ); + while(1); +} +#endif /* __ASSEMBLY__ */ #if defined(__x86_64__) @@ -149,9 +154,14 @@ extern unsigned long _end; /* standard ELF symbol */ /* Slot 256: read-only guest-accessible machine-to-phys translation table. */ #define RO_MPT_VIRT_START (PML4_ADDR(256)) #define RO_MPT_VIRT_END (RO_MPT_VIRT_START + PML4_ENTRY_BYTES/2) + +// current unused? +#if 0 /* Slot 257: read-only guest-accessible linear page table. */ #define RO_LINEAR_PT_VIRT_START (PML4_ADDR(257)) #define RO_LINEAR_PT_VIRT_END (RO_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES) +#endif + /* Slot 258: linear page table (guest table). */ #define LINEAR_PT_VIRT_START (PML4_ADDR(258)) #define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES) @@ -175,7 +185,7 @@ extern unsigned long _end; /* standard ELF symbol */ #define DIRECTMAP_VIRT_START (PML4_ADDR(262)) #define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2) -#define PGT_base_page_table PGT_l4_page_table +#define PGT_base_page_table PGT_l4_page_table #define __HYPERVISOR_CS64 0xe010 #define __HYPERVISOR_CS32 0xe008 @@ -274,9 +284,9 @@ extern unsigned long _end; /* standard ELF symbol */ (L2_PAGETABLE_LAST_XEN_SLOT - L2_PAGETABLE_FIRST_XEN_SLOT + 1) #ifdef CONFIG_X86_PAE -# define PGT_base_page_table PGT_l3_page_table +# define PGT_base_page_table PGT_l3_page_table #else -# define PGT_base_page_table PGT_l2_page_table +# define PGT_base_page_table PGT_l2_page_table #endif #define __HYPERVISOR_CS 0xe008 diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index a0efe89f0a..2ef0775795 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -73,42 +73,42 @@ struct arch_domain /* I/O-port admin-specified access capabilities. */ struct rangeset *ioport_caps; - /* Shadow mode status and controls. */ - struct shadow_ops *ops; - unsigned int shadow_mode; /* flags to control shadow table operation */ - unsigned int shadow_nest; /* Recursive depth of shadow_lock() nesting */ - - /* shadow hashtable */ - struct shadow_status *shadow_ht; - struct shadow_status *shadow_ht_free; - struct shadow_status *shadow_ht_extras; /* extra allocation units */ - unsigned int shadow_extras_count; - - /* shadow dirty bitmap */ + /* HVM stuff */ + struct hvm_domain hvm_domain; + + /* Shadow-translated guest: Pseudophys base address of reserved area. */ + unsigned long first_reserved_pfn; + + /* Shadow2 stuff */ + u32 shadow2_mode; /* flags to control shadow operation */ + spinlock_t shadow2_lock; /* shadow2 domain lock */ + int shadow2_locker; /* processor which holds the lock */ + const char *shadow2_locker_function; /* Func that took it */ + struct list_head shadow2_freelists[SHADOW2_MAX_ORDER + 1]; + struct list_head shadow2_p2m_freelist; + struct list_head shadow2_p2m_inuse; + struct list_head shadow2_toplevel_shadows; + unsigned int shadow2_total_pages; /* number of pages allocated */ + unsigned int shadow2_free_pages; /* number of pages on freelists */ + unsigned int shadow2_p2m_pages; /* number of pages in p2m map */ + + /* Shadow2 hashtable */ + struct shadow2_hash_entry *shadow2_hash_table; + struct shadow2_hash_entry *shadow2_hash_freelist; + struct shadow2_hash_entry *shadow2_hash_allocations; + int shadow2_hash_walking; /* Some function is walking the hash table */ + + /* Shadow log-dirty bitmap */ unsigned long *shadow_dirty_bitmap; unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */ - /* shadow mode stats */ - unsigned int shadow_page_count; - unsigned int hl2_page_count; - unsigned int snapshot_page_count; - + /* Shadow log-dirty mode stats */ unsigned int shadow_fault_count; unsigned int shadow_dirty_count; - /* full shadow mode */ - struct out_of_sync_entry *out_of_sync; /* list of out-of-sync pages */ - struct out_of_sync_entry *out_of_sync_free; - struct out_of_sync_entry *out_of_sync_extras; - unsigned int out_of_sync_extras_count; + /* Shadow translated domain: P2M mapping */ + pagetable_t phys_table; - struct list_head free_shadow_frames; - - pagetable_t phys_table; /* guest 1:1 pagetable */ - struct hvm_domain hvm_domain; - - /* Shadow-translated guest: Pseudophys base address of reserved area. */ - unsigned long first_reserved_pfn; } __cacheline_aligned; #ifdef CONFIG_X86_PAE @@ -166,25 +166,34 @@ struct arch_vcpu */ l1_pgentry_t *perdomain_ptes; - pagetable_t guest_table_user; /* x86/64: user-space pagetable. */ - pagetable_t guest_table; /* (MA) guest notion of cr3 */ - pagetable_t shadow_table; /* (MA) shadow of guest */ - pagetable_t monitor_table; /* (MA) used in hypervisor */ - - l2_pgentry_t *guest_vtable; /* virtual address of pagetable */ - l2_pgentry_t *shadow_vtable; /* virtual address of shadow_table */ - l2_pgentry_t *monitor_vtable; /* virtual address of monitor_table */ - l1_pgentry_t *hl2_vtable; /* virtual address of hl2_table */ - #ifdef CONFIG_X86_64 - l3_pgentry_t *guest_vl3table; - l4_pgentry_t *guest_vl4table; + pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ #endif + pagetable_t guest_table; /* (MFN) guest notion of cr3 */ + /* guest_table holds a ref to the page, and also a type-count unless + * shadow refcounts are in use */ + pagetable_t shadow_table; /* (MFN) shadow of guest */ + pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */ + unsigned long cr3; /* (MA) value to install in HW CR3 */ - unsigned long monitor_shadow_ref; + void *guest_vtable; /* virtual address of pagetable */ + void *shadow_vtable; /* virtual address of shadow_table */ + root_pgentry_t *monitor_vtable; /* virtual address of monitor_table */ /* Current LDT details. */ unsigned long shadow_ldt_mapcnt; + + /* Shadow2 stuff */ + /* -- pointers to mode-specific entry points */ + struct shadow2_entry_points *shadow2; + unsigned long last_emulated_mfn; /* last mfn we emulated a write to */ + u8 shadow2_propagate_fault; /* emulated fault needs to be */ + /* propagated to guest */ +#if CONFIG_PAGING_LEVELS >= 3 + u8 shadow2_pae_flip_pending; /* shadow update requires this PAE cpu + * to recopy/install its L3 table. + */ +#endif } __cacheline_aligned; /* shorthands to improve code legibility */ diff --git a/xen/include/asm-x86/grant_table.h b/xen/include/asm-x86/grant_table.h index 5c6600ac7e..277b93ca0c 100644 --- a/xen/include/asm-x86/grant_table.h +++ b/xen/include/asm-x86/grant_table.h @@ -31,7 +31,7 @@ int destroy_grant_host_mapping( #define gnttab_shared_gmfn(d, t, i) \ (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i))) -#define gnttab_log_dirty(d, f) mark_dirty((d), (f)) +#define gnttab_mark_dirty(d, f) mark_dirty((d), (f)) static inline void gnttab_clear_flag(unsigned long nr, uint16_t *addr) { diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 73f3b31275..cb573e5d9c 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -56,9 +56,16 @@ struct hvm_function_table { */ int (*realmode)(struct vcpu *v); int (*paging_enabled)(struct vcpu *v); + int (*long_mode_enabled)(struct vcpu *v); + int (*guest_x86_mode)(struct vcpu *v); int (*instruction_length)(struct vcpu *v); unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num); + /* + * Re-set the value of CR3 that Xen runs on when handling VM exits + */ + void (*update_host_cr3)(struct vcpu *v); + /* * Update specifics of the guest state: * 1) TS bit in guest cr0 @@ -134,11 +141,29 @@ hvm_paging_enabled(struct vcpu *v) } static inline int +hvm_long_mode_enabled(struct vcpu *v) +{ + return hvm_funcs.long_mode_enabled(v); +} + +static inline int +hvm_guest_x86_mode(struct vcpu *v) +{ + return hvm_funcs.guest_x86_mode(v); +} + +static inline int hvm_instruction_length(struct vcpu *v) { return hvm_funcs.instruction_length(v); } +static inline void +hvm_update_host_cr3(struct vcpu *v) +{ + hvm_funcs.update_host_cr3(v); +} + void hvm_hypercall_page_initialise(struct domain *d, void *hypercall_page); diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h index 35a0bfe464..6ccfdee678 100644 --- a/xen/include/asm-x86/hvm/support.h +++ b/xen/include/asm-x86/hvm/support.h @@ -116,10 +116,13 @@ enum hval_bitmaps { #define DBG_LEVEL_IOAPIC (1 << 9) extern unsigned int opt_hvm_debug_level; -#define HVM_DBG_LOG(level, _f, _a...) \ - if ( (level) & opt_hvm_debug_level ) \ - printk("[HVM:%d.%d] <%s> " _f "\n", \ - current->domain->domain_id, current->vcpu_id, __func__, ## _a) +#define HVM_DBG_LOG(level, _f, _a...) \ + do { \ + if ( (level) & opt_hvm_debug_level ) \ + printk("[HVM:%d.%d] <%s> " _f "\n", \ + current->domain->domain_id, current->vcpu_id, __func__, \ + ## _a); \ + } while (0) #else #define HVM_DBG_LOG(level, _f, _a...) #endif diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index f89b6ad787..b607a4578b 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -29,6 +29,7 @@ #define HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI 1 struct hvm_vcpu { + unsigned long hw_cr3; /* value we give to HW to use */ unsigned long ioflags; struct hvm_io_op io_op; struct vlapic *vlapic; @@ -40,6 +41,11 @@ struct hvm_vcpu { int xen_port; +#if CONFIG_PAGING_LEVELS >= 3 + l3_pgentry_t hvm_lowmem_l3tab[4] + __attribute__((__aligned__(32))); +#endif + /* Flags */ int flag_dr_dirty; diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index 85ee7046fd..524411be34 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -87,6 +87,7 @@ struct arch_vmx_struct { unsigned long cpu_cr0; /* copy of guest CR0 */ unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */ + unsigned long cpu_shadow_cr4; /* copy of guest read shadow CR4 */ unsigned long cpu_cr2; /* save CR2 */ unsigned long cpu_cr3; unsigned long cpu_state; diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h index 38ae0e3b0f..38e447259c 100644 --- a/xen/include/asm-x86/hvm/vmx/vmx.h +++ b/xen/include/asm-x86/hvm/vmx/vmx.h @@ -298,6 +298,9 @@ static always_inline void __vmwrite_vcpu( case GUEST_CR0: v->arch.hvm_vmx.cpu_cr0 = value; break; + case CR4_READ_SHADOW: + v->arch.hvm_vmx.cpu_shadow_cr4 = value; + break; case CPU_BASED_VM_EXEC_CONTROL: v->arch.hvm_vmx.cpu_based_exec_control = value; break; @@ -317,11 +320,14 @@ static always_inline void __vmread_vcpu( case GUEST_CR0: *value = v->arch.hvm_vmx.cpu_cr0; break; + case CR4_READ_SHADOW: + *value = v->arch.hvm_vmx.cpu_shadow_cr4; + break; case CPU_BASED_VM_EXEC_CONTROL: *value = v->arch.hvm_vmx.cpu_based_exec_control; break; default: - printk("__vmread_cpu: invalid field %lx\n", field); + printk("__vmread_vcpu: invalid field %lx\n", field); break; } } @@ -342,6 +348,7 @@ static inline int __vmwrite(unsigned long field, unsigned long value) switch ( field ) { case CR0_READ_SHADOW: case GUEST_CR0: + case CR4_READ_SHADOW: case CPU_BASED_VM_EXEC_CONTROL: __vmwrite_vcpu(v, field, value); break; @@ -404,6 +411,46 @@ static inline int vmx_paging_enabled(struct vcpu *v) return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG); } +/* Works only for vcpu == current */ +static inline int vmx_long_mode_enabled(struct vcpu *v) +{ + ASSERT(v == current); + return VMX_LONG_GUEST(current); +} + +/* Works only for vcpu == current */ +static inline int vmx_realmode(struct vcpu *v) +{ + unsigned long rflags; + ASSERT(v == current); + + __vmread(GUEST_RFLAGS, &rflags); + return rflags & X86_EFLAGS_VM; +} + +/* Works only for vcpu == current */ +static inline void vmx_update_host_cr3(struct vcpu *v) +{ + ASSERT(v == current); + __vmwrite(HOST_CR3, v->arch.cr3); +} + +static inline int vmx_guest_x86_mode(struct vcpu *v) +{ + unsigned long cs_ar_bytes; + ASSERT(v == current); + + if ( vmx_long_mode_enabled(v) ) + { + __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes); + return (cs_ar_bytes & (1u<<13)) ? 8 : 4; + } + if ( vmx_realmode(v) ) + return 2; + __vmread(GUEST_CS_AR_BYTES, &cs_ar_bytes); + return (cs_ar_bytes & (1u<<14)) ? 4 : 2; +} + static inline int vmx_pgbit_test(struct vcpu *v) { unsigned long cr0; diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 06ea598754..0b19fbe7ec 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -20,7 +20,11 @@ struct page_info { /* Each frame can be threaded onto a doubly-linked list. */ - struct list_head list; + union { + struct list_head list; + /* Shadow2 uses this field as an up-pointer in lower-level shadows */ + paddr_t up; + }; /* Reference count and various PGC_xxx flags and fields. */ u32 count_info; @@ -46,8 +50,20 @@ struct page_info } u; - /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ - u32 tlbflush_timestamp; + union { + /* Timestamp from 'TLB clock', used to reduce need for safety + * flushes. Only valid on a) free pages, and b) guest pages with a + * zero type count. */ + u32 tlbflush_timestamp; + + /* Only used on guest pages with a shadow. + * Guest pages with a shadow must have a non-zero type count, so this + * does not conflict with the tlbflush timestamp. */ + u32 shadow2_flags; + + // XXX -- we expect to add another field here, to be used for min/max + // purposes, which is only used for shadow pages. + }; }; /* The following page types are MUTUALLY EXCLUSIVE. */ @@ -60,6 +76,7 @@ struct page_info #define PGT_ldt_page (6U<<29) /* using this page in an LDT? */ #define PGT_writable_page (7U<<29) /* has writable mappings of this page? */ +#ifndef SHADOW2 #define PGT_l1_shadow PGT_l1_page_table #define PGT_l2_shadow PGT_l2_page_table #define PGT_l3_shadow PGT_l3_page_table @@ -69,14 +86,16 @@ struct page_info #define PGT_writable_pred (7U<<29) /* predicted gpfn with writable ref */ #define PGT_fl1_shadow (5U<<29) +#endif + #define PGT_type_mask (7U<<29) /* Bits 29-31. */ - /* Has this page been validated for use as its current type? */ -#define _PGT_validated 28 -#define PGT_validated (1U<<_PGT_validated) /* Owning guest has pinned this page to its current type? */ -#define _PGT_pinned 27 +#define _PGT_pinned 28 #define PGT_pinned (1U<<_PGT_pinned) + /* Has this page been validated for use as its current type? */ +#define _PGT_validated 27 +#define PGT_validated (1U<<_PGT_validated) #if defined(__i386__) /* The 11 most significant bits of virt address if this is a page table. */ #define PGT_va_shift 16 @@ -98,6 +117,7 @@ struct page_info /* 16-bit count of uses of this frame as its current type. */ #define PGT_count_mask ((1U<<16)-1) +#ifndef SHADOW2 #ifdef __x86_64__ #define PGT_high_mfn_shift 52 #define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift) @@ -112,19 +132,53 @@ struct page_info #define PGT_score_shift 23 #define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift) #endif +#endif /* SHADOW2 */ /* Cleared when the owning guest 'frees' this page. */ #define _PGC_allocated 31 #define PGC_allocated (1U<<_PGC_allocated) - /* Set when fullshadow mode marks a page out-of-sync */ + /* Set on a *guest* page to mark it out-of-sync with its shadow */ #define _PGC_out_of_sync 30 #define PGC_out_of_sync (1U<<_PGC_out_of_sync) - /* Set when fullshadow mode is using a page as a page table */ + /* Set when is using a page as a page table */ #define _PGC_page_table 29 #define PGC_page_table (1U<<_PGC_page_table) /* 29-bit count of references to this frame. */ #define PGC_count_mask ((1U<<29)-1) +/* shadow2 uses the count_info on shadow pages somewhat differently */ +/* NB: please coordinate any changes here with the SH2F's in shadow2.h */ +#define PGC_SH2_none (0U<<28) /* on the shadow2 free list */ +#define PGC_SH2_min_shadow (1U<<28) +#define PGC_SH2_l1_32_shadow (1U<<28) /* shadowing a 32-bit L1 guest page */ +#define PGC_SH2_fl1_32_shadow (2U<<28) /* L1 shadow for a 32b 4M superpage */ +#define PGC_SH2_l2_32_shadow (3U<<28) /* shadowing a 32-bit L2 guest page */ +#define PGC_SH2_l1_pae_shadow (4U<<28) /* shadowing a pae L1 page */ +#define PGC_SH2_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */ +#define PGC_SH2_l2_pae_shadow (6U<<28) /* shadowing a pae L2-low page */ +#define PGC_SH2_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */ +#define PGC_SH2_l3_pae_shadow (8U<<28) /* shadowing a pae L3 page */ +#define PGC_SH2_l1_64_shadow (9U<<28) /* shadowing a 64-bit L1 page */ +#define PGC_SH2_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */ +#define PGC_SH2_l2_64_shadow (11U<<28) /* shadowing a 64-bit L2 page */ +#define PGC_SH2_l3_64_shadow (12U<<28) /* shadowing a 64-bit L3 page */ +#define PGC_SH2_l4_64_shadow (13U<<28) /* shadowing a 64-bit L4 page */ +#define PGC_SH2_max_shadow (13U<<28) +#define PGC_SH2_p2m_table (14U<<28) /* in use as the p2m table */ +#define PGC_SH2_monitor_table (15U<<28) /* in use as a monitor table */ +#define PGC_SH2_unused (15U<<28) + +#define PGC_SH2_type_mask (15U<<28) +#define PGC_SH2_type_shift 28 + +#define PGC_SH2_pinned (1U<<27) + +#define _PGC_SH2_log_dirty 26 +#define PGC_SH2_log_dirty (1U<<26) + +/* 26 bit ref count for shadow pages */ +#define PGC_SH2_count_mask ((1U<<26) - 1) + /* We trust the slab allocator in slab.c, and our use of it. */ #define PageSlab(page) (1) #define PageSetSlab(page) ((void)0) @@ -134,16 +188,24 @@ struct page_info #if defined(__i386__) #define pickle_domptr(_d) ((u32)(unsigned long)(_d)) -#define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d)) +static inline struct domain *unpickle_domptr(u32 _domain) +{ return (_domain & 1) ? NULL : (void *)_domain; } #define PRtype_info "08lx" /* should only be used for printk's */ #elif defined(__x86_64__) static inline struct domain *unpickle_domptr(u32 _domain) -{ return (_domain == 0) ? NULL : __va(_domain); } +{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); } static inline u32 pickle_domptr(struct domain *domain) { return (domain == NULL) ? 0 : (u32)__pa(domain); } #define PRtype_info "016lx"/* should only be used for printk's */ #endif +/* The order of the largest allocation unit we use for shadow pages */ +#if CONFIG_PAGING_LEVELS == 2 +#define SHADOW2_MAX_ORDER 0 /* Only ever need 4k allocations */ +#else +#define SHADOW2_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */ +#endif + #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d)) @@ -165,7 +227,7 @@ extern void invalidate_shadow_ldt(struct vcpu *d); extern int shadow_remove_all_write_access( struct domain *d, unsigned long gmfn, unsigned long mfn); extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn); -extern int _shadow_mode_refcounts(struct domain *d); +extern int _shadow2_mode_refcounts(struct domain *d); static inline void put_page(struct page_info *page) { @@ -197,8 +259,8 @@ static inline int get_page(struct page_info *page, unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */ unlikely(d != _domain) ) /* Wrong owner? */ { - if ( !_shadow_mode_refcounts(domain) ) - DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" + if ( !_shadow2_mode_refcounts(domain) ) + DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" PRtype_info "\n", page_to_mfn(page), domain, unpickle_domptr(d), x, page->u.inuse.type_info); @@ -254,6 +316,16 @@ static inline int page_is_removable(struct page_info *page) ASSERT(((_p)->count_info & PGC_count_mask) != 0); \ ASSERT(page_get_owner(_p) == (_d)) +// Quick test for whether a given page can be represented directly in CR3. +// +#if CONFIG_PAGING_LEVELS == 3 +#define MFN_FITS_IN_CR3(_MFN) !(mfn_x(_MFN) >> 20) + +/* returns a lowmem machine address of the copied L3 root table */ +unsigned long +pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab); +#endif /* CONFIG_PAGING_LEVELS == 3 */ + int check_descriptor(struct desc_struct *d); /* @@ -271,29 +343,44 @@ int check_descriptor(struct desc_struct *d); #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn)) #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) + +#define mfn_to_gmfn(_d, mfn) \ + ( (shadow2_mode_translate(_d)) \ + ? get_gpfn_from_mfn(mfn) \ + : (mfn) ) + +#define gmfn_to_mfn(_d, gpfn) mfn_x(sh2_gfn_to_mfn(_d, gpfn)) + + /* * The phys_to_machine_mapping is the reversed mapping of MPT for full * virtualization. It is only used by shadow_mode_translate()==true * guests, so we steal the address space that would have normally * been used by the read-only MPT map. */ -#define phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START) -#define NR_P2M_TABLE_ENTRIES ((unsigned long *)RO_MPT_VIRT_END \ - - phys_to_machine_mapping) +#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) #define INVALID_MFN (~0UL) #define VALID_MFN(_mfn) (!((_mfn) & (1U<<31))) -#define set_mfn_from_gpfn(pfn, mfn) (phys_to_machine_mapping[(pfn)] = (mfn)) static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) { - unsigned long mfn; + l1_pgentry_t l1e = l1e_empty(); + int ret; + +#if CONFIG_PAGING_LEVELS > 2 + if ( pfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t) ) + /* This pfn is higher than the p2m map can hold */ + return INVALID_MFN; +#endif + + ret = __copy_from_user(&l1e, + &phys_to_machine_mapping[pfn], + sizeof(l1e)); - if ( unlikely(pfn >= NR_P2M_TABLE_ENTRIES) || - unlikely(__copy_from_user(&mfn, &phys_to_machine_mapping[pfn], - sizeof(mfn))) ) - mfn = INVALID_MFN; + if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) + return l1e_get_pfn(l1e); - return mfn; + return INVALID_MFN; } #ifdef MEMORY_GUARD @@ -333,6 +420,7 @@ void audit_domains(void); #endif int new_guest_cr3(unsigned long pfn); +void make_cr3(struct vcpu *v, unsigned long mfn); void propagate_page_fault(unsigned long addr, u16 error_code); diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h index f1c08cf500..07c09b2ae2 100644 --- a/xen/include/asm-x86/msr.h +++ b/xen/include/asm-x86/msr.h @@ -112,6 +112,10 @@ static inline void wrmsrl(unsigned int msr, __u64 val) #define MSR_IA32_VMX_EXIT_CTLS_MSR 0x483 #define MSR_IA32_VMX_ENTRY_CTLS_MSR 0x484 #define MSR_IA32_VMX_MISC_MSR 0x485 +#define MSR_IA32_VMX_CR0_FIXED0 0x486 +#define MSR_IA32_VMX_CR0_FIXED1 0x487 +#define MSR_IA32_VMX_CR4_FIXED0 0x488 +#define MSR_IA32_VMX_CR4_FIXED1 0x489 #define IA32_FEATURE_CONTROL_MSR 0x3a #define IA32_FEATURE_CONTROL_MSR_LOCK 0x1 #define IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON 0x4 diff --git a/xen/include/asm-x86/page-guest32.h b/xen/include/asm-x86/page-guest32.h index cf5595b078..e93206169a 100644 --- a/xen/include/asm-x86/page-guest32.h +++ b/xen/include/asm-x86/page-guest32.h @@ -89,15 +89,8 @@ static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags) #define linear_l1_table_32 \ ((l1_pgentry_32_t *)(LINEAR_PT_VIRT_START)) -#define __linear_l2_table_32 \ - ((l2_pgentry_32_t *)(LINEAR_PT_VIRT_START + \ - (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0)))) #define linear_pg_table_32 linear_l1_table_32 -#define linear_l2_table_32(_ed) ((_ed)->arch.guest_vtable) - -#define va_to_l1mfn_32(_ed, _va) \ - (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT])) #endif /* __X86_PAGE_GUEST_H__ */ diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h index 6432402066..94158c7f3d 100644 --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -233,26 +233,18 @@ typedef struct { u64 pfn; } pagetable_t; + DOMAIN_ENTRIES_PER_L4_PAGETABLE) #endif -#define LINEAR_PT_OFFSET (LINEAR_PT_VIRT_START & VADDR_MASK) -#define linear_l1_table \ - ((l1_pgentry_t *)(LINEAR_PT_VIRT_START)) -#define __linear_l2_table \ - ((l2_pgentry_t *)(LINEAR_PT_VIRT_START + \ - (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)))) -#define __linear_l3_table \ - ((l3_pgentry_t *)(LINEAR_PT_VIRT_START + \ - (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) + \ - (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1)))) -#define __linear_l4_table \ - ((l4_pgentry_t *)(LINEAR_PT_VIRT_START + \ - (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) + \ - (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1)) + \ - (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<2)))) - +/* Where to find each level of the linear mapping */ +#define __linear_l1_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START)) +#define __linear_l2_table \ + ((l2_pgentry_t *)(__linear_l1_table + l1_linear_offset(LINEAR_PT_VIRT_START))) +#define __linear_l3_table \ + ((l3_pgentry_t *)(__linear_l2_table + l2_linear_offset(LINEAR_PT_VIRT_START))) +#define __linear_l4_table \ + ((l4_pgentry_t *)(__linear_l3_table + l3_linear_offset(LINEAR_PT_VIRT_START))) + +#define linear_l1_table __linear_l1_table #define linear_pg_table linear_l1_table -#define linear_l2_table(v) ((v)->arch.guest_vtable) -#define linear_l3_table(v) ((v)->arch.guest_vl3table) -#define linear_l4_table(v) ((v)->arch.guest_vl4table) +#define linear_l2_table(v) ((l2_pgentry_t *)(v)->arch.guest_vtable) #ifndef __ASSEMBLY__ #if CONFIG_PAGING_LEVELS == 3 @@ -294,6 +286,7 @@ extern void paging_init(void); #define _PAGE_AVAIL1 0x400U #define _PAGE_AVAIL2 0x800U #define _PAGE_AVAIL 0xE00U +#define _PAGE_PSE_PAT 0x1000U /* * Debug option: Ensure that granted mappings are not implicitly unmapped. @@ -307,9 +300,9 @@ extern void paging_init(void); #endif /* - * Disallow unused flag bits plus PAT, PSE and GLOBAL. Also disallow GNTTAB - * if we are using it for grant-table debugging. Permit the NX bit if the - * hardware supports it. + * Disallow unused flag bits plus PAT, PSE and GLOBAL. + * Also disallow GNTTAB if we are using it for grant-table debugging. + * Permit the NX bit if the hardware supports it. */ #define BASE_DISALLOW_MASK ((0xFFFFF180U | _PAGE_GNTTAB) & ~_PAGE_NX) diff --git a/xen/include/asm-x86/perfc_defn.h b/xen/include/asm-x86/perfc_defn.h index 54bc01ea7c..d6e24b207d 100644 --- a/xen/include/asm-x86/perfc_defn.h +++ b/xen/include/asm-x86/perfc_defn.h @@ -144,4 +144,57 @@ PERFCOUNTER_CPU(remove_write_predicted, "remove_write predict hit&exit") PERFCOUNTER_CPU(remove_write_bad_prediction, "remove_write bad prediction") PERFCOUNTER_CPU(update_hl2e_invlpg, "update_hl2e calls invlpg") +/* Shadow2 counters */ +PERFCOUNTER_CPU(shadow2_alloc, "calls to shadow2_alloc") +PERFCOUNTER_CPU(shadow2_alloc_tlbflush, "shadow2_alloc flushed TLBs") +PERFSTATUS(shadow2_alloc_count, "number of shadow pages in use") +PERFCOUNTER_CPU(shadow2_free, "calls to shadow2_free") +PERFCOUNTER_CPU(shadow2_prealloc_1, "shadow2 recycles old shadows") +PERFCOUNTER_CPU(shadow2_prealloc_2, "shadow2 recycles in-use shadows") +PERFCOUNTER_CPU(shadow2_linear_map_failed, "shadow2 hit read-only linear map") +PERFCOUNTER_CPU(shadow2_a_update, "shadow2 A bit update") +PERFCOUNTER_CPU(shadow2_ad_update, "shadow2 A&D bit update") +PERFCOUNTER_CPU(shadow2_fault, "calls to shadow2_fault") +PERFCOUNTER_CPU(shadow2_fault_bail_bad_gfn, "shadow2_fault guest bad gfn") +PERFCOUNTER_CPU(shadow2_fault_bail_not_present, + "shadow2_fault guest not-present") +PERFCOUNTER_CPU(shadow2_fault_bail_nx, "shadow2_fault guest NX fault") +PERFCOUNTER_CPU(shadow2_fault_bail_ro_mapping, "shadow2_fault guest R/W fault") +PERFCOUNTER_CPU(shadow2_fault_bail_user_supervisor, + "shadow2_fault guest U/S fault") +PERFCOUNTER_CPU(shadow2_fault_emulate_read, "shadow2_fault emulates a read") +PERFCOUNTER_CPU(shadow2_fault_emulate_write, "shadow2_fault emulates a write") +PERFCOUNTER_CPU(shadow2_fault_emulate_failed, "shadow2_fault emulator fails") +PERFCOUNTER_CPU(shadow2_fault_mmio, "shadow2_fault handled as mmio") +PERFCOUNTER_CPU(shadow2_fault_fixed, "shadow2_fault fixed fault") +PERFCOUNTER_CPU(shadow2_ptwr_emulate, "shadow2 causes ptwr to emulate") +PERFCOUNTER_CPU(shadow2_validate_gl1e_calls, "calls to shadow2_validate_gl1e") +PERFCOUNTER_CPU(shadow2_validate_gl2e_calls, "calls to shadow2_validate_gl2e") +PERFCOUNTER_CPU(shadow2_validate_gl3e_calls, "calls to shadow2_validate_gl3e") +PERFCOUNTER_CPU(shadow2_validate_gl4e_calls, "calls to shadow2_validate_gl4e") +PERFCOUNTER_CPU(shadow2_hash_lookups, "calls to shadow2_hash_lookup") +PERFCOUNTER_CPU(shadow2_hash_lookup_head, "shadow2 hash hit in bucket head") +PERFCOUNTER_CPU(shadow2_hash_lookup_miss, "shadow2 hash misses") +PERFCOUNTER_CPU(shadow2_get_shadow_status, "calls to get_shadow_status") +PERFCOUNTER_CPU(shadow2_hash_inserts, "calls to shadow2_hash_insert") +PERFCOUNTER_CPU(shadow2_hash_deletes, "calls to shadow2_hash_delete") +PERFCOUNTER_CPU(shadow2_writeable, "shadow2 removes write access") +PERFCOUNTER_CPU(shadow2_writeable_h_1, "shadow2 writeable: 32b w2k3") +PERFCOUNTER_CPU(shadow2_writeable_h_2, "shadow2 writeable: 32pae w2k3") +PERFCOUNTER_CPU(shadow2_writeable_h_3, "shadow2 writeable: 64b w2k3") +PERFCOUNTER_CPU(shadow2_writeable_h_4, "shadow2 writeable: 32b linux low") +PERFCOUNTER_CPU(shadow2_writeable_bf, "shadow2 writeable brute-force") +PERFCOUNTER_CPU(shadow2_mappings, "shadow2 removes all mappings") +PERFCOUNTER_CPU(shadow2_mappings_bf, "shadow2 rm-mappings brute-force") +PERFCOUNTER_CPU(shadow2_early_unshadow, "shadow2 unshadows for fork/exit") +PERFCOUNTER_CPU(shadow2_early_unshadow_top, "shadow2 unhooks for fork/exit") +PERFCOUNTER_CPU(shadow2_unshadow, "shadow2 unshadows a page") +PERFCOUNTER_CPU(shadow2_up_pointer, "shadow2 unshadow by up-pointer") +PERFCOUNTER_CPU(shadow2_unshadow_bf, "shadow2 unshadow brute-force") +PERFCOUNTER_CPU(shadow2_get_page_fail, "shadow2_get_page_from_l1e failed") +PERFCOUNTER_CPU(shadow2_guest_walk, "shadow2 walks guest tables") +PERFCOUNTER_CPU(shadow2_walk_cache_hit, "shadow2 walk-cache hits") +PERFCOUNTER_CPU(shadow2_walk_cache_miss, "shadow2 walk-cache misses") + + /*#endif*/ /* __XEN_PERFC_DEFN_H__ */ diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index d460544d3e..81c8757f8e 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -545,6 +545,7 @@ extern always_inline void prefetchw(const void *x) #endif void show_stack(struct cpu_user_regs *regs); +void show_xen_trace(void); void show_stack_overflow(unsigned long esp); void show_registers(struct cpu_user_regs *regs); void show_execution_state(struct cpu_user_regs *regs); diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h index 7144b24d8b..efade3021c 100644 --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -1,8 +1,7 @@ /****************************************************************************** * include/asm-x86/shadow.h * - * Copyright (c) 2005 Michael A Fetterman - * Based on an earlier implementation by Ian Pratt et al + * Copyright (c) 2006 by XenSource Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,1782 +21,28 @@ #ifndef _XEN_SHADOW_H #define _XEN_SHADOW_H -#include <xen/config.h> -#include <xen/types.h> -#include <xen/perfc.h> -#include <xen/sched.h> -#include <xen/mm.h> -#include <xen/domain_page.h> -#include <asm/current.h> -#include <asm/flushtlb.h> -#include <asm/processor.h> -#include <asm/hvm/hvm.h> -#include <asm/hvm/support.h> -#include <asm/regs.h> -#include <public/dom0_ops.h> -#include <asm/shadow_public.h> -#include <asm/page-guest32.h> -#include <asm/shadow_ops.h> +/* This file is just a wrapper around the new Shadow2 header, + * providing names that must be defined in any shadow implementation. */ -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */ +#include <asm/shadow2.h> -#define SHM_enable (1<<0) /* we're in one of the shadow modes */ -#define SHM_refcounts (1<<1) /* refcounts based on shadow tables instead of - guest tables */ -#define SHM_write_all (1<<2) /* allow write access to all guest pt pages, - regardless of pte write permissions */ -#define SHM_log_dirty (1<<3) /* enable log dirty mode */ -#define SHM_translate (1<<4) /* Xen does p2m translation, not guest */ -#define SHM_external (1<<5) /* Xen does not steal address space from the - domain for its own booking; requires VT or - similar mechanisms */ -#define SHM_wr_pt_pte (1<<6) /* guest allowed to set PAGE_RW bit in PTEs which - point to page table pages. */ +/* How to make sure a page is not referred to in a shadow PT */ +/* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ +#define shadow_drop_references(_d, _p) \ + shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) +#define shadow_sync_and_drop_references(_d, _p) \ + shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) -#define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode) -#define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts) -#define shadow_mode_write_l1(_d) (VM_ASSIST(_d, VMASST_TYPE_writable_pagetables)) -#define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all) -#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty) -#define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate) -#define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external) -#define shadow_mode_wr_pt_pte(_d) ((_d)->arch.shadow_mode & SHM_wr_pt_pte) +/* Whether we are translating the domain's frame numbers for it */ +#define shadow_mode_translate(d) shadow2_mode_translate(d) -#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START) -#define __shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \ - (SH_LINEAR_PT_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))) -#define shadow_linear_l2_table(_v) ((_v)->arch.shadow_vtable) +/* ...and if so, how to add and remove entries in the mapping */ +#define guest_physmap_add_page(_d, _p, _m) \ + shadow2_guest_physmap_add_page((_d), (_p), (_m)) +#define guest_physmap_remove_page(_d, _p, _m ) \ + shadow2_guest_physmap_remove_page((_d), (_p), (_m)) -// easy access to the hl2 table (for translated but not external modes only) -#define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \ - (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))) - -/* - * For now we use the per-domain BIGLOCK rather than a shadow-specific lock. - * We usually have the BIGLOCK already acquired anyway, so this is unlikely - * to cause much unnecessary extra serialisation. Also it's a recursive - * lock, and there are some code paths containing nested shadow_lock(). - * The #if0'ed code below is therefore broken until such nesting is removed. - */ -#if 0 -#define shadow_lock_init(_d) \ - spin_lock_init(&(_d)->arch.shadow_lock) -#define shadow_lock_is_acquired(_d) \ - spin_is_locked(&(_d)->arch.shadow_lock) -#define shadow_lock(_d) \ -do { \ - ASSERT(!shadow_lock_is_acquired(_d)); \ - spin_lock(&(_d)->arch.shadow_lock); \ -} while (0) -#define shadow_unlock(_d) \ -do { \ - ASSERT(!shadow_lock_is_acquired(_d)); \ - spin_unlock(&(_d)->arch.shadow_lock); \ -} while (0) -#else -#define shadow_lock_init(_d) \ - ((_d)->arch.shadow_nest = 0) -#define shadow_lock_is_acquired(_d) \ - (spin_is_locked(&(_d)->big_lock) && ((_d)->arch.shadow_nest != 0)) -#define shadow_lock(_d) \ -do { \ - LOCK_BIGLOCK(_d); \ - (_d)->arch.shadow_nest++; \ -} while (0) -#define shadow_unlock(_d) \ -do { \ - ASSERT(shadow_lock_is_acquired(_d)); \ - (_d)->arch.shadow_nest--; \ - UNLOCK_BIGLOCK(_d); \ -} while (0) -#endif - -#if CONFIG_PAGING_LEVELS >= 3 -static inline u64 get_cr3_idxval(struct vcpu *v) -{ - u64 pae_cr3; - - if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 && - !shadow_mode_log_dirty(v->domain) ) - { - pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */ - return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK; - } - else - return 0; -} - -#define shadow_key_t u64 -#define index_to_key(x) ((x) << 32) -#else -#define get_cr3_idxval(v) (0) -#define shadow_key_t unsigned long -#define index_to_key(x) (0) -#endif - - -#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min)) -#define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1)) -#define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16)) -extern void shadow_direct_map_clean(struct domain *d); -extern int shadow_direct_map_init(struct domain *d); -extern int shadow_direct_map_fault( - unsigned long vpa, struct cpu_user_regs *regs); -extern void shadow_mode_init(void); -extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc); -extern int shadow_fault(unsigned long va, struct cpu_user_regs *regs); -extern int shadow_mode_enable(struct domain *p, unsigned int mode); -extern void shadow_invlpg(struct vcpu *, unsigned long); -extern struct out_of_sync_entry *shadow_mark_mfn_out_of_sync( - struct vcpu *v, unsigned long gpfn, unsigned long mfn); -extern void free_monitor_pagetable(struct vcpu *v); -extern void __shadow_sync_all(struct domain *d); -extern int __shadow_out_of_sync(struct vcpu *v, unsigned long va); -extern int set_p2m_entry( - struct domain *d, unsigned long pfn, unsigned long mfn, - struct domain_mmap_cache *l2cache, - struct domain_mmap_cache *l1cache); -extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype); - -extern void free_shadow_page(unsigned long smfn); - -extern void shadow_l1_normal_pt_update(struct domain *d, - paddr_t pa, l1_pgentry_t l1e, - struct domain_mmap_cache *cache); -extern void shadow_l2_normal_pt_update(struct domain *d, - paddr_t pa, l2_pgentry_t l2e, - struct domain_mmap_cache *cache); -#if CONFIG_PAGING_LEVELS >= 3 -#include <asm/page-guest32.h> -/* - * va_mask cannot be used because it's used by the shadow hash. - * Use the score area for for now. - */ -#define is_xen_l2_slot(t,s) \ - ( ((((t) & PGT_score_mask) >> PGT_score_shift) == 3) && \ - ((s) >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) ) - -extern unsigned long gva_to_gpa(unsigned long gva); -extern void shadow_l3_normal_pt_update(struct domain *d, - paddr_t pa, l3_pgentry_t l3e, - struct domain_mmap_cache *cache); -#endif -#if CONFIG_PAGING_LEVELS >= 4 -extern void shadow_l4_normal_pt_update(struct domain *d, - paddr_t pa, l4_pgentry_t l4e, - struct domain_mmap_cache *cache); -#endif -extern int shadow_do_update_va_mapping(unsigned long va, - l1_pgentry_t val, - struct vcpu *v); - - -static inline unsigned long __shadow_status( - struct domain *d, unsigned long gpfn, unsigned long stype); - -#if CONFIG_PAGING_LEVELS <= 2 -static inline void update_hl2e(struct vcpu *v, unsigned long va); -#endif - -static inline int page_is_page_table(struct page_info *page) -{ - struct domain *owner = page_get_owner(page); - u32 type_info; - - if ( owner && shadow_mode_refcounts(owner) ) - return page->count_info & PGC_page_table; - - type_info = page->u.inuse.type_info & PGT_type_mask; - return type_info && (type_info <= PGT_l4_page_table); -} - -static inline int mfn_is_page_table(unsigned long mfn) -{ - if ( !mfn_valid(mfn) ) - return 0; - - return page_is_page_table(mfn_to_page(mfn)); -} - -static inline int page_out_of_sync(struct page_info *page) -{ - return page->count_info & PGC_out_of_sync; -} - -static inline int mfn_out_of_sync(unsigned long mfn) -{ - if ( !mfn_valid(mfn) ) - return 0; - - return page_out_of_sync(mfn_to_page(mfn)); -} - - -/************************************************************************/ - -static void inline -__shadow_sync_mfn(struct domain *d, unsigned long mfn) -{ - if ( d->arch.out_of_sync ) - { - // XXX - could be smarter - // - __shadow_sync_all(d); - } -} - -static void inline -__shadow_sync_va(struct vcpu *v, unsigned long va) -{ - struct domain *d = v->domain; - - if ( d->arch.out_of_sync && __shadow_out_of_sync(v, va) ) - { - perfc_incrc(shadow_sync_va); - - // XXX - could be smarter - // - __shadow_sync_all(v->domain); - } -#if CONFIG_PAGING_LEVELS <= 2 - // Also make sure the HL2 is up-to-date for this address. - // - if ( unlikely(shadow_mode_translate(v->domain)) ) - update_hl2e(v, va); -#endif -} - -static void inline -shadow_sync_all(struct domain *d) -{ - if ( unlikely(shadow_mode_enabled(d)) ) - { - shadow_lock(d); - - if ( d->arch.out_of_sync ) - __shadow_sync_all(d); - - ASSERT(d->arch.out_of_sync == NULL); - - shadow_unlock(d); - } -} - -// SMP BUG: This routine can't ever be used properly in an SMP context. -// It should be something like get_shadow_and_sync_va(). -// This probably shouldn't exist. -// -static void inline -shadow_sync_va(struct vcpu *v, unsigned long gva) -{ - struct domain *d = v->domain; - if ( unlikely(shadow_mode_enabled(d)) ) - { - shadow_lock(d); - __shadow_sync_va(v, gva); - shadow_unlock(d); - } -} - -extern void __shadow_mode_disable(struct domain *d); -static inline void shadow_mode_disable(struct domain *d) -{ - if ( unlikely(shadow_mode_enabled(d)) ) - { - shadow_lock(d); - __shadow_mode_disable(d); - shadow_unlock(d); - } -} - -/************************************************************************/ - -#define mfn_to_gmfn(_d, mfn) \ - ( (shadow_mode_translate(_d)) \ - ? get_gpfn_from_mfn(mfn) \ - : (mfn) ) - -#define gmfn_to_mfn(_d, gpfn) \ - ({ \ - unlikely(shadow_mode_translate(_d)) \ - ? (likely(current->domain == (_d)) \ - ? get_mfn_from_gpfn(gpfn) \ - : get_mfn_from_gpfn_foreign(_d, gpfn)) \ - : (gpfn); \ - }) - -extern unsigned long get_mfn_from_gpfn_foreign( - struct domain *d, unsigned long gpfn); - -/************************************************************************/ - -struct shadow_status { - struct shadow_status *next; /* Pull-to-front list per hash bucket. */ - shadow_key_t gpfn_and_flags; /* Guest pfn plus flags. */ - unsigned long smfn; /* Shadow mfn. */ -}; - -#define shadow_ht_extra_size 128 -#define shadow_ht_buckets 256 - -struct out_of_sync_entry { - struct out_of_sync_entry *next; - struct vcpu *v; - unsigned long gpfn; /* why is this here? */ - unsigned long gmfn; - unsigned long snapshot_mfn; - paddr_t writable_pl1e; /* NB: this is a machine address */ - unsigned long va; -}; - -#define out_of_sync_extra_size 127 - -#define SHADOW_SNAPSHOT_ELSEWHERE (-1L) - -/************************************************************************/ -#define SHADOW_DEBUG 0 -#define SHADOW_VERBOSE_DEBUG 0 -#define SHADOW_VVERBOSE_DEBUG 0 -#define SHADOW_VVVERBOSE_DEBUG 0 -#define SHADOW_HASH_DEBUG 0 -#define FULLSHADOW_DEBUG 0 - -#if SHADOW_DEBUG -extern int shadow_status_noswap; -#define SHADOW_REFLECTS_SNAPSHOT _PAGE_AVAIL0 -#endif - -#if SHADOW_VERBOSE_DEBUG -#define SH_LOG(_f, _a...) \ - printk("DOM%uP%u: SH_LOG(%d): " _f "\n", \ - current->domain->domain_id , smp_processor_id(), __LINE__ , ## _a ) -#define SH_VLOG(_f, _a...) \ - printk("DOM%uP%u: SH_VLOG(%d): " _f "\n", \ - current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a ) -#else -#define SH_LOG(_f, _a...) ((void)0) -#define SH_VLOG(_f, _a...) ((void)0) -#endif - -#if SHADOW_VVERBOSE_DEBUG -#define SH_VVLOG(_f, _a...) \ - printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n", \ - current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a ) -#else -#define SH_VVLOG(_f, _a...) ((void)0) -#endif - -#if SHADOW_VVVERBOSE_DEBUG -#define SH_VVVLOG(_f, _a...) \ - printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n", \ - current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a ) -#else -#define SH_VVVLOG(_f, _a...) ((void)0) -#endif - -#if FULLSHADOW_DEBUG -#define FSH_LOG(_f, _a...) \ - printk("DOM%uP%u: FSH_LOG(%d): " _f "\n", \ - current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a ) -#else -#define FSH_LOG(_f, _a...) ((void)0) -#endif - - -/************************************************************************/ - -static inline int -shadow_get_page_from_l1e(l1_pgentry_t l1e, struct domain *d) -{ - l1_pgentry_t nl1e; - int res; - unsigned long mfn; - struct domain *owner; - - ASSERT(l1e_get_flags(l1e) & _PAGE_PRESENT); - - if ( !shadow_mode_refcounts(d) ) - return 1; - - nl1e = l1e; - l1e_remove_flags(nl1e, _PAGE_GLOBAL); - - if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) ) - return 0; - - res = get_page_from_l1e(nl1e, d); - - if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) && - !(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) && - (mfn = l1e_get_pfn(nl1e)) && - mfn_valid(mfn) && - (owner = page_get_owner(mfn_to_page(mfn))) && - (d != owner) ) - { - res = get_page_from_l1e(nl1e, owner); - printk("tried to map mfn %lx from domain %d into shadow page tables " - "of domain %d; %s\n", - mfn, owner->domain_id, d->domain_id, - res ? "success" : "failed"); - } - - if ( unlikely(!res) ) - { - perfc_incrc(shadow_get_page_fail); - FSH_LOG("%s failed to get ref l1e=%" PRIpte "\n", - __func__, l1e_get_intpte(l1e)); - } - - return res; -} - -static inline void -shadow_put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) -{ - if ( !shadow_mode_refcounts(d) ) - return; - - put_page_from_l1e(l1e, d); -} - -static inline void -shadow_put_page_type(struct domain *d, struct page_info *page) -{ - if ( !shadow_mode_refcounts(d) ) - return; - - put_page_type(page); -} - -static inline int shadow_get_page(struct domain *d, - struct page_info *page, - struct domain *owner) -{ - if ( !shadow_mode_refcounts(d) ) - return 1; - return get_page(page, owner); -} - -static inline void shadow_put_page(struct domain *d, - struct page_info *page) -{ - if ( !shadow_mode_refcounts(d) ) - return; - put_page(page); -} - -/************************************************************************/ - -static inline void __mark_dirty(struct domain *d, unsigned long mfn) -{ - unsigned long pfn; - - ASSERT(shadow_lock_is_acquired(d)); - - if ( likely(!shadow_mode_log_dirty(d)) || !VALID_MFN(mfn) ) - return; - - ASSERT(d->arch.shadow_dirty_bitmap != NULL); - - /* We /really/ mean PFN here, even for non-translated guests. */ - pfn = get_gpfn_from_mfn(mfn); - - /* - * Values with the MSB set denote MFNs that aren't really part of the - * domain's pseudo-physical memory map (e.g., the shared info frame). - * Nothing to do here... - */ - if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) ) - return; - - /* N.B. Can use non-atomic TAS because protected by shadow_lock. */ - if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) && - !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) ) - { - d->arch.shadow_dirty_count++; - } -#ifndef NDEBUG - else if ( mfn_valid(mfn) ) - { - SH_VLOG("mark_dirty OOR! mfn=%lx pfn=%lx max=%x (dom %p)", - mfn, pfn, d->arch.shadow_dirty_bitmap_size, d); - SH_VLOG("dom=%p caf=%08x taf=%" PRtype_info, - page_get_owner(mfn_to_page(mfn)), - mfn_to_page(mfn)->count_info, - mfn_to_page(mfn)->u.inuse.type_info ); - } -#endif -} - - -static inline void mark_dirty(struct domain *d, unsigned int mfn) -{ - if ( unlikely(shadow_mode_log_dirty(d)) ) - { - shadow_lock(d); - __mark_dirty(d, mfn); - shadow_unlock(d); - } -} - - -/************************************************************************/ -#if CONFIG_PAGING_LEVELS <= 2 -static inline void -__shadow_get_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e) -{ - ASSERT(shadow_mode_enabled(v->domain)); - - *psl2e = v->arch.shadow_vtable[l2_table_offset(va)]; -} - -static inline void -__shadow_set_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t value) -{ - ASSERT(shadow_mode_enabled(v->domain)); - - v->arch.shadow_vtable[l2_table_offset(va)] = value; -} - -static inline void -__guest_get_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t *pl2e) -{ - *pl2e = v->arch.guest_vtable[l2_table_offset(va)]; -} - -static inline void -__guest_set_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t value) -{ - struct domain *d = v->domain; - - v->arch.guest_vtable[l2_table_offset(va)] = value; - - if ( unlikely(shadow_mode_translate(d)) ) - update_hl2e(v, va); - - __mark_dirty(d, pagetable_get_pfn(v->arch.guest_table)); -} - -static inline void -__direct_get_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e) -{ - l2_pgentry_t *phys_vtable; - - ASSERT(shadow_mode_enabled(v->domain)); - - phys_vtable = map_domain_page( - pagetable_get_pfn(v->domain->arch.phys_table)); - - *psl2e = phys_vtable[l2_table_offset(va)]; - - unmap_domain_page(phys_vtable); -} - -static inline void -__direct_set_l2e( - struct vcpu *v, unsigned long va, l2_pgentry_t value) -{ - l2_pgentry_t *phys_vtable; - - ASSERT(shadow_mode_enabled(v->domain)); - - phys_vtable = map_domain_page( - pagetable_get_pfn(v->domain->arch.phys_table)); - - phys_vtable[l2_table_offset(va)] = value; - - unmap_domain_page(phys_vtable); -} - -static inline void -update_hl2e(struct vcpu *v, unsigned long va) -{ - int index = l2_table_offset(va); - unsigned long mfn; - l2_pgentry_t gl2e = v->arch.guest_vtable[index]; - l1_pgentry_t old_hl2e, new_hl2e; - int need_flush = 0; - - ASSERT(shadow_mode_translate(v->domain)); - - old_hl2e = v->arch.hl2_vtable[index]; - - if ( (l2e_get_flags(gl2e) & _PAGE_PRESENT) && - VALID_MFN(mfn = get_mfn_from_gpfn(l2e_get_pfn(gl2e))) ) - new_hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR); - else - new_hl2e = l1e_empty(); - - // only do the ref counting if something has changed. - // - if ( (l1e_has_changed(old_hl2e, new_hl2e, PAGE_FLAG_MASK)) ) - { - if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) && - !shadow_get_page(v->domain, mfn_to_page(l1e_get_pfn(new_hl2e)), - v->domain) ) - new_hl2e = l1e_empty(); - if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT ) - { - shadow_put_page(v->domain, mfn_to_page(l1e_get_pfn(old_hl2e))); - need_flush = 1; - } - - v->arch.hl2_vtable[l2_table_offset(va)] = new_hl2e; - - if ( need_flush ) - { - perfc_incrc(update_hl2e_invlpg); - flush_tlb_one_mask(v->domain->domain_dirty_cpumask, - &linear_pg_table[l1_linear_offset(va)]); - } - } -} - -static inline void shadow_drop_references( - struct domain *d, struct page_info *page) -{ - if ( likely(!shadow_mode_refcounts(d)) || - ((page->u.inuse.type_info & PGT_count_mask) == 0) ) - return; - - /* XXX This needs more thought... */ - printk("%s: needing to call shadow_remove_all_access for mfn=%lx\n", - __func__, page_to_mfn(page)); - printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page), - page->count_info, page->u.inuse.type_info); - - shadow_lock(d); - shadow_remove_all_access(d, page_to_mfn(page)); - shadow_unlock(d); - - printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page), - page->count_info, page->u.inuse.type_info); -} - -/* XXX Needs more thought. Neither pretty nor fast: a place holder. */ -static inline void shadow_sync_and_drop_references( - struct domain *d, struct page_info *page) -{ - if ( likely(!shadow_mode_refcounts(d)) ) - return; - - if ( page_out_of_sync(page) ) - __shadow_sync_mfn(d, page_to_mfn(page)); - - shadow_remove_all_access(d, page_to_mfn(page)); -} -#endif - -/************************************************************************/ - -/* - * Add another shadow reference to smfn. - */ -static inline int -get_shadow_ref(unsigned long smfn) -{ - u32 x, nx; - - ASSERT(mfn_valid(smfn)); - - x = mfn_to_page(smfn)->count_info; - nx = x + 1; - - if ( unlikely(nx == 0) ) - { - printk("get_shadow_ref overflow, gmfn=%" PRtype_info " smfn=%lx\n", - mfn_to_page(smfn)->u.inuse.type_info & PGT_mfn_mask, - smfn); - BUG(); - } - - // Guarded by the shadow lock... - // - mfn_to_page(smfn)->count_info = nx; - - return 1; -} - -/* - * Drop a shadow reference to smfn. - */ -static inline void -put_shadow_ref(unsigned long smfn) -{ - u32 x, nx; - - ASSERT(mfn_valid(smfn)); - - x = mfn_to_page(smfn)->count_info; - nx = x - 1; - - if ( unlikely(x == 0) ) - { - printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%" - PRtype_info "\n", - smfn, - mfn_to_page(smfn)->count_info, - mfn_to_page(smfn)->u.inuse.type_info); - BUG(); - } - - // Guarded by the shadow lock... - // - mfn_to_page(smfn)->count_info = nx; - - if ( unlikely(nx == 0) ) - { - free_shadow_page(smfn); - } -} - -static inline void -shadow_pin(unsigned long smfn) -{ - ASSERT( !(mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) ); - - mfn_to_page(smfn)->u.inuse.type_info |= PGT_pinned; - if ( unlikely(!get_shadow_ref(smfn)) ) - BUG(); -} - -static inline void -shadow_unpin(unsigned long smfn) -{ - ASSERT( (mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) ); - - mfn_to_page(smfn)->u.inuse.type_info &= ~PGT_pinned; - put_shadow_ref(smfn); -} - -/* - * SMP issue. The following code assumes the shadow lock is held. Re-visit - * when working on finer-gained locks for shadow. - */ -static inline void set_guest_back_ptr( - struct domain *d, l1_pgentry_t spte, - unsigned long smfn, unsigned int index) -{ - struct page_info *gpage; - - ASSERT(shadow_lock_is_acquired(d)); - - if ( !shadow_mode_external(d) || - ((l1e_get_flags(spte) & (_PAGE_PRESENT|_PAGE_RW)) != - (_PAGE_PRESENT|_PAGE_RW)) ) - return; - - gpage = l1e_get_page(spte); - - ASSERT(smfn != 0); - ASSERT(page_to_mfn(gpage) != 0); - - gpage->tlbflush_timestamp = smfn; - gpage->u.inuse.type_info &= ~PGT_va_mask; - gpage->u.inuse.type_info |= (unsigned long)index << PGT_va_shift; -} - -/************************************************************************/ -#if CONFIG_PAGING_LEVELS <= 2 -extern void shadow_mark_va_out_of_sync( - struct vcpu *v, unsigned long gpfn, unsigned long mfn, - unsigned long va); - -static inline int l1pte_write_fault( - struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p, - unsigned long va) -{ - struct domain *d = v->domain; - l1_pgentry_t gpte = *gpte_p; - l1_pgentry_t spte; - unsigned long gpfn = l1e_get_pfn(gpte); - unsigned long gmfn = gmfn_to_mfn(d, gpfn); - - //printk("l1pte_write_fault gmfn=%lx\n", gmfn); - - if ( unlikely(!VALID_MFN(gmfn)) ) - { - SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn); - *spte_p = l1e_empty(); - return 0; - } - - ASSERT(l1e_get_flags(gpte) & _PAGE_RW); - l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED); - spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL); - - SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte, - l1e_get_intpte(spte), l1e_get_intpte(gpte)); - - __mark_dirty(d, gmfn); - - if ( mfn_is_page_table(gmfn) ) - shadow_mark_va_out_of_sync(v, gpfn, gmfn, va); - - *gpte_p = gpte; - *spte_p = spte; - - return 1; -} - -static inline int l1pte_read_fault( - struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p) -{ - l1_pgentry_t gpte = *gpte_p; - l1_pgentry_t spte = *spte_p; - unsigned long pfn = l1e_get_pfn(gpte); - unsigned long mfn = gmfn_to_mfn(d, pfn); - - if ( unlikely(!VALID_MFN(mfn)) ) - { - SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn); - *spte_p = l1e_empty(); - return 0; - } - - l1e_add_flags(gpte, _PAGE_ACCESSED); - spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL); - - if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) || - mfn_is_page_table(mfn) ) - { - l1e_remove_flags(spte, _PAGE_RW); - } - - SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte, - l1e_get_intpte(spte), l1e_get_intpte(gpte)); - *gpte_p = gpte; - *spte_p = spte; - - return 1; -} -#endif - -static inline void l1pte_propagate_from_guest( - struct domain *d, guest_l1_pgentry_t gpte, l1_pgentry_t *spte_p) -{ - unsigned long mfn; - l1_pgentry_t spte; - - spte = l1e_empty(); - - if ( ((guest_l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == - (_PAGE_PRESENT|_PAGE_ACCESSED)) && - VALID_MFN(mfn = gmfn_to_mfn(d, l1e_get_pfn(gpte))) ) - { - spte = l1e_from_pfn( - mfn, guest_l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL)); - - if ( shadow_mode_log_dirty(d) || - !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) || - mfn_is_page_table(mfn) ) - { - l1e_remove_flags(spte, _PAGE_RW); - } - } - - if ( l1e_get_intpte(spte) || l1e_get_intpte(gpte) ) - SH_VVVLOG("%s: gpte=%" PRIpte ", new spte=%" PRIpte, - __func__, l1e_get_intpte(gpte), l1e_get_intpte(spte)); - - *spte_p = spte; -} - -static inline void hl2e_propagate_from_guest( - struct domain *d, l2_pgentry_t gpde, l1_pgentry_t *hl2e_p) -{ - unsigned long pfn = l2e_get_pfn(gpde); - unsigned long mfn; - l1_pgentry_t hl2e; - - hl2e = l1e_empty(); - - if ( l2e_get_flags(gpde) & _PAGE_PRESENT ) - { - mfn = gmfn_to_mfn(d, pfn); - if ( VALID_MFN(mfn) && mfn_valid(mfn) ) - hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR); - } - - if ( l1e_get_intpte(hl2e) || l2e_get_intpte(gpde) ) - SH_VVLOG("%s: gpde=%" PRIpte " hl2e=%" PRIpte, __func__, - l2e_get_intpte(gpde), l1e_get_intpte(hl2e)); - - *hl2e_p = hl2e; -} - -static inline void l2pde_general( - struct domain *d, - guest_l2_pgentry_t *gpde_p, - l2_pgentry_t *spde_p, - unsigned long sl1mfn) -{ - guest_l2_pgentry_t gpde = *gpde_p; - l2_pgentry_t spde; - - spde = l2e_empty(); - if ( (guest_l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) ) - { - spde = l2e_from_pfn( - sl1mfn, - (guest_l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL); - - /* N.B. PDEs do not have a dirty bit. */ - guest_l2e_add_flags(gpde, _PAGE_ACCESSED); - - *gpde_p = gpde; - } - - if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) ) - SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__, - l2e_get_intpte(gpde), l2e_get_intpte(spde)); - - *spde_p = spde; -} - -static inline void l2pde_propagate_from_guest( - struct domain *d, guest_l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p) -{ - guest_l2_pgentry_t gpde = *gpde_p; - unsigned long sl1mfn = 0; - - if ( guest_l2e_get_flags(gpde) & _PAGE_PRESENT ) - sl1mfn = __shadow_status(d, l2e_get_pfn(gpde), PGT_l1_shadow); - l2pde_general(d, gpde_p, spde_p, sl1mfn); -} - -/************************************************************************/ - -// returns true if a tlb flush is needed -// -static int inline -validate_pte_change( - struct domain *d, - guest_l1_pgentry_t new_pte, - l1_pgentry_t *shadow_pte_p) -{ - l1_pgentry_t old_spte, new_spte; - int need_flush = 0; - - perfc_incrc(validate_pte_calls); - - l1pte_propagate_from_guest(d, new_pte, &new_spte); - - if ( shadow_mode_refcounts(d) ) - { - old_spte = *shadow_pte_p; - - if ( l1e_get_intpte(old_spte) == l1e_get_intpte(new_spte) ) - { - // No accounting required... - // - perfc_incrc(validate_pte_changes1); - } - else if ( l1e_get_intpte(old_spte) == (l1e_get_intpte(new_spte)|_PAGE_RW) ) - { - // Fast path for PTEs that have merely been write-protected - // (e.g., during a Unix fork()). A strict reduction in privilege. - // - perfc_incrc(validate_pte_changes2); - if ( likely(l1e_get_flags(new_spte) & _PAGE_PRESENT) ) - shadow_put_page_type(d, mfn_to_page(l1e_get_pfn(new_spte))); - } - else if ( ((l1e_get_flags(old_spte) | l1e_get_flags(new_spte)) & - _PAGE_PRESENT ) && - l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) ) - { - // only do the ref counting if something important changed. - // - perfc_incrc(validate_pte_changes3); - - if ( l1e_get_flags(old_spte) & _PAGE_PRESENT ) - { - shadow_put_page_from_l1e(old_spte, d); - need_flush = 1; - } - if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) && - !shadow_get_page_from_l1e(new_spte, d) ) { - new_spte = l1e_empty(); - need_flush = -1; /* need to unshadow the page */ - } - } - else - { - perfc_incrc(validate_pte_changes4); - } - } - - *shadow_pte_p = new_spte; - - return need_flush; -} - -// returns true if a tlb flush is needed -// -static int inline -validate_hl2e_change( - struct domain *d, - l2_pgentry_t new_gpde, - l1_pgentry_t *shadow_hl2e_p) -{ - l1_pgentry_t old_hl2e, new_hl2e; - int need_flush = 0; - - perfc_incrc(validate_hl2e_calls); - - old_hl2e = *shadow_hl2e_p; - hl2e_propagate_from_guest(d, new_gpde, &new_hl2e); - - // Only do the ref counting if something important changed. - // - if ( ((l1e_get_flags(old_hl2e) | l1e_get_flags(new_hl2e)) & _PAGE_PRESENT) && - l1e_has_changed(old_hl2e, new_hl2e, _PAGE_PRESENT) ) - { - perfc_incrc(validate_hl2e_changes); - - if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) && - !get_page(mfn_to_page(l1e_get_pfn(new_hl2e)), d) ) - new_hl2e = l1e_empty(); - if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT ) - { - put_page(mfn_to_page(l1e_get_pfn(old_hl2e))); - need_flush = 1; - } - } - - *shadow_hl2e_p = new_hl2e; - - return need_flush; -} - -// returns true if a tlb flush is needed -// -static int inline -validate_pde_change( - struct domain *d, - guest_l2_pgentry_t new_gpde, - l2_pgentry_t *shadow_pde_p) -{ - l2_pgentry_t old_spde, new_spde; - int need_flush = 0; - - perfc_incrc(validate_pde_calls); - - old_spde = *shadow_pde_p; - l2pde_propagate_from_guest(d, &new_gpde, &new_spde); - - // Only do the ref counting if something important changed. - // - if ( ((l2e_get_intpte(old_spde) | l2e_get_intpte(new_spde)) & _PAGE_PRESENT) && - l2e_has_changed(old_spde, new_spde, _PAGE_PRESENT) ) - { - perfc_incrc(validate_pde_changes); - - if ( (l2e_get_flags(new_spde) & _PAGE_PRESENT) && - !get_shadow_ref(l2e_get_pfn(new_spde)) ) - BUG(); - if ( l2e_get_flags(old_spde) & _PAGE_PRESENT ) - { - put_shadow_ref(l2e_get_pfn(old_spde)); - need_flush = 1; - } - } - - *shadow_pde_p = new_spde; - - return need_flush; -} - -/*********************************************************************/ - -#if SHADOW_HASH_DEBUG - -static void shadow_audit(struct domain *d, int print) -{ - int live = 0, free = 0, j = 0, abs; - struct shadow_status *a; - - for ( j = 0; j < shadow_ht_buckets; j++ ) - { - a = &d->arch.shadow_ht[j]; - if ( a->gpfn_and_flags ) - { - live++; - ASSERT(a->smfn); - } - else - ASSERT(!a->next); - - a = a->next; - while ( a && (live < 9999) ) - { - live++; - if ( (a->gpfn_and_flags == 0) || (a->smfn == 0) ) - { - printk("XXX live=%d gpfn+flags=%lx sp=%lx next=%p\n", - live, a->gpfn_and_flags, a->smfn, a->next); - BUG(); - } - ASSERT(a->smfn); - a = a->next; - } - ASSERT(live < 9999); - } - - for ( a = d->arch.shadow_ht_free; a != NULL; a = a->next ) - free++; - - if ( print ) - printk("Xlive=%d free=%d\n", live, free); - - // BUG: this only works if there's only a single domain which is - // using shadow tables. - // - abs = ( - perfc_value(shadow_l1_pages) + - perfc_value(shadow_l2_pages) + - perfc_value(hl2_table_pages) + - perfc_value(snapshot_pages) + - perfc_value(writable_pte_predictions) - ) - live; -#ifdef PERF_COUNTERS - if ( (abs < -1) || (abs > 1) ) - { - printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n", - live, free, - perfc_value(shadow_l1_pages), - perfc_value(shadow_l2_pages), - perfc_value(hl2_table_pages), - perfc_value(snapshot_pages), - perfc_value(writable_pte_predictions)); - BUG(); - } -#endif - - // XXX ought to add some code to audit the out-of-sync entries, too. - // -} -#else -#define shadow_audit(p, print) ((void)0) -#endif - - -static inline struct shadow_status *hash_bucket( - struct domain *d, unsigned int gpfn) -{ - return &d->arch.shadow_ht[gpfn % shadow_ht_buckets]; -} - - -/* - * N.B. This takes a guest pfn (i.e. a pfn in the guest's namespace, - * which, depending on full shadow mode, may or may not equal - * its mfn). - * It returns the shadow's mfn, or zero if it doesn't exist. - */ -static inline unsigned long __shadow_status( - struct domain *d, unsigned long gpfn, unsigned long stype) -{ - struct shadow_status *p, *x, *head; - shadow_key_t key; -#if CONFIG_PAGING_LEVELS >= 3 - if ( d->arch.ops->guest_paging_levels == PAGING_L3 && stype == PGT_l4_shadow ) - key = gpfn | stype | index_to_key(get_cr3_idxval(current)); - else -#endif - key = gpfn | stype; - - ASSERT(shadow_lock_is_acquired(d)); - ASSERT(gpfn == (gpfn & PGT_mfn_mask)); - ASSERT(stype && !(stype & ~PGT_type_mask)); - - perfc_incrc(shadow_status_calls); - - x = head = hash_bucket(d, gpfn); - p = NULL; - - shadow_audit(d, 0); - - do - { - ASSERT(x->gpfn_and_flags || ((x == head) && (x->next == NULL))); - - if ( x->gpfn_and_flags == key ) - { -#if SHADOW_DEBUG - if ( unlikely(shadow_status_noswap) ) - return x->smfn; -#endif - /* Pull-to-front if 'x' isn't already the head item. */ - if ( unlikely(x != head) ) - { - /* Delete 'x' from list and reinsert immediately after head. */ - p->next = x->next; - x->next = head->next; - head->next = x; - - /* Swap 'x' contents with head contents. */ - SWAP(head->gpfn_and_flags, x->gpfn_and_flags); - SWAP(head->smfn, x->smfn); - } - else - { - perfc_incrc(shadow_status_hit_head); - } - - return head->smfn; - } - - p = x; - x = x->next; - } - while ( x != NULL ); - - perfc_incrc(shadow_status_miss); - return 0; -} - -/* - * Not clear if pull-to-front is worth while for this or not, - * as it generally needs to scan the entire bucket anyway. - * Much simpler without. - * - * Either returns PGT_none, or PGT_l{1,2,3,4}_page_table. - */ -static inline u32 -shadow_max_pgtable_type(struct domain *d, unsigned long gpfn, - unsigned long *smfn) -{ - struct shadow_status *x; - u32 pttype = PGT_none, type; - - ASSERT(shadow_lock_is_acquired(d)); - ASSERT(gpfn == (gpfn & PGT_mfn_mask)); - - perfc_incrc(shadow_max_type); - - x = hash_bucket(d, gpfn); - - while ( x && x->gpfn_and_flags ) - { - if ( (x->gpfn_and_flags & PGT_mfn_mask) == gpfn ) - { - type = x->gpfn_and_flags & PGT_type_mask; - - switch ( type ) - { - case PGT_hl2_shadow: - // Treat an HL2 as if it's an L1 - // - type = PGT_l1_shadow; - break; - case PGT_snapshot: - case PGT_writable_pred: - // Ignore snapshots -- they don't in and of themselves constitute - // treating a page as a page table - // - goto next; - case PGT_base_page_table: - // Early exit if we found the max possible value - // - return type; - default: - break; - } - - if ( type > pttype ) - { - pttype = type; - if ( smfn ) - *smfn = x->smfn; - } - } - next: - x = x->next; - } - - return pttype; -} - -static inline void delete_shadow_status( - struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype, u64 index) -{ - struct shadow_status *p, *x, *n, *head; - - shadow_key_t key = gpfn | stype | index_to_key(index); - - ASSERT(shadow_lock_is_acquired(d)); - ASSERT(!(gpfn & ~PGT_mfn_mask)); - ASSERT(stype && !(stype & ~PGT_type_mask)); - - head = hash_bucket(d, gpfn); - - SH_VLOG("delete gpfn=%lx t=%08x bucket=%p", gpfn, stype, head); - shadow_audit(d, 0); - - /* Match on head item? */ - if ( head->gpfn_and_flags == key ) - { - if ( (n = head->next) != NULL ) - { - /* Overwrite head with contents of following node. */ - head->gpfn_and_flags = n->gpfn_and_flags; - head->smfn = n->smfn; - - /* Delete following node. */ - head->next = n->next; - - /* Add deleted node to the free list. */ - n->gpfn_and_flags = 0; - n->smfn = 0; - n->next = d->arch.shadow_ht_free; - d->arch.shadow_ht_free = n; - } - else - { - /* This bucket is now empty. Initialise the head node. */ - head->gpfn_and_flags = 0; - head->smfn = 0; - } - - goto found; - } - - p = head; - x = head->next; - - do - { - if ( x->gpfn_and_flags == key ) - { - /* Delete matching node. */ - p->next = x->next; - - /* Add deleted node to the free list. */ - x->gpfn_and_flags = 0; - x->smfn = 0; - x->next = d->arch.shadow_ht_free; - d->arch.shadow_ht_free = x; - - goto found; - } - - p = x; - x = x->next; - } - while ( x != NULL ); - - /* If we got here, it wasn't in the list! */ - BUG(); - - found: - // release ref to page - if ( stype != PGT_writable_pred ) - put_page(mfn_to_page(gmfn)); - - shadow_audit(d, 0); -} - -static inline void set_shadow_status( - struct domain *d, unsigned long gpfn, unsigned long gmfn, - unsigned long smfn, unsigned long stype, u64 index) -{ - struct shadow_status *x, *head, *extra; - int i; - - shadow_key_t key = gpfn | stype | index_to_key(index); - - SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype); - - ASSERT(shadow_lock_is_acquired(d)); - - ASSERT(shadow_mode_translate(d) || gpfn); - ASSERT(!(gpfn & ~PGT_mfn_mask)); - - // XXX - need to be more graceful. - ASSERT(VALID_MFN(gmfn)); - - ASSERT(stype && !(stype & ~PGT_type_mask)); - - x = head = hash_bucket(d, gpfn); - - SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)", - gpfn, smfn, stype, x, x->next); - shadow_audit(d, 0); - - // grab a reference to the guest page to represent the entry in the shadow - // hash table - // - // XXX - Should PGT_writable_pred grab a page ref? - // - Who/how are these hash table entry refs flushed if/when a page - // is given away by the domain? - // - if ( stype != PGT_writable_pred ) - get_page(mfn_to_page(gmfn), d); - - /* - * STEP 1. If page is already in the table, update it in place. - */ - do - { - if ( unlikely(x->gpfn_and_flags == key) ) - { - if ( stype != PGT_writable_pred ) - BUG(); // we should never replace entries into the hash table - x->smfn = smfn; - if ( stype != PGT_writable_pred ) - put_page(mfn_to_page(gmfn)); // already had a ref... - goto done; - } - - x = x->next; - } - while ( x != NULL ); - - /* - * STEP 2. The page must be inserted into the table. - */ - - /* If the bucket is empty then insert the new page as the head item. */ - if ( head->gpfn_and_flags == 0 ) - { - head->gpfn_and_flags = key; - head->smfn = smfn; - ASSERT(head->next == NULL); - goto done; - } - - /* We need to allocate a new node. Ensure the quicklist is non-empty. */ - if ( unlikely(d->arch.shadow_ht_free == NULL) ) - { - SH_VLOG("Allocate more shadow hashtable blocks."); - - extra = xmalloc_bytes( - sizeof(void *) + (shadow_ht_extra_size * sizeof(*x))); - - /* XXX Should be more graceful here. */ - if ( extra == NULL ) - BUG(); - - memset(extra, 0, sizeof(void *) + (shadow_ht_extra_size * sizeof(*x))); - - /* Record the allocation block so it can be correctly freed later. */ - d->arch.shadow_extras_count++; - *((struct shadow_status **)&extra[shadow_ht_extra_size]) = - d->arch.shadow_ht_extras; - d->arch.shadow_ht_extras = &extra[0]; - - /* Thread a free chain through the newly-allocated nodes. */ - for ( i = 0; i < (shadow_ht_extra_size - 1); i++ ) - extra[i].next = &extra[i+1]; - extra[i].next = NULL; - - /* Add the new nodes to the free list. */ - d->arch.shadow_ht_free = &extra[0]; - } - - /* Allocate a new node from the quicklist. */ - x = d->arch.shadow_ht_free; - d->arch.shadow_ht_free = x->next; - - /* Initialise the new node and insert directly after the head item. */ - x->gpfn_and_flags = key; - x->smfn = smfn; - x->next = head->next; - head->next = x; - - done: - shadow_audit(d, 0); - - if ( stype <= PGT_l4_shadow ) - { - // add to front of list of pages to check when removing write - // permissions for a page... - // - } -} - -/************************************************************************/ - -static inline void guest_physmap_add_page( - struct domain *d, unsigned long gpfn, unsigned long mfn) -{ - struct domain_mmap_cache c1, c2; - - if ( likely(!shadow_mode_translate(d)) ) - return; - - domain_mmap_cache_init(&c1); - domain_mmap_cache_init(&c2); - shadow_lock(d); - shadow_sync_and_drop_references(d, mfn_to_page(mfn)); - set_p2m_entry(d, gpfn, mfn, &c1, &c2); - set_gpfn_from_mfn(mfn, gpfn); - shadow_unlock(d); - domain_mmap_cache_destroy(&c1); - domain_mmap_cache_destroy(&c2); -} - -static inline void guest_physmap_remove_page( - struct domain *d, unsigned long gpfn, unsigned long mfn) -{ - struct domain_mmap_cache c1, c2; - unsigned long type; - - if ( likely(!shadow_mode_translate(d)) ) - return; - - domain_mmap_cache_init(&c1); - domain_mmap_cache_init(&c2); - shadow_lock(d); - shadow_sync_and_drop_references(d, mfn_to_page(mfn)); - while ( (type = shadow_max_pgtable_type(d, gpfn, NULL)) != PGT_none ) - free_shadow_page(__shadow_status(d, gpfn, type)); - set_p2m_entry(d, gpfn, -1, &c1, &c2); - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); - shadow_unlock(d); - domain_mmap_cache_destroy(&c1); - domain_mmap_cache_destroy(&c2); -} - -/************************************************************************/ - -void static inline -shadow_update_min_max(unsigned long smfn, int index) -{ - struct page_info *sl1page = mfn_to_page(smfn); - u32 min_max = sl1page->tlbflush_timestamp; - int min = SHADOW_MIN(min_max); - int max = SHADOW_MAX(min_max); - int update = 0; - - if ( index < min ) - { - min = index; - update = 1; - } - if ( index > max ) - { - max = index; - update = 1; - } - if ( update ) - sl1page->tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max); -} - -#if CONFIG_PAGING_LEVELS <= 2 -extern void shadow_map_l1_into_current_l2(unsigned long va); - -void static inline -shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - l2_pgentry_t sl2e = {0}; - - __shadow_get_l2e(v, va, &sl2e); - if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) ) - { - /* - * Either the L1 is not shadowed, or the shadow isn't linked into - * the current shadow L2. - */ - if ( create_l1_shadow ) - { - perfc_incrc(shadow_set_l1e_force_map); - shadow_map_l1_into_current_l2(va); - } - else /* check to see if it exists; if so, link it in */ - { - l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)]; - unsigned long gl1pfn = l2e_get_pfn(gpde); - unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow); - - ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT ); - - if ( sl1mfn ) - { - perfc_incrc(shadow_set_l1e_unlinked); - if ( !get_shadow_ref(sl1mfn) ) - BUG(); - l2pde_general(d, &gpde, &sl2e, sl1mfn); - __guest_set_l2e(v, va, gpde); - __shadow_set_l2e(v, va, sl2e); - } - else - { - // no shadow exists, so there's nothing to do. - perfc_incrc(shadow_set_l1e_fail); - return; - } - } - } - - __shadow_get_l2e(v, va, &sl2e); - - if ( shadow_mode_refcounts(d) ) - { - l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)]; - - // only do the ref counting if something important changed. - // - if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) ) - { - if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) && - !shadow_get_page_from_l1e(new_spte, d) ) - new_spte = l1e_empty(); - if ( l1e_get_flags(old_spte) & _PAGE_PRESENT ) - shadow_put_page_from_l1e(old_spte, d); - } - - } - - set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va)); - shadow_linear_pg_table[l1_linear_offset(va)] = new_spte; - shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va)); -} -#endif -/************************************************************************/ - -static inline int -shadow_mode_page_writable(unsigned long va, struct cpu_user_regs *regs, unsigned long gpfn) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - unsigned long mfn = gmfn_to_mfn(d, gpfn); - u32 type = mfn_to_page(mfn)->u.inuse.type_info & PGT_type_mask; - - if ( shadow_mode_refcounts(d) && - (type == PGT_writable_page) ) - type = shadow_max_pgtable_type(d, gpfn, NULL); - - // Strange but true: writable page tables allow kernel-mode access - // to L1 page table pages via write-protected PTEs... Similarly, write - // access to all page table pages is granted for shadow_mode_write_all - // clients. - // - if ( ((shadow_mode_write_l1(d) && (type == PGT_l1_page_table)) || - (shadow_mode_write_all(d) && type && (type <= PGT_l4_page_table))) && - ((va < HYPERVISOR_VIRT_START) -#if defined(__x86_64__) - || (va >= HYPERVISOR_VIRT_END) -#endif - ) && - guest_kernel_mode(v, regs) ) - return 1; - - return 0; -} - -#if CONFIG_PAGING_LEVELS <= 2 -static inline l1_pgentry_t gva_to_gpte(unsigned long gva) -{ - l2_pgentry_t gpde; - l1_pgentry_t gpte; - struct vcpu *v = current; - - ASSERT( shadow_mode_translate(current->domain) ); - - __guest_get_l2e(v, gva, &gpde); - if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) ) - return l1e_empty();; - - // This is actually overkill - we only need to make sure the hl2 - // is in-sync. - // - shadow_sync_va(v, gva); - - if ( unlikely(__copy_from_user(&gpte, - &linear_pg_table[gva >> PAGE_SHIFT], - sizeof(gpte))) ) - { - FSH_LOG("gva_to_gpte got a fault on gva=%lx", gva); - return l1e_empty(); - } - - return gpte; -} - -static inline unsigned long gva_to_gpa(unsigned long gva) -{ - l1_pgentry_t gpte; - - gpte = gva_to_gpte(gva); - if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) ) - return 0; - - return l1e_get_paddr(gpte) + (gva & ~PAGE_MASK); -} -#endif - -static inline unsigned long gva_to_mfn(unsigned long gva) -{ - unsigned long gpa = gva_to_gpa(gva); - return get_mfn_from_gpfn(gpa >> PAGE_SHIFT); -} - -/************************************************************************/ - -extern void __update_pagetables(struct vcpu *v); -static inline void update_pagetables(struct vcpu *v) -{ - struct domain *d = v->domain; - int paging_enabled; - - if ( hvm_guest(v) ) - paging_enabled = hvm_paging_enabled(v); - else - // HACK ALERT: there's currently no easy way to figure out if a domU - // has set its arch.guest_table to zero, vs not yet initialized it. - // - paging_enabled = !!pagetable_get_paddr(v->arch.guest_table); - - /* - * We don't call __update_pagetables() when hvm guest paging is - * disabled as we want the linear_pg_table to be inaccessible so that - * we bail out early of shadow_fault() if the hvm guest tries illegal - * accesses while it thinks paging is turned off. - */ - if ( unlikely(shadow_mode_enabled(d)) && paging_enabled ) - { - shadow_lock(d); - __update_pagetables(v); - shadow_unlock(d); - } - - if ( likely(!shadow_mode_external(d)) ) - { - if ( shadow_mode_enabled(d) ) - v->arch.monitor_table = v->arch.shadow_table; - else -#if CONFIG_PAGING_LEVELS == 4 - if ( !(v->arch.flags & TF_kernel_mode) ) - v->arch.monitor_table = v->arch.guest_table_user; - else -#endif - v->arch.monitor_table = v->arch.guest_table; - } -} - -void clear_all_shadow_status(struct domain *d); - -#if SHADOW_DEBUG -extern int _check_pagetable(struct vcpu *v, char *s); -extern int _check_all_pagetables(struct vcpu *v, char *s); - -#define check_pagetable(_v, _s) _check_pagetable(_v, _s) -//#define check_pagetable(_v, _s) _check_all_pagetables(_v, _s) - -#else -#define check_pagetable(_v, _s) ((void)0) -#endif - -#endif /* XEN_SHADOW_H */ +#endif /* _XEN_SHADOW_H */ /* * Local variables: diff --git a/xen/include/asm-x86/shadow2-multi.h b/xen/include/asm-x86/shadow2-multi.h new file mode 100644 index 0000000000..3b23a2f198 --- /dev/null +++ b/xen/include/asm-x86/shadow2-multi.h @@ -0,0 +1,116 @@ +/****************************************************************************** + * arch/x86/shadow2-multi.h + * + * Shadow2 declarations which will be multiply compiled. + * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +extern int +SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl1e, SHADOW_LEVELS, GUEST_LEVELS)( + struct vcpu *v, mfn_t gl1mfn, void *new_gl1p, u32 size); +extern int +SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2e, SHADOW_LEVELS, GUEST_LEVELS)( + struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size); +extern int +SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl2he, SHADOW_LEVELS, GUEST_LEVELS)( + struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size); +extern int +SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl3e, SHADOW_LEVELS, GUEST_LEVELS)( + struct vcpu *v, mfn_t gl3mfn, void *new_gl3p, u32 size); +extern int +SHADOW2_INTERNAL_NAME(sh2_map_and_validate_gl4e, SHADOW_LEVELS, GUEST_LEVELS)( + struct vcpu *v, mfn_t gl4mfn, void *new_gl4p, u32 size); + +extern void +SHADOW2_INTERNAL_NAME(sh2_destroy_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS)( + struct vcpu *v, mfn_t smfn); +extern void +SHADOW2_INTERNAL_NAME(sh2_destroy_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS)( + struct vcpu *v, mfn_t smfn); +extern void +SHADOW2_INTERNAL_NAME(sh2_destroy_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS)( + struct vcpu *v, mfn_t smfn); +extern void +SHADOW2_INTERNAL_NAME(sh2_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)( + struct vcpu *v, mfn_t smfn); + +extern void +SHADOW2_INTERNAL_NAME(sh2_unpin_all_l3_subshadows, 3, 3) + (struct vcpu *v, mfn_t smfn); + +extern void +SHADOW2_INTERNAL_NAME(sh2_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl2mfn); +extern void +SHADOW2_INTERNAL_NAME(sh2_unhook_pae_mappings, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl3mfn); +extern void +SHADOW2_INTERNAL_NAME(sh2_unhook_64b_mappings, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl4mfn); + +extern int +SHADOW2_INTERNAL_NAME(sh2_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn); +extern int +SHADOW2_INTERNAL_NAME(sh2_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn); + +extern void +SHADOW2_INTERNAL_NAME(sh2_clear_shadow_entry, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, void *ep, mfn_t smfn); + +extern int +SHADOW2_INTERNAL_NAME(sh2_remove_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn); +extern int +SHADOW2_INTERNAL_NAME(sh2_remove_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn); +extern int +SHADOW2_INTERNAL_NAME(sh2_remove_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn); + +#if SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES +int +SHADOW2_INTERNAL_NAME(sh2_audit_l1_table, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl1mfn, mfn_t x); +int +SHADOW2_INTERNAL_NAME(sh2_audit_fl1_table, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl1mfn, mfn_t x); +int +SHADOW2_INTERNAL_NAME(sh2_audit_l2_table, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl2mfn, mfn_t x); +int +SHADOW2_INTERNAL_NAME(sh2_audit_l3_table, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl3mfn, mfn_t x); +int +SHADOW2_INTERNAL_NAME(sh2_audit_l4_table, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t sl4mfn, mfn_t x); +#endif + +#if SHADOW_LEVELS == GUEST_LEVELS +extern mfn_t +SHADOW2_INTERNAL_NAME(sh2_make_monitor_table, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v); +extern void +SHADOW2_INTERNAL_NAME(sh2_destroy_monitor_table, SHADOW_LEVELS, GUEST_LEVELS) + (struct vcpu *v, mfn_t mmfn); +#endif + +extern struct shadow2_entry_points +SHADOW2_INTERNAL_NAME(shadow2_entry, SHADOW_LEVELS, GUEST_LEVELS); diff --git a/xen/include/asm-x86/shadow2-private.h b/xen/include/asm-x86/shadow2-private.h new file mode 100644 index 0000000000..7b2ac57572 --- /dev/null +++ b/xen/include/asm-x86/shadow2-private.h @@ -0,0 +1,612 @@ +/****************************************************************************** + * arch/x86/shadow2-private.h + * + * Shadow2 code that is private, and does not need to be multiply compiled. + * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XEN_SHADOW2_PRIVATE_H +#define _XEN_SHADOW2_PRIVATE_H + +// In order to override the definition of mfn_to_page, we make sure page.h has +// been included... +#include <asm/page.h> +#include <xen/domain_page.h> +#include <asm/x86_emulate.h> +#include <asm/hvm/support.h> + + +/****************************************************************************** + * Definitions for the use of the "available" bits in the shadow PTEs. + * + * Review of the low 12 bits of a shadow page table entry: + * + * in a guest: in a shadow: + * Bit 11: _PAGE_AVAIL2, aka _PAGE_GNTTAB + * Bit 10: _PAGE_AVAIL1 _PAGE_SHADOW_RW ("SW" below) + * Bit 9: _PAGE_AVAIL0 _PAGE_SHADOW_PRESENT ("SP" below) + * Bit 8: _PAGE_GLOBAL _PAGE_SHADOW_MMIO ("MMIO" below), + * aka _PAGE_SHADOW_GUEST_NOT_PRESENT + * Bit 7: _PAGE_PSE, aka _PAGE_PAT + * Bit 6: _PAGE_DIRTY + * Bit 5: _PAGE_ACCESSED + * Bit 4: _PAGE_PCD + * Bit 3: _PAGE_PWT + * Bit 2: _PAGE_USER + * Bit 1: _PAGE_RW ("GW" below) + * Bit 0: _PAGE_PRESENT ("GP" below) + * + * Given a guest entry, as shown below, we can expect the following in the + * corresponding shadow entry: + * + * Guest entry Shadow entry Commentary + * ----------- ---------------- --------------------------------------------- + * Maps + * GP GW IO GP SP GW SW MMIO + * -- -- ---- -- -- -- -- ---- + * - - - 0 0 0 0 0 The guest entry has not yet been shadowed. + * 0 - - 0 0 0 0 1 The guest entry is marked not-present. + * 1 1 no ? 1 ? 1 0 Writable entry in the guest. + * 1 0 no ? 1 0 0 0 Read-only entry in the guest. + * 1 1 yes 0 1 ? 1 1 Writable MMIO mapping in the guest. + * 1 0 yes 0 1 0 0 1 Read-only MMIO mapping in the guest. + * + * Normally, we would expect that GP=1 in the guest to imply GP=1 in the + * shadow, and similarly for GW=1. However, various functionality that may be + * implemented via the shadow can cause GP or GW to be cleared in such cases. + * A & D bit emulation is a prime example of such functionality. + * + * If _PAGE_SHADOW_PRESENT is zero, then the _PAGE_PRESENT bit in that same + * entry will always be zero, too. + + * Bit 11 is used in debug builds as the _PAGE_GNTTAB bit in PV guests. It is + * currently available for random (ab)use in shadow entries. + * + * Bit 8 (the global bit) could be propagated from an HVM guest to the shadow, + * but currently there is no benefit, as the guest's TLB is flushed on every + * transition of CR3 anyway due to the HVM exit/re-entry. + * + * In shadow entries in which the _PAGE_SHADOW_PRESENT is set, bit 8 is used + * as the _PAGE_SHADOW_MMIO bit. In such entries, if _PAGE_SHADOW_MMIO is + * set, then the entry contains the *gfn* directly from the corresponding + * guest entry (not an mfn!!). + * + * Bit 7 is set in a guest L2 to signify a superpage entry. The current + * shadow code splinters superpage mappings into 512 or 1024 4K mappings; the + * resulting shadow L1 table is called an FL1. Note that there is no guest + * page that corresponds to an FL1. + * + * Bit 7 in a guest L1 is the PAT2 bit. Currently we do not support PAT in + * this shadow code. + * + * Bit 6 is the dirty bit. + * + * Bit 5 is the accessed bit. + * + * Bit 4 is the cache disable bit. If set in a guest, the hardware is + * supposed to refuse to cache anything found via this entry. It can be set + * in an L4e, L3e, L2e, or L1e. This shadow code currently does not support + * cache disable bits. They are silently ignored. + * + * Bit 4 is a guest L1 is also the PAT1 bit. Currently we do not support PAT + * in this shadow code. + * + * Bit 3 is the cache write-thru bit. If set in a guest, the hardware is + * supposed to use write-thru instead of write-back caching for anything found + * via this entry. It can be set in an L4e, L3e, L2e, or L1e. This shadow + * code currently does not support cache write-thru bits. They are silently + * ignored. + * + * Bit 3 is a guest L1 is also the PAT0 bit. Currently we do not support PAT + * in this shadow code. + * + * Bit 2 is the user bit. + * + * Bit 1 is the read-write bit. + * + * Bit 0 is the present bit. + */ + +// Copy of the _PAGE_RW bit from the guest's PTE, appropriately zero'ed by +// the appropriate shadow rules. +#define _PAGE_SHADOW_RW _PAGE_AVAIL1 + +// Copy of the _PAGE_PRESENT bit from the guest's PTE +#define _PAGE_SHADOW_PRESENT _PAGE_AVAIL0 + +// The matching guest entry maps MMIO space +#define _PAGE_SHADOW_MMIO _PAGE_GLOBAL + +// Shadow flags value used when the guest is not present +#define _PAGE_SHADOW_GUEST_NOT_PRESENT _PAGE_GLOBAL + + +/****************************************************************************** + * Debug and error-message output + */ +#define SHADOW2_PRINTK(_f, _a...) \ + debugtrace_printk("sh2: %s(): " _f, __func__, ##_a) +#define SHADOW2_ERROR(_f, _a...) \ + printk("sh2 error: %s(): " _f, __func__, ##_a) +#define SHADOW2_DEBUG(flag, _f, _a...) \ + do { \ + if (SHADOW2_DEBUG_ ## flag) \ + debugtrace_printk("sh2debug: %s(): " _f, __func__, ##_a); \ + } while (0) + +// The flags for use with SHADOW2_DEBUG: +#define SHADOW2_DEBUG_PROPAGATE 0 +#define SHADOW2_DEBUG_MAKE_SHADOW 0 +#define SHADOW2_DEBUG_DESTROY_SHADOW 0 +#define SHADOW2_DEBUG_P2M 0 +#define SHADOW2_DEBUG_A_AND_D 0 +#define SHADOW2_DEBUG_EMULATE 0 +#define SHADOW2_DEBUG_LOGDIRTY 1 + + +/****************************************************************************** + * Auditing routines + */ + +#if SHADOW2_AUDIT & SHADOW2_AUDIT_ENTRIES_FULL +extern void shadow2_audit_tables(struct vcpu *v); +#else +#define shadow2_audit_tables(_v) do {} while(0) +#endif + +#if SHADOW2_AUDIT & SHADOW2_AUDIT_P2M +extern void shadow2_audit_p2m(struct domain *d); +#else +#define shadow2_audit_p2m(_d) do {} while(0) +#endif + + +/****************************************************************************** + * Mechanism for double-checking the optimized pagefault path: this + * structure contains a record of actions taken by the fault handling + * code. In paranoid mode, the fast-path code fills out one of these + * structures (but doesn't take any actual action) and then the normal + * path fills in another. When the fault handler finishes, the + * two are compared */ + +#ifdef SHADOW2_OPTIMIZATION_PARANOIA + +typedef struct shadow2_action_log sh2_log_t; +struct shadow2_action_log { + paddr_t ad[CONFIG_PAGING_LEVELS]; /* A & D bits propagated here */ + paddr_t mmio; /* Address of an mmio operation */ + int rv; /* Result of the fault handler */ +}; + +/* There are two logs, one for the fast path, one for the normal path */ +enum sh2_log_type { log_slow = 0, log_fast= 1 }; + +/* Alloc and zero the logs */ +static inline void sh2_init_log(struct vcpu *v) +{ + if ( unlikely(!v->arch.shadow2_action_log) ) + v->arch.shadow2_action_log = xmalloc_array(sh2_log_t, 2); + ASSERT(v->arch.shadow2_action_log); + memset(v->arch.shadow2_action_log, 0, 2 * sizeof (sh2_log_t)); +} + +/* Log an A&D-bit update */ +static inline void sh2_log_ad(struct vcpu *v, paddr_t e, unsigned int level) +{ + v->arch.shadow2_action_log[v->arch.shadow2_action_index].ad[level] = e; +} + +/* Log an MMIO address */ +static inline void sh2_log_mmio(struct vcpu *v, paddr_t m) +{ + v->arch.shadow2_action_log[v->arch.shadow2_action_index].mmio = m; +} + +/* Log the result */ +static inline void sh2_log_rv(struct vcpu *v, int rv) +{ + v->arch.shadow2_action_log[v->arch.shadow2_action_index].rv = rv; +} + +/* Set which mode we're in */ +static inline void sh2_set_log_mode(struct vcpu *v, enum sh2_log_type t) +{ + v->arch.shadow2_action_index = t; +} + +/* Know not to take action, because we're only checking the mechanism */ +static inline int sh2_take_no_action(struct vcpu *v) +{ + return (v->arch.shadow2_action_index == log_fast); +} + +#else /* Non-paranoid mode: these logs do not exist */ + +#define sh2_init_log(_v) do { (void)(_v); } while(0) +#define sh2_set_log_mode(_v,_t) do { (void)(_v); } while(0) +#define sh2_log_ad(_v,_e,_l) do { (void)(_v),(void)(_e),(void)(_l); } while (0) +#define sh2_log_mmio(_v,_m) do { (void)(_v),(void)(_m); } while (0) +#define sh2_log_rv(_v,_r) do { (void)(_v),(void)(_r); } while (0) +#define sh2_take_no_action(_v) (((void)(_v)), 0) + +#endif /* SHADOW2_OPTIMIZATION_PARANOIA */ + + +/****************************************************************************** + * Macro for dealing with the naming of the internal names of the + * shadow code's external entry points. + */ +#define SHADOW2_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels) \ + name ## __shadow_ ## shadow_levels ## _guest_ ## guest_levels +#define SHADOW2_INTERNAL_NAME(name, shadow_levels, guest_levels) \ + SHADOW2_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels) + +#if CONFIG_PAGING_LEVELS == 2 +#define GUEST_LEVELS 2 +#define SHADOW_LEVELS 2 +#include <asm/shadow2-multi.h> +#undef GUEST_LEVELS +#undef SHADOW_LEVELS +#endif /* CONFIG_PAGING_LEVELS == 2 */ + +#if CONFIG_PAGING_LEVELS == 3 +#define GUEST_LEVELS 2 +#define SHADOW_LEVELS 3 +#include <asm/shadow2-multi.h> +#undef GUEST_LEVELS +#undef SHADOW_LEVELS + +#define GUEST_LEVELS 3 +#define SHADOW_LEVELS 3 +#include <asm/shadow2-multi.h> +#undef GUEST_LEVELS +#undef SHADOW_LEVELS +#endif /* CONFIG_PAGING_LEVELS == 3 */ + +#if CONFIG_PAGING_LEVELS == 4 +#define GUEST_LEVELS 2 +#define SHADOW_LEVELS 3 +#include <asm/shadow2-multi.h> +#undef GUEST_LEVELS +#undef SHADOW_LEVELS + +#define GUEST_LEVELS 3 +#define SHADOW_LEVELS 3 +#include <asm/shadow2-multi.h> +#undef GUEST_LEVELS +#undef SHADOW_LEVELS + +#define GUEST_LEVELS 3 +#define SHADOW_LEVELS 4 +#include <asm/shadow2-multi.h> +#undef GUEST_LEVELS +#undef SHADOW_LEVELS + +#define GUEST_LEVELS 4 +#define SHADOW_LEVELS 4 +#include <asm/shadow2-multi.h> +#undef GUEST_LEVELS +#undef SHADOW_LEVELS +#endif /* CONFIG_PAGING_LEVELS == 4 */ + + +/****************************************************************************** + * Various function declarations + */ + +/* x86 emulator support */ +extern struct x86_emulate_ops shadow2_emulator_ops; + +/* Hash table functions */ +mfn_t shadow2_hash_lookup(struct vcpu *v, unsigned long n, u8 t); +void shadow2_hash_insert(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn); +void shadow2_hash_delete(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn); + +/* shadow promotion */ +void shadow2_promote(struct vcpu *v, mfn_t gmfn, u32 type); +void shadow2_demote(struct vcpu *v, mfn_t gmfn, u32 type); + +/* Shadow page allocation functions */ +void shadow2_prealloc(struct domain *d, unsigned int order); +mfn_t shadow2_alloc(struct domain *d, + u32 shadow_type, + unsigned long backpointer); +void shadow2_free(struct domain *d, mfn_t smfn); + +/* Function to convert a shadow to log-dirty */ +void shadow2_convert_to_log_dirty(struct vcpu *v, mfn_t smfn); + +/* Dispatcher function: call the per-mode function that will unhook the + * non-Xen mappings in this top-level shadow mfn */ +void shadow2_unhook_mappings(struct vcpu *v, mfn_t smfn); + +/* Re-sync copies of PAE shadow L3 tables if they have been changed */ +void sh2_pae_recopy(struct domain *d); + +/* Install the xen mappings in various flavours of shadow */ +void sh2_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn); +void sh2_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn); +void sh2_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn); +void sh2_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); + + +/****************************************************************************** + * MFN/page-info handling + */ + +// Override mfn_to_page from asm/page.h, which was #include'd above, +// in order to make it work with our mfn type. +#undef mfn_to_page +#define mfn_to_page(_mfn) (frame_table + mfn_x(_mfn)) + +// Override page_to_mfn from asm/page.h, which was #include'd above, +// in order to make it work with our mfn type. +#undef page_to_mfn +#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) + +// Override mfn_valid from asm/page.h, which was #include'd above, +// in order to make it work with our mfn type. +#undef mfn_valid +#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) + +// Provide mfn_t-aware versions of common xen functions +static inline void * +sh2_map_domain_page(mfn_t mfn) +{ + /* XXX Using the monitor-table as a map will happen here */ + return map_domain_page(mfn_x(mfn)); +} + +static inline void +sh2_unmap_domain_page(void *p) +{ + /* XXX Using the monitor-table as a map will happen here */ + unmap_domain_page(p); +} + +static inline void * +sh2_map_domain_page_global(mfn_t mfn) +{ + /* XXX Using the monitor-table as a map will happen here */ + return map_domain_page_global(mfn_x(mfn)); +} + +static inline void +sh2_unmap_domain_page_global(void *p) +{ + /* XXX Using the monitor-table as a map will happen here */ + unmap_domain_page_global(p); +} + +static inline int +sh2_mfn_is_dirty(struct domain *d, mfn_t gmfn) +/* Is this guest page dirty? Call only in log-dirty mode. */ +{ + unsigned long pfn; + ASSERT(shadow2_mode_log_dirty(d)); + ASSERT(d->arch.shadow_dirty_bitmap != NULL); + + /* We /really/ mean PFN here, even for non-translated guests. */ + pfn = get_gpfn_from_mfn(mfn_x(gmfn)); + if ( likely(VALID_M2P(pfn)) + && likely(pfn < d->arch.shadow_dirty_bitmap_size) + && test_bit(pfn, d->arch.shadow_dirty_bitmap) ) + return 1; + + return 0; +} + +static inline int +sh2_mfn_is_a_page_table(mfn_t gmfn) +{ + struct page_info *page = mfn_to_page(gmfn); + struct domain *owner; + unsigned long type_info; + + if ( !valid_mfn(gmfn) ) + return 0; + + owner = page_get_owner(page); + if ( owner && shadow2_mode_refcounts(owner) + && (page->count_info & PGC_page_table) ) + return 1; + + type_info = page->u.inuse.type_info & PGT_type_mask; + return type_info && (type_info <= PGT_l4_page_table); +} + + +/**************************************************************************/ +/* Shadow-page refcounting. See comment in shadow2-common.c about the + * use of struct page_info fields for shadow pages */ + +void sh2_destroy_shadow(struct vcpu *v, mfn_t smfn); + +/* Increase the refcount of a shadow page. Arguments are the mfn to refcount, + * and the physical address of the shadow entry that holds the ref (or zero + * if the ref is held by something else) */ +static inline void sh2_get_ref(mfn_t smfn, paddr_t entry_pa) +{ + u32 x, nx; + struct page_info *page = mfn_to_page(smfn); + + ASSERT(mfn_valid(smfn)); + + x = page->count_info & PGC_SH2_count_mask; + nx = x + 1; + + if ( unlikely(nx & ~PGC_SH2_count_mask) ) + { + SHADOW2_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n", + page->u.inuse.type_info, mfn_x(smfn)); + domain_crash_synchronous(); + } + + /* Guarded by the shadow lock, so no need for atomic update */ + page->count_info &= ~PGC_SH2_count_mask; + page->count_info |= nx; + + /* We remember the first shadow entry that points to each shadow. */ + if ( entry_pa != 0 && page->up == 0 ) + page->up = entry_pa; +} + + +/* Decrease the refcount of a shadow page. As for get_ref, takes the + * physical address of the shadow entry that held this reference. */ +static inline void sh2_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa) +{ + u32 x, nx; + struct page_info *page = mfn_to_page(smfn); + + ASSERT(mfn_valid(smfn)); + ASSERT(page_get_owner(page) == NULL); + + /* If this is the entry in the up-pointer, remove it */ + if ( entry_pa != 0 && page->up == entry_pa ) + page->up = 0; + + x = page->count_info & PGC_SH2_count_mask; + nx = x - 1; + + if ( unlikely(x == 0) ) + { + SHADOW2_PRINTK("shadow ref underflow, smfn=%lx oc=%08x t=%" + PRtype_info "\n", + mfn_x(smfn), + page->count_info & PGC_SH2_count_mask, + page->u.inuse.type_info); + domain_crash_synchronous(); + } + + /* Guarded by the shadow lock, so no need for atomic update */ + page->count_info &= ~PGC_SH2_count_mask; + page->count_info |= nx; + + if ( unlikely(nx == 0) ) + sh2_destroy_shadow(v, smfn); +} + + +/* Pin a shadow page: take an extra refcount and set the pin bit. */ +static inline void sh2_pin(mfn_t smfn) +{ + struct page_info *page; + + ASSERT(mfn_valid(smfn)); + page = mfn_to_page(smfn); + if ( !(page->count_info & PGC_SH2_pinned) ) + { + sh2_get_ref(smfn, 0); + page->count_info |= PGC_SH2_pinned; + } +} + +/* Unpin a shadow page: unset the pin bit and release the extra ref. */ +static inline void sh2_unpin(struct vcpu *v, mfn_t smfn) +{ + struct page_info *page; + + ASSERT(mfn_valid(smfn)); + page = mfn_to_page(smfn); + if ( page->count_info & PGC_SH2_pinned ) + { + page->count_info &= ~PGC_SH2_pinned; + sh2_put_ref(v, smfn, 0); + } +} + +/**************************************************************************/ +/* CPU feature support querying */ + +static inline int +guest_supports_superpages(struct vcpu *v) +{ + return hvm_guest(v) && (hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PSE); +} + +static inline int +guest_supports_nx(struct vcpu *v) +{ + if ( !hvm_guest(v) ) + return cpu_has_nx; + + // XXX - fix this! + return 1; +} + +/**************************************************************************/ +/* Guest physmap (p2m) support */ + +/* Read our own P2M table, checking in the linear pagetables first to be + * sure that we will succeed. Call this function if you expect it to + * fail often, as it avoids page faults. If you expect to succeed, use + * vcpu_gfn_to_mfn, which copy_from_user()s the entry */ +static inline mfn_t +vcpu_gfn_to_mfn_nofault(struct vcpu *v, unsigned long gfn) +{ + unsigned long entry_addr = (unsigned long) &phys_to_machine_mapping[gfn]; +#if CONFIG_PAGING_LEVELS >= 4 + l4_pgentry_t *l4e; + l3_pgentry_t *l3e; +#endif + l2_pgentry_t *l2e; + l1_pgentry_t *l1e; + + ASSERT(current == v); + if ( !shadow2_vcpu_mode_translate(v) ) + return _mfn(gfn); + +#if CONFIG_PAGING_LEVELS > 2 + if ( gfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) ) + /* This pfn is higher than the p2m map can hold */ + return _mfn(INVALID_MFN); +#endif + + /* Walk the linear pagetables. Note that this is *not* the same as + * the walk in sh2_gfn_to_mfn_foreign, which is walking the p2m map */ +#if CONFIG_PAGING_LEVELS >= 4 + l4e = __linear_l4_table + l4_linear_offset(entry_addr); + if ( !(l4e_get_flags(*l4e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN); + l3e = __linear_l3_table + l3_linear_offset(entry_addr); + if ( !(l3e_get_flags(*l3e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN); +#endif + l2e = __linear_l2_table + l2_linear_offset(entry_addr); + if ( !(l2e_get_flags(*l2e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN); + l1e = __linear_l1_table + l1_linear_offset(entry_addr); + if ( !(l1e_get_flags(*l1e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN); + + /* Safe to look at this part of the table */ + if ( l1e_get_flags(phys_to_machine_mapping[gfn]) & _PAGE_PRESENT ) + return _mfn(l1e_get_pfn(phys_to_machine_mapping[gfn])); + + return _mfn(INVALID_MFN); +} + + +#endif /* _XEN_SHADOW2_PRIVATE_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/asm-x86/shadow2-types.h b/xen/include/asm-x86/shadow2-types.h new file mode 100644 index 0000000000..f593c97822 --- /dev/null +++ b/xen/include/asm-x86/shadow2-types.h @@ -0,0 +1,705 @@ +/****************************************************************************** + * include/asm-x86/shadow2-types.h + * + * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XEN_SHADOW2_TYPES_H +#define _XEN_SHADOW2_TYPES_H + +// Map a shadow page +static inline void * +map_shadow_page(mfn_t smfn) +{ + // XXX -- Possible optimization/measurement question for 32-bit and PAE + // hypervisors: + // How often is this smfn already available in the shadow linear + // table? Might it be worth checking that table first, + // presumably using the reverse map hint in the page_info of this + // smfn, rather than calling map_domain_page()? + // + return sh2_map_domain_page(smfn); +} + +// matching unmap for map_shadow_page() +static inline void +unmap_shadow_page(void *p) +{ + sh2_unmap_domain_page(p); +} + +/* + * Define various types for handling pagetabels, based on these options: + * SHADOW_PAGING_LEVELS : Number of levels of shadow pagetables + * GUEST_PAGING_LEVELS : Number of levels of guest pagetables + */ + +#if (CONFIG_PAGING_LEVELS < SHADOW_PAGING_LEVELS) +#error Cannot have more levels of shadow pagetables than host pagetables +#endif + +#if (SHADOW_PAGING_LEVELS < GUEST_PAGING_LEVELS) +#error Cannot have more levels of guest pagetables than shadow pagetables +#endif + +#if SHADOW_PAGING_LEVELS == 2 +#define SHADOW_L1_PAGETABLE_ENTRIES 1024 +#define SHADOW_L2_PAGETABLE_ENTRIES 1024 +#define SHADOW_L1_PAGETABLE_SHIFT 12 +#define SHADOW_L2_PAGETABLE_SHIFT 22 +#endif + +#if SHADOW_PAGING_LEVELS == 3 +#define SHADOW_L1_PAGETABLE_ENTRIES 512 +#define SHADOW_L2_PAGETABLE_ENTRIES 512 +#define SHADOW_L3_PAGETABLE_ENTRIES 4 +#define SHADOW_L1_PAGETABLE_SHIFT 12 +#define SHADOW_L2_PAGETABLE_SHIFT 21 +#define SHADOW_L3_PAGETABLE_SHIFT 30 +#endif + +#if SHADOW_PAGING_LEVELS == 4 +#define SHADOW_L1_PAGETABLE_ENTRIES 512 +#define SHADOW_L2_PAGETABLE_ENTRIES 512 +#define SHADOW_L3_PAGETABLE_ENTRIES 512 +#define SHADOW_L4_PAGETABLE_ENTRIES 512 +#define SHADOW_L1_PAGETABLE_SHIFT 12 +#define SHADOW_L2_PAGETABLE_SHIFT 21 +#define SHADOW_L3_PAGETABLE_SHIFT 30 +#define SHADOW_L4_PAGETABLE_SHIFT 39 +#endif + +/* Types of the shadow page tables */ +typedef l1_pgentry_t shadow_l1e_t; +typedef l2_pgentry_t shadow_l2e_t; +#if SHADOW_PAGING_LEVELS >= 3 +typedef l3_pgentry_t shadow_l3e_t; +#if SHADOW_PAGING_LEVELS >= 4 +typedef l4_pgentry_t shadow_l4e_t; +#endif +#endif + +/* Access functions for them */ +static inline paddr_t shadow_l1e_get_paddr(shadow_l1e_t sl1e) +{ return l1e_get_paddr(sl1e); } +static inline paddr_t shadow_l2e_get_paddr(shadow_l2e_t sl2e) +{ return l2e_get_paddr(sl2e); } +#if SHADOW_PAGING_LEVELS >= 3 +static inline paddr_t shadow_l3e_get_paddr(shadow_l3e_t sl3e) +{ return l3e_get_paddr(sl3e); } +#if SHADOW_PAGING_LEVELS >= 4 +static inline paddr_t shadow_l4e_get_paddr(shadow_l4e_t sl4e) +{ return l4e_get_paddr(sl4e); } +#endif +#endif + +static inline mfn_t shadow_l1e_get_mfn(shadow_l1e_t sl1e) +{ return _mfn(l1e_get_pfn(sl1e)); } +static inline mfn_t shadow_l2e_get_mfn(shadow_l2e_t sl2e) +{ return _mfn(l2e_get_pfn(sl2e)); } +#if SHADOW_PAGING_LEVELS >= 3 +static inline mfn_t shadow_l3e_get_mfn(shadow_l3e_t sl3e) +{ return _mfn(l3e_get_pfn(sl3e)); } +#if SHADOW_PAGING_LEVELS >= 4 +static inline mfn_t shadow_l4e_get_mfn(shadow_l4e_t sl4e) +{ return _mfn(l4e_get_pfn(sl4e)); } +#endif +#endif + +static inline u32 shadow_l1e_get_flags(shadow_l1e_t sl1e) +{ return l1e_get_flags(sl1e); } +static inline u32 shadow_l2e_get_flags(shadow_l2e_t sl2e) +{ return l2e_get_flags(sl2e); } +#if SHADOW_PAGING_LEVELS >= 3 +static inline u32 shadow_l3e_get_flags(shadow_l3e_t sl3e) +{ return l3e_get_flags(sl3e); } +#if SHADOW_PAGING_LEVELS >= 4 +static inline u32 shadow_l4e_get_flags(shadow_l4e_t sl4e) +{ return l4e_get_flags(sl4e); } +#endif +#endif + +static inline shadow_l1e_t +shadow_l1e_remove_flags(shadow_l1e_t sl1e, u32 flags) +{ l1e_remove_flags(sl1e, flags); return sl1e; } + +static inline shadow_l1e_t shadow_l1e_empty(void) +{ return l1e_empty(); } +static inline shadow_l2e_t shadow_l2e_empty(void) +{ return l2e_empty(); } +#if SHADOW_PAGING_LEVELS >= 3 +static inline shadow_l3e_t shadow_l3e_empty(void) +{ return l3e_empty(); } +#if SHADOW_PAGING_LEVELS >= 4 +static inline shadow_l4e_t shadow_l4e_empty(void) +{ return l4e_empty(); } +#endif +#endif + +static inline shadow_l1e_t shadow_l1e_from_mfn(mfn_t mfn, u32 flags) +{ return l1e_from_pfn(mfn_x(mfn), flags); } +static inline shadow_l2e_t shadow_l2e_from_mfn(mfn_t mfn, u32 flags) +{ return l2e_from_pfn(mfn_x(mfn), flags); } +#if SHADOW_PAGING_LEVELS >= 3 +static inline shadow_l3e_t shadow_l3e_from_mfn(mfn_t mfn, u32 flags) +{ return l3e_from_pfn(mfn_x(mfn), flags); } +#if SHADOW_PAGING_LEVELS >= 4 +static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags) +{ return l4e_from_pfn(mfn_x(mfn), flags); } +#endif +#endif + +#define shadow_l1_table_offset(a) l1_table_offset(a) +#define shadow_l2_table_offset(a) l2_table_offset(a) +#define shadow_l3_table_offset(a) l3_table_offset(a) +#define shadow_l4_table_offset(a) l4_table_offset(a) + +/**************************************************************************/ +/* Access to the linear mapping of shadow page tables. */ + +/* Offsets into each level of the linear mapping for a virtual address. */ +#define shadow_l1_linear_offset(_a) \ + (((_a) & VADDR_MASK) >> SHADOW_L1_PAGETABLE_SHIFT) +#define shadow_l2_linear_offset(_a) \ + (((_a) & VADDR_MASK) >> SHADOW_L2_PAGETABLE_SHIFT) +#define shadow_l3_linear_offset(_a) \ + (((_a) & VADDR_MASK) >> SHADOW_L3_PAGETABLE_SHIFT) +#define shadow_l4_linear_offset(_a) \ + (((_a) & VADDR_MASK) >> SHADOW_L4_PAGETABLE_SHIFT) + +/* Where to find each level of the linear mapping. For PV guests, we use + * the shadow linear-map self-entry as many times as we need. For HVM + * guests, the shadow doesn't have a linear-map self-entry so we must use + * the monitor-table's linear-map entry N-1 times and then the shadow-map + * entry once. */ +#define __sh2_linear_l1_table ((shadow_l1e_t *)(SH_LINEAR_PT_VIRT_START)) +#define __sh2_linear_l2_table ((shadow_l2e_t *) \ + (__sh2_linear_l1_table + shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START))) + +// shadow linear L3 and L4 tables only exist in 4 level paging... +#if SHADOW_PAGING_LEVELS == 4 +#define __sh2_linear_l3_table ((shadow_l3e_t *) \ + (__sh2_linear_l2_table + shadow_l2_linear_offset(SH_LINEAR_PT_VIRT_START))) +#define __sh2_linear_l4_table ((shadow_l4e_t *) \ + (__sh2_linear_l3_table + shadow_l3_linear_offset(SH_LINEAR_PT_VIRT_START))) +#endif + +#define sh2_linear_l1_table(v) ({ \ + ASSERT(current == (v)); \ + __sh2_linear_l1_table; \ +}) + +#define sh2_linear_l2_table(v) ({ \ + ASSERT(current == (v)); \ + ((shadow_l2e_t *) \ + (hvm_guest(v) ? __linear_l1_table : __sh2_linear_l1_table) + \ + shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \ +}) + +// shadow linear L3 and L4 tables only exist in 4 level paging... +#if SHADOW_PAGING_LEVELS == 4 +#define sh2_linear_l3_table(v) ({ \ + ASSERT(current == (v)); \ + ((shadow_l3e_t *) \ + (hvm_guest(v) ? __linear_l2_table : __sh2_linear_l2_table) + \ + shadow_l2_linear_offset(SH_LINEAR_PT_VIRT_START)); \ +}) + +// we use l4_pgentry_t instead of shadow_l4e_t below because shadow_l4e_t is +// not defined for when xen_levels==4 & shadow_levels==3... +#define sh2_linear_l4_table(v) ({ \ + ASSERT(current == (v)); \ + ((l4_pgentry_t *) \ + (hvm_guest(v) ? __linear_l3_table : __sh2_linear_l3_table) + \ + shadow_l3_linear_offset(SH_LINEAR_PT_VIRT_START)); \ +}) +#endif + +#if GUEST_PAGING_LEVELS == 2 + +#include <asm/page-guest32.h> + +#define GUEST_L1_PAGETABLE_ENTRIES 1024 +#define GUEST_L2_PAGETABLE_ENTRIES 1024 +#define GUEST_L1_PAGETABLE_SHIFT 12 +#define GUEST_L2_PAGETABLE_SHIFT 22 + +/* Type of the guest's frame numbers */ +TYPE_SAFE(u32,gfn) +#define INVALID_GFN ((u32)(-1u)) +#define SH2_PRI_gfn "05x" + +/* Types of the guest's page tables */ +typedef l1_pgentry_32_t guest_l1e_t; +typedef l2_pgentry_32_t guest_l2e_t; + +/* Access functions for them */ +static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e) +{ return l1e_get_paddr_32(gl1e); } +static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e) +{ return l2e_get_paddr_32(gl2e); } + +static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e) +{ return _gfn(l1e_get_paddr_32(gl1e) >> PAGE_SHIFT); } +static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e) +{ return _gfn(l2e_get_paddr_32(gl2e) >> PAGE_SHIFT); } + +static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e) +{ return l1e_get_flags_32(gl1e); } +static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e) +{ return l2e_get_flags_32(gl2e); } + +static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags) +{ l1e_add_flags_32(gl1e, flags); return gl1e; } +static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags) +{ l2e_add_flags_32(gl2e, flags); return gl2e; } + +static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags) +{ return l1e_from_pfn_32(gfn_x(gfn), flags); } +static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags) +{ return l2e_from_pfn_32(gfn_x(gfn), flags); } + +#define guest_l1_table_offset(a) l1_table_offset_32(a) +#define guest_l2_table_offset(a) l2_table_offset_32(a) + +/* The shadow types needed for the various levels. */ +#define PGC_SH2_l1_shadow PGC_SH2_l1_32_shadow +#define PGC_SH2_l2_shadow PGC_SH2_l2_32_shadow +#define PGC_SH2_fl1_shadow PGC_SH2_fl1_32_shadow + +#else /* GUEST_PAGING_LEVELS != 2 */ + +#if GUEST_PAGING_LEVELS == 3 +#define GUEST_L1_PAGETABLE_ENTRIES 512 +#define GUEST_L2_PAGETABLE_ENTRIES 512 +#define GUEST_L3_PAGETABLE_ENTRIES 4 +#define GUEST_L1_PAGETABLE_SHIFT 12 +#define GUEST_L2_PAGETABLE_SHIFT 21 +#define GUEST_L3_PAGETABLE_SHIFT 30 +#else /* GUEST_PAGING_LEVELS == 4 */ +#define GUEST_L1_PAGETABLE_ENTRIES 512 +#define GUEST_L2_PAGETABLE_ENTRIES 512 +#define GUEST_L3_PAGETABLE_ENTRIES 512 +#define GUEST_L4_PAGETABLE_ENTRIES 512 +#define GUEST_L1_PAGETABLE_SHIFT 12 +#define GUEST_L2_PAGETABLE_SHIFT 21 +#define GUEST_L3_PAGETABLE_SHIFT 30 +#define GUEST_L4_PAGETABLE_SHIFT 39 +#endif + +/* Type of the guest's frame numbers */ +TYPE_SAFE(unsigned long,gfn) +#define INVALID_GFN ((unsigned long)(-1ul)) +#define SH2_PRI_gfn "05lx" + +/* Types of the guest's page tables */ +typedef l1_pgentry_t guest_l1e_t; +typedef l2_pgentry_t guest_l2e_t; +typedef l3_pgentry_t guest_l3e_t; +#if GUEST_PAGING_LEVELS >= 4 +typedef l4_pgentry_t guest_l4e_t; +#endif + +/* Access functions for them */ +static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e) +{ return l1e_get_paddr(gl1e); } +static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e) +{ return l2e_get_paddr(gl2e); } +static inline paddr_t guest_l3e_get_paddr(guest_l3e_t gl3e) +{ return l3e_get_paddr(gl3e); } +#if GUEST_PAGING_LEVELS >= 4 +static inline paddr_t guest_l4e_get_paddr(guest_l4e_t gl4e) +{ return l4e_get_paddr(gl4e); } +#endif + +static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e) +{ return _gfn(l1e_get_paddr(gl1e) >> PAGE_SHIFT); } +static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e) +{ return _gfn(l2e_get_paddr(gl2e) >> PAGE_SHIFT); } +static inline gfn_t guest_l3e_get_gfn(guest_l3e_t gl3e) +{ return _gfn(l3e_get_paddr(gl3e) >> PAGE_SHIFT); } +#if GUEST_PAGING_LEVELS >= 4 +static inline gfn_t guest_l4e_get_gfn(guest_l4e_t gl4e) +{ return _gfn(l4e_get_paddr(gl4e) >> PAGE_SHIFT); } +#endif + +static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e) +{ return l1e_get_flags(gl1e); } +static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e) +{ return l2e_get_flags(gl2e); } +static inline u32 guest_l3e_get_flags(guest_l3e_t gl3e) +{ return l3e_get_flags(gl3e); } +#if GUEST_PAGING_LEVELS >= 4 +static inline u32 guest_l4e_get_flags(guest_l4e_t gl4e) +{ return l4e_get_flags(gl4e); } +#endif + +static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags) +{ l1e_add_flags(gl1e, flags); return gl1e; } +static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags) +{ l2e_add_flags(gl2e, flags); return gl2e; } +static inline guest_l3e_t guest_l3e_add_flags(guest_l3e_t gl3e, u32 flags) +{ l3e_add_flags(gl3e, flags); return gl3e; } +#if GUEST_PAGING_LEVELS >= 4 +static inline guest_l4e_t guest_l4e_add_flags(guest_l4e_t gl4e, u32 flags) +{ l4e_add_flags(gl4e, flags); return gl4e; } +#endif + +static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags) +{ return l1e_from_pfn(gfn_x(gfn), flags); } +static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags) +{ return l2e_from_pfn(gfn_x(gfn), flags); } +static inline guest_l3e_t guest_l3e_from_gfn(gfn_t gfn, u32 flags) +{ return l3e_from_pfn(gfn_x(gfn), flags); } +#if GUEST_PAGING_LEVELS >= 4 +static inline guest_l4e_t guest_l4e_from_gfn(gfn_t gfn, u32 flags) +{ return l4e_from_pfn(gfn_x(gfn), flags); } +#endif + +#define guest_l1_table_offset(a) l1_table_offset(a) +#define guest_l2_table_offset(a) l2_table_offset(a) +#define guest_l3_table_offset(a) l3_table_offset(a) +#define guest_l4_table_offset(a) l4_table_offset(a) + +/* The shadow types needed for the various levels. */ +#if GUEST_PAGING_LEVELS == 3 +#define PGC_SH2_l1_shadow PGC_SH2_l1_pae_shadow +#define PGC_SH2_fl1_shadow PGC_SH2_fl1_pae_shadow +#define PGC_SH2_l2_shadow PGC_SH2_l2_pae_shadow +#define PGC_SH2_l2h_shadow PGC_SH2_l2h_pae_shadow +#define PGC_SH2_l3_shadow PGC_SH2_l3_pae_shadow +#else +#define PGC_SH2_l1_shadow PGC_SH2_l1_64_shadow +#define PGC_SH2_fl1_shadow PGC_SH2_fl1_64_shadow +#define PGC_SH2_l2_shadow PGC_SH2_l2_64_shadow +#define PGC_SH2_l3_shadow PGC_SH2_l3_64_shadow +#define PGC_SH2_l4_shadow PGC_SH2_l4_64_shadow +#endif + +#endif /* GUEST_PAGING_LEVELS != 2 */ + +#define VALID_GFN(m) (m != INVALID_GFN) + +static inline int +valid_gfn(gfn_t m) +{ + return VALID_GFN(gfn_x(m)); +} + +#if GUEST_PAGING_LEVELS == 2 +#define PGC_SH2_guest_root_type PGC_SH2_l2_32_shadow +#elif GUEST_PAGING_LEVELS == 3 +#define PGC_SH2_guest_root_type PGC_SH2_l3_pae_shadow +#else +#define PGC_SH2_guest_root_type PGC_SH2_l4_64_shadow +#endif + +/* Translation between mfns and gfns */ +static inline mfn_t +vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn) +{ + return sh2_vcpu_gfn_to_mfn(v, gfn_x(gfn)); +} + +static inline gfn_t +mfn_to_gfn(struct domain *d, mfn_t mfn) +{ + return _gfn(sh2_mfn_to_gfn(d, mfn)); +} + +static inline paddr_t +gfn_to_paddr(gfn_t gfn) +{ + return ((paddr_t)gfn_x(gfn)) << PAGE_SHIFT; +} + +/* Type used for recording a walk through guest pagetables. It is + * filled in by the pagetable walk function, and also used as a cache + * for later walks. + * Any non-null pointer in this structure represents a mapping of guest + * memory. We must always call walk_init() before using a walk_t, and + * call walk_unmap() when we're done. + * The "Effective l1e" field is used when there isn't an l1e to point to, + * but we have fabricated an l1e for propagation to the shadow (e.g., + * for splintering guest superpages into many shadow l1 entries). */ +typedef struct shadow2_walk_t walk_t; +struct shadow2_walk_t +{ + unsigned long va; /* Address we were looking for */ +#if GUEST_PAGING_LEVELS >= 3 +#if GUEST_PAGING_LEVELS >= 4 + guest_l4e_t *l4e; /* Pointer to guest's level 4 entry */ +#endif + guest_l3e_t *l3e; /* Pointer to guest's level 3 entry */ +#endif + guest_l2e_t *l2e; /* Pointer to guest's level 2 entry */ + guest_l1e_t *l1e; /* Pointer to guest's level 1 entry */ + guest_l1e_t eff_l1e; /* Effective level 1 entry */ +#if GUEST_PAGING_LEVELS >= 3 +#if GUEST_PAGING_LEVELS >= 4 + mfn_t l4mfn; /* MFN that the level 4 entry is in */ +#endif + mfn_t l3mfn; /* MFN that the level 3 entry is in */ +#endif + mfn_t l2mfn; /* MFN that the level 2 entry is in */ + mfn_t l1mfn; /* MFN that the level 1 entry is in */ +}; + + +/* X86 error code bits: + * These bits certainly ought to be defined somewhere other than here, + * but until that place is determined, here they sit. + * + * "PFEC" == "Page Fault Error Code" + */ +#define X86_PFEC_PRESENT 1 /* 0 == page was not present */ +#define X86_PFEC_WRITE_FAULT 2 /* 0 == reading, 1 == writing */ +#define X86_PFEC_SUPERVISOR_FAULT 4 /* 0 == supervisor-mode, 1 == user */ +#define X86_PFEC_RESERVED_BIT_FAULT 8 /* 1 == reserved bits set in pte */ +#define X86_PFEC_INSN_FETCH_FAULT 16 /* 0 == normal, 1 == instr'n fetch */ + +/* macros for dealing with the naming of the internal function names of the + * shadow code's external entry points. + */ +#define INTERNAL_NAME(name) \ + SHADOW2_INTERNAL_NAME(name, SHADOW_PAGING_LEVELS, GUEST_PAGING_LEVELS) + +/* macros for renaming the primary entry points, so that they are more + * easily distinguished from a debugger + */ +#define sh2_page_fault INTERNAL_NAME(sh2_page_fault) +#define sh2_invlpg INTERNAL_NAME(sh2_invlpg) +#define sh2_gva_to_gpa INTERNAL_NAME(sh2_gva_to_gpa) +#define sh2_gva_to_gfn INTERNAL_NAME(sh2_gva_to_gfn) +#define sh2_update_cr3 INTERNAL_NAME(sh2_update_cr3) +#define sh2_remove_write_access INTERNAL_NAME(sh2_remove_write_access) +#define sh2_remove_all_mappings INTERNAL_NAME(sh2_remove_all_mappings) +#define sh2_remove_l1_shadow INTERNAL_NAME(sh2_remove_l1_shadow) +#define sh2_remove_l2_shadow INTERNAL_NAME(sh2_remove_l2_shadow) +#define sh2_remove_l3_shadow INTERNAL_NAME(sh2_remove_l3_shadow) +#define sh2_map_and_validate_gl4e INTERNAL_NAME(sh2_map_and_validate_gl4e) +#define sh2_map_and_validate_gl3e INTERNAL_NAME(sh2_map_and_validate_gl3e) +#define sh2_map_and_validate_gl2e INTERNAL_NAME(sh2_map_and_validate_gl2e) +#define sh2_map_and_validate_gl2he INTERNAL_NAME(sh2_map_and_validate_gl2he) +#define sh2_map_and_validate_gl1e INTERNAL_NAME(sh2_map_and_validate_gl1e) +#define sh2_destroy_l4_shadow INTERNAL_NAME(sh2_destroy_l4_shadow) +#define sh2_destroy_l3_shadow INTERNAL_NAME(sh2_destroy_l3_shadow) +#define sh2_destroy_l3_subshadow INTERNAL_NAME(sh2_destroy_l3_subshadow) +#define sh2_unpin_all_l3_subshadows INTERNAL_NAME(sh2_unpin_all_l3_subshadows) +#define sh2_destroy_l2_shadow INTERNAL_NAME(sh2_destroy_l2_shadow) +#define sh2_destroy_l1_shadow INTERNAL_NAME(sh2_destroy_l1_shadow) +#define sh2_unhook_32b_mappings INTERNAL_NAME(sh2_unhook_32b_mappings) +#define sh2_unhook_pae_mappings INTERNAL_NAME(sh2_unhook_pae_mappings) +#define sh2_unhook_64b_mappings INTERNAL_NAME(sh2_unhook_64b_mappings) +#define shadow2_entry INTERNAL_NAME(shadow2_entry) +#define sh2_detach_old_tables INTERNAL_NAME(sh2_detach_old_tables) +#define sh2_x86_emulate_write INTERNAL_NAME(sh2_x86_emulate_write) +#define sh2_x86_emulate_cmpxchg INTERNAL_NAME(sh2_x86_emulate_cmpxchg) +#define sh2_x86_emulate_cmpxchg8b INTERNAL_NAME(sh2_x86_emulate_cmpxchg8b) +#define sh2_audit_l1_table INTERNAL_NAME(sh2_audit_l1_table) +#define sh2_audit_fl1_table INTERNAL_NAME(sh2_audit_fl1_table) +#define sh2_audit_l2_table INTERNAL_NAME(sh2_audit_l2_table) +#define sh2_audit_l3_table INTERNAL_NAME(sh2_audit_l3_table) +#define sh2_audit_l4_table INTERNAL_NAME(sh2_audit_l4_table) +#define sh2_guess_wrmap INTERNAL_NAME(sh2_guess_wrmap) +#define sh2_clear_shadow_entry INTERNAL_NAME(sh2_clear_shadow_entry) + +/* sh2_make_monitor_table only depends on the number of shadow levels */ +#define sh2_make_monitor_table \ + SHADOW2_INTERNAL_NAME(sh2_make_monitor_table, \ + SHADOW_PAGING_LEVELS, \ + SHADOW_PAGING_LEVELS) +#define sh2_destroy_monitor_table \ + SHADOW2_INTERNAL_NAME(sh2_destroy_monitor_table, \ + SHADOW_PAGING_LEVELS, \ + SHADOW_PAGING_LEVELS) + + +#if GUEST_PAGING_LEVELS == 3 +/* + * Accounting information stored in the shadow of PAE Guest L3 pages. + * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep + * various refcounts, etc., on the page_info of their page. We provide extra + * bookkeeping space in the shadow itself, and this is the structure + * definition for that bookkeeping information. + */ +struct pae_l3_bookkeeping { + u32 vcpus; /* bitmap of which vcpus are currently storing + * copies of this 32-byte page */ + u32 refcount; /* refcount for this 32-byte page */ + u8 pinned; /* is this 32-byte page pinned or not? */ +}; + +// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer. +#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *) \ + (((unsigned long)(_ptr) & ~31) + 32)) + +static void sh2_destroy_l3_subshadow(struct vcpu *v, + shadow_l3e_t *sl3e); + +/* Increment a subshadow ref + * Called with a pointer to the subshadow, and the mfn of the + * *first* page of the overall shadow. */ +static inline void sh2_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn) +{ + struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e); + + /* First ref to the subshadow takes a ref to the full shadow */ + if ( bk->refcount == 0 ) + sh2_get_ref(smfn, 0); + if ( unlikely(++(bk->refcount) == 0) ) + { + SHADOW2_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH2_PRI_mfn " sh=%p\n", + mfn_x(smfn), sl3e); + domain_crash_synchronous(); + } +} + +/* Decrement a subshadow ref. + * Called with a pointer to the subshadow, and the mfn of the + * *first* page of the overall shadow. Calling this may cause the + * entire shadow to disappear, so the caller must immediately unmap + * the pointer after calling. */ +static inline void sh2_put_ref_l3_subshadow(struct vcpu *v, + shadow_l3e_t *sl3e, + mfn_t smfn) +{ + struct pae_l3_bookkeeping *bk; + + bk = sl3p_to_info(sl3e); + + ASSERT(bk->refcount > 0); + if ( --(bk->refcount) == 0 ) + { + /* Need to destroy this subshadow */ + sh2_destroy_l3_subshadow(v, sl3e); + /* Last ref to the subshadow had a ref to the full shadow */ + sh2_put_ref(v, smfn, 0); + } +} + +/* Pin a subshadow + * Called with a pointer to the subshadow, and the mfn of the + * *first* page of the overall shadow. */ +static inline void sh2_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn) +{ + struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e); + +#if 0 + debugtrace_printk("%s smfn=%05lx offset=%ld\n", + __func__, mfn_x(smfn), + ((unsigned long)sl3e & ~PAGE_MASK) / 64); +#endif + + if ( !bk->pinned ) + { + bk->pinned = 1; + sh2_get_ref_l3_subshadow(sl3e, smfn); + } +} + +/* Unpin a sub-shadow. + * Called with a pointer to the subshadow, and the mfn of the + * *first* page of the overall shadow. Calling this may cause the + * entire shadow to disappear, so the caller must immediately unmap + * the pointer after calling. */ +static inline void sh2_unpin_l3_subshadow(struct vcpu *v, + shadow_l3e_t *sl3e, + mfn_t smfn) +{ + struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e); + +#if 0 + debugtrace_printk("%s smfn=%05lx offset=%ld\n", + __func__, mfn_x(smfn), + ((unsigned long)sl3e & ~PAGE_MASK) / 64); +#endif + + if ( bk->pinned ) + { + bk->pinned = 0; + sh2_put_ref_l3_subshadow(v, sl3e, smfn); + } +} + +#endif /* GUEST_PAGING_LEVELS == 3 */ + +#if SHADOW_PAGING_LEVELS == 3 +#define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20) +#endif + +#if SHADOW_PAGING_LEVELS == 2 +#define SH2_PRI_pte "08x" +#else /* SHADOW_PAGING_LEVELS >= 3 */ +#ifndef __x86_64__ +#define SH2_PRI_pte "016llx" +#else +#define SH2_PRI_pte "016lx" +#endif +#endif /* SHADOW_PAGING_LEVELS >= 3 */ + +#if GUEST_PAGING_LEVELS == 2 +#define SH2_PRI_gpte "08x" +#else /* GUEST_PAGING_LEVELS >= 3 */ +#ifndef __x86_64__ +#define SH2_PRI_gpte "016llx" +#else +#define SH2_PRI_gpte "016lx" +#endif +#endif /* GUEST_PAGING_LEVELS >= 3 */ + +static inline u32 +accumulate_guest_flags(walk_t *gw) +{ + u32 accumulated_flags; + + // We accumulate the permission flags with bitwise ANDing. + // This works for the PRESENT bit, RW bit, and USER bit. + // For the NX bit, however, the polarity is wrong, so we accumulate the + // inverse of the NX bit. + // + accumulated_flags = guest_l1e_get_flags(gw->eff_l1e) ^ _PAGE_NX_BIT; + accumulated_flags &= guest_l2e_get_flags(*gw->l2e) ^ _PAGE_NX_BIT; + + // Note that PAE guests do not have USER or RW or NX bits in their L3s. + // +#if GUEST_PAGING_LEVELS == 3 + accumulated_flags &= + ~_PAGE_PRESENT | (guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT); +#elif GUEST_PAGING_LEVELS >= 4 + accumulated_flags &= guest_l3e_get_flags(*gw->l3e) ^ _PAGE_NX_BIT; + accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT; +#endif + + // Finally, revert the NX bit back to its original polarity + accumulated_flags ^= _PAGE_NX_BIT; + + return accumulated_flags; +} + +#endif /* _XEN_SHADOW2_TYPES_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/asm-x86/shadow2.h b/xen/include/asm-x86/shadow2.h new file mode 100644 index 0000000000..94de7781f8 --- /dev/null +++ b/xen/include/asm-x86/shadow2.h @@ -0,0 +1,627 @@ +/****************************************************************************** + * include/asm-x86/shadow2.h + * + * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XEN_SHADOW2_H +#define _XEN_SHADOW2_H + +#include <public/dom0_ops.h> +#include <xen/sched.h> +#include <xen/perfc.h> +#include <asm/flushtlb.h> + +/* Shadow PT operation mode : shadow-mode variable in arch_domain. */ + +#define SHM2_shift 10 +/* We're in one of the shadow modes */ +#define SHM2_enable (DOM0_SHADOW2_CONTROL_FLAG_ENABLE << SHM2_shift) +/* Refcounts based on shadow tables instead of guest tables */ +#define SHM2_refcounts (DOM0_SHADOW2_CONTROL_FLAG_REFCOUNT << SHM2_shift) +/* Enable log dirty mode */ +#define SHM2_log_dirty (DOM0_SHADOW2_CONTROL_FLAG_LOG_DIRTY << SHM2_shift) +/* Xen does p2m translation, not guest */ +#define SHM2_translate (DOM0_SHADOW2_CONTROL_FLAG_TRANSLATE << SHM2_shift) +/* Xen does not steal address space from the domain for its own booking; + * requires VT or similar mechanisms */ +#define SHM2_external (DOM0_SHADOW2_CONTROL_FLAG_EXTERNAL << SHM2_shift) + +#define shadow2_mode_enabled(_d) ((_d)->arch.shadow2_mode) +#define shadow2_mode_refcounts(_d) ((_d)->arch.shadow2_mode & SHM2_refcounts) +#define shadow2_mode_log_dirty(_d) ((_d)->arch.shadow2_mode & SHM2_log_dirty) +#define shadow2_mode_translate(_d) ((_d)->arch.shadow2_mode & SHM2_translate) +#define shadow2_mode_external(_d) ((_d)->arch.shadow2_mode & SHM2_external) + +/* Xen traps & emulates all reads of all page table pages: + *not yet supported + */ +#define shadow2_mode_trap_reads(_d) ({ (void)(_d); 0; }) + +// flags used in the return value of the shadow_set_lXe() functions... +#define SHADOW2_SET_CHANGED 0x1 +#define SHADOW2_SET_FLUSH 0x2 +#define SHADOW2_SET_ERROR 0x4 +#define SHADOW2_SET_L3PAE_RECOPY 0x8 + +// How do we tell that we have a 32-bit PV guest in a 64-bit Xen? +#ifdef __x86_64__ +#define pv_32bit_guest(_v) 0 // not yet supported +#else +#define pv_32bit_guest(_v) !hvm_guest(v) +#endif + +/* The shadow2 lock. + * + * This lock is per-domain. It is intended to allow us to make atomic + * updates to the software TLB that the shadow tables provide. + * + * Specifically, it protects: + * - all changes to shadow page table pages + * - the shadow hash table + * - the shadow page allocator + * - all changes to guest page table pages; if/when the notion of + * out-of-sync pages is added to this code, then the shadow lock is + * protecting all guest page table pages which are not listed as + * currently as both guest-writable and out-of-sync... + * XXX -- need to think about this relative to writable page tables. + * - all changes to the page_info->tlbflush_timestamp + * - the page_info->count fields on shadow pages + * - the shadow dirty bit array and count + * - XXX + */ +#ifndef CONFIG_SMP +#error shadow2.h currently requires CONFIG_SMP +#endif + +#define shadow2_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.shadow2_lock); \ + (_d)->arch.shadow2_locker = -1; \ + (_d)->arch.shadow2_locker_function = "nobody"; \ + } while (0) + +#define shadow2_lock_is_acquired(_d) \ + (current->processor == (_d)->arch.shadow2_locker) + +#define shadow2_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.shadow2_locker == current->processor) ) \ + { \ + printk("Error: shadow2 lock held by %s\n", \ + (_d)->arch.shadow2_locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.shadow2_lock); \ + ASSERT((_d)->arch.shadow2_locker == -1); \ + (_d)->arch.shadow2_locker = current->processor; \ + (_d)->arch.shadow2_locker_function = __func__; \ + } while (0) + +#define shadow2_unlock(_d) \ + do { \ + ASSERT((_d)->arch.shadow2_locker == current->processor); \ + (_d)->arch.shadow2_locker = -1; \ + (_d)->arch.shadow2_locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.shadow2_lock); \ + } while (0) + +/* + * Levels of self-test and paranoia + * XXX should go in config files somewhere? + */ +#define SHADOW2_AUDIT_HASH 0x01 /* Check current hash bucket */ +#define SHADOW2_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */ +#define SHADOW2_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ +#define SHADOW2_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ +#define SHADOW2_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ +#define SHADOW2_AUDIT_P2M 0x20 /* Check the p2m table */ + +#ifdef NDEBUG +#define SHADOW2_AUDIT 0 +#define SHADOW2_AUDIT_ENABLE 0 +#else +#define SHADOW2_AUDIT 0x15 /* Basic audit of all except p2m. */ +#define SHADOW2_AUDIT_ENABLE shadow2_audit_enable +extern int shadow2_audit_enable; +#endif + +/* + * Levels of optimization + * XXX should go in config files somewhere? + */ +#define SH2OPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ +#define SH2OPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ + +#define SHADOW2_OPTIMIZATIONS 0x03 + + +/* With shadow pagetables, the different kinds of address start + * to get get confusing. + * + * Virtual addresses are what they usually are: the addresses that are used + * to accessing memory while the guest is running. The MMU translates from + * virtual addresses to machine addresses. + * + * (Pseudo-)physical addresses are the abstraction of physical memory the + * guest uses for allocation and so forth. For the purposes of this code, + * we can largely ignore them. + * + * Guest frame numbers (gfns) are the entries that the guest puts in its + * pagetables. For normal paravirtual guests, they are actual frame numbers, + * with the translation done by the guest. + * + * Machine frame numbers (mfns) are the entries that the hypervisor puts + * in the shadow page tables. + * + * Elsewhere in the xen code base, the name "gmfn" is generally used to refer + * to a "machine frame number, from the guest's perspective", or in other + * words, pseudo-physical frame numbers. However, in the shadow code, the + * term "gmfn" means "the mfn of a guest page"; this combines naturally with + * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a + * guest L2 page), etc... + */ + +/* With this defined, we do some ugly things to force the compiler to + * give us type safety between mfns and gfns and other integers. + * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions + * that translate beween int and foo_t. + * + * It does have some performance cost because the types now have + * a different storage attribute, so may not want it on all the time. */ +#ifndef NDEBUG +#define TYPE_SAFETY 1 +#endif + +#ifdef TYPE_SAFETY +#define TYPE_SAFE(_type,_name) \ +typedef struct { _type _name; } _name##_t; \ +static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \ +static inline _type _name##_x(_name##_t n) { return n._name; } +#else +#define TYPE_SAFE(_type,_name) \ +typedef _type _name##_t; \ +static inline _name##_t _##_name(_type n) { return n; } \ +static inline _type _name##_x(_name##_t n) { return n; } +#endif + +TYPE_SAFE(unsigned long,mfn) +#define SH2_PRI_mfn "05lx" + +static inline int +valid_mfn(mfn_t m) +{ + return VALID_MFN(mfn_x(m)); +} + +static inline mfn_t +pagetable_get_mfn(pagetable_t pt) +{ + return _mfn(pagetable_get_pfn(pt)); +} + +static inline pagetable_t +pagetable_from_mfn(mfn_t mfn) +{ + return pagetable_from_pfn(mfn_x(mfn)); +} + +static inline int +shadow2_vcpu_mode_translate(struct vcpu *v) +{ + // Returns true if this VCPU needs to be using the P2M table to translate + // between GFNs and MFNs. + // + // This is true of translated HVM domains on a vcpu which has paging + // enabled. (HVM vcpu's with paging disabled are using the p2m table as + // its paging table, so no translation occurs in this case.) + // + return v->vcpu_flags & VCPUF_shadow2_translate; +} + + +/**************************************************************************/ +/* Mode-specific entry points into the shadow code */ + +struct x86_emulate_ctxt; +struct shadow2_entry_points { + int (*page_fault )(struct vcpu *v, unsigned long va, + struct cpu_user_regs *regs); + int (*invlpg )(struct vcpu *v, unsigned long va); + unsigned long (*gva_to_gpa )(struct vcpu *v, unsigned long va); + unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); + void (*update_cr3 )(struct vcpu *v); + int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn, + void *new_guest_entry, u32 size); + int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn, + void *new_guest_entry, u32 size); + int (*map_and_validate_gl2he)(struct vcpu *v, mfn_t gmfn, + void *new_guest_entry, u32 size); + int (*map_and_validate_gl3e )(struct vcpu *v, mfn_t gmfn, + void *new_guest_entry, u32 size); + int (*map_and_validate_gl4e )(struct vcpu *v, mfn_t gmfn, + void *new_guest_entry, u32 size); + void (*detach_old_tables )(struct vcpu *v); + int (*x86_emulate_write )(struct vcpu *v, unsigned long va, + void *src, u32 bytes, + struct x86_emulate_ctxt *ctxt); + int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va, + unsigned long old, + unsigned long new, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt); + int (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va, + unsigned long old_lo, + unsigned long old_hi, + unsigned long new_lo, + unsigned long new_hi, + struct x86_emulate_ctxt *ctxt); + mfn_t (*make_monitor_table )(struct vcpu *v); + void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn); +#if SHADOW2_OPTIMIZATIONS & SH2OPT_WRITABLE_HEURISTIC + int (*guess_wrmap )(struct vcpu *v, + unsigned long vaddr, mfn_t gmfn); +#endif + /* For outsiders to tell what mode we're in */ + unsigned int shadow_levels; + unsigned int guest_levels; +}; + +static inline int shadow2_guest_paging_levels(struct vcpu *v) +{ + ASSERT(v->arch.shadow2 != NULL); + return v->arch.shadow2->guest_levels; +} + +/**************************************************************************/ +/* Entry points into the shadow code */ + +/* Turning on shadow2 test mode */ +int shadow2_test_enable(struct domain *d); + +/* Handler for shadow control ops: enabling and disabling shadow modes, + * and log-dirty bitmap ops all happen through here. */ +int shadow2_control_op(struct domain *d, + dom0_shadow_control_t *sc, + XEN_GUEST_HANDLE(dom0_op_t) u_dom0_op); + +/* Call when destroying a domain */ +void shadow2_teardown(struct domain *d); + +/* Call once all of the references to the domain have gone away */ +void shadow2_final_teardown(struct domain *d); + + +/* Mark a page as dirty in the bitmap */ +void sh2_do_mark_dirty(struct domain *d, mfn_t gmfn); +static inline void mark_dirty(struct domain *d, unsigned long gmfn) +{ + if ( shadow2_mode_log_dirty(d) ) + { + shadow2_lock(d); + sh2_do_mark_dirty(d, _mfn(gmfn)); + shadow2_unlock(d); + } +} + +/* Internal version, for when the shadow lock is already held */ +static inline void sh2_mark_dirty(struct domain *d, mfn_t gmfn) +{ + ASSERT(shadow2_lock_is_acquired(d)); + if ( shadow2_mode_log_dirty(d) ) + sh2_do_mark_dirty(d, gmfn); +} + +static inline int +shadow2_fault(unsigned long va, struct cpu_user_regs *regs) +/* Called from pagefault handler in Xen, and from the HVM trap handlers + * for pagefaults. Returns 1 if this fault was an artefact of the + * shadow code (and the guest should retry) or 0 if it is not (and the + * fault should be handled elsewhere or passed to the guest). */ +{ + struct vcpu *v = current; + perfc_incrc(shadow2_fault); + return v->arch.shadow2->page_fault(v, va, regs); +} + +static inline int +shadow2_invlpg(struct vcpu *v, unsigned long va) +/* Called when the guest requests an invlpg. Returns 1 if the invlpg + * instruction should be issued on the hardware, or 0 if it's safe not + * to do so. */ +{ + return v->arch.shadow2->invlpg(v, va); +} + +static inline unsigned long +shadow2_gva_to_gpa(struct vcpu *v, unsigned long va) +/* Called to translate a guest virtual address to what the *guest* + * pagetables would map it to. */ +{ + return v->arch.shadow2->gva_to_gpa(v, va); +} + +static inline unsigned long +shadow2_gva_to_gfn(struct vcpu *v, unsigned long va) +/* Called to translate a guest virtual address to what the *guest* + * pagetables would map it to. */ +{ + return v->arch.shadow2->gva_to_gfn(v, va); +} + +static inline void +shadow2_update_cr3(struct vcpu *v) +/* Updates all the things that are derived from the guest's CR3. + * Called when the guest changes CR3. */ +{ + shadow2_lock(v->domain); + v->arch.shadow2->update_cr3(v); + shadow2_unlock(v->domain); +} + + +/* Should be called after CR3 is updated. + * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. + * + * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, + * shadow_vtable, etc). + * + * Uses values found in vcpu->arch.(guest_table and guest_table_user), and + * for HVM guests, arch.monitor_table and hvm's guest CR3. + * + * Update ref counts to shadow tables appropriately. + * For PAE, relocate L3 entries, if necessary, into low memory. + */ +static inline void update_cr3(struct vcpu *v) +{ + unsigned long cr3_mfn=0; + + if ( shadow2_mode_enabled(v->domain) ) + { + shadow2_update_cr3(v); + return; + } + +#if CONFIG_PAGING_LEVELS == 4 + if ( !(v->arch.flags & TF_kernel_mode) ) + cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); + else +#endif + cr3_mfn = pagetable_get_pfn(v->arch.guest_table); + + /* Update vcpu->arch.cr3 */ + BUG_ON(cr3_mfn == 0); + make_cr3(v, cr3_mfn); +} + +extern void sh2_update_paging_modes(struct vcpu *v); + +/* Should be called to initialise paging structures if the paging mode + * has changed, and when bringing up a VCPU for the first time. */ +static inline void shadow2_update_paging_modes(struct vcpu *v) +{ + ASSERT(shadow2_mode_enabled(v->domain)); + shadow2_lock(v->domain); + sh2_update_paging_modes(v); + shadow2_unlock(v->domain); +} + +static inline void +shadow2_detach_old_tables(struct vcpu *v) +{ + v->arch.shadow2->detach_old_tables(v); +} + +static inline mfn_t +shadow2_make_monitor_table(struct vcpu *v) +{ + return v->arch.shadow2->make_monitor_table(v); +} + +static inline void +shadow2_destroy_monitor_table(struct vcpu *v, mfn_t mmfn) +{ + v->arch.shadow2->destroy_monitor_table(v, mmfn); +} + +/* Validate a pagetable change from the guest and update the shadows. */ +extern int shadow2_validate_guest_entry(struct vcpu *v, mfn_t gmfn, + void *new_guest_entry); + +/* Update the shadows in response to a pagetable write from a HVM guest */ +extern void shadow2_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size); + +/* Remove all writeable mappings of a guest frame from the shadows. + * Returns non-zero if we need to flush TLBs. + * level and fault_addr desribe how we found this to be a pagetable; + * level==0 means we have some other reason for revoking write access. */ +extern int shadow2_remove_write_access(struct vcpu *v, mfn_t readonly_mfn, + unsigned int level, + unsigned long fault_addr); + +/* Remove all mappings of the guest mfn from the shadows. + * Returns non-zero if we need to flush TLBs. */ +extern int shadow2_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); + +void +shadow2_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn); +/* This is a HVM page that we thing is no longer a pagetable. + * Unshadow it, and recursively unshadow pages that reference it. */ + +/* Remove all shadows of the guest mfn. */ +extern void sh2_remove_shadows(struct vcpu *v, mfn_t gmfn, int all); +static inline void shadow2_remove_all_shadows(struct vcpu *v, mfn_t gmfn) +{ + sh2_remove_shadows(v, gmfn, 1); +} + +/* Add a page to a domain */ +void +shadow2_guest_physmap_add_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* Remove a page from a domain */ +void +shadow2_guest_physmap_remove_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* + * Definitions for the shadow2_flags field in page_info. + * These flags are stored on *guest* pages... + * Bits 1-13 are encodings for the shadow types. + */ +#define PGC_SH2_type_to_index(_type) ((_type) >> PGC_SH2_type_shift) +#define SH2F_page_type_mask \ + (((1u << PGC_SH2_type_to_index(PGC_SH2_max_shadow + 1u)) - 1u) - \ + ((1u << PGC_SH2_type_to_index(PGC_SH2_min_shadow)) - 1u)) + +#define SH2F_L1_32 (1u << PGC_SH2_type_to_index(PGC_SH2_l1_32_shadow)) +#define SH2F_FL1_32 (1u << PGC_SH2_type_to_index(PGC_SH2_fl1_32_shadow)) +#define SH2F_L2_32 (1u << PGC_SH2_type_to_index(PGC_SH2_l2_32_shadow)) +#define SH2F_L1_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_l1_pae_shadow)) +#define SH2F_FL1_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_fl1_pae_shadow)) +#define SH2F_L2_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_l2_pae_shadow)) +#define SH2F_L2H_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_l2h_pae_shadow)) +#define SH2F_L3_PAE (1u << PGC_SH2_type_to_index(PGC_SH2_l3_pae_shadow)) +#define SH2F_L1_64 (1u << PGC_SH2_type_to_index(PGC_SH2_l1_64_shadow)) +#define SH2F_FL1_64 (1u << PGC_SH2_type_to_index(PGC_SH2_fl1_64_shadow)) +#define SH2F_L2_64 (1u << PGC_SH2_type_to_index(PGC_SH2_l2_64_shadow)) +#define SH2F_L3_64 (1u << PGC_SH2_type_to_index(PGC_SH2_l3_64_shadow)) +#define SH2F_L4_64 (1u << PGC_SH2_type_to_index(PGC_SH2_l4_64_shadow)) + +/* Used for hysteresis when automatically unhooking mappings on fork/exit */ +#define SH2F_unhooked_mappings (1u<<31) + +/* + * Allocation of shadow pages + */ + +/* Return the minumum acceptable number of shadow pages a domain needs */ +unsigned int shadow2_min_acceptable_pages(struct domain *d); + +/* Set the pool of shadow pages to the required number of MB. + * Input will be rounded up to at least min_acceptable_shadow_pages(). + * Returns 0 for success, 1 for failure. */ +unsigned int shadow2_set_allocation(struct domain *d, + unsigned int megabytes, + int *preempted); + +/* Return the size of the shadow2 pool, rounded up to the nearest MB */ +static inline unsigned int shadow2_get_allocation(struct domain *d) +{ + unsigned int pg = d->arch.shadow2_total_pages; + return ((pg >> (20 - PAGE_SHIFT)) + + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); +} + +/* + * Linked list for chaining entries in the shadow hash table. + */ +struct shadow2_hash_entry { + struct shadow2_hash_entry *next; + mfn_t smfn; /* MFN of the shadow */ +#ifdef _x86_64_ /* Shorten 'n' so we don't waste a whole word on storing 't' */ + unsigned long n:56; /* MFN of guest PT or GFN of guest superpage */ +#else + unsigned long n; /* MFN of guest PT or GFN of guest superpage */ +#endif + unsigned char t; /* shadow type bits, or 0 for empty */ +}; + +#define SHADOW2_HASH_BUCKETS 251 +/* Other possibly useful primes are 509, 1021, 2039, 4093, 8191, 16381 */ + + +#if SHADOW2_OPTIMIZATIONS & SH2OPT_CACHE_WALKS +/* Optimization: cache the results of guest walks. This helps with MMIO + * and emulated writes, which tend to issue very similar walk requests + * repeatedly. We keep the results of the last few walks, and blow + * away the cache on guest cr3 write, mode change, or page fault. */ + +#define SH2_WALK_CACHE_ENTRIES 4 + +/* Rather than cache a guest walk, which would include mapped pointers + * to pages, we cache what a TLB would remember about the walk: the + * permissions and the l1 gfn */ +struct shadow2_walk_cache { + unsigned long va; /* The virtual address (or 0 == unused) */ + unsigned long gfn; /* The gfn from the effective l1e */ + u32 permissions; /* The aggregated permission bits */ +}; +#endif + + +/**************************************************************************/ +/* Guest physmap (p2m) support */ + +/* Walk another domain's P2M table, mapping pages as we go */ +extern mfn_t +sh2_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); + + +/* General conversion function from gfn to mfn */ +static inline mfn_t +sh2_gfn_to_mfn(struct domain *d, unsigned long gfn) +{ + if ( !shadow2_mode_translate(d) ) + return _mfn(gfn); + else if ( likely(current->domain == d) ) + return _mfn(get_mfn_from_gpfn(gfn)); + else + return sh2_gfn_to_mfn_foreign(d, gfn); +} + +// vcpu-specific version of gfn_to_mfn(). This is where we hide the dirty +// little secret that, for hvm guests with paging disabled, nearly all of the +// shadow code actually think that the guest is running on *untranslated* page +// tables (which is actually domain->phys_table). +// +static inline mfn_t +sh2_vcpu_gfn_to_mfn(struct vcpu *v, unsigned long gfn) +{ + if ( !shadow2_vcpu_mode_translate(v) ) + return _mfn(gfn); + if ( likely(current->domain == v->domain) ) + return _mfn(get_mfn_from_gpfn(gfn)); + return sh2_gfn_to_mfn_foreign(v->domain, gfn); +} + +static inline unsigned long +sh2_mfn_to_gfn(struct domain *d, mfn_t mfn) +{ + if ( shadow2_mode_translate(d) ) + return get_gpfn_from_mfn(mfn_x(mfn)); + else + return mfn_x(mfn); +} + + + +#endif /* _XEN_SHADOW2_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ + diff --git a/xen/include/asm-x86/shadow_64.h b/xen/include/asm-x86/shadow_64.h deleted file mode 100644 index d9afbdca18..0000000000 --- a/xen/include/asm-x86/shadow_64.h +++ /dev/null @@ -1,587 +0,0 @@ -/****************************************************************************** - * include/asm-x86/shadow_64.h - * - * Copyright (c) 2005 Michael A Fetterman - * Based on an earlier implementation by Ian Pratt et al - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* - * Jun Nakajima <jun.nakajima@intel.com> - * Chengyuan Li <chengyuan.li@intel.com> - * - * Extended to support 64-bit guests. - */ -#ifndef _XEN_SHADOW_64_H -#define _XEN_SHADOW_64_H -#include <asm/shadow.h> -#include <asm/shadow_ops.h> -#include <asm/hvm/hvm.h> - -/* - * The naming convention of the shadow_ops: - * MODE_<pgentry size>_<guest paging levels>_HANDLER - */ -extern struct shadow_ops MODE_64_2_HANDLER; -extern struct shadow_ops MODE_64_3_HANDLER; -extern struct shadow_ops MODE_64_PAE_HANDLER; -#if CONFIG_PAGING_LEVELS == 4 -extern struct shadow_ops MODE_64_4_HANDLER; -#endif - -#if CONFIG_PAGING_LEVELS == 3 -#define L4_PAGETABLE_SHIFT 39 -#define L4_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) -typedef struct { intpte_t l4; } l4_pgentry_t; -#define is_guest_l4_slot(_s) (1) -#endif - -#define READ_FAULT 0 -#define WRITE_FAULT 1 - -#define ERROR_P 1 -#define ERROR_W 2 -#define ERROR_U 4 -#define ERROR_I (1 << 4) - -#define X86_64_SHADOW_DEBUG 0 - -#if X86_64_SHADOW_DEBUG -#define ESH_LOG(_f, _a...) \ - printk(_f, ##_a) -#else -#define ESH_LOG(_f, _a...) ((void)0) -#endif - -#define L_MASK 0xff - -#define PAE_PAGING_LEVELS 3 - -#define ROOT_LEVEL_64 PAGING_L4 -#define ROOT_LEVEL_32 PAGING_L2 - -#define DIRECT_ENTRY (4UL << 16) -#define SHADOW_ENTRY (2UL << 16) -#define GUEST_ENTRY (1UL << 16) - -#define GET_ENTRY (2UL << 8) -#define SET_ENTRY (1UL << 8) - -#define PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) - -/* For 32-bit VMX guest to allocate shadow L1 & L2*/ -#define SL1_ORDER 1 -#define SL2_ORDER 2 - -typedef struct { intpte_t lo; } pgentry_64_t; -#define shadow_level_to_type(l) (l << 29) -#define shadow_type_to_level(t) (t >> 29) - -#define entry_get_value(_x) ((_x).lo) -#define entry_get_pfn(_x) \ - (((_x).lo & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT) -#define entry_get_paddr(_x) (((_x).lo & (PADDR_MASK&PAGE_MASK))) -#define entry_get_flags(_x) (get_pte_flags((_x).lo)) - -#define entry_empty() ((pgentry_64_t) { 0 }) -#define entry_from_pfn(pfn, flags) \ - ((pgentry_64_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) }) -#define entry_from_page(page, flags) (entry_from_pfn(page_to_mfn(page),(flags))) -#define entry_add_flags(x, flags) ((x).lo |= put_pte_flags(flags)) -#define entry_remove_flags(x, flags) ((x).lo &= ~put_pte_flags(flags)) -#define entry_has_changed(x,y,flags) \ - ( !!(((x).lo ^ (y).lo) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) - -/******************************************************************************/ -/* - * The macro and inlines are for 32-bit PAE guest - */ -#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */ - -#define PAE_SHADOW_SELF_ENTRY 259 -#define PAE_L3_PAGETABLE_ENTRIES 4 - -/******************************************************************************/ -static inline int table_offset_64(unsigned long va, int level) -{ - switch(level) { - case 1: - return (((va) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)); - case 2: - return (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)); - case 3: - return (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)); -#if CONFIG_PAGING_LEVELS == 3 - case 4: - return PAE_SHADOW_SELF_ENTRY; -#endif - -#if CONFIG_PAGING_LEVELS >= 4 -#ifndef GUEST_PGENTRY_32 -#ifndef GUEST_32PAE - case 4: - return (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)); -#else - case 4: - return PAE_SHADOW_SELF_ENTRY; -#endif -#else - case 4: - return PAE_SHADOW_SELF_ENTRY; -#endif -#endif - default: - return -1; - } -} - -/*****************************************************************************/ - -#if defined( GUEST_32PAE ) -static inline int guest_table_offset_64(unsigned long va, int level, unsigned int index) -{ - switch(level) { - case 1: - return (((va) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)); - case 2: - return (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)); - case 3: - return (index * 4 + ((va) >> L3_PAGETABLE_SHIFT)); -#if CONFIG_PAGING_LEVELS == 3 - case 4: - return PAE_SHADOW_SELF_ENTRY; -#endif - -#if CONFIG_PAGING_LEVELS >= 4 -#ifndef GUEST_PGENTRY_32 - case 4: - return (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)); -#else - case 4: - return PAE_SHADOW_SELF_ENTRY; -#endif -#endif - default: - return -1; - } -} - -#define SH_GUEST_32PAE 1 -#else -#define guest_table_offset_64(va, level, index) \ - table_offset_64((va),(level)) -#define SH_GUEST_32PAE 0 -#endif - -/********************************************************************************/ - -static inline void free_out_of_sync_state(struct domain *d) -{ - struct out_of_sync_entry *entry; - - // NB: Be careful not to call something that manipulates this list - // while walking it. Remove one item at a time, and always - // restart from start of list. - // - while ( (entry = d->arch.out_of_sync) ) - { - d->arch.out_of_sync = entry->next; - release_out_of_sync_entry(d, entry); - - entry->next = d->arch.out_of_sync_free; - d->arch.out_of_sync_free = entry; - } -} - -static inline int __entry( - struct vcpu *v, unsigned long va, pgentry_64_t *e_p, u32 flag) -{ - int i; - pgentry_64_t *le_e; - pgentry_64_t *le_p = NULL; - pgentry_64_t *phys_vtable = NULL; - unsigned long mfn; - int index; - u32 level = flag & L_MASK; - struct domain *d = v->domain; - int root_level; - unsigned int base_idx; - - base_idx = get_cr3_idxval(v); - - if ( flag & SHADOW_ENTRY ) - { - root_level = ROOT_LEVEL_64; - index = table_offset_64(va, root_level); - le_e = (pgentry_64_t *)&v->arch.shadow_vtable[index]; - } - else if ( flag & GUEST_ENTRY ) - { - root_level = v->domain->arch.ops->guest_paging_levels; - if ( root_level == PAGING_L3 ) - index = guest_table_offset_64(va, PAGING_L3, base_idx); - else - index = guest_table_offset_64(va, root_level, base_idx); - le_e = (pgentry_64_t *)&v->arch.guest_vtable[index]; - } - else /* direct mode */ - { - root_level = PAE_PAGING_LEVELS; - index = table_offset_64(va, root_level); - phys_vtable = (pgentry_64_t *)map_domain_page( - pagetable_get_pfn(v->domain->arch.phys_table)); - le_e = &phys_vtable[index]; - } - - /* - * If it's not external mode, then mfn should be machine physical. - */ - for ( i = root_level - level; i > 0; i-- ) - { - if ( unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT)) ) - { - if ( le_p ) - unmap_domain_page(le_p); - - if ( phys_vtable ) - unmap_domain_page(phys_vtable); - - return 0; - } - - mfn = entry_get_pfn(*le_e); - if ( (flag & GUEST_ENTRY) && shadow_mode_translate(d) ) - mfn = get_mfn_from_gpfn(mfn); - - if ( le_p ) - unmap_domain_page(le_p); - le_p = (pgentry_64_t *)map_domain_page(mfn); - - if ( flag & SHADOW_ENTRY ) - index = table_offset_64(va, (level + i - 1)); - else - index = guest_table_offset_64(va, (level + i - 1), base_idx); - le_e = &le_p[index]; - } - - if ( flag & SET_ENTRY ) - *le_e = *e_p; - else - *e_p = *le_e; - - if ( le_p ) - unmap_domain_page(le_p); - - if ( phys_vtable ) - unmap_domain_page(phys_vtable); - - return 1; -} - -static inline int __rw_entry( - struct vcpu *v, unsigned long va, void *e_p, u32 flag) -{ - pgentry_64_t *e = (pgentry_64_t *)e_p; - - if (e) { - return __entry(v, va, e, flag); - } - - return 0; -} - -#define __shadow_set_l4e(v, va, value) \ - __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L4) -#define __shadow_get_l4e(v, va, sl4e) \ - __rw_entry(v, va, sl4e, SHADOW_ENTRY | GET_ENTRY | PAGING_L4) -#define __shadow_set_l3e(v, va, value) \ - __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L3) -#define __shadow_get_l3e(v, va, sl3e) \ - __rw_entry(v, va, sl3e, SHADOW_ENTRY | GET_ENTRY | PAGING_L3) -#define __shadow_set_l2e(v, va, value) \ - __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L2) -#define __shadow_get_l2e(v, va, sl2e) \ - __rw_entry(v, va, sl2e, SHADOW_ENTRY | GET_ENTRY | PAGING_L2) -#define __shadow_set_l1e(v, va, value) \ - __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L1) -#define __shadow_get_l1e(v, va, sl1e) \ - __rw_entry(v, va, sl1e, SHADOW_ENTRY | GET_ENTRY | PAGING_L1) - -#define __guest_set_l4e(v, va, value) \ - __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L4) -#define __guest_get_l4e(v, va, gl4e) \ - __rw_entry(v, va, gl4e, GUEST_ENTRY | GET_ENTRY | PAGING_L4) -#define __guest_set_l3e(v, va, value) \ - __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L3) -#define __guest_get_l3e(v, va, sl3e) \ - __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3) - -#define __direct_set_l3e(v, va, value) \ - __rw_entry(v, va, value, DIRECT_ENTRY | SET_ENTRY | PAGING_L3) -#define __direct_get_l3e(v, va, sl3e) \ - __rw_entry(v, va, sl3e, DIRECT_ENTRY | GET_ENTRY | PAGING_L3) -#define __direct_set_l2e(v, va, value) \ - __rw_entry(v, va, value, DIRECT_ENTRY | SET_ENTRY | PAGING_L2) -#define __direct_get_l2e(v, va, sl2e) \ - __rw_entry(v, va, sl2e, DIRECT_ENTRY | GET_ENTRY | PAGING_L2) -#define __direct_set_l1e(v, va, value) \ - __rw_entry(v, va, value, DIRECT_ENTRY | SET_ENTRY | PAGING_L1) -#define __direct_get_l1e(v, va, sl1e) \ - __rw_entry(v, va, sl1e, DIRECT_ENTRY | GET_ENTRY | PAGING_L1) - - -static inline int __guest_set_l2e( - struct vcpu *v, unsigned long va, void *value, int size) -{ - switch(size) { - case 4: - // 32-bit guest - { - l2_pgentry_32_t *l2va; - - l2va = (l2_pgentry_32_t *)v->arch.guest_vtable; - if (value) - l2va[l2_table_offset_32(va)] = *(l2_pgentry_32_t *)value; - return 1; - } - case 8: - return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L2); - default: - BUG(); - return 0; - } - return 0; -} - -#define __guest_set_l2e(v, va, value) \ - __guest_set_l2e(v, (unsigned long)va, value, sizeof(*value)) - -static inline int __guest_get_l2e( - struct vcpu *v, unsigned long va, void *gl2e, int size) -{ - switch(size) { - case 4: - // 32-bit guest - { - l2_pgentry_32_t *l2va; - l2va = (l2_pgentry_32_t *)v->arch.guest_vtable; - if (gl2e) - *(l2_pgentry_32_t *)gl2e = l2va[l2_table_offset_32(va)]; - return 1; - } - case 8: - return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | PAGING_L2); - default: - BUG(); - return 0; - } - return 0; -} - -#define __guest_get_l2e(v, va, gl2e) \ - __guest_get_l2e(v, (unsigned long)va, gl2e, sizeof(*gl2e)) - -static inline int __guest_set_l1e( - struct vcpu *v, unsigned long va, void *value, int size) -{ - switch(size) { - case 4: - // 32-bit guest - { - l2_pgentry_32_t gl2e; - l1_pgentry_32_t *l1va; - unsigned long l1mfn; - - if (!__guest_get_l2e(v, va, &gl2e)) - return 0; - if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT))) - return 0; - - l1mfn = get_mfn_from_gpfn( - l2e_get_pfn(gl2e)); - - l1va = (l1_pgentry_32_t *)map_domain_page(l1mfn); - if (value) - l1va[l1_table_offset_32(va)] = *(l1_pgentry_32_t *)value; - unmap_domain_page(l1va); - - return 1; - } - - case 8: - return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L1); - default: - BUG(); - return 0; - } - return 0; -} - -#define __guest_set_l1e(v, va, value) \ - __guest_set_l1e(v, (unsigned long)va, value, sizeof(*value)) - -static inline int __guest_get_l1e( - struct vcpu *v, unsigned long va, void *gl1e, int size) -{ - switch(size) { - case 4: - // 32-bit guest - { - l2_pgentry_32_t gl2e; - l1_pgentry_32_t *l1va; - unsigned long l1mfn; - - if (!(__guest_get_l2e(v, va, &gl2e))) - return 0; - - - if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT))) - return 0; - - - l1mfn = get_mfn_from_gpfn( - l2e_get_pfn(gl2e)); - l1va = (l1_pgentry_32_t *) map_domain_page(l1mfn); - if (gl1e) - *(l1_pgentry_32_t *)gl1e = l1va[l1_table_offset_32(va)]; - unmap_domain_page(l1va); - return 1; - } - case 8: - // 64-bit guest - return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | PAGING_L1); - default: - BUG(); - return 0; - } - return 0; -} - -#define __guest_get_l1e(v, va, gl1e) \ - __guest_get_l1e(v, (unsigned long)va, gl1e, sizeof(*gl1e)) - -static inline void entry_general( - struct domain *d, - pgentry_64_t *gle_p, - pgentry_64_t *sle_p, - unsigned long smfn, u32 level) - -{ - pgentry_64_t gle = *gle_p; - pgentry_64_t sle; - - sle = entry_empty(); - if ( (entry_get_flags(gle) & _PAGE_PRESENT) && (smfn != 0) ) - { - if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) { - sle = entry_from_pfn(smfn, entry_get_flags(gle)); - entry_remove_flags(sle, _PAGE_PSE); - - if ( shadow_mode_log_dirty(d) || - !(entry_get_flags(gle) & _PAGE_DIRTY) ) - { - pgentry_64_t *l1_p; - int i; - - l1_p =(pgentry_64_t *)map_domain_page(smfn); - for (i = 0; i < L1_PAGETABLE_ENTRIES; i++) - { - if ( mfn_is_page_table(entry_get_pfn(l1_p[i])) ) - entry_remove_flags(l1_p[i], _PAGE_RW); - } - - unmap_domain_page(l1_p); - } - } else { - if (d->arch.ops->guest_paging_levels <= PAGING_L3 - && level == PAGING_L3) { - sle = entry_from_pfn(smfn, entry_get_flags(gle)); - } else { - - sle = entry_from_pfn( - smfn, - (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL); - entry_add_flags(gle, _PAGE_ACCESSED); - } - } - // XXX mafetter: Hmm... - // Shouldn't the dirty log be checked/updated here? - // Actually, it needs to be done in this function's callers. - // - *gle_p = gle; - } - - if ( entry_get_value(sle) || entry_get_value(gle) ) - SH_VVLOG("%s: gpde=%lx, new spde=%lx", __func__, - entry_get_value(gle), entry_get_value(sle)); - - *sle_p = sle; -} - -static inline void entry_propagate_from_guest( - struct domain *d, pgentry_64_t *gle_p, pgentry_64_t *sle_p, u32 level) -{ - pgentry_64_t gle = *gle_p; - unsigned long smfn = 0; - - if ( entry_get_flags(gle) & _PAGE_PRESENT ) { - if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) { - smfn = __shadow_status(d, entry_get_pfn(gle), PGT_fl1_shadow); - } else { - smfn = __shadow_status(d, entry_get_pfn(gle), - shadow_level_to_type((level -1 ))); - } - } - entry_general(d, gle_p, sle_p, smfn, level); - -} - -static int inline -validate_entry_change( - struct domain *d, - pgentry_64_t *new_gle_p, - pgentry_64_t *shadow_le_p, - u32 level) -{ - pgentry_64_t old_sle, new_sle; - pgentry_64_t new_gle = *new_gle_p; - - old_sle = *shadow_le_p; - entry_propagate_from_guest(d, &new_gle, &new_sle, level); - - ESH_LOG("old_sle: %lx, new_gle: %lx, new_sle: %lx\n", - entry_get_value(old_sle), entry_get_value(new_gle), - entry_get_value(new_sle)); - - if ( ((entry_get_value(old_sle) | entry_get_value(new_sle)) & _PAGE_PRESENT) && - entry_has_changed(old_sle, new_sle, _PAGE_PRESENT) ) - { - perfc_incrc(validate_entry_changes); - - if ( (entry_get_flags(new_sle) & _PAGE_PRESENT) && - !get_shadow_ref(entry_get_pfn(new_sle)) ) - BUG(); - if ( entry_get_flags(old_sle) & _PAGE_PRESENT ) - put_shadow_ref(entry_get_pfn(old_sle)); - } - - *shadow_le_p = new_sle; - - return 1; -} - -#endif - - diff --git a/xen/include/asm-x86/shadow_ops.h b/xen/include/asm-x86/shadow_ops.h deleted file mode 100644 index 8765ed8b10..0000000000 --- a/xen/include/asm-x86/shadow_ops.h +++ /dev/null @@ -1,138 +0,0 @@ -/****************************************************************************** - * include/asm-x86/shadow_ops.h - * - * Copyright (c) 2005 Michael A Fetterman - * Based on an earlier implementation by Ian Pratt et al - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _XEN_SHADOW_OPS_H -#define _XEN_SHADOW_OPS_H - -#define PAGING_L4 4UL -#define PAGING_L3 3UL -#define PAGING_L2 2UL -#define PAGING_L1 1UL - -#define PAE_CR3_ALIGN 5 -#define PAE_CR3_IDX_MASK 0x7f - -#if defined( GUEST_PGENTRY_32 ) - -#define GUEST_L1_PAGETABLE_ENTRIES L1_PAGETABLE_ENTRIES_32 -#define GUEST_L2_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES_32 -#define GUEST_ROOT_PAGETABLE_ENTRIES ROOT_PAGETABLE_ENTRIES_32 -#define GUEST_L2_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT_32 - -#define guest_l1_pgentry_t l1_pgentry_32_t -#define guest_l2_pgentry_t l2_pgentry_32_t -#define guest_root_pgentry_t l2_pgentry_32_t - -#define guest_l1e_get_paddr l1e_get_paddr_32 -#define guest_l2e_get_paddr l2e_get_paddr_32 - -#define guest_get_pte_flags get_pte_flags_32 -#define guest_put_pte_flags put_pte_flags_32 - -#define guest_l1e_get_flags l1e_get_flags_32 -#define guest_l2e_get_flags l2e_get_flags_32 -#define guest_root_get_flags l2e_get_flags_32 -#define guest_root_get_intpte l2e_get_intpte - -#define guest_l1e_empty l1e_empty_32 -#define guest_l2e_empty l2e_empty_32 - -#define guest_l1e_from_pfn l1e_from_pfn_32 -#define guest_l2e_from_pfn l2e_from_pfn_32 - -#define guest_l1e_from_paddr l1e_from_paddr_32 -#define guest_l2e_from_paddr l2e_from_paddr_32 - -#define guest_l1e_from_page l1e_from_page_32 -#define guest_l2e_from_page l2e_from_page_32 - -#define guest_l1e_add_flags l1e_add_flags_32 -#define guest_l2e_add_flags l2e_add_flags_32 - -#define guest_l1e_remove_flag l1e_remove_flags_32 -#define guest_l2e_remove_flag l2e_remove_flags_32 - -#define guest_l1e_has_changed l1e_has_changed_32 -#define guest_l2e_has_changed l2e_has_changed_32 -#define root_entry_has_changed l2e_has_changed_32 - -#define guest_l1_table_offset l1_table_offset_32 -#define guest_l2_table_offset l2_table_offset_32 - -#define guest_linear_l1_table linear_pg_table_32 -#define guest_linear_l2_table linear_l2_table_32 - -#define guest_va_to_l1mfn va_to_l1mfn_32 - -#else - -#define GUEST_L1_PAGETABLE_ENTRIES L1_PAGETABLE_ENTRIES -#define GUEST_L2_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES -#define GUEST_ROOT_PAGETABLE_ENTRIES ROOT_PAGETABLE_ENTRIES -#define GUEST_L2_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT - -#define guest_l1_pgentry_t l1_pgentry_t -#define guest_l2_pgentry_t l2_pgentry_t -#define guest_root_pgentry_t l4_pgentry_t - -#define guest_l1e_get_paddr l1e_get_paddr -#define guest_l2e_get_paddr l2e_get_paddr - -#define guest_get_pte_flags get_pte_flags -#define guest_put_pte_flags put_pte_flags - -#define guest_l1e_get_flags l1e_get_flags -#define guest_l2e_get_flags l2e_get_flags -#define guest_root_get_flags l4e_get_flags -#define guest_root_get_intpte l4e_get_intpte - -#define guest_l1e_empty l1e_empty -#define guest_l2e_empty l2e_empty - -#define guest_l1e_from_pfn l1e_from_pfn -#define guest_l2e_from_pfn l2e_from_pfn - -#define guest_l1e_from_paddr l1e_from_paddr -#define guest_l2e_from_paddr l2e_from_paddr - -#define guest_l1e_from_page l1e_from_page -#define guest_l2e_from_page l2e_from_page - -#define guest_l1e_add_flags l1e_add_flags -#define guest_l2e_add_flags l2e_add_flags - -#define guest_l1e_remove_flag l1e_remove_flags -#define guest_l2e_remove_flag l2e_remove_flags - -#define guest_l1e_has_changed l1e_has_changed -#define guest_l2e_has_changed l2e_has_changed -#define root_entry_has_changed l4e_has_changed - -#define guest_l1_table_offset l1_table_offset -#define guest_l2_table_offset l2_table_offset - -#define guest_linear_l1_table linear_pg_table -#define guest_linear_l2_table linear_l2_table - -#define guest_va_to_l1mfn va_to_l1mfn -#endif - -#endif /* _XEN_SHADOW_OPS_H */ diff --git a/xen/include/asm-x86/shadow_public.h b/xen/include/asm-x86/shadow_public.h deleted file mode 100644 index e2b4b5fd57..0000000000 --- a/xen/include/asm-x86/shadow_public.h +++ /dev/null @@ -1,61 +0,0 @@ -/****************************************************************************** - * include/asm-x86/shadow_public.h - * - * Copyright (c) 2005 Michael A Fetterman - * Based on an earlier implementation by Ian Pratt et al - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _XEN_SHADOW_PUBLIC_H -#define _XEN_SHADOW_PUBLIC_H - -#if CONFIG_PAGING_LEVELS >= 3 -#define MFN_PINNED(_x) (mfn_to_page(_x)->u.inuse.type_info & PGT_pinned) - -extern void shadow_sync_and_drop_references( - struct domain *d, struct page_info *page); -extern void shadow_drop_references( - struct domain *d, struct page_info *page); - -extern int shadow_set_guest_paging_levels(struct domain *d, int levels); - -extern void release_out_of_sync_entry( - struct domain *d, struct out_of_sync_entry *entry); - -struct shadow_ops { - unsigned long guest_paging_levels; /* guest paging levels */ - void (*invlpg)(struct vcpu *v, unsigned long va); - int (*fault)(unsigned long va, struct cpu_user_regs *regs); - void (*update_pagetables)(struct vcpu *v); - void (*sync_all)(struct domain *d); - int (*remove_all_write_access)(struct domain *d, - unsigned long readonly_gpfn, unsigned long readonly_gmfn); - int (*do_update_va_mapping)(unsigned long va, l1_pgentry_t val, struct vcpu *v); - struct out_of_sync_entry * - (*mark_mfn_out_of_sync)(struct vcpu *v, unsigned long gpfn, - unsigned long mfn); - int (*is_out_of_sync)(struct vcpu *v, unsigned long va); - unsigned long (*gva_to_gpa)(unsigned long gva); -}; -#endif - -#if CONFIG_PAGING_LEVELS >= 4 -extern void shadow_l4_normal_pt_update(struct domain *d, - unsigned long pa, l4_pgentry_t l4e, - struct domain_mmap_cache *cache); -#endif - -#endif diff --git a/xen/include/asm-x86/x86_32/page-2level.h b/xen/include/asm-x86/x86_32/page-2level.h index 764b1c2c05..7f450c4624 100644 --- a/xen/include/asm-x86/x86_32/page-2level.h +++ b/xen/include/asm-x86/x86_32/page-2level.h @@ -46,6 +46,7 @@ typedef l2_pgentry_t root_pgentry_t; * 12-bit flags = (pte[11:0]) */ +#define _PAGE_NX_BIT 0U #define _PAGE_NX 0U /* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */ diff --git a/xen/include/asm-x86/x86_32/page-3level.h b/xen/include/asm-x86/x86_32/page-3level.h index 43e73033e3..e0187478cc 100644 --- a/xen/include/asm-x86/x86_32/page-3level.h +++ b/xen/include/asm-x86/x86_32/page-3level.h @@ -59,7 +59,8 @@ typedef l3_pgentry_t root_pgentry_t; * 32-bit flags = (pte[63:44],pte[11:0]) */ -#define _PAGE_NX (cpu_has_nx ? (1<<31) : 0) +#define _PAGE_NX_BIT (1U<<31) +#define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0) /* Extract flags into 32-bit integer, or turn 32-bit flags into a pte mask. */ #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF)) diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h index 0afb5e719b..429cfb8c5d 100644 --- a/xen/include/asm-x86/x86_64/page.h +++ b/xen/include/asm-x86/x86_64/page.h @@ -44,6 +44,8 @@ typedef l4_pgentry_t root_pgentry_t; /* Given a virtual address, get an entry offset into a linear page table. */ #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT) #define l2_linear_offset(_a) (((_a) & VADDR_MASK) >> L2_PAGETABLE_SHIFT) +#define l3_linear_offset(_a) (((_a) & VADDR_MASK) >> L3_PAGETABLE_SHIFT) +#define l4_linear_offset(_a) (((_a) & VADDR_MASK) >> L4_PAGETABLE_SHIFT) #define is_guest_l1_slot(_s) (1) #define is_guest_l2_slot(_t, _s) (1) @@ -70,7 +72,8 @@ typedef l4_pgentry_t root_pgentry_t; #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF)) /* Bit 23 of a 24-bit flag mask. This corresponds to bit 63 of a pte.*/ -#define _PAGE_NX (cpu_has_nx ? (1U<<23) : 0U) +#define _PAGE_NX_BIT (1U<<23) +#define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0U) #define L1_DISALLOW_MASK BASE_DISALLOW_MASK #define L2_DISALLOW_MASK BASE_DISALLOW_MASK diff --git a/xen/include/public/dom0_ops.h b/xen/include/public/dom0_ops.h index d211ca1624..f12cc93108 100644 --- a/xen/include/public/dom0_ops.h +++ b/xen/include/public/dom0_ops.h @@ -262,6 +262,18 @@ DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t); #define DOM0_SHADOW_CONTROL_OP_CLEAN 11 #define DOM0_SHADOW_CONTROL_OP_PEEK 12 +/* Shadow2 operations */ +#define DOM0_SHADOW2_CONTROL_OP_GET_ALLOCATION 30 +#define DOM0_SHADOW2_CONTROL_OP_SET_ALLOCATION 31 +#define DOM0_SHADOW2_CONTROL_OP_ENABLE 32 + +/* Mode flags for Shadow2 enable op */ +#define DOM0_SHADOW2_CONTROL_FLAG_ENABLE (1 << 0) +#define DOM0_SHADOW2_CONTROL_FLAG_REFCOUNT (1 << 1) +#define DOM0_SHADOW2_CONTROL_FLAG_LOG_DIRTY (1 << 2) +#define DOM0_SHADOW2_CONTROL_FLAG_TRANSLATE (1 << 3) +#define DOM0_SHADOW2_CONTROL_FLAG_EXTERNAL (1 << 4) + struct dom0_shadow_control_stats { uint32_t fault_count; uint32_t dirty_count; @@ -277,7 +289,9 @@ struct dom0_shadow_control { uint32_t op; XEN_GUEST_HANDLE(ulong) dirty_bitmap; /* IN/OUT variables. */ - uint64_t pages; /* size of buffer, updated with actual size */ + uint64_t pages; /* size of buffer, updated with actual size */ + uint32_t mb; /* Shadow2 memory allocation in MB */ + uint32_t mode; /* Shadow2 mode to enable */ /* OUT variables. */ struct dom0_shadow_control_stats stats; }; diff --git a/xen/include/xen/domain_page.h b/xen/include/xen/domain_page.h index 03d7af5f0f..2a51fcbacb 100644 --- a/xen/include/xen/domain_page.h +++ b/xen/include/xen/domain_page.h @@ -26,6 +26,13 @@ extern void *map_domain_page(unsigned long pfn); */ extern void unmap_domain_page(void *va); +/* + * Convert a VA (within a page previously mapped in the context of the + * currently-executing VCPU via a call to map_domain_pages()) to a machine + * address + */ +extern paddr_t mapped_domain_page_to_maddr(void *va); + /* * Similar to the above calls, except the mapping is accessible in all * address spaces (not just within the VCPU that created the mapping). Global @@ -98,6 +105,7 @@ domain_mmap_cache_destroy(struct domain_mmap_cache *cache) #define map_domain_page(pfn) maddr_to_virt((pfn)<<PAGE_SHIFT) #define unmap_domain_page(va) ((void)(va)) +#define mapped_domain_page_to_maddr(va) (virt_to_maddr(va)) #define map_domain_page_global(pfn) maddr_to_virt((pfn)<<PAGE_SHIFT) #define unmap_domain_page_global(va) ((void)(va)) @@ -112,4 +120,9 @@ struct domain_mmap_cache { #endif /* !CONFIG_DOMAIN_PAGE */ +#define HERE_I_AM \ +do { \ + printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__); \ +} while (0) + #endif /* __XEN_DOMAIN_PAGE_H__ */ diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h index e2c67a1d46..e7d84afd92 100644 --- a/xen/include/xen/lib.h +++ b/xen/include/xen/lib.h @@ -18,7 +18,7 @@ extern void __bug(char *file, int line) __attribute__((noreturn)); #ifndef NDEBUG #define ASSERT(_p) \ do { \ - if ( !(_p) ) \ + if ( unlikely(!(_p)) ) \ { \ printk("Assertion '%s' failed, line %d, file %s\n", #_p , \ __LINE__, __FILE__); \ @@ -41,7 +41,7 @@ struct domain; void cmdline_parse(char *cmdline); #ifndef NDEBUG -extern int debugtrace_send_to_console; +extern void debugtrace_toggle(void); extern void debugtrace_dump(void); extern void debugtrace_printk(const char *fmt, ...); #else diff --git a/xen/include/xen/list.h b/xen/include/xen/list.h index 66cdfc814b..5072d0b924 100644 --- a/xen/include/xen/list.h +++ b/xen/include/xen/list.h @@ -162,6 +162,16 @@ static __inline__ void list_splice(struct list_head *list, struct list_head *hea pos = n, n = pos->next) /** + * list_for_each_backwards_safe - iterate backwards over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_backwards_safe(pos, n, head) \ + for (pos = (head)->prev, n = pos->prev; pos != (head); \ + pos = n, n = pos->prev) + +/** * list_for_each_entry - iterate over list of given type * @pos: the type * to use as a loop counter. * @head: the head for your list. diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index c37e60f23a..d90b27adc7 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -376,9 +376,12 @@ extern struct domain *domain_list; /* VCPU is paused by the hypervisor? */ #define _VCPUF_paused 11 #define VCPUF_paused (1UL<<_VCPUF_paused) - /* VCPU is blocked awaiting an event to be consumed by Xen. */ +/* VCPU is blocked awaiting an event to be consumed by Xen. */ #define _VCPUF_blocked_in_xen 12 #define VCPUF_blocked_in_xen (1UL<<_VCPUF_blocked_in_xen) + /* HVM vcpu thinks CR0.PG == 0 */ +#define _VCPUF_shadow2_translate 13 +#define VCPUF_shadow2_translate (1UL<<_VCPUF_shadow2_translate) /* * Per-domain flags (domain_flags). |