/****************************************************************************** * include/asm-x86/shadow.h * * Copyright (c) 2005 Michael A Fetterman * Based on an earlier implementation by Ian Pratt et al * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _XEN_SHADOW_H #define _XEN_SHADOW_H #include #include #include #include #include #include #include /* Shadow PT operation mode : shadow-mode variable in arch_domain. */ #define SHM_enable (1<<0) /* we're in one of the shadow modes */ #define SHM_refcounts (1<<1) /* refcounts based on shadow tables instead of guest tables */ #define SHM_write_all (1<<2) /* allow write access to all guest pt pages, regardless of pte write permissions */ #define SHM_log_dirty (1<<3) /* enable log dirty mode */ #define SHM_translate (1<<4) /* do p2m tranaltion on guest tables */ #define SHM_external (1<<5) /* external page table, not used by Xen */ #define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode) #define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts) #define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all) #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty) #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate) #define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external) #define shadow_tainted_refcnts(_d) ((_d)->arch.shadow_tainted_refcnts) #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START) #define __shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \ (SH_LINEAR_PT_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))) #define shadow_linear_l2_table(_ed) ((_ed)->arch.shadow_vtable) // easy access to the hl2 table (for translated but not external modes only) #define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \ (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))) #define shadow_lock_init(_d) spin_lock_init(&(_d)->arch.shadow_lock) #define shadow_lock(_d) do { ASSERT(!spin_is_locked(&(_d)->arch.shadow_lock)); spin_lock(&(_d)->arch.shadow_lock); } while (0) #define shadow_unlock(_d) spin_unlock(&(_d)->arch.shadow_lock) #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min)) #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1)) #define SHADOW_MAX(_encoded) ((L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16)) extern void shadow_mode_init(void); extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc); extern int shadow_fault(unsigned long va, struct cpu_user_regs *regs); extern int shadow_mode_enable(struct domain *p, unsigned int mode); extern void shadow_invlpg(struct exec_domain *, unsigned long); extern struct out_of_sync_entry *shadow_mark_mfn_out_of_sync( struct exec_domain *ed, unsigned long gpfn, unsigned long mfn); extern void free_monitor_pagetable(struct exec_domain *ed); extern void __shadow_sync_all(struct domain *d); extern int __shadow_out_of_sync(struct exec_domain *ed, unsigned long va); extern int set_p2m_entry( struct domain *d, unsigned long pfn, unsigned long mfn, struct map_dom_mem_cache *l2cache, struct map_dom_mem_cache *l1cache); extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype); extern void shadow_l1_normal_pt_update(struct domain *d, unsigned long pa, l1_pgentry_t l1e, struct map_dom_mem_cache *cache); extern void shadow_l2_normal_pt_update(struct domain *d, unsigned long pa, l2_pgentry_t l2e, struct map_dom_mem_cache *cache); #ifdef __x86_64__ extern void shadow_l3_normal_pt_update(struct domain *d, unsigned long pa, l3_pgentry_t l3e, struct map_dom_mem_cache *cache); extern void shadow_l4_normal_pt_update(struct domain *d, unsigned long pa, l4_pgentry_t l4e, struct map_dom_mem_cache *cache); #endif extern int shadow_do_update_va_mapping(unsigned long va, l1_pgentry_t val, struct exec_domain *ed); static inline unsigned long __shadow_status( struct domain *d, unsigned long gpfn, unsigned long stype); static inline void update_hl2e(struct exec_domain *ed, unsigned long va); extern void vmx_shadow_clear_state(struct domain *); static inline int page_is_page_table(struct pfn_info *page) { struct domain *owner = page_get_owner(page); if ( owner && shadow_mode_refcounts(owner) ) return page->count_info & PGC_page_table; u32 type_info = page->u.inuse.type_info & PGT_type_mask; return type_info && (type_info <= PGT_l4_page_table); } static inline int mfn_is_page_table(unsigned long mfn) { if ( !pfn_valid(mfn) ) return 0; return page_is_page_table(pfn_to_page(mfn)); } static inline int page_out_of_sync(struct pfn_info *page) { return page->count_info & PGC_out_of_sync; } static inline int mfn_out_of_sync(unsigned long mfn) { if ( !pfn_valid(mfn) ) return 0; return page_out_of_sync(pfn_to_page(mfn)); } /************************************************************************/ static void inline __shadow_sync_mfn(struct domain *d, unsigned long mfn) { if ( d->arch.out_of_sync ) { // XXX - could be smarter // __shadow_sync_all(d); } } static void inline __shadow_sync_va(struct exec_domain *ed, unsigned long va) { struct domain *d = ed->domain; if ( d->arch.out_of_sync && __shadow_out_of_sync(ed, va) ) { perfc_incrc(shadow_sync_va); // XXX - could be smarter // __shadow_sync_all(ed->domain); } // Also make sure the HL2 is up-to-date for this address. // if ( unlikely(shadow_mode_translate(ed->domain)) ) update_hl2e(ed, va); } static void inline shadow_sync_all(struct domain *d) { if ( unlikely(shadow_mode_enabled(d)) ) { shadow_lock(d); if ( d->arch.out_of_sync ) __shadow_sync_all(d); ASSERT(d->arch.out_of_sync == NULL); shadow_unlock(d); } } // SMP BUG: This routine can't ever be used properly in an SMP context. // It should be something like get_shadow_and_sync_va(). // This probably shouldn't exist. // static void inline shadow_sync_va(struct exec_domain *ed, unsigned long gva) { struct domain *d = ed->domain; if ( unlikely(shadow_mode_enabled(d)) ) { shadow_lock(d); __shadow_sync_va(ed, gva); shadow_unlock(d); } } extern void __shadow_mode_disable(struct domain *d); static inline void shadow_mode_disable(struct domain *d) { if ( unlikely(shadow_mode_enabled(d)) ) { shadow_lock(d); __shadow_mode_disable(d); shadow_unlock(d); } } /************************************************************************/ #define __mfn_to_gpfn(_d, mfn) \ ( (shadow_mode_translate(_d)) \ ? machine_to_phys_mapping[(mfn)] \ : (mfn) ) #define __gpfn_to_mfn(_d, gpfn) \ ({ \ ASSERT(current->domain == (_d)); \ (shadow_mode_translate(_d)) \ ? phys_to_machine_mapping(gpfn) \ : (gpfn); \ }) #define __gpfn_to_mfn_foreign(_d, gpfn) \ ( (shadow_mode_translate(_d)) \ ? gpfn_to_mfn_foreign(_d, gpfn) \ : (gpfn) ) extern unsigned long gpfn_to_mfn_foreign( struct domain *d, unsigned long gpfn); /************************************************************************/ struct shadow_status { struct shadow_status *next; /* Pull-to-front list per hash bucket. */ unsigned long gpfn_and_flags; /* Guest pfn plus flags. */ unsigned long smfn; /* Shadow mfn. */ }; #define shadow_ht_extra_size 128 #define shadow_ht_buckets 256 struct out_of_sync_entry { struct out_of_sync_entry *next; unsigned long gpfn; /* why is this here? */ unsigned long gmfn; unsigned long snapshot_mfn; unsigned long writable_pl1e; /* NB: this is a machine address */ }; #define out_of_sync_extra_size 127 #define SHADOW_SNAPSHOT_ELSEWHERE (-1L) /************************************************************************/ #define SHADOW_DEBUG 0 #define SHADOW_VERBOSE_DEBUG 0 #define SHADOW_VVERBOSE_DEBUG 0 #define SHADOW_VVVERBOSE_DEBUG 0 #define SHADOW_HASH_DEBUG 0 #define FULLSHADOW_DEBUG 0 #if SHADOW_DEBUG extern int shadow_status_noswap; #define _SHADOW_REFLECTS_SNAPSHOT ( 9) #define SHADOW_REFLECTS_SNAPSHOT (1u << _SHADOW_REFLECTS_SNAPSHOT) #endif #ifdef VERBOSE #define SH_LOG(_f, _a...) \ printk("DOM%uP%u: SH_LOG(%d): " _f "\n", \ current->domain->id , current->processor, __LINE__ , ## _a ) #else #define SH_LOG(_f, _a...) ((void)0) #endif #if SHADOW_VERBOSE_DEBUG #define SH_VLOG(_f, _a...) \ printk("DOM%uP%u: SH_VLOG(%d): " _f "\n", \ current->domain->id, current->processor, __LINE__ , ## _a ) #else #define SH_VLOG(_f, _a...) ((void)0) #endif #if SHADOW_VVERBOSE_DEBUG #define SH_VVLOG(_f, _a...) \ printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n", \ current->domain->id, current->processor, __LINE__ , ## _a ) #else #define SH_VVLOG(_f, _a...) ((void)0) #endif #if SHADOW_VVVERBOSE_DEBUG #define SH_VVVLOG(_f, _a...) \ printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n",
From a779a482fb9b9f8fcdf8b2519c789b4b9bb5dd05 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 Jul 2017 16:56:48 +0200
Subject: build: add a hack for removing non-essential module info

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 include/linux/module.h      | 13 ++++++++-----
 include/linux/moduleparam.h | 15 ++++++++++++---
 init/Kconfig                |  7 +++++++
 kernel/module.c             |  5 ++++-
 scripts/mod/modpost.c       | 12 ++++++++++++
 5 files changed, 43 insertions(+), 9 deletions(-)

--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -158,6 +158,7 @@ extern void cleanup_module(void);
 
 /* Generic info of form tag = "info" */
 #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)
+#define MODULE_INFO_STRIP(tag, info) __MODULE_INFO_STRIP(tag, tag, info)
 
 /* For userspace: you can also call me... */
 #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias)
@@ -201,12 +202,12 @@ extern void cleanup_module(void);
  * Author(s), use "Name <email>" or just "Name", for multiple
  * authors use multiple MODULE_AUTHOR() statements/lines.
  */
-#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
+#define MODULE_AUTHOR(_author) MODULE_INFO_STRIP(author, _author)
 
 /* What your module does. */
-#define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description)
+#define MODULE_DESCRIPTION(_description) MODULE_INFO_STRIP(description, _description)
 
-#ifdef MODULE
+#if defined(MODULE) && !defined(CONFIG_MODULE_STRIPPED)
 /* Creates an alias so file2alias.c can find device table. */
 #define MODULE_DEVICE_TABLE(type, name)					\
 extern typeof(name) __mod_##type##__##name##_device_table		\
@@ -233,7 +234,9 @@ extern typeof(name) __mod_##type##__##na
  */
 
 #if defined(MODULE) || !defined(CONFIG_SYSFS)
-#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
+#define MODULE_VERSION(_version) MODULE_INFO_STRIP(version, _version)
+#elif defined(CONFIG_MODULE_STRIPPED)
+#define MODULE_VERSION(_version) __MODULE_INFO_DISABLED(version)
 #else
 #define MODULE_VERSION(_version)					\
 	static struct module_version_attribute ___modver_attr = {	\
@@ -255,7 +258,7 @@ extern typeof(name) __mod_##type##__##na
 /* Optional firmware file (or files) needed by the module
  * format is simply firmware file name.  Multiple firmware
  * files require multiple MODULE_FIRMWARE() specifiers */
-#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
+#define MODULE_FIRMWARE(_firmware) MODULE_INFO_STRIP(firmware, _firmware)
 
 struct notifier_block;
 
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -17,6 +17,16 @@
 /* Chosen so that structs with an unsigned long line up. */
 #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
 
+/* This struct is here for syntactic coherency, it is not used */
+#define __MODULE_INFO_DISABLED(name)					  \
+  struct __UNIQUE_ID(name) {}
+
+#ifdef CONFIG_MODULE_STRIPPED
+#define __MODULE_INFO_STRIP(tag, name, info) __MODULE_INFO_DISABLED(name)
+#else
+#define __MODULE_INFO_STRIP(tag, name, info) __MODULE_INFO(tag, name, info)
+#endif
+
 #ifdef MODULE
 #define __MODULE_INFO(tag, name, info)					  \
 static const char __UNIQUE_ID(name)[]					  \
@@ -24,8 +34,7 @@ static const char __UNIQUE_ID(name)[]
   = __stringify(tag) "=" info
 #else  /* !MODULE */
 /* This struct is here for syntactic coherency, it is not used */
-#define __MODULE_INFO(tag, name, info)					  \
-  struct __UNIQUE_ID(name) {}
+#define __MODULE_INFO(tag, name, info) __MODULE_INFO_DISABLED(name)
 #endif
 #define __MODULE_PARM_TYPE(name, _type)					  \
   __MODULE_INFO(parmtype, name##type, #name ":" _type)
@@ -33,7 +42,7 @@ static const char __UNIQUE_ID(name)[]
 /* One for each parameter, describing how to use it.  Some files do
    multiple of these per line, so can't just use MODULE_INFO. */
 #define MODULE_PARM_DESC(_parm, desc) \
-	__MODULE_INFO(parm, _parm, #_parm ":" desc)
+	__MODULE_INFO_STRIP(parm, _parm, #_parm ":" desc)
 
 struct kernel_param;
 
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1896,6 +1896,13 @@ config TRIM_UNUSED_KSYMS
 
 	  If unsure, or if you need to build out-of-tree modules, say N.
 
+config MODULE_STRIPPED
+	bool "Reduce module size"
+	depends on MODULES
+	help
+	  Remove module parameter descriptions, author info, version, aliases,
+	  device tables, etc.
+
 endif # MODULES
 
 config MODULES_TREE_LOOKUP
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2997,9 +2997,11 @@ static struct module *setup_load_info(st
 
 static int check_modinfo(struct module *mod, struct load_info *info, int flags)
 {
-	const char *modmagic = get_modinfo(info, "vermagic");
 	int err;
 
+#ifndef CONFIG_MODULE_STRIPPED
+	const char *modmagic = get_modinfo(info, "vermagic");
+
 	if (flags & MODULE_INIT_IGNORE_VERMAGIC)
 		modmagic = NULL;
 
@@ -3020,6 +3022,7 @@ static int check_modinfo(struct module *
 				mod->name);
 		add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
 	}
+#endif
 
 	if (get_modinfo(info, "staging")) {
 		add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1982,7 +1982,9 @@ static void read_symbols(char *modname)
 		symname = remove_dot(info.strtab + sym->st_name);
 
 		handle_modversions(mod, &info, sym, symname);
+#ifndef CONFIG_MODULE_STRIPPED
 		handle_moddevtable(mod, &info, sym, symname);
+#endif
 	}
 	if (!is_vmlinux(modname) ||
 	     (is_vmlinux(modname) && vmlinux_section_warnings))
@@ -2143,8 +2145,10 @@ static void add_header(struct buffer *b,
 	buf_printf(b, "#include <linux/vermagic.h>\n");
 	buf_printf(b, "#include <linux/compiler.h>\n");
 	buf_printf(b, "\n");
+#ifndef CONFIG_MODULE_STRIPPED
 	buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
 	buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n");
+#endif
 	buf_printf(b, "\n");
 	buf_printf(b, "__visible struct module __this_module\n");
 	buf_printf(b, "__attribute__((section(\".gnu.linkonce.this_module\"))) = {\n");
@@ -2161,16 +2165,20 @@ static void add_header(struct buffer *b,
 
 static void add_intree_flag(struct buffer *b, int is_intree)
 {
+#ifndef CONFIG_MODULE_STRIPPED
 	if (is_intree)
 		buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
+#endif
 }
 
 static void add_staging_flag(struct buffer *b, const char *name)
 {
+#ifndef CONFIG_MODULE_STRIPPED
 	static const char *staging_dir = "drivers/staging";
 
 	if (strncmp(staging_dir, name, strlen(staging_dir)) == 0)
 		buf_printf(b, "\nMODULE_INFO(staging, \"Y\");\n");
+#endif
 }
 
 /**
@@ -2269,11 +2277,13 @@ static void add_depends(struct buffer *b
 
 static void add_srcversion(struct buffer *b, struct module *mod)
 {
+#ifndef CONFIG_MODULE_STRIPPED
 	if (mod->srcversion[0]) {
 		buf_printf(b, "\n");
 		buf_printf(b, "MODULE_INFO(srcversion, \"%s\");\n",
 			   mod->srcversion);
 	}
+#endif
 }
 
 static void write_if_changed(struct buffer *b, const char *fname)
@@ -2509,7 +2519,9 @@ int main(int argc, char **argv)
 		add_staging_flag(&buf, mod->name);
 		err |= add_versions(&buf, mod);
 		add_depends(&buf, mod, modules);
+#ifndef CONFIG_MODULE_STRIPPED
 		add_moddevtable(&buf, mod);
+#endif
 		add_srcversion(&buf, mod);
 
 		sprintf(fname, "%s.mod.c", mod->name);
_spte) == l1e_get_value(new_spte) ) { // No accounting required... // perfc_incrc(validate_pte_changes1); } else if ( l1e_get_value(old_spte) == (l1e_get_value(new_spte)|_PAGE_RW) ) { // Fast path for PTEs that have merely been write-protected // (e.g., during a Unix fork()). A strict reduction in privilege. // perfc_incrc(validate_pte_changes2); if ( likely(l1e_get_flags(new_spte) & _PAGE_PRESENT) ) shadow_put_page_type(d, &frame_table[l1e_get_pfn(new_spte)]); } else if ( ((l1e_get_flags(old_spte) | l1e_get_flags(new_spte)) & _PAGE_PRESENT ) && l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) ) { // only do the ref counting if something important changed. // perfc_incrc(validate_pte_changes3); if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) && !shadow_get_page_from_l1e(new_spte, d) ) new_spte = l1e_empty(); if ( l1e_get_flags(old_spte) & _PAGE_PRESENT ) { shadow_put_page_from_l1e(old_spte, d); need_flush = 1; } } else { perfc_incrc(validate_pte_changes4); } } *shadow_pte_p = new_spte; return need_flush; } // returns true if a tlb flush is needed // static int inline validate_hl2e_change( struct domain *d, l2_pgentry_t new_gpde, l1_pgentry_t *shadow_hl2e_p) { l1_pgentry_t old_hl2e, new_hl2e; int need_flush = 0; perfc_incrc(validate_hl2e_calls); old_hl2e = *shadow_hl2e_p; hl2e_propagate_from_guest(d, new_gpde, &new_hl2e); // Only do the ref counting if something important changed. // if ( ((l1e_get_flags(old_hl2e) | l1e_get_flags(new_hl2e)) & _PAGE_PRESENT) && l1e_has_changed(&old_hl2e, &new_hl2e, _PAGE_PRESENT) ) { perfc_incrc(validate_hl2e_changes); if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) && !get_page(pfn_to_page(l1e_get_pfn(new_hl2e)), d) ) new_hl2e = l1e_empty(); if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT ) { put_page(pfn_to_page(l1e_get_pfn(old_hl2e))); need_flush = 1; } } *shadow_hl2e_p = new_hl2e; return need_flush; } // returns true if a tlb flush is needed // static int inline validate_pde_change( struct domain *d, l2_pgentry_t new_gpde, l2_pgentry_t *shadow_pde_p) { l2_pgentry_t old_spde, new_spde; int need_flush = 0; perfc_incrc(validate_pde_calls); old_spde = *shadow_pde_p; l2pde_propagate_from_guest(d, &new_gpde, &new_spde); // Only do the ref counting if something important changed. // if ( ((l2e_get_value(old_spde) | l2e_get_value(new_spde)) & _PAGE_PRESENT) && l2e_has_changed(&old_spde, &new_spde, _PAGE_PRESENT) ) { perfc_incrc(validate_pde_changes); if ( (l2e_get_flags(new_spde) & _PAGE_PRESENT) && !get_shadow_ref(l2e_get_pfn(new_spde)) ) BUG(); if ( l2e_get_flags(old_spde) & _PAGE_PRESENT ) { put_shadow_ref(l2e_get_pfn(old_spde)); need_flush = 1; } } *shadow_pde_p = new_spde; return need_flush; } /*********************************************************************/ #if SHADOW_HASH_DEBUG static void shadow_audit(struct domain *d, int print) { int live = 0, free = 0, j = 0, abs; struct shadow_status *a; for ( j = 0; j < shadow_ht_buckets; j++ ) { a = &d->arch.shadow_ht[j]; if ( a->gpfn_and_flags ) { live++; ASSERT(a->smfn); } else ASSERT(!a->next); a = a->next; while ( a && (live < 9999) ) { live++; if ( (a->gpfn_and_flags == 0) || (a->smfn == 0) ) { printk("XXX live=%d gpfn+flags=%lx sp=%lx next=%p\n", live, a->gpfn_and_flags, a->smfn, a->next); BUG(); } ASSERT(a->smfn); a = a->next; } ASSERT(live < 9999); } for ( a = d->arch.shadow_ht_free; a != NULL; a = a->next ) free++; if ( print ) printk("Xlive=%d free=%d\n", live, free); // BUG: this only works if there's only a single domain which is // using shadow tables. // abs = ( perfc_value(shadow_l1_pages) + perfc_value(shadow_l2_pages) + perfc_value(hl2_table_pages) + perfc_value(snapshot_pages) + perfc_value(writable_pte_predictions) ) - live; #ifdef PERF_COUNTERS if ( (abs < -1) || (abs > 1) ) { printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n", live, free, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages), perfc_value(hl2_table_pages), perfc_value(snapshot_pages), perfc_value(writable_pte_predictions)); BUG(); } #endif // XXX ought to add some code to audit the out-of-sync entries, too. // } #else #define shadow_audit(p, print) ((void)0) #endif static inline struct shadow_status *hash_bucket( struct domain *d, unsigned int gpfn) { return &d->arch.shadow_ht[gpfn % shadow_ht_buckets]; } /* * N.B. This takes a guest pfn (i.e. a pfn in the guest's namespace, * which, depending on full shadow mode, may or may not equal * its mfn). * It returns the shadow's mfn, or zero if it doesn't exist. */ static inline unsigned long ___shadow_status( struct domain *d, unsigned long gpfn, unsigned long stype) { struct shadow_status *p, *x, *head; unsigned long key = gpfn | stype; perfc_incrc(shadow_status_calls); x = head = hash_bucket(d, gpfn); p = NULL; //SH_VVLOG("lookup gpfn=%08x type=%08x bucket=%p", gpfn, stype, x); shadow_audit(d, 0); do { ASSERT(x->gpfn_and_flags || ((x == head) && (x->next == NULL))); if ( x->gpfn_and_flags == key ) { #if SHADOW_DEBUG if ( unlikely(shadow_status_noswap) ) return x->smfn; #endif /* Pull-to-front if 'x' isn't already the head item. */ if ( unlikely(x != head) ) { /* Delete 'x' from list and reinsert immediately after head. */ p->next = x->next; x->next = head->next; head->next = x; /* Swap 'x' contents with head contents. */ SWAP(head->gpfn_and_flags, x->gpfn_and_flags); SWAP(head->smfn, x->smfn); } else { perfc_incrc(shadow_status_hit_head); } //SH_VVLOG("lookup gpfn=%p => status=%p", key, head->smfn); return head->smfn; } p = x; x = x->next; } while ( x != NULL ); //SH_VVLOG("lookup gpfn=%p => status=0", key); perfc_incrc(shadow_status_miss); return 0; } static inline unsigned long __shadow_status( struct domain *d, unsigned long gpfn, unsigned long stype) { unsigned long gmfn = ((current->domain == d) ? __gpfn_to_mfn(d, gpfn) : INVALID_MFN); ASSERT(spin_is_locked(&d->arch.shadow_lock)); ASSERT(gpfn == (gpfn & PGT_mfn_mask)); ASSERT(stype && !(stype & ~PGT_type_mask)); if ( VALID_MFN(gmfn) && (gmfn < max_page) && (stype != PGT_writable_pred) && ((stype == PGT_snapshot) ? !mfn_out_of_sync(gmfn) : !mfn_is_page_table(gmfn)) ) { perfc_incrc(shadow_status_shortcut); #ifndef NDEBUG if ( ___shadow_status(d, gpfn, stype) != 0 ) { printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x " "mfn_out_of_sync(gmfn)=%d mfn_is_page_table(gmfn)=%d\n", d->id, gpfn, gmfn, stype, frame_table[gmfn].count_info, frame_table[gmfn].u.inuse.type_info, mfn_out_of_sync(gmfn), mfn_is_page_table(gmfn)); BUG(); } // Undo the affects of the above call to ___shadow_status()'s perf // counters, since that call is really just part of an assertion. // perfc_decrc(shadow_status_calls); perfc_decrc(shadow_status_miss); #endif return 0; } return ___shadow_status(d, gpfn, stype); } /* * Not clear if pull-to-front is worth while for this or not, * as it generally needs to scan the entire bucket anyway. * Much simpler without. * * Either returns PGT_none, or PGT_l{1,2,3,4}_page_table. */ static inline u32 shadow_max_pgtable_type(struct domain *d, unsigned long gpfn, unsigned long *smfn) { struct shadow_status *x; u32 pttype = PGT_none, type; ASSERT(spin_is_locked(&d->arch.shadow_lock)); ASSERT(gpfn == (gpfn & PGT_mfn_mask)); perfc_incrc(shadow_max_type); x = hash_bucket(d, gpfn); while ( x && x->gpfn_and_flags ) { if ( (x->gpfn_and_flags & PGT_mfn_mask) == gpfn ) { type = x->gpfn_and_flags & PGT_type_mask; switch ( type ) { case PGT_hl2_shadow: // Treat an HL2 as if it's an L1 // type = PGT_l1_shadow; break; case PGT_snapshot: case PGT_writable_pred: // Ignore snapshots -- they don't in and of themselves constitute // treating a page as a page table // goto next; case PGT_base_page_table: // Early exit if we found the max possible value // return type; default: break; } if ( type > pttype ) { pttype = type; if ( smfn ) *smfn = x->smfn; } } next: x = x->next; } return pttype; } /* * N.B. We can make this locking more fine grained (e.g., per shadow page) if * it ever becomes a problem, but since we need a spin lock on the hash table * anyway it's probably not worth being too clever. */ static inline unsigned long get_shadow_status( struct domain *d, unsigned long gpfn, unsigned long stype) { unsigned long res; ASSERT(shadow_mode_enabled(d)); /* * If we get here we know that some sort of update has happened to the * underlying page table page: either a PTE has been updated, or the page * has changed type. If we're in log dirty mode, we should set the * appropriate bit in the dirty bitmap. * N.B. The VA update path doesn't use this and is handled independently. * * XXX need to think this through for vmx guests, but probably OK */ shadow_lock(d); if ( shadow_mode_log_dirty(d) ) __mark_dirty(d, __gpfn_to_mfn(d, gpfn)); if ( !(res = __shadow_status(d, gpfn, stype)) ) shadow_unlock(d); return res; } static inline void put_shadow_status(struct domain *d) { shadow_unlock(d); } static inline void delete_shadow_status( struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype) { struct shadow_status *p, *x, *n, *head; unsigned long key = gpfn | stype; ASSERT(spin_is_locked(&d->arch.shadow_lock)); ASSERT(!(gpfn & ~PGT_mfn_mask)); ASSERT(stype && !(stype & ~PGT_type_mask)); head = hash_bucket(d, gpfn); SH_VLOG("delete gpfn=%lx t=%08x bucket=%p", gpfn, stype, head); shadow_audit(d, 0); /* Match on head item? */ if ( head->gpfn_and_flags == key ) { if ( (n = head->next) != NULL ) { /* Overwrite head with contents of following node. */ head->gpfn_and_flags = n->gpfn_and_flags; head->smfn = n->smfn; /* Delete following node. */ head->next = n->next; /* Add deleted node to the free list. */ n->gpfn_and_flags = 0; n->smfn = 0; n->next = d->arch.shadow_ht_free; d->arch.shadow_ht_free = n; } else { /* This bucket is now empty. Initialise the head node. */ head->gpfn_and_flags = 0; head->smfn = 0; } goto found; } p = head; x = head->next; do { if ( x->gpfn_and_flags == key ) { /* Delete matching node. */ p->next = x->next; /* Add deleted node to the free list. */ x->gpfn_and_flags = 0; x->smfn = 0; x->next = d->arch.shadow_ht_free; d->arch.shadow_ht_free = x; goto found; } p = x; x = x->next; } while ( x != NULL ); /* If we got here, it wasn't in the list! */ BUG(); found: // release ref to page if ( stype != PGT_writable_pred ) put_page(pfn_to_page(gmfn)); shadow_audit(d, 0); } static inline void set_shadow_status( struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned long smfn, unsigned long stype) { struct shadow_status *x, *head, *extra; int i; unsigned long key = gpfn | stype; SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype); ASSERT(spin_is_locked(&d->arch.shadow_lock)); ASSERT(shadow_mode_translate(d) || gpfn); ASSERT(!(gpfn & ~PGT_mfn_mask)); // XXX - need to be more graceful. ASSERT(VALID_MFN(gmfn)); ASSERT(stype && !(stype & ~PGT_type_mask)); x = head = hash_bucket(d, gpfn); SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)", gpfn, smfn, stype, x, x->next); shadow_audit(d, 0); // grab a reference to the guest page to represent the entry in the shadow // hash table // // XXX - Should PGT_writable_pred grab a page ref? // - Who/how are these hash table entry refs flushed if/when a page // is given away by the domain? // if ( stype != PGT_writable_pred ) get_page(pfn_to_page(gmfn), d); /* * STEP 1. If page is already in the table, update it in place. */ do { if ( unlikely(x->gpfn_and_flags == key) ) { if ( stype != PGT_writable_pred ) BUG(); // we should never replace entries into the hash table x->smfn = smfn; put_page(pfn_to_page(gmfn)); // already had a ref... goto done; } x = x->next; } while ( x != NULL ); /* * STEP 2. The page must be inserted into the table. */ /* If the bucket is empty then insert the new page as the head item. */ if ( head->gpfn_and_flags == 0 ) { head->gpfn_and_flags = key; head->smfn = smfn; ASSERT(head->next == NULL); goto done; } /* We need to allocate a new node. Ensure the quicklist is non-empty. */ if ( unlikely(d->arch.shadow_ht_free == NULL) ) { SH_LOG("Allocate more shadow hashtable blocks."); extra = xmalloc_bytes( sizeof(void *) + (shadow_ht_extra_size * sizeof(*x))); /* XXX Should be more graceful here. */ if ( extra == NULL ) BUG(); memset(extra, 0, sizeof(void *) + (shadow_ht_extra_size * sizeof(*x))); /* Record the allocation block so it can be correctly freed later. */ d->arch.shadow_extras_count++; *((struct shadow_status **)&extra[shadow_ht_extra_size]) = d->arch.shadow_ht_extras; d->arch.shadow_ht_extras = &extra[0]; /* Thread a free chain through the newly-allocated nodes. */ for ( i = 0; i < (shadow_ht_extra_size - 1); i++ ) extra[i].next = &extra[i+1]; extra[i].next = NULL; /* Add the new nodes to the free list. */ d->arch.shadow_ht_free = &extra[0]; } /* Allocate a new node from the quicklist. */ x = d->arch.shadow_ht_free; d->arch.shadow_ht_free = x->next; /* Initialise the new node and insert directly after the head item. */ x->gpfn_and_flags = key; x->smfn = smfn; x->next = head->next; head->next = x; done: shadow_audit(d, 0); if ( stype <= PGT_l4_shadow ) { // add to front of list of pages to check when removing write // permissions for a page... // } } /************************************************************************/ void static inline shadow_update_min_max(unsigned long smfn, int index) { struct pfn_info *sl1page = pfn_to_page(smfn); u32 min_max = sl1page->tlbflush_timestamp; int min = SHADOW_MIN(min_max); int max = SHADOW_MAX(min_max); int update = 0; if ( index < min ) { min = index; update = 1; } if ( index > max ) { max = index; update = 1; } if ( update ) sl1page->tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max); } extern void shadow_map_l1_into_current_l2(unsigned long va); void static inline shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow) { struct exec_domain *ed = current; struct domain *d = ed->domain; l2_pgentry_t sl2e; __shadow_get_l2e(ed, va, &sl2e); if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) ) { /* * Either the L1 is not shadowed, or the shadow isn't linked into * the current shadow L2. */ if ( create_l1_shadow ) { perfc_incrc(shadow_set_l1e_force_map); shadow_map_l1_into_current_l2(va); } else /* check to see if it exists; if so, link it in */ { l2_pgentry_t gpde = linear_l2_table(ed)[l2_table_offset(va)]; unsigned long gl1pfn = l2e_get_pfn(gpde); unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow); ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT ); if ( sl1mfn ) { perfc_incrc(shadow_set_l1e_unlinked); if ( !get_shadow_ref(sl1mfn) ) BUG(); l2pde_general(d, &gpde, &sl2e, sl1mfn); __guest_set_l2e(ed, va, gpde); __shadow_set_l2e(ed, va, sl2e); } else { // no shadow exists, so there's nothing to do. perfc_incrc(shadow_set_l1e_fail); return; } } } if ( shadow_mode_refcounts(d) ) { l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)]; // only do the ref counting if something important changed. // if ( l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) ) { if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) && !shadow_get_page_from_l1e(new_spte, d) ) new_spte = l1e_empty(); if ( l1e_get_flags(old_spte) & _PAGE_PRESENT ) shadow_put_page_from_l1e(old_spte, d); } } shadow_linear_pg_table[l1_linear_offset(va)] = new_spte; shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va)); } /************************************************************************/ static inline int shadow_mode_page_writable(struct domain *d, unsigned long gpfn) { unsigned long mfn = __gpfn_to_mfn(d, gpfn); u32 type = frame_table[mfn].u.inuse.type_info & PGT_type_mask; if ( shadow_mode_refcounts(d) && (type == PGT_writable_page) ) type = shadow_max_pgtable_type(d, gpfn, NULL); if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (type == PGT_l1_page_table) ) return 1; if ( shadow_mode_write_all(d) && type && (type <= PGT_l4_page_table) ) return 1; return 0; } static inline l1_pgentry_t gva_to_gpte(unsigned long gva) { l2_pgentry_t gpde; l1_pgentry_t gpte; struct exec_domain *ed = current; ASSERT( shadow_mode_translate(current->domain) ); __guest_get_l2e(ed, gva, &gpde); if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) ) return l1e_empty();; // This is actually overkill - we only need to make sure the hl2 // is in-sync. // shadow_sync_va(ed, gva); if ( unlikely(__copy_from_user(&gpte, &linear_pg_table[gva >> PAGE_SHIFT], sizeof(gpte))) ) { FSH_LOG("gva_to_gpte got a fault on gva=%lx", gva); return l1e_empty(); } return gpte; } static inline unsigned long gva_to_gpa(unsigned long gva) { l1_pgentry_t gpte; gpte = gva_to_gpte(gva); if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) ) return 0; return l1e_get_phys(gpte) + (gva & ~PAGE_MASK); } /************************************************************************/ extern void __update_pagetables(struct exec_domain *ed); static inline void update_pagetables(struct exec_domain *ed) { struct domain *d = ed->domain; int paging_enabled; #ifdef CONFIG_VMX if ( VMX_DOMAIN(ed) ) paging_enabled = test_bit(VMX_CPU_STATE_PG_ENABLED, &ed->arch.arch_vmx.cpu_state); else #endif // HACK ALERT: there's currently no easy way to figure out if a domU // has set its arch.guest_table to zero, vs not yet initialized it. // paging_enabled = !!pagetable_val(ed->arch.guest_table); /* * We don't call __update_pagetables() when vmx guest paging is * disabled as we want the linear_pg_table to be inaccessible so that * we bail out early of shadow_fault() if the vmx guest tries illegal * accesses while it thinks paging is turned off. */ if ( unlikely(shadow_mode_enabled(d)) && paging_enabled ) { shadow_lock(d); __update_pagetables(ed); shadow_unlock(d); } if ( likely(!shadow_mode_external(d)) ) { #ifdef __x86_64__ if ( !(ed->arch.flags & TF_kernel_mode) ) ed->arch.monitor_table = ed->arch.guest_table_user; else #endif if ( shadow_mode_enabled(d) ) ed->arch.monitor_table = ed->arch.shadow_table; else ed->arch.monitor_table = ed->arch.guest_table; } } #if SHADOW_DEBUG extern int _check_pagetable(struct exec_domain *ed, char *s); extern int _check_all_pagetables(struct exec_domain *ed, char *s); #define check_pagetable(_ed, _s) _check_pagetable(_ed, _s) //#define check_pagetable(_ed, _s) _check_all_pagetables(_ed, _s) #else #define check_pagetable(_ed, _s) ((void)0) #endif #endif /* XEN_SHADOW_H */ /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */