aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authorAndres Lagar-Cavilla <andres@lagarcavilla.org>2012-01-26 12:46:26 +0000
committerAndres Lagar-Cavilla <andres@lagarcavilla.org>2012-01-26 12:46:26 +0000
commit6a11f31bd53381b925e67413e8230af8979e3807 (patch)
treefad775401614a2662a24511f2600bb6f54aa98d2 /xen
parent3881f1a903ada4573be2eec5458a1defe5a34313 (diff)
downloadxen-6a11f31bd53381b925e67413e8230af8979e3807.tar.gz
xen-6a11f31bd53381b925e67413e8230af8979e3807.tar.bz2
xen-6a11f31bd53381b925e67413e8230af8979e3807.zip
x86/mm: Add per-page locking for memory sharing, when audits are disabled
With the removal of the hash table, all that is needed now is locking of individual shared pages, as new (gfn,domain) pairs are removed or added from the list of mappings. We recycle PGT_locked and use it to lock individual pages. We ensure deadlock is averted by locking pages in increasing order. The global lock remains for the benefit of the auditing code, and is thus enabled only as a compile-time option. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> Signed-off-by: Adin Scannell <adin@scannell.ca> Acked-by: Tim Deegan <tim@xen.org> Committed-by: Tim Deegan <tim@xen.org>
Diffstat (limited to 'xen')
-rw-r--r--xen/arch/x86/mm.c74
-rw-r--r--xen/arch/x86/mm/mem_sharing.c257
-rw-r--r--xen/arch/x86/mm/mm-locks.h15
-rw-r--r--xen/include/asm-x86/mm.h27
4 files changed, 275 insertions, 98 deletions
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1f996e0be3..ef9b202773 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1719,7 +1719,7 @@ static int free_l4_table(struct page_info *page, int preemptible)
#define free_l4_table(page, preemptible) (-EINVAL)
#endif
-static int page_lock(struct page_info *page)
+int page_lock(struct page_info *page)
{
unsigned long x, nx;
@@ -1736,7 +1736,7 @@ static int page_lock(struct page_info *page)
return 1;
}
-static void page_unlock(struct page_info *page)
+void page_unlock(struct page_info *page)
{
unsigned long x, nx, y = page->u.inuse.type_info;
@@ -4295,76 +4295,6 @@ int steal_page(
return -1;
}
-int page_make_sharable(struct domain *d,
- struct page_info *page,
- int expected_refcnt)
-{
- spin_lock(&d->page_alloc_lock);
-
- /* Change page type and count atomically */
- if ( !get_page_and_type(page, d, PGT_shared_page) )
- {
- spin_unlock(&d->page_alloc_lock);
- return -EINVAL;
- }
-
- /* Check it wasn't already sharable and undo if it was */
- if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
- {
- put_page_and_type(page);
- spin_unlock(&d->page_alloc_lock);
- return -EEXIST;
- }
-
- /* Check if the ref count is 2. The first from PGC_allocated, and
- * the second from get_page_and_type at the top of this function */
- if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
- {
- /* Return type count back to zero */
- put_page_and_type(page);
- spin_unlock(&d->page_alloc_lock);
- return -E2BIG;
- }
-
- page_set_owner(page, dom_cow);
- d->tot_pages--;
- page_list_del(page, &d->page_list);
- spin_unlock(&d->page_alloc_lock);
- return 0;
-}
-
-int page_make_private(struct domain *d, struct page_info *page)
-{
- if ( !get_page(page, dom_cow) )
- return -EINVAL;
-
- spin_lock(&d->page_alloc_lock);
-
- /* We can only change the type if count is one */
- if ( (page->u.inuse.type_info & (PGT_type_mask | PGT_count_mask))
- != (PGT_shared_page | 1) )
- {
- put_page(page);
- spin_unlock(&d->page_alloc_lock);
- return -EEXIST;
- }
-
- /* Drop the final typecount */
- put_page_and_type(page);
-
- /* Change the owner */
- ASSERT(page_get_owner(page) == dom_cow);
- page_set_owner(page, d);
-
- d->tot_pages++;
- page_list_add_tail(page, &d->page_list);
- spin_unlock(&d->page_alloc_lock);
-
- put_page(page);
-
- return 0;
-}
-
static int __do_update_va_mapping(
unsigned long va, u64 val64, unsigned long flags, struct domain *pg_owner)
{
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 4e3873409f..a9ed1aa012 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -35,10 +35,21 @@
#include "mm-locks.h"
+static shr_handle_t next_handle = 1;
+
#if MEM_SHARING_AUDIT
+
+static mm_lock_t shr_lock;
+
+#define shr_lock() _shr_lock()
+#define shr_unlock() _shr_unlock()
+#define shr_locked_by_me() _shr_locked_by_me()
+
static void mem_sharing_audit(void);
+
#define MEM_SHARING_DEBUG(_f, _a...) \
debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
+
static struct list_head shr_audit_list;
static inline void audit_add_list(struct page_info *page)
@@ -51,11 +62,53 @@ static inline void audit_del_list(struct page_info *page)
{
list_del(&page->shared_info->entry);
}
+
+static inline int mem_sharing_page_lock(struct page_info *p)
+{
+ return 1;
+}
+#define mem_sharing_page_unlock(p) ((void)0)
+
+#define get_next_handle() next_handle++;
#else
+
+#define shr_lock() ((void)0)
+#define shr_unlock() ((void)0)
+/* Only used inside audit code */
+//#define shr_locked_by_me() ((void)0)
+
#define mem_sharing_audit() ((void)0)
#define audit_add_list(p) ((void)0)
#define audit_del_list(p) ((void)0)
+
+static inline int mem_sharing_page_lock(struct page_info *pg)
+{
+ int rc;
+ rc = page_lock(pg);
+ if ( rc )
+ {
+ preempt_disable();
+ }
+ return rc;
+}
+
+static inline void mem_sharing_page_unlock(struct page_info *pg)
+{
+ preempt_enable();
+ page_unlock(pg);
+}
+
+static inline shr_handle_t get_next_handle(void)
+{
+ /* Get the next handle get_page style */
+ uint64_t x, y = next_handle;
+ do {
+ x = y;
+ }
+ while ( (y = cmpxchg(&next_handle, x, x + 1)) != x );
+ return x + 1;
+}
#endif /* MEM_SHARING_AUDIT */
#define mem_sharing_enabled(d) \
@@ -68,7 +121,6 @@ static inline void audit_del_list(struct page_info *page)
#undef page_to_mfn
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
-static shr_handle_t next_handle = 1;
static atomic_t nr_saved_mfns = ATOMIC_INIT(0);
typedef struct gfn_info
@@ -78,8 +130,6 @@ typedef struct gfn_info
struct list_head list;
} gfn_info_t;
-static mm_lock_t shr_lock;
-
/* Returns true if list has only one entry. O(1) complexity. */
static inline int list_has_one_entry(struct list_head *head)
{
@@ -398,6 +448,113 @@ int mem_sharing_debug_gref(struct domain *d, grant_ref_t ref)
return mem_sharing_debug_gfn(d, gfn);
}
+/* Functions that change a page's type and ownership */
+static int page_make_sharable(struct domain *d,
+ struct page_info *page,
+ int expected_refcnt)
+{
+ spin_lock(&d->page_alloc_lock);
+
+ /* Change page type and count atomically */
+ if ( !get_page_and_type(page, d, PGT_shared_page) )
+ {
+ spin_unlock(&d->page_alloc_lock);
+ return -EINVAL;
+ }
+
+ /* Check it wasn't already sharable and undo if it was */
+ if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
+ {
+ put_page_and_type(page);
+ spin_unlock(&d->page_alloc_lock);
+ return -EEXIST;
+ }
+
+ /* Check if the ref count is 2. The first from PGC_allocated, and
+ * the second from get_page_and_type at the top of this function */
+ if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
+ {
+ /* Return type count back to zero */
+ put_page_and_type(page);
+ spin_unlock(&d->page_alloc_lock);
+ return -E2BIG;
+ }
+
+ page_set_owner(page, dom_cow);
+ d->tot_pages--;
+ page_list_del(page, &d->page_list);
+ spin_unlock(&d->page_alloc_lock);
+ return 0;
+}
+
+static int page_make_private(struct domain *d, struct page_info *page)
+{
+ unsigned long expected_type;
+
+ if ( !get_page(page, dom_cow) )
+ return -EINVAL;
+
+ spin_lock(&d->page_alloc_lock);
+
+ /* We can only change the type if count is one */
+ /* If we are locking pages individually, then we need to drop
+ * the lock here, while the page is typed. We cannot risk the
+ * race of page_unlock and then put_page_type. */
+#if MEM_SHARING_AUDIT
+ expected_type = (PGT_shared_page | PGT_validated | 1);
+#else
+ expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
+#endif
+ if ( page->u.inuse.type_info != expected_type )
+ {
+ put_page(page);
+ spin_unlock(&d->page_alloc_lock);
+ return -EEXIST;
+ }
+
+ /* Drop the final typecount */
+ put_page_and_type(page);
+
+#ifndef MEM_SHARING_AUDIT
+ /* Now that we've dropped the type, we can unlock */
+ mem_sharing_page_unlock(page);
+#endif
+
+ /* Change the owner */
+ ASSERT(page_get_owner(page) == dom_cow);
+ page_set_owner(page, d);
+
+ d->tot_pages++;
+ page_list_add_tail(page, &d->page_list);
+ spin_unlock(&d->page_alloc_lock);
+
+ put_page(page);
+
+ return 0;
+}
+
+static inline struct page_info *__grab_shared_page(mfn_t mfn)
+{
+ struct page_info *pg = NULL;
+
+ if ( !mfn_valid(mfn) )
+ return NULL;
+ pg = mfn_to_page(mfn);
+
+ /* If the page is not validated we can't lock it, and if it's
+ * not validated it's obviously not shared. */
+ if ( !mem_sharing_page_lock(pg) )
+ return NULL;
+
+ if ( mem_sharing_lookup(mfn_x(mfn)) == NULL )
+ {
+ mem_sharing_page_unlock(pg);
+ return NULL;
+ }
+
+ return pg;
+}
+
int mem_sharing_nominate_page(struct domain *d,
unsigned long gfn,
int expected_refcnt,
@@ -405,7 +562,7 @@ int mem_sharing_nominate_page(struct domain *d,
{
p2m_type_t p2mt;
mfn_t mfn;
- struct page_info *page;
+ struct page_info *page = NULL; /* gcc... */
int ret;
struct gfn_info *gfn_info;
@@ -420,10 +577,17 @@ int mem_sharing_nominate_page(struct domain *d,
goto out;
/* Return the handle if the page is already shared */
- page = mfn_to_page(mfn);
if ( p2m_is_shared(p2mt) ) {
- *phandle = page->shared_info->handle;
+ struct page_info *pg = __grab_shared_page(mfn);
+ if ( !pg )
+ {
+ gdprintk(XENLOG_ERR, "Shared p2m entry gfn %lx, but could not "
+ "grab page %lx dom %d\n", gfn, mfn_x(mfn), d->domain_id);
+ BUG();
+ }
+ *phandle = pg->shared_info->handle;
ret = 0;
+ mem_sharing_page_unlock(pg);
goto out;
}
@@ -432,15 +596,24 @@ int mem_sharing_nominate_page(struct domain *d,
goto out;
/* Try to convert the mfn to the sharable type */
+ page = mfn_to_page(mfn);
ret = page_make_sharable(d, page, expected_refcnt);
if ( ret )
goto out;
+ /* Now that the page is validated, we can lock it. There is no
+ * race because we're holding the p2m entry, so no one else
+ * could be nominating this gfn */
+ ret = -ENOENT;
+ if ( !mem_sharing_page_lock(page) )
+ goto out;
+
/* Initialize the shared state */
ret = -ENOMEM;
if ( (page->shared_info =
xmalloc(struct page_sharing_info)) == NULL )
{
+ /* Making a page private atomically unlocks it */
BUG_ON(page_make_private(d, page) != 0);
goto out;
}
@@ -448,7 +621,7 @@ int mem_sharing_nominate_page(struct domain *d,
INIT_LIST_HEAD(&page->shared_info->gfns);
/* Create the handle */
- page->shared_info->handle = next_handle++;
+ page->shared_info->handle = get_next_handle();
/* Create the local gfn info */
if ( (gfn_info = mem_sharing_gfn_alloc(page, d, gfn)) == NULL )
@@ -479,6 +652,7 @@ int mem_sharing_nominate_page(struct domain *d,
*phandle = page->shared_info->handle;
audit_add_list(page);
+ mem_sharing_page_unlock(page);
ret = 0;
out:
@@ -490,7 +664,7 @@ out:
int mem_sharing_share_pages(struct domain *sd, unsigned long sgfn, shr_handle_t sh,
struct domain *cd, unsigned long cgfn, shr_handle_t ch)
{
- struct page_info *spage, *cpage;
+ struct page_info *spage, *cpage, *firstpg, *secondpg;
struct list_head *le, *te;
gfn_info_t *gfn;
struct domain *d;
@@ -505,26 +679,63 @@ int mem_sharing_share_pages(struct domain *sd, unsigned long sgfn, shr_handle_t
smfn = get_gfn(sd, sgfn, &smfn_type);
cmfn = get_gfn(cd, cgfn, &cmfn_type);
- ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
- spage = mem_sharing_lookup(mfn_x(smfn));
- if ( spage == NULL )
+ /* This tricky business is to avoid two callers deadlocking if
+ * grabbing pages in opposite client/source order */
+ if( mfn_x(smfn) == mfn_x(cmfn) )
+ {
+ /* The pages are already the same. We could return some
+ * kind of error here, but no matter how you look at it,
+ * the pages are already 'shared'. It possibly represents
+ * a big problem somewhere else, but as far as sharing is
+ * concerned: great success! */
+ ret = 0;
goto err_out;
+ }
+ else if ( mfn_x(smfn) < mfn_x(cmfn) )
+ {
+ ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
+ spage = firstpg = __grab_shared_page(smfn);
+ if ( spage == NULL )
+ goto err_out;
+
+ ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
+ cpage = secondpg = __grab_shared_page(cmfn);
+ if ( cpage == NULL )
+ {
+ mem_sharing_page_unlock(spage);
+ goto err_out;
+ }
+ } else {
+ ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
+ cpage = firstpg = __grab_shared_page(cmfn);
+ if ( cpage == NULL )
+ goto err_out;
+
+ ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
+ spage = secondpg = __grab_shared_page(smfn);
+ if ( spage == NULL )
+ {
+ mem_sharing_page_unlock(cpage);
+ goto err_out;
+ }
+ }
+
ASSERT(smfn_type == p2m_ram_shared);
- ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
- cpage = mem_sharing_lookup(mfn_x(cmfn));
- if ( cpage == NULL )
- goto err_out;
ASSERT(cmfn_type == p2m_ram_shared);
/* Check that the handles match */
if ( spage->shared_info->handle != sh )
{
ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
+ mem_sharing_page_unlock(secondpg);
+ mem_sharing_page_unlock(firstpg);
goto err_out;
}
if ( cpage->shared_info->handle != ch )
{
ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
+ mem_sharing_page_unlock(secondpg);
+ mem_sharing_page_unlock(firstpg);
goto err_out;
}
@@ -551,6 +762,9 @@ int mem_sharing_share_pages(struct domain *sd, unsigned long sgfn, shr_handle_t
xfree(cpage->shared_info);
cpage->shared_info = NULL;
+ mem_sharing_page_unlock(secondpg);
+ mem_sharing_page_unlock(firstpg);
+
/* Free the client page */
if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
put_page(cpage);
@@ -592,7 +806,7 @@ int mem_sharing_unshare_page(struct domain *d,
return 0;
}
- page = mem_sharing_lookup(mfn_x(mfn));
+ page = __grab_shared_page(mfn);
if ( page == NULL )
{
gdprintk(XENLOG_ERR, "Domain p2m is shared, but page is not: "
@@ -628,18 +842,20 @@ gfn_found:
* (possibly freeing the page), and exit early */
if ( flags & MEM_SHARING_DESTROY_GFN )
{
- put_gfn(d, gfn);
- shr_unlock();
put_page_and_type(page);
+ mem_sharing_page_unlock(page);
if ( last_gfn &&
test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
+ put_gfn(d, gfn);
+ shr_unlock();
return 0;
}
if ( last_gfn )
{
+ /* Making a page private atomically unlocks it */
BUG_ON(page_make_private(d, page) != 0);
goto private_page_found;
}
@@ -648,6 +864,7 @@ gfn_found:
page = alloc_domheap_page(d, 0);
if ( !page )
{
+ mem_sharing_page_unlock(old_page);
put_gfn(d, gfn);
mem_sharing_notify_helper(d, gfn);
shr_unlock();
@@ -661,6 +878,7 @@ gfn_found:
unmap_domain_page(t);
BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)) == 0);
+ mem_sharing_page_unlock(old_page);
put_page_and_type(old_page);
private_page_found:
@@ -678,6 +896,7 @@ private_page_found:
/* Now that the gfn<->mfn map is properly established,
* marking dirty is feasible */
paging_mark_dirty(d, mfn_x(page_to_mfn(page)));
+ /* We do not need to unlock a private page */
put_gfn(d, gfn);
shr_unlock();
return 0;
@@ -826,8 +1045,8 @@ int mem_sharing_domctl(struct domain *d, xen_domctl_mem_sharing_op_t *mec)
void __init mem_sharing_init(void)
{
printk("Initing memory sharing.\n");
- mm_lock_init(&shr_lock);
#if MEM_SHARING_AUDIT
+ mm_lock_init(&shr_lock);
INIT_LIST_HEAD(&shr_audit_list);
#endif
}
diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
index 058de9a943..eaafbe1086 100644
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -26,6 +26,8 @@
#ifndef _MM_LOCKS_H
#define _MM_LOCKS_H
+#include <asm/mem_sharing.h>
+
/* Per-CPU variable for enforcing the lock ordering */
DECLARE_PER_CPU(int, mm_lock_level);
#define __get_lock_level() (this_cpu(mm_lock_level))
@@ -141,15 +143,22 @@ static inline void mm_enforce_order_unlock(int unlock_level,
* *
************************************************************************/
+#if MEM_SHARING_AUDIT
/* Page-sharing lock (global)
*
* A single global lock that protects the memory-sharing code's
* hash tables. */
declare_mm_lock(shr)
-#define shr_lock() mm_lock(shr, &shr_lock)
-#define shr_unlock() mm_unlock(&shr_lock)
-#define shr_locked_by_me() mm_locked_by_me(&shr_lock)
+#define _shr_lock() mm_lock(shr, &shr_lock)
+#define _shr_unlock() mm_unlock(&shr_lock)
+#define _shr_locked_by_me() mm_locked_by_me(&shr_lock)
+
+#else
+
+/* We use an efficient per-page lock when AUDIT is not enabled. */
+
+#endif /* MEM_SHARING_AUDIT */
/* Nested P2M lock (per-domain)
*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index b23479c481..c8d5d66e4f 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -337,6 +337,29 @@ int is_iomem_page(unsigned long mfn);
void clear_superpage_mark(struct page_info *page);
+/* Per page locks:
+ * page_lock() is used for two purposes: pte serialization, and memory sharing.
+ *
+ * All users of page lock for pte serialization live in mm.c, use it
+ * to lock a page table page during pte updates, do not take other locks within
+ * the critical section delimited by page_lock/unlock, and perform no
+ * nesting.
+ *
+ * All users of page lock for memory sharing live in mm/mem_sharing.c. Page_lock
+ * is used in memory sharing to protect addition (share) and removal (unshare)
+ * of (gfn,domain) tupples to a list of gfn's that the shared page is currently
+ * backing. Nesting may happen when sharing (and locking) two pages -- deadlock
+ * is avoided by locking pages in increasing order.
+ * Memory sharing may take the p2m_lock within a page_lock/unlock
+ * critical section.
+ *
+ * These two users (pte serialization and memory sharing) do not collide, since
+ * sharing is only supported for hvm guests, which do not perform pv pte updates.
+ *
+ */
+int page_lock(struct page_info *page);
+void page_unlock(struct page_info *page);
+
struct domain *page_get_owner_and_reference(struct page_info *page);
void put_page(struct page_info *page);
int get_page(struct page_info *page, struct domain *domain);
@@ -588,10 +611,6 @@ int steal_page(
struct domain *d, struct page_info *page, unsigned int memflags);
int donate_page(
struct domain *d, struct page_info *page, unsigned int memflags);
-int page_make_sharable(struct domain *d,
- struct page_info *page,
- int expected_refcnt);
-int page_make_private(struct domain *d, struct page_info *page);
int map_ldt_shadow_page(unsigned int);