Re-order calls to put_gfn() around wait queue invocations

Since we use wait queues to handle potential ring congestion cases, code paths that try to generate a mem event while holding a gfn lock would go to sleep in non-preemptible mode. Most such code paths can be fixed by simply postponing event generation until locks are released. Signed-off-by: Adin Scannell <adin@scannell.ca> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> Acked-by: Tim Deegan <tim@xen.org> Committed-by: Tim Deegan <tim@xen.org>
author: Andres Lagar-Cavilla <andres@lagarcavilla.org> 2012-02-10 16:07:07 +0000
committer: Andres Lagar-Cavilla <andres@lagarcavilla.org> 2012-02-10 16:07:07 +0000
commit: 2981fb7d9dcbbe36228042fbf00618bb56ebafea (patch)
tree: 21e024a9049fee2493626b68116913b83d187025 /xen
parent: 99af3cd40b6e9fde63daddcec86ee5e6db338d94 (diff)
download: xen-2981fb7d9dcbbe36228042fbf00618bb56ebafea.tar.gz
xen-2981fb7d9dcbbe36228042fbf00618bb56ebafea.tar.bz2
xen-2981fb7d9dcbbe36228042fbf00618bb56ebafea.zip
13 files changed, 83 insertions, 47 deletions
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 53cddbecf4..c40c3d61a4 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -67,8 +67,8 @@ static int hvmemul_do_io(
     ram_mfn = get_gfn_unshare(curr->domain, ram_gfn, &p2mt);
     if ( p2m_is_paging(p2mt) )
     {
-        p2m_mem_paging_populate(curr->domain, ram_gfn);
         put_gfn(curr->domain, ram_gfn); 
+        p2m_mem_paging_populate(curr->domain, ram_gfn);
         return X86EMUL_RETRY;
     }
     if ( p2m_is_shared(p2mt) )
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index f07b5939de..c6f5c63ad0 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -64,6 +64,7 @@
 #include <public/version.h>
 #include <public/memory.h>
 #include <asm/mem_event.h>
+#include <asm/mem_access.h>
 #include <public/mem_event.h>
 
 bool_t __read_mostly hvm_enabled;
@@ -363,8 +364,8 @@ static int hvm_set_ioreq_page(
     }
     if ( p2m_is_paging(p2mt) )
     {
-        p2m_mem_paging_populate(d, gmfn);
         put_gfn(d, gmfn);
+        p2m_mem_paging_populate(d, gmfn);
         return -ENOENT;
     }
     if ( p2m_is_shared(p2mt) )
@@ -1195,7 +1196,8 @@ int hvm_hap_nested_page_fault(unsigned long gpa,
     mfn_t mfn;
     struct vcpu *v = current;
     struct p2m_domain *p2m;
-    int rc, fall_through = 0;
+    int rc, fall_through = 0, paged = 0;
+    mem_event_request_t *req_ptr = NULL;
 
     /* On Nested Virtualization, walk the guest page table.
      * If this succeeds, all is fine.
@@ -1270,7 +1272,7 @@ int hvm_hap_nested_page_fault(unsigned long gpa,
         if ( violation )
         {
             if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r, 
-                                        access_w, access_x) )
+                                        access_w, access_x, &req_ptr) )
             {
                 fall_through = 1;
             } else {
@@ -1297,7 +1299,7 @@ int hvm_hap_nested_page_fault(unsigned long gpa,
 #ifdef __x86_64__
     /* Check if the page has been paged out */
     if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) )
-        p2m_mem_paging_populate(v->domain, gfn);
+        paged = 1;
 
     /* Mem sharing: unshare the page and try again */
     if ( access_w && (p2mt == p2m_ram_shared) )
@@ -1343,6 +1345,13 @@ int hvm_hap_nested_page_fault(unsigned long gpa,
 
 out_put_gfn:
     put_gfn(p2m->domain, gfn);
+    if ( paged )
+        p2m_mem_paging_populate(v->domain, gfn);
+    if ( req_ptr )
+    {
+        mem_access_send_req(v->domain, req_ptr);
+        xfree(req_ptr);
+    }
     return rc;
 }
 
@@ -1849,8 +1858,8 @@ static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable)
     }
     if ( p2m_is_paging(p2mt) )
     {
-        p2m_mem_paging_populate(d, gfn);
         put_gfn(d, gfn);
+        p2m_mem_paging_populate(d, gfn);
         return NULL;
     }
 
@@ -2325,8 +2334,8 @@ static enum hvm_copy_result __hvm_copy(
 
         if ( p2m_is_paging(p2mt) )
         {
-            p2m_mem_paging_populate(curr->domain, gfn);
             put_gfn(curr->domain, gfn);
+            p2m_mem_paging_populate(curr->domain, gfn);
             return HVMCOPY_gfn_paged_out;
         }
         if ( p2m_is_shared(p2mt) )
@@ -3923,8 +3932,8 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
             mfn_t mfn = get_gfn_unshare(d, pfn, &t);
             if ( p2m_is_paging(t) )
             {
-                p2m_mem_paging_populate(d, pfn);
                 put_gfn(d, pfn);
+                p2m_mem_paging_populate(d, pfn);
                 rc = -EINVAL;
                 goto param_fail3;
             }
@@ -4040,8 +4049,8 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
             mfn = get_gfn_unshare(d, pfn, &t);
             if ( p2m_is_paging(t) )
             {
-                p2m_mem_paging_populate(d, pfn);
                 put_gfn(d, pfn);
+                p2m_mem_paging_populate(d, pfn);
                 rc = -EINVAL;
                 goto param_fail4;
             }
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index c8829c30d7..120033beeb 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3536,8 +3536,8 @@ int do_mmu_update(
 
             if ( p2m_is_paged(p2mt) )
             {
-                p2m_mem_paging_populate(pg_owner, gmfn);
                 put_gfn(pt_owner, gmfn);
+                p2m_mem_paging_populate(pg_owner, gmfn);
                 rc = -ENOENT;
                 break;
             }
@@ -3568,8 +3568,8 @@ int do_mmu_update(
 
                     if ( p2m_is_paged(l1e_p2mt) )
                     {
-                        p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e));
                         put_gfn(pg_owner, l1egfn);
+                        p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e));
                         rc = -ENOENT;
                         break;
                     }
@@ -3617,8 +3617,8 @@ int do_mmu_update(
 
                     if ( p2m_is_paged(l2e_p2mt) )
                     {
-                        p2m_mem_paging_populate(pg_owner, l2egfn);
                         put_gfn(pg_owner, l2egfn);
+                        p2m_mem_paging_populate(pg_owner, l2egfn);
                         rc = -ENOENT;
                         break;
                     }
@@ -3652,8 +3652,8 @@ int do_mmu_update(
 
                     if ( p2m_is_paged(l3e_p2mt) )
                     {
-                        p2m_mem_paging_populate(pg_owner, l3egfn);
                         put_gfn(pg_owner, l3egfn);
+                        p2m_mem_paging_populate(pg_owner, l3egfn);
                         rc = -ENOENT;
                         break;
                     }
@@ -3687,8 +3687,8 @@ int do_mmu_update(
 
                     if ( p2m_is_paged(l4e_p2mt) )
                     {
-                        p2m_mem_paging_populate(pg_owner, l4egfn);
                         put_gfn(pg_owner, l4egfn);
+                        p2m_mem_paging_populate(pg_owner, l4egfn);
                         rc = -ENOENT;
                         break;
                     }
diff --git a/xen/arch/x86/mm/guest_walk.c b/xen/arch/x86/mm/guest_walk.c
index 699bd7e468..bc4a82f878 100644
--- a/xen/arch/x86/mm/guest_walk.c
+++ b/xen/arch/x86/mm/guest_walk.c
@@ -102,8 +102,8 @@ static inline void *map_domain_gfn(struct p2m_domain *p2m,
     if ( p2m_is_paging(*p2mt) )
     {
         ASSERT(!p2m_is_nestedp2m(p2m));
-        p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
         __put_gfn(p2m, gfn_x(gfn));
+        p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
         *rc = _PAGE_PAGED;
         return NULL;
     }
diff --git a/xen/arch/x86/mm/hap/guest_walk.c b/xen/arch/x86/mm/hap/guest_walk.c
index 3191e76225..afa2a81f37 100644
--- a/xen/arch/x86/mm/hap/guest_walk.c
+++ b/xen/arch/x86/mm/hap/guest_walk.c
@@ -64,10 +64,9 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
     if ( p2m_is_paging(p2mt) )
     {
         ASSERT(!p2m_is_nestedp2m(p2m));
-        p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT);
-
         pfec[0] = PFEC_page_paged;
         __put_gfn(p2m, top_gfn);
+        p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT);
         return INVALID_GFN;
     }
     if ( p2m_is_shared(p2mt) )
@@ -101,10 +100,9 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
         if ( p2m_is_paging(p2mt) )
         {
             ASSERT(!p2m_is_nestedp2m(p2m));
-            p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
-
             pfec[0] = PFEC_page_paged;
             __put_gfn(p2m, gfn_x(gfn));
+            p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
             return INVALID_GFN;
         }
         if ( p2m_is_shared(p2mt) )
diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
index 74fa4d3424..f52d993840 100644
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -47,6 +47,16 @@ int mem_access_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
     return rc;
 }
 
+int mem_access_send_req(struct domain *d, mem_event_request_t *req)
+{
+    int rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc < 0 )
+        return rc;
+
+    mem_event_put_request(d, &d->mem_event->access, req);
+
+    return 0;
+} 
 
 /*
  * Local variables:
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
index 0752e3273a..b1333e1330 100644
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -423,6 +423,11 @@ static int mem_event_wait_slot(struct mem_event_domain *med)
     return rc;
 }
 
+bool_t mem_event_check_ring(struct mem_event_domain *med)
+{
+    return (med->ring_page != NULL);
+}
+
 /*
  * Determines whether or not the current vCPU belongs to the target domain,
  * and calls the appropriate wait function.  If it is a guest vCPU, then we
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index c7669dff1b..8bbcce04e5 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1130,17 +1130,18 @@ void p2m_mem_paging_resume(struct domain *d)
 }
 
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, 
-                          bool_t access_r, bool_t access_w, bool_t access_x)
+                          bool_t access_r, bool_t access_w, bool_t access_x,
+                          mem_event_request_t **req_ptr)
 {
     struct vcpu *v = current;
-    mem_event_request_t req;
     unsigned long gfn = gpa >> PAGE_SHIFT;
     struct domain *d = v->domain;    
     struct p2m_domain* p2m = p2m_get_hostp2m(d);
     mfn_t mfn;
     p2m_type_t p2mt;
     p2m_access_t p2ma;
-    
+    mem_event_request_t *req;
+
     /* First, handle rx2rw conversion automatically */
     gfn_lock(p2m, gfn, 0);
     mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query, NULL);
@@ -1159,7 +1160,7 @@ bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long g
     gfn_unlock(p2m, gfn, 0);
 
     /* Otherwise, check if there is a memory event listener, and send the message along */
-    if ( mem_event_claim_slot(d, &d->mem_event->access) == -ENOSYS )
+    if ( !mem_event_check_ring(&d->mem_event->access) || !req_ptr ) 
     {
         /* No listener */
         if ( p2m->access_required ) 
@@ -1183,29 +1184,34 @@ bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long g
         }
     }
 
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_ACCESS;
-    req.reason = MEM_EVENT_REASON_VIOLATION;
+    *req_ptr = NULL;
+    req = xmalloc(mem_event_request_t);
+    if ( req )
+    {
+        *req_ptr = req;
+        memset(req, 0, sizeof(req));
+        req->type = MEM_EVENT_TYPE_ACCESS;
+        req->reason = MEM_EVENT_REASON_VIOLATION;
+
+        /* Pause the current VCPU */
+        if ( p2ma != p2m_access_n2rwx )
+            req->flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+
+        /* Send request to mem event */
+        req->gfn = gfn;
+        req->offset = gpa & ((1 << PAGE_SHIFT) - 1);
+        req->gla_valid = gla_valid;
+        req->gla = gla;
+        req->access_r = access_r;
+        req->access_w = access_w;
+        req->access_x = access_x;
+    
+        req->vcpu_id = v->vcpu_id;
+    }
 
     /* Pause the current VCPU */
     if ( p2ma != p2m_access_n2rwx )
-    {
         vcpu_pause_nosync(v);
-        req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
-    } 
-
-    /* Send request to mem event */
-    req.gfn = gfn;
-    req.offset = gpa & ((1 << PAGE_SHIFT) - 1);
-    req.gla_valid = gla_valid;
-    req.gla = gla;
-    req.access_r = access_r;
-    req.access_w = access_w;
-    req.access_x = access_x;
-    
-    req.vcpu_id = v->vcpu_id;
-
-    mem_event_put_request(d, &d->mem_event->access, &req);
 
     /* VCPU may be paused, return whether we promoted automatically */
     return (p2ma == p2m_access_n2rwx);
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 8a0f38664d..9defa4d74c 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -164,8 +164,8 @@ static int __get_paged_frame(unsigned long gfn, unsigned long *frame, int readon
         *frame = mfn_x(mfn);
         if ( p2m_is_paging(p2mt) )
         {
-            p2m_mem_paging_populate(rd, gfn);
             put_gfn(rd, gfn);
+            p2m_mem_paging_populate(rd, gfn);
             rc = GNTST_eagain;
         }
     } else {
diff --git a/xen/common/memory.c b/xen/common/memory.c
index df94dc2ca7..781053d00c 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -166,8 +166,8 @@ int guest_remove_page(struct domain *d, unsigned long gmfn)
     if ( unlikely(p2m_is_paging(p2mt)) )
     {
         guest_physmap_remove_page(d, gmfn, mfn, 0);
-        p2m_mem_paging_drop_page(d, gmfn, p2mt);
         put_gfn(d, gmfn);
+        p2m_mem_paging_drop_page(d, gmfn, p2mt);
         return 1;
     }
 #else
diff --git a/xen/include/asm-x86/mem_access.h b/xen/include/asm-x86/mem_access.h
index 4f038f97b0..fe3c349aa7 100644
--- a/xen/include/asm-x86/mem_access.h
+++ b/xen/include/asm-x86/mem_access.h
@@ -23,6 +23,7 @@
 
 int mem_access_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                       XEN_GUEST_HANDLE(void) u_domctl);
+int mem_access_send_req(struct domain *d, mem_event_request_t *req);
 
 
 /*
diff --git a/xen/include/asm-x86/mem_event.h b/xen/include/asm-x86/mem_event.h
index c157900c80..7b2d4d3a88 100644
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,6 +24,9 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
+/* Returns whether a ring has been set up */
+bool_t mem_event_check_ring(struct mem_event_domain *med);
+
 /* Returns 0 on success, -ENOSYS if there is no ring, -EBUSY if there is no
  * available space. For success or -EBUSY, the vCPU may be left blocked
  * temporarily to ensure that the ring does not lose future events.  In
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index bf05de16a5..e03eb10a27 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -494,9 +494,12 @@ static inline void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
 #ifdef __x86_64__
 /* Send mem event based on the access (gla is -1ull if not available).  Handles
  * the rw2rx conversion. Boolean return value indicates if access rights have 
- * been promoted with no underlying vcpu pause. */
+ * been promoted with no underlying vcpu pause. If the req_ptr has been populated, 
+ * then the caller must put the event in the ring (once having released get_gfn*
+ * locks -- caller must also xfree the request. */
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, 
-                          bool_t access_r, bool_t access_w, bool_t access_x);
+                          bool_t access_r, bool_t access_w, bool_t access_x,
+                          mem_event_request_t **req_ptr);
 /* Resumes the running of the VCPU, restarting the last instruction */
 void p2m_mem_access_resume(struct domain *d);
 
@@ -513,7 +516,8 @@ int p2m_get_mem_access(struct domain *d, unsigned long pfn,
 #else
 static inline bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, 
                                         unsigned long gla, bool_t access_r, 
-                                        bool_t access_w, bool_t access_x)
+                                        bool_t access_w, bool_t access_x,
+                                        mem_event_request_t **req_ptr)
 { return 1; }
 static inline int p2m_set_mem_access(struct domain *d, 
                                      unsigned long start_pfn,
author	Andres Lagar-Cavilla <andres@lagarcavilla.org>	2012-02-10 16:07:07 +0000
committer	Andres Lagar-Cavilla <andres@lagarcavilla.org>	2012-02-10 16:07:07 +0000
commit	2981fb7d9dcbbe36228042fbf00618bb56ebafea (patch)
tree	21e024a9049fee2493626b68116913b83d187025 /xen
parent	99af3cd40b6e9fde63daddcec86ee5e6db338d94 (diff)
download	xen-2981fb7d9dcbbe36228042fbf00618bb56ebafea.tar.gz xen-2981fb7d9dcbbe36228042fbf00618bb56ebafea.tar.bz2 xen-2981fb7d9dcbbe36228042fbf00618bb56ebafea.zip