x86/mm: Improve ring management for memory events. Do not lose guest events

This patch is an amalgamation of the work done by Olaf Hering <olaf@aepfle.de> and our work. It combines logic changes to simplify the memory event API, as well as leveraging wait queues to deal with extreme conditions in which too many events are generated by a guest vcpu. In order to generate a new event, a slot in the ring is claimed. If a guest vcpu is generating the event and there is no space, it is put on a wait queue. If a foreign vcpu is generating the event and there is no space, the vcpu is expected to retry its operation. If an error happens later, the function returns the claimed slot via a cancel operation. Thus, the API has only four calls: claim slot, cancel claimed slot, put request in the ring, consume the response. With all these mechanisms, no guest events are lost. Our testing includes 1. ballooning down 512 MiBs; 2. using mem access n2rwx, in which every page access in a four-vCPU guest results in an event, with no vCPU pausing, and the four vCPUs touching all RAM. No guest events were lost in either case, and qemu-dm had no mapping problems. Signed-off-by: Adin Scannell <adin@scannell.ca> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> Signed-off-by: Olaf Hering <olaf@aepfle.de> Signed-off-by: Tim Deegan <tim@xen.org> Committed-by: Tim Deegan <tim@xen.org>
author: Andres Lagar-Cavilla <andres@lagarcavilla.org> 2012-01-19 10:38:47 +0000
committer: Andres Lagar-Cavilla <andres@lagarcavilla.org> 2012-01-19 10:38:47 +0000
commit: 3643a961195f76ba849a213628c1979240e6fbdd (patch)
tree: f52a9b0fd47756a1ddd35ec1493fd6271ddf60fd /xen
parent: a6ec974704c7ed6a48ca75957655a79cb9a55a7f (diff)
download: xen-3643a961195f76ba849a213628c1979240e6fbdd.tar.gz
xen-3643a961195f76ba849a213628c1979240e6fbdd.tar.bz2
xen-3643a961195f76ba849a213628c1979240e6fbdd.zip
7 files changed, 340 insertions, 121 deletions
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 160a47f4b6..01c1411e0b 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4198,15 +4198,21 @@ static int hvm_memory_event_traps(long p, uint32_t reason,
 
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
-    
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc == -ENOSYS )
+    {
+        /* If there was no ring to handle the event, then
+         * simple continue executing normally. */
+        return 1;
+    }
+    else if ( rc < 0 )
         return rc;
-    
+
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
     req.reason = reason;
-    
+
     if ( (p & HVMPME_MODE_MASK) == HVMPME_mode_sync ) 
     {
         req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;    
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
index d666dfc7d8..0752e3273a 100644
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -41,6 +42,7 @@ static int mem_event_enable(
     struct domain *d,
     xen_domctl_mem_event_op_t *mec,
     struct mem_event_domain *med,
+    int pause_flag,
     xen_event_channel_notification_t notification_fn)
 {
     int rc;
@@ -75,6 +77,8 @@ static int mem_event_enable(
         return -EINVAL;
     }
 
+    mem_event_ring_lock_init(med);
+
     /* Get MFN of shared page */
     guest_get_eff_l1e(v, shared_addr, &l1e);
     shared_gfn = l1e_get_pfn(l1e);
@@ -93,6 +97,9 @@ static int mem_event_enable(
     put_gfn(dom_mem_event, ring_gfn);
     put_gfn(dom_mem_event, shared_gfn); 
 
+    /* Set the number of currently blocked vCPUs to 0. */
+    med->blocked = 0;
+
     /* Allocate event channel */
     rc = alloc_unbound_xen_event_channel(d->vcpu[0],
                                          current->domain->domain_id,
@@ -108,10 +115,11 @@ static int mem_event_enable(
                     (mem_event_sring_t *)med->ring_page,
                     PAGE_SIZE);
 
-    mem_event_ring_lock_init(med);
+    /* Save the pause flag for this particular ring. */
+    med->pause_flag = pause_flag;
 
-    /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    /* Initialize the last-chance wait queue. */
+    init_waitqueue_head(&med->wq);
 
     return 0;
 
@@ -125,26 +133,177 @@ static int mem_event_enable(
     return rc;
 }
 
-static int mem_event_disable(struct mem_event_domain *med)
+static unsigned int mem_event_ring_available(struct mem_event_domain *med)
 {
-    unmap_domain_page(med->ring_page);
-    med->ring_page = NULL;
+    int avail_req = RING_FREE_REQUESTS(&med->front_ring);
+    avail_req -= med->target_producers;
+    avail_req -= med->foreign_producers;
 
-    unmap_domain_page(med->shared_page);
-    med->shared_page = NULL;
+    BUG_ON(avail_req < 0);
+
+    return avail_req;
+}
+
+/*
+ * mem_event_wake_blocked() will wakeup vcpus waiting for room in the
+ * ring. These vCPUs were paused on their way out after placing an event,
+ * but need to be resumed where the ring is capable of processing at least
+ * one event from them.
+ */
+static void mem_event_wake_blocked(struct domain *d, struct mem_event_domain *med)
+{
+    struct vcpu *v;
+    int online = d->max_vcpus;
+    unsigned int avail_req = mem_event_ring_available(med);
+
+    if ( avail_req == 0 || med->blocked == 0 )
+        return;
+
+    /*
+     * We ensure that we only have vCPUs online if there are enough free slots
+     * for their memory events to be processed.  This will ensure that no
+     * memory events are lost (due to the fact that certain types of events
+     * cannot be replayed, we need to ensure that there is space in the ring
+     * for when they are hit).
+     * See comment below in mem_event_put_request().
+     */
+    for_each_vcpu ( d, v )
+        if ( test_bit(med->pause_flag, &v->pause_flags) )
+            online--;
+
+    ASSERT(online == (d->max_vcpus - med->blocked));
+
+    /* We remember which vcpu last woke up to avoid scanning always linearly
+     * from zero and starving higher-numbered vcpus under high load */
+    if ( d->vcpu )
+    {
+        int i, j, k;
+
+        for (i = med->last_vcpu_wake_up + 1, j = 0; j < d->max_vcpus; i++, j++)
+        {
+            k = i % d->max_vcpus;
+            v = d->vcpu[k];
+            if ( !v )
+                continue;
+
+            if ( !(med->blocked) || online >= avail_req )
+               break;
+
+            if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                online++;
+                med->blocked--;
+                med->last_vcpu_wake_up = k;
+            }
+        }
+    }
+}
+
+/*
+ * In the event that a vCPU attempted to place an event in the ring and
+ * was unable to do so, it is queued on a wait queue.  These are woken as
+ * needed, and take precedence over the blocked vCPUs.
+ */
+static void mem_event_wake_queued(struct domain *d, struct mem_event_domain *med)
+{
+    unsigned int avail_req = mem_event_ring_available(med);
+
+    if ( avail_req > 0 )
+        wake_up_nr(&med->wq, avail_req);
+}
+
+/*
+ * mem_event_wake() will wakeup all vcpus waiting for the ring to
+ * become available.  If we have queued vCPUs, they get top priority. We
+ * are guaranteed that they will go through code paths that will eventually
+ * call mem_event_wake() again, ensuring that any blocked vCPUs will get
+ * unpaused once all the queued vCPUs have made it through.
+ */
+void mem_event_wake(struct domain *d, struct mem_event_domain *med)
+{
+    if (!list_empty(&med->wq.list))
+        mem_event_wake_queued(d, med);
+    else
+        mem_event_wake_blocked(d, med);
+}
+
+static int mem_event_disable(struct domain *d, struct mem_event_domain *med)
+{
+    if ( med->ring_page )
+    {
+        struct vcpu *v;
+
+        mem_event_ring_lock(med);
+
+        if ( !list_empty(&med->wq.list) )
+        {
+            mem_event_ring_unlock(med);
+            return -EBUSY;
+        }
+
+        unmap_domain_page(med->ring_page);
+        med->ring_page = NULL;
+
+        unmap_domain_page(med->shared_page);
+        med->shared_page = NULL;
+
+        /* Unblock all vCPUs */
+        for_each_vcpu ( d, v )
+        {
+            if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                med->blocked--;
+            }
+        }
+
+        mem_event_ring_unlock(med);
+    }
 
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static inline void mem_event_release_slot(struct domain *d,
+                                          struct mem_event_domain *med)
 {
-    mem_event_front_ring_t *front_ring;
-    RING_IDX req_prod;
+    /* Update the accounting */
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
+
+    /* Kick any waiters */
+    mem_event_wake(d, med);
+}
 
-    mem_event_ring_lock(med);
+/*
+ * mem_event_mark_and_pause() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med)
+{
+    if ( !test_and_set_bit(med->pause_flag, &v->pause_flags) )
+    {
+        vcpu_pause_nosync(v);
+        med->blocked++;
+    }
+}
 
-    front_ring = &med->front_ring;
-    req_prod = front_ring->req_prod_pvt;
+/*
+ * This must be preceded by a call to claim_slot(), and is guaranteed to
+ * succeed.  As a side-effect however, the vCPU may be paused if the ring is
+ * overly full and its continued execution would cause stalling and excessive
+ * waiting.  The vCPU will be automatically unpaused when the ring clears.
+ */
+void mem_event_put_request(struct domain *d,
+                           struct mem_event_domain *med,
+                           mem_event_request_t *req)
+{
+    mem_event_front_ring_t *front_ring;
+    int free_req;
+    unsigned int avail_req;
+    RING_IDX req_prod;
 
     if ( current->domain != d )
     {
@@ -152,21 +311,38 @@ void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_e
         ASSERT( !(req->flags & MEM_EVENT_FLAG_VCPU_PAUSED) );
     }
 
+    mem_event_ring_lock(med);
+
+    /* Due to the reservations, this step must succeed. */
+    front_ring = &med->front_ring;
+    free_req = RING_FREE_REQUESTS(front_ring);
+    ASSERT(free_req > 0);
+
     /* Copy request */
+    req_prod = front_ring->req_prod_pvt;
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
+    /* We've actually *used* our reservation, so release the slot. */
+    mem_event_release_slot(d, med);
+
+    /* Give this vCPU a black eye if necessary, on the way out.
+     * See the comments above wake_blocked() for more information
+     * on how this mechanism works to avoid waiting. */
+    avail_req = mem_event_ring_available(med);
+    if( current->domain == d && avail_req < d->max_vcpus )
+        mem_event_mark_and_pause(current, med);
+
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
 }
 
-int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med, mem_event_response_t *rsp)
 {
     mem_event_front_ring_t *front_ring;
     RING_IDX rsp_cons;
@@ -190,57 +366,81 @@ int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *r
     front_ring->rsp_cons = rsp_cons;
     front_ring->sring->rsp_event = rsp_cons + 1;
 
+    /* Kick any waiters -- since we've just consumed an event,
+     * there may be additional space available in the ring. */
+    mem_event_wake(d, med);
+
     mem_event_ring_unlock(med);
 
     return 1;
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
-{
-    struct vcpu *v;
-
-    for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
-            vcpu_wake(v);
-}
-
-void mem_event_mark_and_pause(struct vcpu *v)
-{
-    set_bit(_VPF_mem_event, &v->pause_flags);
-    vcpu_sleep_nosync(v);
-}
-
-void mem_event_put_req_producers(struct mem_event_domain *med)
+void mem_event_cancel_slot(struct domain *d, struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
-    med->req_producers--;
+    mem_event_release_slot(d, med);
     mem_event_ring_unlock(med);
 }
 
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+static int mem_event_grab_slot(struct mem_event_domain *med, int foreign)
 {
-    struct vcpu *curr = current;
-    int free_requests;
-    int ring_full = 1;
+    unsigned int avail_req;
 
     if ( !med->ring_page )
-        return -1;
+        return -ENOSYS;
 
     mem_event_ring_lock(med);
 
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    avail_req = mem_event_ring_available(med);
+    if ( avail_req == 0 )
     {
-        med->req_producers++;
-        ring_full = 0;
+        mem_event_ring_unlock(med);
+        return -EBUSY;
     }
 
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
+    if ( !foreign )
+        med->target_producers++;
+    else
+        med->foreign_producers++;
 
     mem_event_ring_unlock(med);
 
-    return ring_full;
+    return 0;
+}
+
+/* Simple try_grab wrapper for use in the wait_event() macro. */
+static int mem_event_wait_try_grab(struct mem_event_domain *med, int *rc)
+{
+    *rc = mem_event_grab_slot(med, 0);
+    return *rc;
+}
+
+/* Call mem_event_grab_slot() until the ring doesn't exist, or is available. */
+static int mem_event_wait_slot(struct mem_event_domain *med)
+{
+    int rc = -EBUSY;
+    wait_event(med->wq, mem_event_wait_try_grab(med, &rc) != -EBUSY);
+    return rc;
+}
+
+/*
+ * Determines whether or not the current vCPU belongs to the target domain,
+ * and calls the appropriate wait function.  If it is a guest vCPU, then we
+ * use mem_event_wait_slot() to reserve a slot.  As long as there is a ring,
+ * this function will always return 0 for a guest.  For a non-guest, we check
+ * for space and return -EBUSY if the ring is not available.
+ *
+ * Return codes: -ENOSYS: the ring is not yet configured
+ *               -EBUSY: the ring is busy
+ *               0: a spot has been reserved
+ *
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
+{
+    if ( current->domain == d )
+        return mem_event_wait_slot(med);
+    else
+        return mem_event_grab_slot(med, 1);
 }
 
 /* Registered with Xen-bound event channel for incoming notifications. */
@@ -316,14 +516,14 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_paging_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_paging, mem_paging_notification);
         }
         break;
 
         case XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE:
         {
             if ( med->ring_page )
-                rc = mem_event_disable(med);
+                rc = mem_event_disable(d, med);
         }
         break;
 
@@ -355,14 +555,14 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_access_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_access, mem_access_notification);
         }
         break;
 
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
         {
             if ( med->ring_page )
-                rc = mem_event_disable(med);
+                rc = mem_event_disable(d, med);
         }
         break;
 
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 7726d5ba70..946242cafa 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
 
     if ( v->domain != d )
     {
@@ -275,20 +267,21 @@ static struct page_info* mem_sharing_alloc_page(struct domain *d,
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d) lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+    {
+        return;
+    }
 
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
+    vcpu_pause_nosync(v);
 
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -301,7 +294,7 @@ int mem_sharing_sharing_resume(struct domain *d)
     mem_event_response_t rsp;
 
     /* Get all requests off the ring */
-    while ( mem_event_get_response(&d->mem_event->share, &rsp) )
+    while ( mem_event_get_response(d, &d->mem_event->share, &rsp) )
     {
         if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
             continue;
@@ -658,13 +651,14 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0);
     if(!page) 
     {
         /* We've failed to obtain memory for private page. Need to re-add the
          * gfn_info to relevant list */
         list_add(&gfn_info->list, &hash_entry->gfns);
         put_gfn(d, gfn);
+        mem_sharing_notify_helper(d, gfn);
         shr_unlock();
         return -ENOMEM;
     }
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 6a1cad08bc..91851bb883 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -861,20 +861,23 @@ int p2m_mem_paging_evict(struct domain *d, unsigned long gfn)
  */
 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
-    struct vcpu *v = current;
     mem_event_request_t req;
 
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* We allow no ring in this unique case, because it won't affect
+     * correctness of the guest execution at this point.  If this is the only
+     * page that happens to be paged-out, we'll be okay..  but it's likely the
+     * guest will crash shortly anyways. */
+    int rc = mem_event_claim_slot(d, &d->mem_event->paging);
+    if ( rc < 0 )
+        return;
 
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    /* Send release notification to pager */
+    memset(&req, 0, sizeof(req));
+    req.type = MEM_EVENT_TYPE_PAGING;
+    req.gfn = gfn;
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
+
+    mem_event_put_request(d, &d->mem_event->paging, &req);
 }
 
 /**
@@ -907,8 +910,16 @@ void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
     mfn_t mfn;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
+    /* We're paging. There should be a ring */
+    int rc = mem_event_claim_slot(d, &d->mem_event->paging);
+    if ( rc == -ENOSYS )
+    {
+        gdprintk(XENLOG_ERR, "Domain %hu paging gfn %lx yet no ring "
+                             "in place\n", d->domain_id, gfn);
+        domain_crash(d);
+        return;
+    }
+    else if ( rc < 0 )
         return;
 
     memset(&req, 0, sizeof(req));
@@ -929,7 +940,7 @@ void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
     p2m_unlock(p2m);
 
     /* Pause domain if request came from guest and gfn has paging type */
-    if (  p2m_is_paging(p2mt) && v->domain == d )
+    if ( p2m_is_paging(p2mt) && v->domain == d )
     {
         vcpu_pause_nosync(v);
         req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
@@ -938,7 +949,7 @@ void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
+        mem_event_cancel_slot(d, &d->mem_event->paging);
         return;
     }
 
@@ -1065,7 +1076,7 @@ void p2m_mem_paging_resume(struct domain *d)
     mfn_t mfn;
 
     /* Pull all responses off the ring */
-    while( mem_event_get_response(&d->mem_event->paging, &rsp) )
+    while( mem_event_get_response(d, &d->mem_event->paging, &rsp) )
     {
         if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
             continue;
@@ -1090,9 +1101,6 @@ void p2m_mem_paging_resume(struct domain *d)
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
-
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
 }
 
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, 
@@ -1103,7 +1111,6 @@ bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long g
     unsigned long gfn = gpa >> PAGE_SHIFT;
     struct domain *d = v->domain;    
     struct p2m_domain* p2m = p2m_get_hostp2m(d);
-    int res;
     mfn_t mfn;
     p2m_type_t p2mt;
     p2m_access_t p2ma;
@@ -1126,17 +1133,16 @@ bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long g
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
-    if ( res < 0 ) 
+    if ( mem_event_claim_slot(d, &d->mem_event->access) == -ENOSYS )
     {
         /* No listener */
         if ( p2m->access_required ) 
         {
-            printk(XENLOG_INFO 
-                   "Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
-                   v->vcpu_id, d->domain_id);
-
-            mem_event_mark_and_pause(v);
+            gdprintk(XENLOG_INFO, "Memory access permissions failure, "
+                                  "no mem_event listener VCPU %d, dom %d\n",
+                                  v->vcpu_id, d->domain_id);
+            domain_crash(v->domain);
+            return 0;
         }
         else
         {
@@ -1149,11 +1155,7 @@ bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long g
             }
             return 1;
         }
-
-        return 0;
     }
-    else if ( res > 0 )
-        return 0;  /* No space in buffer; VCPU paused */
 
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
@@ -1188,7 +1190,7 @@ void p2m_mem_access_resume(struct domain *d)
     mem_event_response_t rsp;
 
     /* Pull all responses off the ring */
-    while( mem_event_get_response(&d->mem_event->access, &rsp) )
+    while( mem_event_get_response(d, &d->mem_event->access, &rsp) )
     {
         if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
             continue;
@@ -1196,13 +1198,8 @@ void p2m_mem_access_resume(struct domain *d)
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
-
-    /* Unpause any domains that were paused because the ring was full or no listener 
-     * was available */
-    mem_event_unpause_vcpus(d);
 }
 
-
 /* Set access type for a region of pfns.
  * If start_pfn == -1ul, sets the default access type */
 int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, 
diff --git a/xen/include/asm-x86/mem_event.h b/xen/include/asm-x86/mem_event.h
index c0233052e9..c157900c80 100644
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,18 +24,24 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req);
-int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+/* Returns 0 on success, -ENOSYS if there is no ring, -EBUSY if there is no
+ * available space. For success or -EBUSY, the vCPU may be left blocked
+ * temporarily to ensure that the ring does not lose future events.  In
+ * general, you must follow a claim_slot() call with either put_request() or
+ * cancel_slot(), both of which are guaranteed to succeed. */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+
+void mem_event_cancel_slot(struct domain *d, struct mem_event_domain *med);
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                            mem_event_request_t *req);
+
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med,
+                           mem_event_response_t *rsp);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
 
-
 #endif /* __MEM_EVENT_H__ */
 
 
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index cea95b27f8..313a459be3 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -317,6 +317,8 @@ page_list_splice(struct page_list_head *list, struct page_list_head *head)
 
 void scrub_one_page(struct page_info *);
 
+/* Returns 1 on success, 0 on error, negative if the ring
+ * for event propagation is full in the presence of paging */
 int guest_remove_page(struct domain *d, unsigned long gmfn);
 
 #define RAM_TYPE_CONVENTIONAL 0x00000001
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 6441546401..dbfb8b39f3 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -13,6 +13,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -182,7 +183,9 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
+    /* The ring has 64 entries */
+    unsigned char foreign_producers;
+    unsigned char target_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
@@ -191,6 +194,14 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int pause_flag;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
+    /* the number of vCPUs blocked */
+    unsigned int blocked;
+    /* The last vcpu woken up */
+    unsigned int last_vcpu_wake_up;
 };
 
 struct mem_event_per_domain
@@ -614,9 +625,12 @@ static inline struct domain *next_domain_in_cpupool(
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {
author	Andres Lagar-Cavilla <andres@lagarcavilla.org>	2012-01-19 10:38:47 +0000
committer	Andres Lagar-Cavilla <andres@lagarcavilla.org>	2012-01-19 10:38:47 +0000
commit	3643a961195f76ba849a213628c1979240e6fbdd (patch)
tree	f52a9b0fd47756a1ddd35ec1493fd6271ddf60fd /xen
parent	a6ec974704c7ed6a48ca75957655a79cb9a55a7f (diff)
download	xen-3643a961195f76ba849a213628c1979240e6fbdd.tar.gz xen-3643a961195f76ba849a213628c1979240e6fbdd.tar.bz2 xen-3643a961195f76ba849a213628c1979240e6fbdd.zip