x86/mm: revert 26399:b0e618cb0233 (multiple vram areas)

Although this passed my smoke-tests at commit time, I'm now seeing screen corruption on 32-bit WinXP guests. Reverting for now. :( Signed-off-by: Tim Deegan <tim@xen.org> Committed-by: Tim Deegan <tim@xen.org>
author: Tim Deegan <tim@xen.org> 2013-01-17 15:55:02 +0000
committer: Tim Deegan <tim@xen.org> 2013-01-17 15:55:02 +0000
commit: d633c601bd604ed68597aea4b2632458309ce1f1 (patch)
tree: ee339f6596e3230c05f1104b9c814d4e8d0d527f
parent: f8079d5208755a9fa35aac4047af150a81420b78 (diff)
download: xen-d633c601bd604ed68597aea4b2632458309ce1f1.tar.gz
xen-d633c601bd604ed68597aea4b2632458309ce1f1.tar.bz2
xen-d633c601bd604ed68597aea4b2632458309ce1f1.zip
14 files changed, 397 insertions, 355 deletions
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index cd4e1ef0d8..32122fd303 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1563,23 +1563,15 @@ int xc_hvm_inject_msi(
     xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
 
 /*
- * Track dirty bit changes in a VRAM region defined by
- * [ first_pfn : first_pfn + nr - 1 ]
+ * Track dirty bit changes in the VRAM area
  *
  * All of this is done atomically:
- * - gets the dirty bitmap since the last call, all zeroes for
- *   the first call with some new region
- * - sets up a dirty tracking region for period up to the next call
- * - clears the specified dirty tracking region.
+ * - get the dirty bitmap since the last call
+ * - set up dirty tracking area for period up to the next call
+ * - clear the dirty tracking area.
  *
- * Creating a new region causes any existing regions that it overlaps
- * to be discarded.
- *
- * Specifying nr == 0 causes all regions to be discarded and
- * disables dirty bit tracking.
- *
- * If nr is not a multiple of 64, only the first nr bits of bitmap
- * are well defined.
+ * Returns -ENODATA and does not fill bitmap if the area has changed since the
+ * last call.
  */
 int xc_hvm_track_dirty_vram(
     xc_interface *xch, domid_t dom,
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 4b8c60adce..c75f7c5920 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -57,7 +57,6 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/nestedhvm.h>
-#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/apic.h>
 #include <public/sched.h>
@@ -1437,11 +1436,8 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          */
         if ( access_w )
         {
-            if ( p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
-                                 p2m_ram_rw) == p2m_ram_logdirty )
-            {
-                paging_mark_dirty_gpfn(v->domain, gfn);
-            }
+            paging_mark_dirty(v->domain, mfn_x(mfn));
+            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
         }
         rc = 1;
         goto out_put_gfn;
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index becd0c9bea..73dcdf4cb1 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -5,7 +5,6 @@ obj-y += paging.o
 obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
-obj-y += dirty_vram.o
 obj-$(x86_64) += guest_walk_4.o
 obj-$(x86_64) += mem_event.o
 obj-$(x86_64) += mem_paging.o
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index f7d979bcdc..a95ccbf62c 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -53,6 +53,117 @@
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
+/*          HAP VRAM TRACKING SUPPORT           */
+/************************************************/
+
+/*
+ * hap_track_dirty_vram()
+ * Create the domain's dv_dirty_vram struct on demand.
+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
+ * first encountered.
+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
+ * calling paging_log_dirty_range(), which interrogates each vram
+ * page's p2m type looking for pages that have been made writable.
+ */
+
+int hap_track_dirty_vram(struct domain *d,
+                         unsigned long begin_pfn,
+                         unsigned long nr,
+                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+{
+    long rc = 0;
+    struct sh_dirty_vram *dirty_vram;
+    uint8_t *dirty_bitmap = NULL;
+
+    if ( nr )
+    {
+        int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
+
+        if ( !paging_mode_log_dirty(d) )
+        {
+            hap_logdirty_init(d);
+            rc = paging_log_dirty_enable(d);
+            if ( rc )
+                goto out;
+        }
+
+        rc = -ENOMEM;
+        dirty_bitmap = xzalloc_bytes(size);
+        if ( !dirty_bitmap )
+            goto out;
+
+        paging_lock(d);
+
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( !dirty_vram )
+        {
+            rc = -ENOMEM;
+            if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
+            {
+                paging_unlock(d);
+                goto out;
+            }
+
+            d->arch.hvm_domain.dirty_vram = dirty_vram;
+        }
+
+        if ( begin_pfn != dirty_vram->begin_pfn ||
+             begin_pfn + nr != dirty_vram->end_pfn )
+        {
+            dirty_vram->begin_pfn = begin_pfn;
+            dirty_vram->end_pfn = begin_pfn + nr;
+
+            paging_unlock(d);
+
+            /* set l1e entries of range within P2M table to be read-only. */
+            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
+                                  p2m_ram_rw, p2m_ram_logdirty);
+
+            flush_tlb_mask(d->domain_dirty_cpumask);
+
+            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
+        }
+        else
+        {
+            paging_unlock(d);
+
+            domain_pause(d);
+
+            /* get the bitmap */
+            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
+
+            domain_unpause(d);
+        }
+
+        rc = -EFAULT;
+        if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
+            rc = 0;
+    }
+    else
+    {
+        paging_lock(d);
+
+        dirty_vram = d->arch.hvm_domain.dirty_vram;
+        if ( dirty_vram )
+        {
+            /*
+             * If zero pages specified while tracking dirty vram
+             * then stop tracking
+             */
+            xfree(dirty_vram);
+            d->arch.hvm_domain.dirty_vram = NULL;
+        }
+
+        paging_unlock(d);
+    }
+out:
+    if ( dirty_bitmap )
+        xfree(dirty_bitmap);
+
+    return rc;
+}
+
+/************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
 /************************************************/
 
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 6f638a27d1..de1dd822ea 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -741,23 +741,20 @@ void p2m_change_type_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
-    p2m_lock(p2m);
 
+    p2m_lock(p2m);
     p2m->defer_nested_flush = 1;
-    
+
     for ( gfn = start; gfn < end; gfn++ )
     {
         mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL);
         if ( pt == ot )
-            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
-                          p2m->default_access);
+            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt, p2m->default_access);
     }
-    
-    p2m->defer_nested_flush = 0;
 
+    p2m->defer_nested_flush = 0;
     if ( nestedhvm_enabled(d) )
         p2m_flush_nestedp2m(d);
-
     p2m_unlock(p2m);
 }
 
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index d59e8d6523..ac9bb1a0ed 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -27,7 +27,6 @@
 #include <asm/p2m.h>
 #include <asm/hap.h>
 #include <asm/hvm/nestedhvm.h>
-#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
@@ -193,11 +192,15 @@ int paging_log_dirty_disable(struct domain *d)
     return ret;
 }
 
-/* Given a guest mfn, mark a page as dirty */
+/* Mark a page as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 {
     unsigned long pfn;
     mfn_t gmfn;
+    int changed;
+    mfn_t mfn, *l4, *l3, *l2;
+    unsigned long *l1;
+    int i1, i2, i3, i4;
 
     gmfn = _mfn(guest_mfn);
 
@@ -207,19 +210,6 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
-    paging_mark_dirty_gpfn(d, pfn);
-}
-
-
-/* Given a guest pfn, mark a page as dirty */
-void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
-{
-    int changed;
-    mfn_t mfn, *l4, *l3, *l2;
-    unsigned long *l1;
-    int i1, i2, i3, i4;
-    dv_range_t *range;
-    
     /* Shared MFNs should NEVER be marked dirty */
     BUG_ON(SHARED_M2P(pfn));
 
@@ -239,11 +229,6 @@ void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
     /* Recursive: this is called from inside the shadow code */
     paging_lock_recursive(d);
 
-    d->arch.paging.log_dirty.dirty_count++;
-    range = dirty_vram_range_find_gfn(d, pfn);
-    if ( range )
-        range->dirty_count++;
-
     if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
     {
          d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
@@ -460,31 +445,7 @@ void paging_log_dirty_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int i;
     unsigned long pfn;
-    dv_range_t *range;
-    unsigned int range_dirty_count;
 
-    paging_lock(d);
-    range = dirty_vram_range_find_gfn(d, begin_pfn);
-    if ( !range )
-    {
-        paging_unlock(d);
-        goto out;
-    }
-    
-    range_dirty_count = range->dirty_count;
-    range->dirty_count = 0;
-
-    paging_unlock(d);
-    
-    if ( !range_dirty_count)
-        goto out;
-
-    PAGING_DEBUG(LOGDIRTY,
-                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
-                 d->domain_id,
-                 begin_pfn,
-                 begin_pfn + nr,
-                 range_dirty_count);
     /*
      * Set l1e entries of P2M table to be read-only.
      *
@@ -499,17 +460,15 @@ void paging_log_dirty_range(struct domain *d,
 
     for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
     {
-        if ( p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty) ==
-             p2m_ram_rw )
+        p2m_type_t pt;
+        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
+        if ( pt == p2m_ram_rw )
             dirty_bitmap[i >> 3] |= (1 << (i & 7));
     }
 
     p2m_unlock(p2m);
 
     flush_tlb_mask(d->domain_dirty_cpumask);
-
- out:
-    return;
 }
 
 /* Note that this function takes three function pointers. Callers must supply
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 1e4b880187..ce79131e1f 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,7 +36,6 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
-#include <asm/dirty_vram.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -3122,7 +3121,12 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    dirty_vram_free(d);
+    if (d->arch.hvm_domain.dirty_vram) {
+        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
+        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
+        xfree(d->arch.hvm_domain.dirty_vram);
+        d->arch.hvm_domain.dirty_vram = NULL;
+    }
 
     paging_unlock(d);
 
@@ -3460,217 +3464,178 @@ void shadow_clean_dirty_bitmap(struct domain *d)
 
 /**************************************************************************/
 /* VRAM dirty tracking support */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+{
+    int rc;
+    unsigned long end_pfn = begin_pfn + nr;
+    unsigned long dirty_size = (nr + 7) / 8;
+    int flush_tlb = 0;
+    unsigned long i;
+    p2m_type_t t;
+    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
+    if (end_pfn < begin_pfn
+            || begin_pfn > p2m->max_mapped_pfn
+            || end_pfn >= p2m->max_mapped_pfn)
+        return -EINVAL;
 
-/* Support functions for shadow-based dirty VRAM code */
+    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
+    p2m_lock(p2m_get_hostp2m(d));
+    paging_lock(d);
 
-#define DEBUG_unshadow_sl1ma                  0          
-#define DEBUG_unshadow_sl1ma_detail           0
-#define DEBUG_count_initial_mappings          0
+    if ( dirty_vram && (!nr ||
+             ( begin_pfn != dirty_vram->begin_pfn
+            || end_pfn   != dirty_vram->end_pfn )) )
+    {
+        /* Different tracking, tear the previous down. */
+        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
+        xfree(dirty_vram->sl1ma);
+        xfree(dirty_vram->dirty_bitmap);
+        xfree(dirty_vram);
+        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+    }
 
-/* smfn is no longer a shadow page.  Remove it from any
- * dirty vram range mapping. */
-void
-dirty_vram_delete_shadow(struct vcpu *v,
-                         unsigned long gfn,
-                         unsigned int shadow_type, 
-                         mfn_t smfn)
-{
-    static unsigned int l1_shadow_mask = 
-          1 << SH_type_l1_32_shadow
-        | 1 << SH_type_fl1_32_shadow
-        | 1 << SH_type_l1_pae_shadow
-        | 1 << SH_type_fl1_pae_shadow
-        | 1 << SH_type_l1_64_shadow
-        | 1 << SH_type_fl1_64_shadow
-        ;
-    struct domain *d = v->domain;
-    dv_dirty_vram_t *dirty_vram;
-    struct list_head *curr, *next;
-    
-    ASSERT(paging_locked_by_me(d));
-    /* Ignore all but level 1 shadows */
-    
-    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
+    if ( !nr )
     {
+        rc = 0;
         goto out;
     }
 
-    dirty_vram = d->arch.hvm_domain.dirty_vram;
+    /* This should happen seldomly (Video mode change),
+     * no need to be careful. */
     if ( !dirty_vram )
     {
-        goto out;
+        /* Throw away all the shadows rather than walking through them 
+         * up to nr times getting rid of mappings of each pfn */
+        shadow_blow_tables(d);
+
+        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
+
+        rc = -ENOMEM;
+        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
+            goto out;
+        dirty_vram->begin_pfn = begin_pfn;
+        dirty_vram->end_pfn = end_pfn;
+        d->arch.hvm_domain.dirty_vram = dirty_vram;
+
+        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
+            goto out_dirty_vram;
+        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
+
+        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
+            goto out_sl1ma;
+
+        dirty_vram->last_dirty = NOW();
+
+        /* Tell the caller that this time we could not track dirty bits. */
+        rc = -ENODATA;
     }
-        
-    list_for_each_safe(curr, next, &dirty_vram->range_head)
+    else if (dirty_vram->last_dirty == -1)
     {
-        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
-        unsigned long i;
-        int max_mappings = 1, mappings = 0;
-        int unshadowed = 0;
-        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
-        {
-            dv_paddr_link_t *pl = &range->pl_tab[ i ].mapping;
-            dv_paddr_link_t **ppl = NULL;
-            mappings = 0;
-            
-            while ( pl != NULL )
+        /* still completely clean, just copy our empty bitmap */
+        rc = -EFAULT;
+        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 )
+            rc = 0;
+    }
+    else
+    {
+        /* Iterate over VRAM to track dirty bits. */
+        for ( i = 0; i < nr; i++ ) {
+            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
+            struct page_info *page;
+            int dirty = 0;
+            paddr_t sl1ma = dirty_vram->sl1ma[i];
+
+            if (mfn_x(mfn) == INVALID_MFN)
             {
-                paddr_t sl1ma = pl->sl1ma;
-                unsigned long sl1mn;
-                
-                if ( sl1ma == INVALID_PADDR )
-                    break;
-                
-                sl1mn = sl1ma >> PAGE_SHIFT;
-                if ( sl1mn == mfn_x(smfn) ) {
-#if DEBUG_unshadow_sl1ma_detail
-                    gdprintk(XENLOG_DEBUG,
-                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
-                             mfn_x(smfn),
-                             range->begin_pfn + i,
-                             sl1ma);
-#endif
-                    unshadowed++;
-                    pl = free_paddr_link(d, ppl, pl);
-                    --range->nr_mappings;
-                }
-                else
+                dirty = 1;
+            }
+            else
+            {
+                page = mfn_to_page(mfn);
+                switch (page->u.inuse.type_info & PGT_count_mask)
                 {
-                    ppl = &pl->pl_next;
-                    pl = *ppl;
-                    mappings++;
+                case 0:
+                    /* No guest reference, nothing to track. */
+                    break;
+                case 1:
+                    /* One guest reference. */
+                    if ( sl1ma == INVALID_PADDR )
+                    {
+                        /* We don't know which sl1e points to this, too bad. */
+                        dirty = 1;
+                        /* TODO: Heuristics for finding the single mapping of
+                         * this gmfn */
+                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
+                    }
+                    else
+                    {
+                        /* Hopefully the most common case: only one mapping,
+                         * whose dirty bit we can use. */
+                        l1_pgentry_t *sl1e = maddr_to_virt(sl1ma);
+
+                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                        {
+                            dirty = 1;
+                            /* Note: this is atomic, so we may clear a
+                             * _PAGE_ACCESSED set by another processor. */
+                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                            flush_tlb = 1;
+                        }
+                    }
+                    break;
+                default:
+                    /* More than one guest reference,
+                     * we don't afford tracking that. */
+                    dirty = 1;
+                    break;
                 }
             }
-        }
-        if ( mappings > max_mappings )
-            max_mappings = mappings;
-        
-        if ( unshadowed ) {
-#if DEBUG_unshadow_sl1ma
-            gdprintk(XENLOG_DEBUG,
-                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x "
-                     "max_mappings:%d\n",
-                     mfn_x(smfn),
-                     range->begin_pfn, range->end_pfn,
-                     unshadowed, range->nr_mappings, max_mappings);
-#endif
-            if ( range->nr_mappings == 0 )
+
+            if ( dirty )
             {
-                dirty_vram_range_free(d, range);                    
+                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
+                dirty_vram->last_dirty = NOW();
             }
         }
-    }
- out:
-    return;
-}
-
 
-typedef int (*hash_pfn_callback_t)(struct vcpu *v,
-                                   mfn_t smfn,
-                                   unsigned long begin_pfn,
-                                   unsigned long end_pfn,
-                                   int *removed);
-
-static int hash_pfn_foreach(struct vcpu *v, 
-                            unsigned int callback_mask, 
-                            hash_pfn_callback_t callbacks[], 
-                            unsigned long begin_pfn,
-                            unsigned long end_pfn)
-/* Walk the hash table looking at the types of the entries and 
- * calling the appropriate callback function for each entry. 
- * The mask determines which shadow types we call back for, and the array
- * of callbacks tells us which function to call.
- * Any callback may return non-zero to let us skip the rest of the scan. 
- *
- * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
- * then return non-zero to terminate the scan. */
-{
-    int i, done = 0, removed = 0;
-    struct domain *d = v->domain;
-    struct page_info *x;
-
-    /* Say we're here, to stop hash-lookups reordering the chains */
-    ASSERT(paging_locked_by_me(d));
-    ASSERT(d->arch.paging.shadow.hash_walking == 0);
-    d->arch.paging.shadow.hash_walking = 1;
-
-    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
-    {
-        /* WARNING: This is not safe against changes to the hash table.
-         * The callback *must* return non-zero if it has inserted or
-         * deleted anything from the hash (lookups are OK, though). */
-        for ( x = d->arch.paging.shadow.hash_table[i];
-              x;
-              x = next_shadow(x) )
-        {
-            if ( callback_mask & (1 << x->u.sh.type) )
+        rc = -EFAULT;
+        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
+            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
+            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
             {
-                ASSERT(x->u.sh.type <= 15);
-                ASSERT(callbacks[x->u.sh.type] != NULL);
-                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
-                                               begin_pfn, end_pfn,
-                                               &removed);
-                if ( done ) break;
+                /* was clean for more than two seconds, try to disable guest
+                 * write access */
+                for ( i = begin_pfn; i < end_pfn; i++ ) {
+                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
+                    if (mfn_x(mfn) != INVALID_MFN)
+                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
+                }
+                dirty_vram->last_dirty = -1;
             }
+            rc = 0;
         }
-        if ( done ) break; 
     }
-    d->arch.paging.shadow.hash_walking = 0;
-    return removed;
-}
-
-void sh_find_all_vram_mappings(struct vcpu *v,
-                               dv_range_t *range)
-{
-    /* Dispatch table for getting per-type functions */
-    static hash_pfn_callback_t callbacks[SH_type_unused] = {
-        NULL, /* none    */
-        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
-        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
-        NULL, /* l2_32   */
-        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
-        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
-        NULL, /* l2_pae  */
-        NULL, /* l2h_pae */
-#if CONFIG_PAGING_LEVELS >= 4
-        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
-        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
-#else
-        NULL, /* l1_64   */
-        NULL, /* fl1_64  */
-#endif
-        NULL, /* l2_64   */
-        NULL, /* l2h_64  */
-        NULL, /* l3_64   */
-        NULL, /* l4_64   */
-        NULL, /* p2m     */
-        NULL  /* unused  */
-    };
-
-    static unsigned int callback_mask = 
-          1 << SH_type_l1_32_shadow
-        | 1 << SH_type_fl1_32_shadow
-        | 1 << SH_type_l1_pae_shadow
-        | 1 << SH_type_fl1_pae_shadow
-        | 1 << SH_type_l1_64_shadow
-        | 1 << SH_type_fl1_64_shadow
-        ;
-
-    perfc_incr(shadow_mappings);
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+    goto out;
 
-    hash_pfn_foreach(v, callback_mask, callbacks,
-                     range->begin_pfn,
-                     range->end_pfn);
+out_sl1ma:
+    xfree(dirty_vram->sl1ma);
+out_dirty_vram:
+    xfree(dirty_vram);
+    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
 
-#if DEBUG_count_initial_mappings
-    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial mappings:%d\n",
-             range->begin_pfn, range->end_pfn,
-             range->nr_mappings);
-#endif
+out:
+    paging_unlock(d);
+    p2m_unlock(p2m_get_hostp2m(d));
+    return rc;
 }
 
-
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index bb983bcecc..4967da1d35 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -35,7 +35,6 @@
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/cacheattr.h>
-#include <asm/dirty_vram.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
@@ -150,10 +149,6 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
-
-    /* Removing any dv_paddr_links to the erstwhile shadow page */
-    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
-    
     shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
 }
 
@@ -165,10 +160,6 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
                    v->domain->domain_id, v->vcpu_id,
                    mfn_x(gmfn), shadow_type, mfn_x(smfn));
     ASSERT(mfn_to_page(smfn)->u.sh.head);
-    
-    /* Removing any dv_paddr_links to the erstwhile shadow page */
-    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
-    
     shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
     /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
     if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
@@ -525,6 +516,7 @@ _sh_propagate(struct vcpu *v,
     guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
+    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
@@ -671,6 +663,17 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
+    if ( unlikely((level == 1) && dirty_vram
+            && dirty_vram->last_dirty == -1
+            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
+            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
+    {
+        if ( ft & FETCH_TYPE_WRITE )
+            dirty_vram->last_dirty = NOW();
+        else
+            sflags &= ~_PAGE_RW;
+    }
+
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) ||
          (p2mt == p2m_mmio_direct &&
@@ -1069,60 +1072,101 @@ static int shadow_set_l2e(struct vcpu *v,
     return flags;
 }
 
-/* shadow_vram_fix_l1e()
- *
- * Tests L1PTEs as they are modified, looking for when they start to (or
- * cease to) point to frame buffer pages.  If the old and new gfns differ,
- * calls dirty_vram_range_update() to updates the dirty_vram structures.
- */
-static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t new_sl1e,
+static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
                                        shadow_l1e_t *sl1e,
                                        mfn_t sl1mfn,
                                        struct domain *d)
 { 
-    mfn_t new_mfn, old_mfn;
-    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
-    paddr_t sl1ma;
-    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
+    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
+    int flags = shadow_l1e_get_flags(new_sl1e);
+    unsigned long gfn;
+    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
 
-    if ( !dirty_vram )
+    if ( !dirty_vram         /* tracking disabled? */
+         || !(flags & _PAGE_RW) /* read-only mapping? */
+         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
         return;
 
-    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
+    gfn = mfn_to_gfn(d, mfn);
+    /* Page sharing not supported on shadow PTs */
+    BUG_ON(SHARED_M2P(gfn));
 
-    old_mfn = shadow_l1e_get_mfn(old_sl1e);
-
-    if ( !sh_l1e_is_magic(old_sl1e) &&
-         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
-         mfn_valid(old_mfn))
+    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
     {
-        old_gfn = mfn_to_gfn(d, old_mfn);
-    }
-    
-    new_mfn = shadow_l1e_get_mfn(new_sl1e);
-    if ( !sh_l1e_is_magic(new_sl1e) &&
-         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
-         mfn_valid(new_mfn))
-    {
-        new_gfn = mfn_to_gfn(d, new_mfn);
+        unsigned long i = gfn - dirty_vram->begin_pfn;
+        struct page_info *page = mfn_to_page(mfn);
+        
+        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
+            /* Initial guest reference, record it */
+            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
+                | ((unsigned long)sl1e & ~PAGE_MASK);
     }
+}
+
+static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t *sl1e,
+                                       mfn_t sl1mfn,
+                                       struct domain *d)
+{
+    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
+    int flags = shadow_l1e_get_flags(old_sl1e);
+    unsigned long gfn;
+    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+
+    if ( !dirty_vram         /* tracking disabled? */
+         || !(flags & _PAGE_RW) /* read-only mapping? */
+         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
+        return;
 
-    if ( old_gfn == new_gfn ) return;
+    gfn = mfn_to_gfn(d, mfn);
+    /* Page sharing not supported on shadow PTs */
+    BUG_ON(SHARED_M2P(gfn));
 
-    if ( VALID_M2P(old_gfn) )
-        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
+    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    {
+        unsigned long i = gfn - dirty_vram->begin_pfn;
+        struct page_info *page = mfn_to_page(mfn);
+        int dirty = 0;
+        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
+            | ((unsigned long)sl1e & ~PAGE_MASK);
+
+        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
+        {
+            /* Last reference */
+            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
+                /* We didn't know it was that one, let's say it is dirty */
+                dirty = 1;
+            }
+            else
+            {
+                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
+                dirty_vram->sl1ma[i] = INVALID_PADDR;
+                if ( flags & _PAGE_DIRTY )
+                    dirty = 1;
+            }
+        }
+        else
         {
-            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
-                          old_gfn, mfn_x(old_mfn));
+            /* We had more than one reference, just consider the page dirty. */
+            dirty = 1;
+            /* Check that it's not the one we recorded. */
+            if ( dirty_vram->sl1ma[i] == sl1ma )
+            {
+                /* Too bad, we remembered the wrong one... */
+                dirty_vram->sl1ma[i] = INVALID_PADDR;
+            }
+            else
+            {
+                /* Ok, our recorded sl1e is still pointing to this page, let's
+                 * just hope it will remain. */
+            }
         }
-
-    if ( VALID_M2P(new_gfn) )
-        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
+        if ( dirty )
         {
-            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
-                          new_gfn, mfn_x(new_mfn));
+            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
+            dirty_vram->last_dirty = NOW();
         }
+    }
 }
 
 static int shadow_set_l1e(struct vcpu *v, 
@@ -1167,13 +1211,12 @@ static int shadow_set_l1e(struct vcpu *v,
                 shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
                 /* fall through */
             case 0:
+                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
                 break;
             }
         }
     } 
 
-    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
-
     /* Write the new entry */
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
@@ -1188,6 +1231,7 @@ static int shadow_set_l1e(struct vcpu *v,
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
+            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
@@ -1974,6 +2018,7 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
                  && !sh_l1e_is_magic(*sl1e) ) {
+                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
@@ -4291,37 +4336,6 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
     return done;
 }
 
-
-int sh_find_vram_mappings_in_l1(struct vcpu *v,
-                                mfn_t sl1mfn,
-                                unsigned long begin_pfn,
-                                unsigned long end_pfn,
-                                int *removed)
-/* Find all VRAM mappings in this shadow l1 table */
-{
-    struct domain *d = v->domain;
-    shadow_l1e_t *sl1e;
-    int done = 0;
-
-    /* only returns _PAGE_PRESENT entries */
-    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
-    {
-        unsigned long gfn;
-        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
-        if ( !mfn_valid(gmfn) )
-            continue;
-        gfn = mfn_to_gfn(d, gmfn);
-        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
-        {
-            paddr_t sl1ma =
-                pfn_to_paddr(mfn_x(sl1mfn)) |
-                ( (unsigned long)sl1e & ~PAGE_MASK );
-            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
-        }
-    });
-    return 0;
-}
-
 /**************************************************************************/
 /* Functions to excise all pointers to shadows from higher-level shadows. */
 
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 436a4ace8a..835121e494 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -66,12 +66,7 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
-extern int
-SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
-     (struct vcpu *v, mfn_t sl1mfn, 
-      unsigned long begin_pfn,
-      unsigned long end_pfn,
-      int *removed);
+
 extern void
 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
     (struct vcpu *v, void *ep, mfn_t smfn);
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 5b0f9f7644..43ce1dbb0a 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -229,7 +229,6 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, u32 flags)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
 #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
 #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
-#define sh_find_vram_mappings_in_l1 INTERNAL_NAME(sh_find_vram_mappings_in_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index 3e3a1f5537..916a35bdc7 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -57,6 +57,10 @@ void  hap_final_teardown(struct domain *d);
 void  hap_teardown(struct domain *d);
 void  hap_vcpu_init(struct vcpu *v);
 void  hap_logdirty_init(struct domain *d);
+int   hap_track_dirty_vram(struct domain *d,
+                           unsigned long begin_pfn,
+                           unsigned long nr,
+                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 0cc7b05b17..27b3de50dc 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -74,7 +74,7 @@ struct hvm_domain {
     struct list_head       pinned_cacheattr_ranges;
 
     /* VRAM dirty support. */
-    struct dv_dirty_vram  *dirty_vram;
+    struct sh_dirty_vram *dirty_vram;
 
     /* If one of vcpus of this domain is in no_fill_mode or
      * mtrr/pat between vcpus is not the same, set is_in_uc_mode
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index e22df388ba..c3a8848a56 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -154,13 +154,9 @@ void paging_log_dirty_init(struct domain *d,
                            int  (*disable_log_dirty)(struct domain *d),
                            void (*clean_dirty_bitmap)(struct domain *d));
 
-/* mark a gmfn as dirty, a wrapper around marking a gpfn as dirty */
+/* mark a page as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
 
-/* mark a gpfn as dirty */
-void paging_mark_dirty_gpfn(struct domain *d, unsigned long gpfn);
-
-
 /* is this guest page dirty? 
  * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
@@ -187,6 +183,15 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 #define L4_LOGDIRTY_IDX(pfn) 0
 #endif
 
+/* VRAM dirty tracking support */
+struct sh_dirty_vram {
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    paddr_t *sl1ma;
+    uint8_t *dirty_bitmap;
+    s_time_t last_dirty;
+};
+
 /*****************************************************************************
  * Entry points into the paging-assistance code */
 
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 940d7fde18..2eb6efc18e 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,6 +62,12 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
+/* Enable VRAM dirty bit tracking. */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long first_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
author	Tim Deegan <tim@xen.org>	2013-01-17 15:55:02 +0000
committer	Tim Deegan <tim@xen.org>	2013-01-17 15:55:02 +0000
commit	d633c601bd604ed68597aea4b2632458309ce1f1 (patch)
tree	ee339f6596e3230c05f1104b9c814d4e8d0d527f
parent	f8079d5208755a9fa35aac4047af150a81420b78 (diff)
download	xen-d633c601bd604ed68597aea4b2632458309ce1f1.tar.gz xen-d633c601bd604ed68597aea4b2632458309ce1f1.tar.bz2 xen-d633c601bd604ed68597aea4b2632458309ce1f1.zip