aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2008-05-02 15:08:27 +0100
committerKeir Fraser <keir.fraser@citrix.com>2008-05-02 15:08:27 +0100
commit4fe64c6d1b6c1e81901ed9fc3e4498fb578b6d91 (patch)
tree4ea3659b511b1c865a14d1c4f63aa7cf9c40df85
parent3238785d35b0cef87d619a1bdaa12dcd7032cf14 (diff)
downloadxen-4fe64c6d1b6c1e81901ed9fc3e4498fb578b6d91.tar.gz
xen-4fe64c6d1b6c1e81901ed9fc3e4498fb578b6d91.tar.bz2
xen-4fe64c6d1b6c1e81901ed9fc3e4498fb578b6d91.zip
shadow: track video RAM dirty bits
This adds a new HVM op that enables tracking dirty bits of a range of video RAM. The idea is to optimize just for the most common case (only one guest mapping, with sometimes some temporary other mappings), which permits to keep the overhead on shadow as low as possible. Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
-rw-r--r--tools/ioemu/hw/cirrus_vga.c48
-rw-r--r--tools/ioemu/hw/vga.c83
-rw-r--r--tools/ioemu/hw/vga_int.h2
-rw-r--r--tools/libxc/xc_misc.c31
-rw-r--r--tools/libxc/xenctrl.h16
-rw-r--r--xen/arch/ia64/vmx/vmx_hypercall.c4
-rw-r--r--xen/arch/x86/hvm/hvm.c48
-rw-r--r--xen/arch/x86/mm/shadow/common.c165
-rw-r--r--xen/arch/x86/mm/shadow/multi.c83
-rw-r--r--xen/arch/x86/mm/shadow/private.h9
-rw-r--r--xen/include/asm-ia64/config.h1
-rw-r--r--xen/include/asm-powerpc/types.h1
-rw-r--r--xen/include/asm-x86/shadow.h6
-rw-r--r--xen/include/asm-x86/types.h3
-rw-r--r--xen/include/public/hvm/hvm_op.h16
-rw-r--r--xen/include/xen/sched.h3
16 files changed, 488 insertions, 31 deletions
diff --git a/tools/ioemu/hw/cirrus_vga.c b/tools/ioemu/hw/cirrus_vga.c
index 8572cf85b7..f96ede93cb 100644
--- a/tools/ioemu/hw/cirrus_vga.c
+++ b/tools/ioemu/hw/cirrus_vga.c
@@ -234,8 +234,6 @@ typedef struct CirrusVGAState {
int cirrus_linear_io_addr;
int cirrus_linear_bitblt_io_addr;
int cirrus_mmio_io_addr;
- unsigned long cirrus_lfb_addr;
- unsigned long cirrus_lfb_end;
uint32_t cirrus_addr_mask;
uint32_t linear_mmio_mask;
uint8_t cirrus_shadow_gr0;
@@ -2657,11 +2655,11 @@ static void cirrus_update_memory_access(CirrusVGAState *s)
mode = s->gr[0x05] & 0x7;
if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
- if (s->cirrus_lfb_addr && s->cirrus_lfb_end && !s->map_addr) {
+ if (s->lfb_addr && s->lfb_end && !s->map_addr) {
void *vram_pointer, *old_vram;
- vram_pointer = set_vram_mapping(s->cirrus_lfb_addr,
- s->cirrus_lfb_end);
+ vram_pointer = set_vram_mapping(s->lfb_addr,
+ s->lfb_end);
if (!vram_pointer)
fprintf(stderr, "NULL vram_pointer\n");
else {
@@ -2669,21 +2667,21 @@ static void cirrus_update_memory_access(CirrusVGAState *s)
VGA_RAM_SIZE);
qemu_free(old_vram);
}
- s->map_addr = s->cirrus_lfb_addr;
- s->map_end = s->cirrus_lfb_end;
+ s->map_addr = s->lfb_addr;
+ s->map_end = s->lfb_end;
}
s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
} else {
generic_io:
- if (s->cirrus_lfb_addr && s->cirrus_lfb_end && s->map_addr) {
+ if (s->lfb_addr && s->lfb_end && s->map_addr) {
void *old_vram;
old_vram = vga_update_vram((VGAState *)s, NULL, VGA_RAM_SIZE);
- unset_vram_mapping(s->cirrus_lfb_addr,
- s->cirrus_lfb_end,
+ unset_vram_mapping(s->lfb_addr,
+ s->lfb_end,
old_vram);
s->map_addr = s->map_end = 0;
@@ -3049,27 +3047,27 @@ void cirrus_stop_acc(CirrusVGAState *s)
if (s->map_addr){
int error;
s->map_addr = 0;
- error = unset_vram_mapping(s->cirrus_lfb_addr,
- s->cirrus_lfb_end, s->vram_ptr);
+ error = unset_vram_mapping(s->lfb_addr,
+ s->lfb_end, s->vram_ptr);
fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n");
}
}
void cirrus_restart_acc(CirrusVGAState *s)
{
- if (s->cirrus_lfb_addr && s->cirrus_lfb_end) {
+ if (s->lfb_addr && s->lfb_end) {
void *vram_pointer, *old_vram;
fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx, lfb_end=0x%lx.\n",
- s->cirrus_lfb_addr, s->cirrus_lfb_end);
- vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end);
+ s->lfb_addr, s->lfb_end);
+ vram_pointer = set_vram_mapping(s->lfb_addr ,s->lfb_end);
if (!vram_pointer){
fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n");
} else {
old_vram = vga_update_vram((VGAState *)s, vram_pointer,
VGA_RAM_SIZE);
qemu_free(old_vram);
- s->map_addr = s->cirrus_lfb_addr;
- s->map_end = s->cirrus_lfb_end;
+ s->map_addr = s->lfb_addr;
+ s->map_end = s->lfb_end;
}
}
}
@@ -3120,8 +3118,8 @@ static void cirrus_vga_save(QEMUFile *f, void *opaque)
vga_acc = (!!s->map_addr);
qemu_put_8s(f, &vga_acc);
- qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
- qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+ qemu_put_be64s(f, (uint64_t*)&s->lfb_addr);
+ qemu_put_be64s(f, (uint64_t*)&s->lfb_end);
qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE);
}
@@ -3175,8 +3173,8 @@ static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
qemu_get_be32s(f, &s->hw_cursor_y);
qemu_get_8s(f, &vga_acc);
- qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
- qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+ qemu_get_be64s(f, (uint64_t*)&s->lfb_addr);
+ qemu_get_be64s(f, (uint64_t*)&s->lfb_end);
qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE);
if (vga_acc){
cirrus_restart_acc(s);
@@ -3337,11 +3335,11 @@ static void cirrus_pci_lfb_map(PCIDevice *d, int region_num,
/* XXX: add byte swapping apertures */
cpu_register_physical_memory(addr, s->vram_size,
s->cirrus_linear_io_addr);
- s->cirrus_lfb_addr = addr;
- s->cirrus_lfb_end = addr + VGA_RAM_SIZE;
+ s->lfb_addr = addr;
+ s->lfb_end = addr + VGA_RAM_SIZE;
- if (s->map_addr && (s->cirrus_lfb_addr != s->map_addr) &&
- (s->cirrus_lfb_end != s->map_end))
+ if (s->map_addr && (s->lfb_addr != s->map_addr) &&
+ (s->lfb_end != s->map_end))
fprintf(logfile, "cirrus vga map change while on lfb mode\n");
cpu_register_physical_memory(addr + 0x1000000, 0x400000,
diff --git a/tools/ioemu/hw/vga.c b/tools/ioemu/hw/vga.c
index 9a6922aa54..f5a9c6c14c 100644
--- a/tools/ioemu/hw/vga.c
+++ b/tools/ioemu/hw/vga.c
@@ -1086,6 +1086,9 @@ static void vga_draw_text(VGAState *s, int full_update)
vga_draw_glyph8_func *vga_draw_glyph8;
vga_draw_glyph9_func *vga_draw_glyph9;
+ /* Disable dirty bit tracking */
+ xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL);
+
if (s->ds->dpy_colourdepth != NULL && s->ds->depth != 0)
s->ds->dpy_colourdepth(s->ds, 0);
s->rgb_to_pixel =
@@ -1485,7 +1488,7 @@ void check_sse2(void)
static void vga_draw_graphic(VGAState *s, int full_update)
{
int y1, y, update, linesize, y_start, double_scan, mask, depth;
- int width, height, shift_control, line_offset, bwidth, ds_depth;
+ int width, height, shift_control, line_offset, bwidth, ds_depth, bits;
ram_addr_t page0, page1;
int disp_width, multi_scan, multi_run;
uint8_t *d;
@@ -1533,6 +1536,7 @@ static void vga_draw_graphic(VGAState *s, int full_update)
} else {
v = VGA_DRAW_LINE4;
}
+ bits = 4;
} else if (shift_control == 1) {
full_update |= update_palette16(s);
if (s->sr[0x01] & 8) {
@@ -1541,28 +1545,35 @@ static void vga_draw_graphic(VGAState *s, int full_update)
} else {
v = VGA_DRAW_LINE2;
}
+ bits = 4;
} else {
switch(s->get_bpp(s)) {
default:
case 0:
full_update |= update_palette256(s);
v = VGA_DRAW_LINE8D2;
+ bits = 4;
break;
case 8:
full_update |= update_palette256(s);
v = VGA_DRAW_LINE8;
+ bits = 8;
break;
case 15:
v = VGA_DRAW_LINE15;
+ bits = 16;
break;
case 16:
v = VGA_DRAW_LINE16;
+ bits = 16;
break;
case 24:
v = VGA_DRAW_LINE24;
+ bits = 24;
break;
case 32:
v = VGA_DRAW_LINE32;
+ bits = 32;
break;
}
}
@@ -1590,12 +1601,72 @@ static void vga_draw_graphic(VGAState *s, int full_update)
width, height, v, line_offset, s->cr[9], s->cr[0x17], s->line_compare, s->sr[0x01]);
#endif
- for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE)
- if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+ y = 0;
+
+ if (height - 1 > s->line_compare || multi_run || (s->cr[0x17] & 3) != 3
+ || !s->lfb_addr) {
+ /* Tricky things happen, disable dirty bit tracking */
+ xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL);
+
+ for ( ; y < s->vram_size; y += TARGET_PAGE_SIZE)
+ if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+ cpu_physical_memory_set_dirty(s->vram_offset + y);
+ } else {
+ /* Tricky things won't have any effect, i.e. we are in the very simple
+ * (and very usual) case of a linear buffer. */
+ unsigned long end;
+
+ for ( ; y < ((s->start_addr * 4) & TARGET_PAGE_MASK); y += TARGET_PAGE_SIZE)
+ /* We will not read that anyway. */
+ cpu_physical_memory_set_dirty(s->vram_offset + y);
+
+ if (y < (s->start_addr * 4)) {
+ /* start address not aligned on a page, track dirtyness by hand. */
+ if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+ cpu_physical_memory_set_dirty(s->vram_offset + y);
+ y += TARGET_PAGE_SIZE;
+ }
+
+ /* use page table dirty bit tracking for the inner of the LFB */
+ end = s->start_addr * 4 + height * line_offset;
+ {
+ unsigned long npages = ((end & TARGET_PAGE_MASK) - y) / TARGET_PAGE_SIZE;
+ const int width = sizeof(unsigned long) * 8;
+ unsigned long bitmap[(npages + width - 1) / width];
+ int err;
+
+ if (!(err = xc_hvm_track_dirty_vram(xc_handle, domid,
+ (s->lfb_addr + y) / TARGET_PAGE_SIZE, npages, bitmap))) {
+ int i, j;
+ for (i = 0; i < sizeof(bitmap) / sizeof(*bitmap); i++) {
+ unsigned long map = bitmap[i];
+ for (j = i * width; map && j < npages; map >>= 1, j++)
+ if (map & 1)
+ cpu_physical_memory_set_dirty(s->vram_offset + y
+ + j * TARGET_PAGE_SIZE);
+ }
+ y += npages * TARGET_PAGE_SIZE;
+ } else {
+ /* ENODATA just means we have changed mode and will succeed
+ * next time */
+ if (err != -ENODATA)
+ fprintf(stderr, "track_dirty_vram(%lx, %lx) failed (%d)\n", s->lfb_addr + y, npages, err);
+ }
+ }
+
+ for ( ; y < s->vram_size && y < end; y += TARGET_PAGE_SIZE)
+ /* failed or end address not aligned on a page, track dirtyness by
+ * hand. */
+ if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+ cpu_physical_memory_set_dirty(s->vram_offset + y);
+
+ for ( ; y < s->vram_size; y += TARGET_PAGE_SIZE)
+ /* We will not read that anyway. */
cpu_physical_memory_set_dirty(s->vram_offset + y);
+ }
addr1 = (s->start_addr * 4);
- bwidth = width * 4;
+ bwidth = (width * bits + 7) / 8;
y_start = -1;
page_min = 0;
page_max = 0;
@@ -1681,6 +1752,10 @@ static void vga_draw_blank(VGAState *s, int full_update)
return;
if (s->last_scr_width <= 0 || s->last_scr_height <= 0)
return;
+
+ /* Disable dirty bit tracking */
+ xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL);
+
s->rgb_to_pixel =
rgb_to_pixel_dup_table[get_depth_index(s->ds)];
if (s->ds->depth == 8)
diff --git a/tools/ioemu/hw/vga_int.h b/tools/ioemu/hw/vga_int.h
index a8dad45af8..f5f7ffd685 100644
--- a/tools/ioemu/hw/vga_int.h
+++ b/tools/ioemu/hw/vga_int.h
@@ -87,6 +87,8 @@
unsigned int vram_size; \
unsigned long bios_offset; \
unsigned int bios_size; \
+ unsigned long lfb_addr; \
+ unsigned long lfb_end; \
PCIDevice *pci_dev; \
uint32_t latch; \
uint8_t sr_index; \
diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
index c79e14a563..c68461a496 100644
--- a/tools/libxc/xc_misc.c
+++ b/tools/libxc/xc_misc.c
@@ -236,6 +236,37 @@ int xc_hvm_set_pci_link_route(
return rc;
}
+int xc_hvm_track_dirty_vram(
+ int xc_handle, domid_t dom,
+ uint64_t first_pfn, uint64_t nr,
+ unsigned long *dirty_bitmap)
+{
+ DECLARE_HYPERCALL;
+ struct xen_hvm_track_dirty_vram arg;
+ int rc;
+
+ hypercall.op = __HYPERVISOR_hvm_op;
+ hypercall.arg[0] = HVMOP_track_dirty_vram;
+ hypercall.arg[1] = (unsigned long)&arg;
+
+ arg.domid = dom;
+ arg.first_pfn = first_pfn;
+ arg.nr = nr;
+ set_xen_guest_handle(arg.dirty_bitmap, (uint8_t *)dirty_bitmap);
+
+ if ( (rc = lock_pages(&arg, sizeof(arg))) != 0 )
+ {
+ PERROR("Could not lock memory");
+ return rc;
+ }
+
+ rc = do_xen_hypercall(xc_handle, &hypercall);
+
+ unlock_pages(&arg, sizeof(arg));
+
+ return rc;
+}
+
void *xc_map_foreign_pages(int xc_handle, uint32_t dom, int prot,
const xen_pfn_t *arr, int num)
{
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index dc532cbe09..380f024da9 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -882,6 +882,22 @@ int xc_hvm_set_pci_link_route(
int xc_handle, domid_t dom, uint8_t link, uint8_t isa_irq);
+/*
+ * Track dirty bit changes in the VRAM area
+ *
+ * All of this is done atomically:
+ * - get the dirty bitmap since the last call
+ * - set up dirty tracking area for period up to the next call
+ * - clear the dirty tracking area.
+ *
+ * Returns -ENODATA and does not fill bitmap if the area has changed since the
+ * last call.
+ */
+int xc_hvm_track_dirty_vram(
+ int xc_handle, domid_t dom,
+ uint64_t first_pfn, uint64_t nr,
+ unsigned long *bitmap);
+
typedef enum {
XC_ERROR_NONE = 0,
XC_INTERNAL_ERROR = 1,
diff --git a/xen/arch/ia64/vmx/vmx_hypercall.c b/xen/arch/ia64/vmx/vmx_hypercall.c
index 84510d7b78..97cc1bffb6 100644
--- a/xen/arch/ia64/vmx/vmx_hypercall.c
+++ b/xen/arch/ia64/vmx/vmx_hypercall.c
@@ -200,6 +200,10 @@ do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
rc = 0;
break;
+ case HVMOP_track_dirty_vram:
+ rc = -ENOSYS;
+ break;
+
default:
gdprintk(XENLOG_INFO, "Bad HVM op %ld.\n", op);
rc = -ENOSYS;
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ca472fa18d..6a7f20dc88 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2345,6 +2345,54 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
rc = guest_handle_is_null(arg) ? hvmop_flush_tlb_all() : -ENOSYS;
break;
+ case HVMOP_track_dirty_vram:
+ {
+ struct xen_hvm_track_dirty_vram a;
+ struct domain *d;
+
+ if ( copy_from_guest(&a, arg, 1) )
+ return -EFAULT;
+
+ if ( a.domid == DOMID_SELF )
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL )
+ return -ESRCH;
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
+ rc = -EPERM;
+ goto param_fail2;
+ }
+ }
+
+ rc = -EINVAL;
+ if ( !is_hvm_domain(d) )
+ goto param_fail2;
+
+ rc = xsm_hvm_param(d, op);
+ if ( rc )
+ goto param_fail2;
+
+ rc = -ESRCH;
+ if ( d->is_dying )
+ goto param_fail2;
+
+ rc = -EINVAL;
+ if ( !shadow_mode_enabled(d))
+ goto param_fail2;
+ if ( d->vcpu[0] == NULL )
+ goto param_fail2;
+
+ rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
+
+ param_fail2:
+ rcu_unlock_domain(d);
+ break;
+ }
+
default:
{
gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 3769721198..81e9111056 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -2589,6 +2589,13 @@ void shadow_teardown(struct domain *d)
* calls now that we've torn down the bitmap */
d->arch.paging.mode &= ~PG_log_dirty;
+ if (d->dirty_vram) {
+ xfree(d->dirty_vram->sl1ma);
+ xfree(d->dirty_vram->dirty_bitmap);
+ xfree(d->dirty_vram);
+ d->dirty_vram = NULL;
+ }
+
shadow_unlock(d);
}
@@ -2849,6 +2856,164 @@ void shadow_clean_dirty_bitmap(struct domain *d)
shadow_blow_tables(d);
shadow_unlock(d);
}
+
+
+/**************************************************************************/
+/* VRAM dirty tracking support */
+int shadow_track_dirty_vram(struct domain *d,
+ unsigned long begin_pfn,
+ unsigned long nr,
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+{
+ int rc;
+ unsigned long end_pfn = begin_pfn + nr;
+ unsigned long dirty_size = (nr + 7) / 8;
+ int flush_tlb = 0;
+
+ if (end_pfn < begin_pfn
+ || begin_pfn > d->arch.p2m->max_mapped_pfn
+ || end_pfn >= d->arch.p2m->max_mapped_pfn)
+ return -EINVAL;
+
+ shadow_lock(d);
+
+ if ( d->dirty_vram && (!nr ||
+ ( begin_pfn != d->dirty_vram->begin_pfn
+ || end_pfn != d->dirty_vram->end_pfn )) ) {
+ /* Different tracking, tear the previous down. */
+ gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", d->dirty_vram->begin_pfn, d->dirty_vram->end_pfn);
+ xfree(d->dirty_vram->sl1ma);
+ xfree(d->dirty_vram->dirty_bitmap);
+ xfree(d->dirty_vram);
+ d->dirty_vram = NULL;
+ }
+
+ if ( !nr ) {
+ rc = 0;
+ goto out;
+ }
+
+ /* This should happen seldomly (Video mode change),
+ * no need to be careful. */
+ if ( !d->dirty_vram ) {
+ unsigned long i;
+ p2m_type_t t;
+
+ /* Just recount from start. */
+ for ( i = begin_pfn; i < end_pfn; i++ )
+ flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, i, &t));
+
+ gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
+
+ rc = -ENOMEM;
+ if ( (d->dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
+ goto out;
+ d->dirty_vram->begin_pfn = begin_pfn;
+ d->dirty_vram->end_pfn = end_pfn;
+
+ if ( (d->dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
+ goto out_dirty_vram;
+ memset(d->dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
+
+ if ( (d->dirty_vram->dirty_bitmap = xmalloc_array(uint8_t, dirty_size)) == NULL )
+ goto out_sl1ma;
+ memset(d->dirty_vram->dirty_bitmap, 0, dirty_size);
+
+ /* Tell the caller that this time we could not track dirty bits. */
+ rc = -ENODATA;
+ } else {
+ int i;
+#ifdef __i386__
+ unsigned long map_mfn = INVALID_MFN;
+ void *map_sl1p = NULL;
+#endif
+
+ /* Iterate over VRAM to track dirty bits. */
+ for ( i = 0; i < nr; i++ ) {
+ p2m_type_t t;
+ mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
+ struct page_info *page = mfn_to_page(mfn);
+ u32 count_info = page->u.inuse.type_info & PGT_count_mask;
+ int dirty = 0;
+ paddr_t sl1ma = d->dirty_vram->sl1ma[i];
+
+ switch (count_info) {
+ case 0:
+ /* No guest reference, nothing to track. */
+ break;
+ case 1:
+ /* One guest reference. */
+ if ( sl1ma == INVALID_PADDR ) {
+ /* We don't know which sl1e points to this, too bad. */
+ dirty = 1;
+ /* TODO: Heuristics for finding the single mapping of
+ * this gmfn */
+ flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, begin_pfn + i, &t));
+ } else {
+ /* Hopefully the most common case: only one mapping,
+ * whose dirty bit we can use. */
+ l1_pgentry_t *sl1e;
+#ifdef __i386__
+ void *sl1p = map_sl1p;
+ unsigned long sl1mfn = paddr_to_pfn(sl1ma);
+
+ if ( sl1mfn != map_mfn ) {
+ if ( map_sl1p )
+ sh_unmap_domain_page(map_sl1p);
+ map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
+ map_mfn = sl1mfn;
+ }
+ sl1e = sl1p + (sl1ma & ~PAGE_MASK);
+#else
+ sl1e = maddr_to_virt(sl1ma);
+#endif
+
+ if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY ) {
+ dirty = 1;
+ /* Note: this is atomic, so we may clear a
+ * _PAGE_ACCESSED set by another processor. */
+ l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+ flush_tlb = 1;
+ }
+ }
+ break;
+ default:
+ /* More than one guest reference,
+ * we don't afford tracking that. */
+ dirty = 1;
+ break;
+ }
+
+ if ( dirty )
+ d->dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
+ }
+
+#ifdef __i386__
+ if ( map_sl1p )
+ sh_unmap_domain_page(map_sl1p);
+#endif
+
+ rc = -EFAULT;
+ if ( copy_to_guest(dirty_bitmap, d->dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
+ memset(d->dirty_vram->dirty_bitmap, 0, dirty_size);
+ rc = 0;
+ }
+ }
+ if ( flush_tlb )
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ goto out;
+
+out_sl1ma:
+ xfree(d->dirty_vram->sl1ma);
+out_dirty_vram:
+ xfree(d->dirty_vram);
+ d->dirty_vram = NULL;
+
+out:
+ shadow_unlock(d);
+ return rc;
+}
+
/**************************************************************************/
/* Shadow-control XEN_DOMCTL dispatcher */
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 95f968940c..c15ff639ed 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -801,7 +801,7 @@ _sh_propagate(struct vcpu *v,
// Since we know the guest's PRESENT bit is set, we also set the shadow's
// SHADOW_PRESENT bit.
//
- pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
+ pass_thru_flags = (_PAGE_ACCESSED | _PAGE_USER |
_PAGE_RW | _PAGE_PRESENT);
if ( guest_supports_nx(v) )
pass_thru_flags |= _PAGE_NX_BIT;
@@ -1251,6 +1251,80 @@ static int shadow_set_l2e(struct vcpu *v,
return flags;
}
+static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+ shadow_l1e_t *sl1e,
+ mfn_t sl1mfn,
+ struct domain *d)
+{
+ mfn_t mfn;
+ unsigned long gfn;
+
+ if ( !d->dirty_vram ) return;
+
+ mfn = shadow_l1e_get_mfn(new_sl1e);
+ gfn = mfn_to_gfn(d, mfn);
+
+ if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
+ unsigned long i = gfn - d->dirty_vram->begin_pfn;
+ struct page_info *page = mfn_to_page(mfn);
+ u32 count_info = page->u.inuse.type_info & PGT_count_mask;
+
+ if ( count_info == 1 )
+ /* Initial guest reference, record it */
+ d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
+ | ((paddr_t) sl1e & ~PAGE_MASK);
+ }
+}
+
+static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
+ shadow_l1e_t *sl1e,
+ mfn_t sl1mfn,
+ struct domain *d)
+{
+ mfn_t mfn;
+ unsigned long gfn;
+
+ if ( !d->dirty_vram ) return;
+
+ mfn = shadow_l1e_get_mfn(old_sl1e);
+ gfn = mfn_to_gfn(d, mfn);
+
+ if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
+ unsigned long i = gfn - d->dirty_vram->begin_pfn;
+ struct page_info *page = mfn_to_page(mfn);
+ u32 count_info = page->u.inuse.type_info & PGT_count_mask;
+ int dirty = 0;
+ paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
+ | ((paddr_t) sl1e & ~PAGE_MASK);
+
+ if ( count_info == 1 ) {
+ /* Last reference */
+ if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
+ /* We didn't know it was that one, let's say it is dirty */
+ dirty = 1;
+ } else {
+ ASSERT(d->dirty_vram->sl1ma[i] == sl1ma);
+ d->dirty_vram->sl1ma[i] = INVALID_PADDR;
+ if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_DIRTY )
+ dirty = 1;
+ }
+ } else {
+ /* We had more than one reference, just consider the page dirty. */
+ dirty = 1;
+ /* Check that it's not the one we recorded. */
+ if ( d->dirty_vram->sl1ma[i] == sl1ma ) {
+ /* Too bad, we remembered the wrong one... */
+ d->dirty_vram->sl1ma[i] = INVALID_PADDR;
+ } else {
+ /* Ok, our recorded sl1e is still pointing to this page, let's
+ * just hope it will remain. */
+ }
+ }
+ if ( dirty )
+ d->dirty_vram->dirty_bitmap[i / 8] |= d->dirty_vram->dirty_bitmap[i % 8];
+ }
+}
+
static int shadow_set_l1e(struct vcpu *v,
shadow_l1e_t *sl1e,
shadow_l1e_t new_sl1e,
@@ -1275,6 +1349,8 @@ static int shadow_set_l1e(struct vcpu *v,
/* Doesn't look like a pagetable. */
flags |= SHADOW_SET_ERROR;
new_sl1e = shadow_l1e_empty();
+ } else {
+ shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
}
}
}
@@ -1293,6 +1369,7 @@ static int shadow_set_l1e(struct vcpu *v,
* trigger a flush later. */
if ( shadow_mode_refcounts(d) )
{
+ shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
shadow_put_page_from_l1e(old_sl1e, d);
}
}
@@ -2248,8 +2325,10 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
mfn_t sl1mfn = smfn;
SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
- && !sh_l1e_is_magic(*sl1e) )
+ && !sh_l1e_is_magic(*sl1e) ) {
+ shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
shadow_put_page_from_l1e(*sl1e, d);
+ }
});
}
diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
index 68d0c8e0ab..7a49231d51 100644
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -528,6 +528,15 @@ sh_mfn_is_dirty(struct domain *d, mfn_t gmfn)
return rv;
}
+/**************************************************************************/
+/* VRAM dirty tracking support */
+
+struct sh_dirty_vram {
+ unsigned long begin_pfn;
+ unsigned long end_pfn;
+ paddr_t *sl1ma;
+ uint8_t *dirty_bitmap;
+};
/**************************************************************************/
/* Shadow-page refcounting. */
diff --git a/xen/include/asm-ia64/config.h b/xen/include/asm-ia64/config.h
index 40a09377ed..eb5c01c3eb 100644
--- a/xen/include/asm-ia64/config.h
+++ b/xen/include/asm-ia64/config.h
@@ -71,6 +71,7 @@ typedef int pid_t;
// now needed for xen/include/mm.h
typedef unsigned long paddr_t;
+#define INVALID_PADDR (~0UL)
// from include/linux/kernel.h
#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
diff --git a/xen/include/asm-powerpc/types.h b/xen/include/asm-powerpc/types.h
index 5f6b4a6b0e..6af61881b5 100644
--- a/xen/include/asm-powerpc/types.h
+++ b/xen/include/asm-powerpc/types.h
@@ -61,6 +61,7 @@ typedef unsigned long size_t;
#endif
typedef unsigned long paddr_t;
+#define INVALID_PADDR (~0UL)
#define PRIpaddr "08lx"
/* DMA addresses come in generic and 64-bit flavours. */
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 85e6866d63..64a6ab31ba 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -62,6 +62,12 @@ void shadow_vcpu_init(struct vcpu *v);
/* Enable an arbitrary shadow mode. Call once at domain creation. */
int shadow_enable(struct domain *d, u32 mode);
+/* Enable VRAM dirty bit tracking. */
+int shadow_track_dirty_vram(struct domain *d,
+ unsigned long first_pfn,
+ unsigned long nr,
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
/* Handler for shadow control ops: operations from user-space to enable
* and disable ephemeral shadow modes (test mode and log-dirty mode) and
* manipulate the log-dirty bitmap. */
diff --git a/xen/include/asm-x86/types.h b/xen/include/asm-x86/types.h
index fd2fd069b6..4a08b22df9 100644
--- a/xen/include/asm-x86/types.h
+++ b/xen/include/asm-x86/types.h
@@ -38,15 +38,18 @@ typedef signed long long s64;
typedef unsigned long long u64;
#if defined(CONFIG_X86_PAE)
typedef u64 paddr_t;
+#define INVALID_PADDR (~0ULL)
#define PRIpaddr "016llx"
#else
typedef unsigned long paddr_t;
+#define INVALID_PADDR (~0UL)
#define PRIpaddr "08lx"
#endif
#elif defined(__x86_64__)
typedef signed long s64;
typedef unsigned long u64;
typedef unsigned long paddr_t;
+#define INVALID_PADDR (~0UL)
#define PRIpaddr "016lx"
#endif
diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
index b21b0f7abe..c2828c1f93 100644
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -73,4 +73,20 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t);
/* Flushes all VCPU TLBs: @arg must be NULL. */
#define HVMOP_flush_tlbs 5
+/* Track dirty VRAM. */
+#define HVMOP_track_dirty_vram 6
+struct xen_hvm_track_dirty_vram {
+ /* Domain to be tracked. */
+ domid_t domid;
+ /* First pfn to track. */
+ uint64_aligned_t first_pfn;
+ /* Number of pages to track. */
+ uint64_aligned_t nr;
+ /* OUT variable. */
+ /* Dirty bitmap buffer. */
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;
+};
+typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t);
+
#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index ff5241a532..05977e054b 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -236,6 +236,9 @@ struct domain
* cause a deadlock. Acquirers don't spin waiting; they preempt.
*/
spinlock_t hypercall_deadlock_mutex;
+
+ /* VRAM dirty support. */
+ struct sh_dirty_vram *dirty_vram;
};
struct domain_setup_info