aboutsummaryrefslogtreecommitdiffstats
path: root/tools/libxc/xc_tmem.c
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2010-09-13 17:11:04 +0100
committerKeir Fraser <keir.fraser@citrix.com>2010-09-13 17:11:04 +0100
commit4678d349a17fba51811f1471c85d6ca15a235b68 (patch)
tree72be0593cabad62e565fab2bf3d4a17bdb915a9b /tools/libxc/xc_tmem.c
parentb0797898d35e15be37d9a679d0c77b2311bacd89 (diff)
downloadxen-4678d349a17fba51811f1471c85d6ca15a235b68.tar.gz
xen-4678d349a17fba51811f1471c85d6ca15a235b68.tar.bz2
xen-4678d349a17fba51811f1471c85d6ca15a235b68.zip
tmem (tools): move to new ABI version to handle long object-ids
After a great deal of discussion and review with linux kernel developers, it appears there are "next-generation" filesystems (such as btrfs, xfs, Lustre) that will not be able to use tmem due to an ABI limitation... a field that represents a unique file identifier is 64-bits in the tmem ABI and may need to be as large as 192-bits. So to support these guest filesystems, the tmem ABI must be revised, from "v0" to "v1". I *think* it is still the case that tmem is experimental and is not used anywhere yet in production. The tmem ABI is designed to support multiple revisions, so the Xen tmem implementation could be updated to handle both v0 and v1. However this is a bit messy and would require data structures for both v0 and v1 to appear in public Xen header files. I am inclined to update the Xen tmem implementation to only support v1 and gracefully fail v0. This would result in only a performance loss (as if tmem were disabled) for newly launched tmem-v0-enabled guests, but live-migration between old tmem-v0 Xen and new tmem-v1 Xen machines would fail, and saved tmem-v0 guests will not be able to be restored on a tmem-v1 Xen machine. I would plan to update both pre-4.0.2 and unstable (future 4.1) to only support v1. I believe these restrictions are reasonable at this point in the tmem lifecycle, though they may not be reasonable in the near future; should the tmem ABI need to be revised from v1 to v2, I understand backwards compatibility will be required. Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Diffstat (limited to 'tools/libxc/xc_tmem.c')
-rw-r--r--tools/libxc/xc_tmem.c71
1 files changed, 61 insertions, 10 deletions
diff --git a/tools/libxc/xc_tmem.c b/tools/libxc/xc_tmem.c
index 20d8de61f1..e5268813c6 100644
--- a/tools/libxc/xc_tmem.c
+++ b/tools/libxc/xc_tmem.c
@@ -63,7 +63,56 @@ int xc_tmem_control(xc_interface *xch,
set_xen_guest_handle(op.u.ctrl.buf,buf);
op.u.ctrl.arg1 = arg1;
op.u.ctrl.arg2 = arg2;
- op.u.ctrl.arg3 = arg3;
+ /* use xc_tmem_control_oid if arg3 is required */
+ op.u.ctrl.oid[0] = 0;
+ op.u.ctrl.oid[1] = 0;
+ op.u.ctrl.oid[2] = 0;
+
+ if (subop == TMEMC_LIST) {
+ if ((arg1 != 0) && (lock_pages(buf, arg1) != 0))
+ {
+ PERROR("Could not lock memory for Xen hypercall");
+ return -ENOMEM;
+ }
+ }
+
+#ifdef VALGRIND
+ if (arg1 != 0)
+ memset(buf, 0, arg1);
+#endif
+
+ rc = do_tmem_op(xc, &op);
+
+ if (subop == TMEMC_LIST) {
+ if (arg1 != 0)
+ unlock_pages(buf, arg1);
+ }
+
+ return rc;
+}
+
+int xc_tmem_control_oid(int xc,
+ int32_t pool_id,
+ uint32_t subop,
+ uint32_t cli_id,
+ uint32_t arg1,
+ uint32_t arg2,
+ struct tmem_oid oid,
+ void *buf)
+{
+ tmem_op_t op;
+ int rc;
+
+ op.cmd = TMEM_CONTROL;
+ op.pool_id = pool_id;
+ op.u.ctrl.subop = subop;
+ op.u.ctrl.cli_id = cli_id;
+ set_xen_guest_handle(op.u.ctrl.buf,buf);
+ op.u.ctrl.arg1 = arg1;
+ op.u.ctrl.arg2 = arg2;
+ op.u.ctrl.oid[0] = oid.oid[0];
+ op.u.ctrl.oid[1] = oid.oid[1];
+ op.u.ctrl.oid[2] = oid.oid[2];
if (subop == TMEMC_LIST) {
if ((arg1 != 0) && (lock_pages(buf, arg1) != 0))
@@ -254,7 +303,7 @@ int xc_tmem_save(xc_interface *xch,
} else {
/* page list terminator */
h = (struct tmem_handle *)buf;
- h->oid = -1;
+ h->oid[0] = h->oid[1] = h->oid[2] = -1L;
if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
return -1;
break;
@@ -291,7 +340,8 @@ int xc_tmem_save_extra(xc_interface *xch, int dom, int io_fd, int field_marker)
if ( write_exact(io_fd, &handle.index, sizeof(handle.index)) )
return -1;
count++;
- checksum += handle.pool_id + handle.oid + handle.index;
+ checksum += handle.pool_id + handle.oid[0] + handle.oid[1] +
+ handle.oid[2] + handle.index;
}
if ( count )
DPRINTF("needed %d tmem invalidates, check=%d\n",count,checksum);
@@ -401,20 +451,21 @@ int xc_tmem_restore(xc_interface *xch, int dom, int io_fd)
}
for ( j = n_pages; j > 0; j-- )
{
- uint64_t oid;
+ struct tmem_oid oid;
uint32_t index;
int rc;
if ( read_exact(io_fd, &oid, sizeof(oid)) )
return -1;
- if ( oid == -1 )
+ if ( oid.oid[0] == -1L && oid.oid[1] == -1L && oid.oid[2] == -1L )
break;
if ( read_exact(io_fd, &index, sizeof(index)) )
return -1;
if ( read_exact(io_fd, buf, pagesize) )
return -1;
checksum += *buf;
- if ( (rc = xc_tmem_control(xch, pool_id, TMEMC_RESTORE_PUT_PAGE,
- dom, bufsize, index, oid, buf)) <= 0 )
+ if ( (rc = xc_tmem_control_oid(xc, pool_id,
+ TMEMC_RESTORE_PUT_PAGE, dom,
+ bufsize, index, oid, buf)) <= 0 )
{
DPRINTF("xc_tmem_restore: putting page failed, rc=%d\n",rc);
return -1;
@@ -434,7 +485,7 @@ int xc_tmem_restore(xc_interface *xch, int dom, int io_fd)
int xc_tmem_restore_extra(xc_interface *xch, int dom, int io_fd)
{
uint32_t pool_id;
- uint64_t oid;
+ struct tmem_oid oid;
uint32_t index;
int count = 0;
int checksum = 0;
@@ -445,11 +496,11 @@ int xc_tmem_restore_extra(xc_interface *xch, int dom, int io_fd)
return -1;
if ( read_exact(io_fd, &index, sizeof(index)) )
return -1;
- if ( xc_tmem_control(xch, pool_id, TMEMC_RESTORE_FLUSH_PAGE, dom,
+ if ( xc_tmem_control_oid(xch, pool_id, TMEMC_RESTORE_FLUSH_PAGE, dom,
0,index,oid,NULL) <= 0 )
return -1;
count++;
- checksum += pool_id + oid + index;
+ checksum += pool_id + oid.oid[0] + oid.oid[1] + oid.oid[2] + index;
}
if ( pool_id != -1 )
return -1;