aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2009-08-06 09:19:55 +0100
committerKeir Fraser <keir.fraser@citrix.com>2009-08-06 09:19:55 +0100
commit734a7f6677f43502a2550ab89bf13439ab56c611 (patch)
treeeb407ee7afa14e46f0e279e32f4cbc0b495480ee
parent70fc2398f035f16a1e6e6fc99af73e4d539a40c2 (diff)
downloadxen-734a7f6677f43502a2550ab89bf13439ab56c611.tar.gz
xen-734a7f6677f43502a2550ab89bf13439ab56c611.tar.bz2
xen-734a7f6677f43502a2550ab89bf13439ab56c611.zip
tmem: save/restore/migrate/livemigrate and shared pool authentication
Attached patch implements save/restore/migration/livemigration for transcendent memory ("tmem"). Without this patch, domains using tmem may in some cases lose data when doing save/restore or migrate/livemigrate. Also included in this patch is support for a new (privileged) hypercall for authorizing domains to share pools; this provides the foundation to accomodate upstream linux requests for security for shared pools. Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
-rw-r--r--tools/libxc/xc_domain_restore.c21
-rw-r--r--tools/libxc/xc_domain_save.c18
-rw-r--r--tools/libxc/xc_tmem.c374
-rw-r--r--tools/libxc/xenctrl.h15
-rw-r--r--tools/python/xen/lowlevel/xc/xc.c40
-rw-r--r--tools/python/xen/xend/XendAPI.py11
-rw-r--r--tools/python/xen/xend/XendNode.py26
-rw-r--r--tools/python/xen/xend/balloon.py6
-rw-r--r--tools/python/xen/xend/server/XMLRPCServer.py3
-rw-r--r--tools/python/xen/xm/main.py48
-rw-r--r--xen/common/tmem.c534
-rw-r--r--xen/common/tmem_xen.c50
-rw-r--r--xen/include/public/tmem.h62
-rw-r--r--xen/include/xen/tmem_xen.h27
14 files changed, 1102 insertions, 133 deletions
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 13e83c3f13..7fad7d0efb 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -536,6 +536,27 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
continue;
}
+ if ( j == -5 )
+ {
+ DPRINTF("xc_domain_restore start tmem\n");
+ if ( xc_tmem_restore(xc_handle, dom, io_fd) )
+ {
+ ERROR("error reading/restoring tmem");
+ goto out;
+ }
+ continue;
+ }
+
+ if ( j == -6 )
+ {
+ if ( xc_tmem_restore_extra(xc_handle, dom, io_fd) )
+ {
+ ERROR("error reading/restoring tmem extra");
+ goto out;
+ }
+ continue;
+ }
+
if ( j == 0 )
break; /* our work here is done */
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index 90557e26cf..0c3a21e73b 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -758,6 +758,7 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
int live = (flags & XCFLAGS_LIVE);
int debug = (flags & XCFLAGS_DEBUG);
int race = 0, sent_last_iter, skip_this_iter;
+ int tmem_saved = 0;
/* The new domain's shared-info frame number. */
unsigned long shared_info_frame;
@@ -996,6 +997,13 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
print_stats(xc_handle, dom, 0, &stats, 0);
+ tmem_saved = xc_tmem_save(xc_handle, dom, io_fd, live, -5);
+ if ( tmem_saved == -1 )
+ {
+ ERROR("Error when writing to state file (tmem)");
+ goto out;
+ }
+
/* Now write out each data page, canonicalising page tables as we go... */
for ( ; ; )
{
@@ -1316,6 +1324,13 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
}
DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
+ if ( (tmem_saved > 0) &&
+ (xc_tmem_save_extra(xc_handle,dom,io_fd,-6) == -1) )
+ {
+ ERROR("Error when writing to state file (tmem)");
+ goto out;
+ }
+
}
if ( xc_shadow_control(xc_handle, dom,
@@ -1605,6 +1620,9 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
out:
+ if ( tmem_saved != 0 && live )
+ xc_tmem_save_done(xc_handle, dom);
+
if ( live )
{
if ( xc_shadow_control(xc_handle, dom,
diff --git a/tools/libxc/xc_tmem.c b/tools/libxc/xc_tmem.c
index 8d8141c07a..926f848a90 100644
--- a/tools/libxc/xc_tmem.c
+++ b/tools/libxc/xc_tmem.c
@@ -36,6 +36,7 @@ int xc_tmem_control(int xc,
uint32_t cli_id,
uint32_t arg1,
uint32_t arg2,
+ uint64_t arg3,
void *buf)
{
tmem_op_t op;
@@ -45,9 +46,10 @@ int xc_tmem_control(int xc,
op.pool_id = pool_id;
op.u.ctrl.subop = subop;
op.u.ctrl.cli_id = cli_id;
+ set_xen_guest_handle(op.u.ctrl.buf,buf);
op.u.ctrl.arg1 = arg1;
op.u.ctrl.arg2 = arg2;
- op.u.ctrl.buf.p = buf;
+ op.u.ctrl.arg3 = arg3;
if (subop == TMEMC_LIST) {
if ((arg1 != 0) && (lock_pages(buf, arg1) != 0))
@@ -72,6 +74,376 @@ int xc_tmem_control(int xc,
return rc;
}
+static int xc_tmem_uuid_parse(char *uuid_str, uint64_t *uuid_lo, uint64_t *uuid_hi)
+{
+ char *p = uuid_str;
+ uint64_t *x = uuid_hi;
+ int i = 0, digit;
+
+ *uuid_lo = 0; *uuid_hi = 0;
+ for ( p = uuid_str, i = 0; i != 36 && *p != '\0'; p++, i++ )
+ {
+ if ( (i == 8 || i == 13 || i == 18 || i == 23) )
+ {
+ if ( *p != '-' )
+ return -1;
+ if ( i == 18 )
+ x = uuid_lo;
+ continue;
+ }
+ else if ( *p >= '0' && *p <= '9' )
+ digit = *p - '0';
+ else if ( *p >= 'A' && *p <= 'F' )
+ digit = *p - 'A';
+ else if ( *p >= 'a' && *p <= 'f' )
+ digit = *p - 'a';
+ else
+ return -1;
+ *x = (*x << 4) | digit;
+ }
+ if ( (i != 1 && i != 36) || *p != '\0' )
+ return -1;
+ return 0;
+}
+
+int xc_tmem_auth(int xc,
+ int cli_id,
+ char *uuid_str,
+ int arg1)
+{
+ tmem_op_t op;
+
+ op.cmd = TMEM_AUTH;
+ op.pool_id = 0;
+ op.u.new.arg1 = cli_id;
+ op.u.new.flags = arg1;
+ if ( xc_tmem_uuid_parse(uuid_str, &op.u.new.uuid[0],
+ &op.u.new.uuid[1]) < 0 )
+ {
+ PERROR("Can't parse uuid, use xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx");
+ return -1;
+ }
+
+ return do_tmem_op(xc, &op);
+}
+
+/* Save/restore/live migrate */
+
+/*
+ Note that live migration complicates the save/restore format in
+ multiple ways: Though saving/migration can only occur when all
+ tmem pools belonging to the domain-being-saved are frozen and
+ this ensures that new pools can't be created or existing pools
+ grown (in number of pages), it is possible during a live migration
+ that pools may be destroyed and pages invalidated while the migration
+ is in process. As a result, (1) it is not safe to pre-specify counts
+ for these values precisely, but only as a "max", and (2) a "invalidation"
+ list (of pools, objects, pages) must be appended when the domain is truly
+ suspended.
+ */
+
+/* returns 0 if nothing to save, -1 if error saving, 1 if saved successfully */
+int xc_tmem_save(int xc, int dom, int io_fd, int live, int field_marker)
+{
+ int marker = field_marker;
+ int i, j;
+ uint32_t max_pools, version;
+ uint32_t weight, cap, flags;
+ uint32_t pool_id;
+ uint32_t minusone = -1;
+ struct tmem_handle *h;
+
+ if ( xc_tmem_control(xc,0,TMEMC_SAVE_BEGIN,dom,live,0,0,NULL) <= 0 )
+ return 0;
+
+ if ( write_exact(io_fd, &marker, sizeof(marker)) )
+ return -1;
+ version = xc_tmem_control(xc,0,TMEMC_SAVE_GET_VERSION,0,0,0,0,NULL);
+ if ( write_exact(io_fd, &version, sizeof(version)) )
+ return -1;
+ max_pools = xc_tmem_control(xc,0,TMEMC_SAVE_GET_MAXPOOLS,0,0,0,0,NULL);
+ if ( write_exact(io_fd, &max_pools, sizeof(max_pools)) )
+ return -1;
+ if ( version == -1 || max_pools == -1 )
+ return -1;
+ if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
+ return -1;
+ flags = xc_tmem_control(xc,0,TMEMC_SAVE_GET_CLIENT_FLAGS,dom,0,0,0,NULL);
+ if ( write_exact(io_fd, &flags, sizeof(flags)) )
+ return -1;
+ weight = xc_tmem_control(xc,0,TMEMC_SAVE_GET_CLIENT_WEIGHT,dom,0,0,0,NULL);
+ if ( write_exact(io_fd, &weight, sizeof(weight)) )
+ return -1;
+ cap = xc_tmem_control(xc,0,TMEMC_SAVE_GET_CLIENT_CAP,dom,0,0,0,NULL);
+ if ( write_exact(io_fd, &cap, sizeof(cap)) )
+ return -1;
+ if ( flags == -1 || weight == -1 || cap == -1 )
+ return -1;
+ if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
+ return -1;
+ for ( i = 0; i < max_pools; i++ )
+ {
+ uint64_t uuid[2];
+ uint32_t n_pages;
+ uint32_t pagesize;
+ char *buf = NULL;
+ int bufsize = 0;
+ int checksum = 0;
+
+ /* get pool id, flags, pagesize, n_pages, uuid */
+ flags = xc_tmem_control(xc,i,TMEMC_SAVE_GET_POOL_FLAGS,dom,0,0,0,NULL);
+ if ( flags != -1 )
+ {
+ pool_id = i;
+ n_pages = xc_tmem_control(xc,i,TMEMC_SAVE_GET_POOL_NPAGES,dom,0,0,0,NULL);
+ if ( !(flags & TMEM_POOL_PERSIST) )
+ n_pages = 0;
+ (void)xc_tmem_control(xc,i,TMEMC_SAVE_GET_POOL_UUID,dom,sizeof(uuid),0,0,&uuid);
+ if ( write_exact(io_fd, &pool_id, sizeof(pool_id)) )
+ return -1;
+ if ( write_exact(io_fd, &flags, sizeof(flags)) )
+ return -1;
+ if ( write_exact(io_fd, &n_pages, sizeof(n_pages)) )
+ return -1;
+ if ( write_exact(io_fd, &uuid, sizeof(uuid)) )
+ return -1;
+ if ( n_pages == 0 )
+ continue;
+
+ pagesize = 1 << (((flags >> TMEM_POOL_PAGESIZE_SHIFT) &
+ TMEM_POOL_PAGESIZE_MASK) + 12);
+ if ( pagesize > bufsize )
+ {
+ bufsize = pagesize + sizeof(struct tmem_handle);
+ if ( (buf = realloc(buf,bufsize)) == NULL )
+ return -1;
+ }
+ for ( j = n_pages; j > 0; j-- )
+ {
+ int ret;
+ if ( (ret = xc_tmem_control(xc, pool_id,
+ TMEMC_SAVE_GET_NEXT_PAGE, dom,
+ bufsize, 0, 0, buf)) > 0 )
+ {
+ h = (struct tmem_handle *)buf;
+ if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
+ return -1;
+ if ( write_exact(io_fd, &h->index, sizeof(h->index)) )
+ return -1;
+ h++;
+ checksum += *(char *)h;
+ if ( write_exact(io_fd, h, pagesize) )
+ return -1;
+ } else if ( ret == 0 ) {
+ continue;
+ } else {
+ /* page list terminator */
+ h = (struct tmem_handle *)buf;
+ h->oid = -1;
+ if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
+ return -1;
+ break;
+ }
+ }
+ DPRINTF("saved %d tmem pages for dom=%d pool=%d, checksum=%x\n",
+ n_pages-j,dom,pool_id,checksum);
+ }
+ }
+ /* pool list terminator */
+ minusone = -1;
+ if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
+ return -1;
+
+ return 1;
+}
+
+/* only called for live migration */
+int xc_tmem_save_extra(int xc, int dom, int io_fd, int field_marker)
+{
+ struct tmem_handle handle;
+ int marker = field_marker;
+ uint32_t minusone;
+ int count = 0, checksum = 0;
+
+ if ( write_exact(io_fd, &marker, sizeof(marker)) )
+ return -1;
+ while ( xc_tmem_control(xc, 0, TMEMC_SAVE_GET_NEXT_INV, dom,
+ sizeof(handle),0,0,&handle) > 0 ) {
+ if ( write_exact(io_fd, &handle.pool_id, sizeof(handle.pool_id)) )
+ return -1;
+ if ( write_exact(io_fd, &handle.oid, sizeof(handle.oid)) )
+ return -1;
+ if ( write_exact(io_fd, &handle.index, sizeof(handle.index)) )
+ return -1;
+ count++;
+ checksum += handle.pool_id + handle.oid + handle.index;
+ }
+ if ( count )
+ DPRINTF("needed %d tmem invalidates, check=%d\n",count,checksum);
+ minusone = -1;
+ if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
+ return -1;
+ return 0;
+}
+
+/* only called for live migration */
+void xc_tmem_save_done(int xc, int dom)
+{
+ xc_tmem_control(xc,0,TMEMC_SAVE_END,dom,0,0,0,NULL);
+}
+
+/* restore routines */
+
+static int xc_tmem_restore_new_pool(
+ int xc,
+ int cli_id,
+ uint32_t pool_id,
+ uint32_t flags,
+ uint64_t uuid_lo,
+ uint64_t uuid_hi)
+{
+ tmem_op_t op;
+
+ op.cmd = TMEM_RESTORE_NEW;
+ op.pool_id = pool_id;
+ op.u.new.arg1 = cli_id;
+ op.u.new.flags = flags;
+ op.u.new.uuid[0] = uuid_lo;
+ op.u.new.uuid[1] = uuid_hi;
+
+ return do_tmem_op(xc, &op);
+}
+
+int xc_tmem_restore(int xc, int dom, int io_fd)
+{
+ uint32_t save_max_pools, save_version;
+ uint32_t this_max_pools, this_version;
+ uint32_t pool_id;
+ uint32_t minusone;
+ uint32_t weight, cap, flags;
+ int checksum = 0;
+
+ save_version = xc_tmem_control(xc,0,TMEMC_SAVE_GET_VERSION,dom,0,0,0,NULL);
+ if ( save_version == -1 )
+ return -1; /* domain doesn't exist */
+ save_max_pools = xc_tmem_control(xc,0,TMEMC_SAVE_GET_MAXPOOLS,0,0,0,0,NULL);
+ if ( read_exact(io_fd, &this_version, sizeof(this_version)) )
+ return -1;
+ if ( read_exact(io_fd, &this_max_pools, sizeof(this_max_pools)) )
+ return -1;
+ /* FIXME check here to ensure no version mismatch or maxpools mismatch */
+ if ( read_exact(io_fd, &minusone, sizeof(minusone)) )
+ return -1;
+ if ( minusone != -1 )
+ return -1;
+ if ( xc_tmem_control(xc,0,TMEMC_RESTORE_BEGIN,dom,0,0,0,NULL) < 0 )
+ return -1;
+ if ( read_exact(io_fd, &flags, sizeof(flags)) )
+ return -1;
+ if ( flags & TMEM_CLIENT_COMPRESS )
+ if ( xc_tmem_control(xc,0,TMEMC_SET_COMPRESS,dom,1,0,0,NULL) < 0 )
+ return -1;
+ if ( flags & TMEM_CLIENT_FROZEN )
+ if ( xc_tmem_control(xc,0,TMEMC_FREEZE,dom,0,0,0,NULL) < 0 )
+ return -1;
+ if ( read_exact(io_fd, &weight, sizeof(weight)) )
+ return -1;
+ if ( xc_tmem_control(xc,0,TMEMC_SET_WEIGHT,dom,0,0,0,NULL) < 0 )
+ return -1;
+ if ( read_exact(io_fd, &cap, sizeof(cap)) )
+ return -1;
+ if ( xc_tmem_control(xc,0,TMEMC_SET_CAP,dom,0,0,0,NULL) < 0 )
+ return -1;
+ if ( read_exact(io_fd, &minusone, sizeof(minusone)) )
+ return -1;
+ while ( read_exact(io_fd, &pool_id, sizeof(pool_id)) == 0 && pool_id != -1 )
+ {
+ uint64_t uuid[2];
+ uint32_t n_pages;
+ char *buf = NULL;
+ int bufsize = 0, pagesize;
+ int j;
+
+ if ( read_exact(io_fd, &flags, sizeof(flags)) )
+ return -1;
+ if ( read_exact(io_fd, &n_pages, sizeof(n_pages)) )
+ return -1;
+ if ( read_exact(io_fd, &uuid, sizeof(uuid)) )
+ return -1;
+ if ( xc_tmem_restore_new_pool(xc, dom, pool_id,
+ flags, uuid[0], uuid[1]) < 0)
+ return -1;
+ if ( n_pages <= 0 )
+ continue;
+
+ pagesize = 1 << (((flags >> TMEM_POOL_PAGESIZE_SHIFT) &
+ TMEM_POOL_PAGESIZE_MASK) + 12);
+ if ( pagesize > bufsize )
+ {
+ bufsize = pagesize;
+ if ( (buf = realloc(buf,bufsize)) == NULL )
+ return -1;
+ }
+ for ( j = n_pages; j > 0; j-- )
+ {
+ uint64_t oid;
+ uint32_t index;
+ int rc;
+ if ( read_exact(io_fd, &oid, sizeof(oid)) )
+ return -1;
+ if ( oid == -1 )
+ break;
+ if ( read_exact(io_fd, &index, sizeof(index)) )
+ return -1;
+ if ( read_exact(io_fd, buf, pagesize) )
+ return -1;
+ checksum += *buf;
+ if ( (rc = xc_tmem_control(xc, pool_id, TMEMC_RESTORE_PUT_PAGE,
+ dom, bufsize, index, oid, buf)) <= 0 )
+ {
+ DPRINTF("xc_tmem_restore: putting page failed, rc=%d\n",rc);
+ return -1;
+ }
+ }
+ if ( n_pages )
+ DPRINTF("restored %d tmem pages for dom=%d pool=%d, check=%x\n",
+ n_pages-j,dom,pool_id,checksum);
+ }
+ if ( pool_id != -1 )
+ return -1;
+
+ return 0;
+}
+
+/* only called for live migration, must be called after suspend */
+int xc_tmem_restore_extra(int xc, int dom, int io_fd)
+{
+ uint32_t pool_id;
+ uint64_t oid;
+ uint32_t index;
+ int count = 0;
+ int checksum = 0;
+
+ while ( read_exact(io_fd, &pool_id, sizeof(pool_id)) == 0 && pool_id != -1 )
+ {
+ if ( read_exact(io_fd, &oid, sizeof(oid)) )
+ return -1;
+ if ( read_exact(io_fd, &index, sizeof(index)) )
+ return -1;
+ if ( xc_tmem_control(xc, pool_id, TMEMC_RESTORE_FLUSH_PAGE, dom,
+ 0,index,oid,NULL) <= 0 )
+ return -1;
+ count++;
+ checksum += pool_id + oid + index;
+ }
+ if ( pool_id != -1 )
+ return -1;
+ if ( count )
+ DPRINTF("invalidated %d tmem pages, check=%d\n",count,checksum);
+
+ return 0;
+}
+
/*
* Local variables:
* mode: C
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 4c7c693a8d..1eade658a0 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1276,12 +1276,13 @@ int xc_set_cpuidle_max_cstate(int xc_handle, uint32_t value);
/**
* tmem operations
*/
-int xc_tmem_control(int xc,
- int32_t pool_id,
- uint32_t subop,
- uint32_t cli_id,
- uint32_t arg1,
- uint32_t arg2,
- void *buf);
+int xc_tmem_control(int xc, int32_t pool_id, uint32_t subop, uint32_t cli_id,
+ uint32_t arg1, uint32_t arg2, uint64_t arg3, void *buf);
+int xc_tmem_auth(int xc_handle, int cli_id, char *uuid_str, int arg1);
+int xc_tmem_save(int xc_handle, int dom, int live, int fd, int field_marker);
+int xc_tmem_save_extra(int xc_handle, int dom, int fd, int field_marker);
+void xc_tmem_save_done(int xc_handle, int dom);
+int xc_tmem_restore(int xc_handle, int dom, int fd);
+int xc_tmem_restore_extra(int xc_handle, int dom, int fd);
#endif /* XENCTRL_H */
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index c33fcc3b9c..f15a5f6280 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -1523,20 +1523,21 @@ static PyObject *pyxc_tmem_control(XcObject *self,
uint32_t cli_id;
uint32_t arg1;
uint32_t arg2;
+ uint64_t arg3;
char *buf;
char _buffer[32768], *buffer = _buffer;
int rc;
- static char *kwd_list[] = { "pool_id", "subop", "cli_id", "arg1", "arg2", "buf", NULL };
+ static char *kwd_list[] = { "pool_id", "subop", "cli_id", "arg1", "arg2", "arg3", "buf", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiiis", kwd_list,
- &pool_id, &subop, &cli_id, &arg1, &arg2, &buf) )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiiiis", kwd_list,
+ &pool_id, &subop, &cli_id, &arg1, &arg2, &arg3, &buf) )
return NULL;
if ( (subop == TMEMC_LIST) && (arg1 > 32768) )
arg1 = 32768;
- if ( (rc = xc_tmem_control(self->xc_handle, pool_id, subop, cli_id, arg1, arg2, buffer)) < 0 )
+ if ( (rc = xc_tmem_control(self->xc_handle, pool_id, subop, cli_id, arg1, arg2, arg3, buffer)) < 0 )
return Py_BuildValue("i", rc);
switch (subop) {
@@ -1558,6 +1559,28 @@ static PyObject *pyxc_tmem_control(XcObject *self,
return zero;
}
+static PyObject *pyxc_tmem_shared_auth(XcObject *self,
+ PyObject *args,
+ PyObject *kwds)
+{
+ uint32_t cli_id;
+ uint32_t arg1;
+ char *uuid_str;
+ int rc;
+
+ static char *kwd_list[] = { "cli_id", "uuid_str", "arg1" };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "isi", kwd_list,
+ &cli_id, &uuid_str, &arg1) )
+ return NULL;
+
+ if ( (rc = xc_tmem_auth(self->xc_handle, cli_id, uuid_str, arg1)) < 0 )
+ return Py_BuildValue("i", rc);
+
+ Py_INCREF(zero);
+ return zero;
+}
+
static PyMethodDef pyxc_methods[] = {
{ "handle",
(PyCFunction)pyxc_handle,
@@ -2029,6 +2052,15 @@ static PyMethodDef pyxc_methods[] = {
" buf [str]: Buffer.\n\n"
"Returns: [int] 0 or [str] tmem info on success; exception on error.\n" },
+ { "tmem_shared_auth",
+ (PyCFunction)pyxc_tmem_shared_auth,
+ METH_VARARGS | METH_KEYWORDS, "\n"
+ "De/authenticate a shared tmem pool.\n"
+ " cli_id [int]: Client identifier (-1 == all).\n"
+ " uuid_str [str]: uuid.\n"
+ " auth [int]: 0|1 .\n"
+ "Returns: [int] 0 on success; exception on error.\n" },
+
{ NULL, NULL, 0, NULL }
};
diff --git a/tools/python/xen/xend/XendAPI.py b/tools/python/xen/xend/XendAPI.py
index 126db6421c..4e9e16dfbc 100644
--- a/tools/python/xen/xend/XendAPI.py
+++ b/tools/python/xen/xend/XendAPI.py
@@ -933,7 +933,8 @@ class XendAPI(object):
('tmem_list', None),
('tmem_set_weight', None),
('tmem_set_cap', None),
- ('tmem_set_compress', None)]
+ ('tmem_set_compress', None),
+ ('tmem_shared_auth', None)]
host_funcs = [('get_by_name_label', None),
('list_methods', None)]
@@ -1133,6 +1134,14 @@ class XendAPI(object):
return xen_api_error(e)
return xen_api_success_void()
+ def host_tmem_shared_auth(self, _, host_ref, cli_id, uuid_str, auth):
+ node = XendNode.instance()
+ try:
+ node.tmem_shared_auth(cli_id, uuid_str, auth)
+ except Exception, e:
+ return xen_api_error(e)
+ return xen_api_success_void()
+
# class methods
def host_get_all(self, session):
return xen_api_success((XendNode.instance().uuid,))
diff --git a/tools/python/xen/xend/XendNode.py b/tools/python/xen/xend/XendNode.py
index ed9bb9d84b..ac71e657c3 100644
--- a/tools/python/xen/xend/XendNode.py
+++ b/tools/python/xen/xend/XendNode.py
@@ -948,62 +948,72 @@ class XendNode:
subop = TMEMC_LIST
arg1 = 32768
arg2 = use_long
+ arg3 = 0
buf = ''
- return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+ return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf)
def tmem_thaw(self, cli_id):
pool_id = -1
subop = TMEMC_THAW
arg1 = 0
arg2 = 0
+ arg3 = 0
buf = ''
- return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+ return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf)
def tmem_freeze(self, cli_id):
pool_id = -1
subop = TMEMC_FREEZE
arg1 = 0
arg2 = 0
+ arg3 = 0
buf = ''
- return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+ return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf)
def tmem_flush(self, cli_id, pages):
pool_id = -1
subop = TMEMC_FLUSH
arg1 = pages
arg2 = 0
+ arg3 = 0
buf = ''
- return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+ return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf)
def tmem_destroy(self, cli_id):
pool_id = -1
subop = TMEMC_DESTROY
arg1 = 0
arg2 = 0
+ arg3 = 0
buf = ''
- return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+ return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf)
def tmem_set_weight(self, cli_id, arg1):
pool_id = -1
subop = TMEMC_SET_WEIGHT
arg2 = 0
+ arg3 = 0
buf = ''
- return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+ return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf)
def tmem_set_cap(self, cli_id, arg1):
pool_id = -1
subop = TMEMC_SET_CAP
arg2 = 0
+ arg3 = 0
buf = ''
- return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+ return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf)
def tmem_set_compress(self, cli_id, arg1):
pool_id = -1
subop = TMEMC_SET_COMPRESS
arg2 = 0
+ arg3 = 0
buf = ''
- return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+ return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf)
+ def tmem_shared_auth(self, cli_id, uuid_str, auth):
+ return self.xc.tmem_auth(cli_id, uuid_str, auth)
def instance():
global inst
diff --git a/tools/python/xen/xend/balloon.py b/tools/python/xen/xend/balloon.py
index 42c8ea0aa7..b5d312970a 100644
--- a/tools/python/xen/xend/balloon.py
+++ b/tools/python/xen/xend/balloon.py
@@ -111,7 +111,7 @@ def free(need_mem, dominfo):
rlimit = RETRY_LIMIT
# stop tmem from absorbing any more memory (must THAW when done!)
- xc.tmem_control(0,TMEMC_FREEZE,-1, 0, 0, "")
+ xc.tmem_control(0,TMEMC_FREEZE,-1, 0, 0, 0, "")
# If unreasonable memory size is required, we give up waiting
# for ballooning or scrubbing, as if had retried.
@@ -130,7 +130,7 @@ def free(need_mem, dominfo):
if freeable_mem < need_mem and need_mem < max_free_mem:
# flush memory from tmem to scrub_mem and reobtain physinfo
need_tmem_kb = need_mem - freeable_mem
- tmem_kb = xc.tmem_control(0,TMEMC_FLUSH,-1, need_tmem_kb, 0, "")
+ tmem_kb = xc.tmem_control(0,TMEMC_FLUSH,-1, need_tmem_kb, 0, 0, "")
log.debug("Balloon: tmem relinquished %d KiB of %d KiB requested.",
tmem_kb, need_tmem_kb)
physinfo = xc.physinfo()
@@ -232,5 +232,5 @@ def free(need_mem, dominfo):
finally:
# allow tmem to accept pages again
- xc.tmem_control(0,TMEMC_THAW,-1, 0, 0, "")
+ xc.tmem_control(0,TMEMC_THAW,-1, 0, 0, 0, "")
del xc
diff --git a/tools/python/xen/xend/server/XMLRPCServer.py b/tools/python/xen/xend/server/XMLRPCServer.py
index 93c6caef1b..accaabc47b 100644
--- a/tools/python/xen/xend/server/XMLRPCServer.py
+++ b/tools/python/xen/xend/server/XMLRPCServer.py
@@ -202,7 +202,8 @@ class XMLRPCServer:
['info', 'pciinfo', 'send_debug_keys',
'tmem_list', 'tmem_freeze', 'tmem_thaw',
'tmem_flush', 'tmem_destroy', 'tmem_set_weight',
- 'tmem_set_cap', 'tmem_set_compress'],
+ 'tmem_set_cap', 'tmem_set_compress',
+ 'tmem_shared_auth'],
'node'),
(XendDmesg, ['info', 'clear'], 'node.dmesg')]:
inst = type.instance()
diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
index e2bf004d49..5f3a449fee 100644
--- a/tools/python/xen/xm/main.py
+++ b/tools/python/xen/xm/main.py
@@ -207,6 +207,7 @@ SUBCOMMAND_HELP = {
'tmem-set' : ('[<Domain>|-a|--all] [weight=<weight>] [cap=<cap>] '
'[compress=<compress>]',
'Change tmem settings.'),
+ 'tmem-shared-auth' : ('[<Domain>|-a|--all] [--uuid=<uuid>] [--auth=<0|1>]', 'De/authenticate shared tmem pool.'),
# security
@@ -307,6 +308,11 @@ SUBCOMMAND_OPTIONS = {
'tmem-set': (
('-a', '--all', 'Operate on all tmem.'),
),
+ 'tmem-shared-auth': (
+ ('-a', '--all', 'Authenticate for all tmem pools.'),
+ ('-u', '--uuid', 'Specify uuid (abcdef01-2345-6789-01234567890abcdef).'),
+ ('-A', '--auth', '0=auth,1=deauth'),
+ ),
}
common_commands = [
@@ -427,6 +433,7 @@ tmem_commands = [
"tmem-freeze",
"tmem-destroy",
"tmem-set",
+ "tmem-shared-auth",
]
all_commands = (domain_commands + host_commands + scheduler_commands +
@@ -3129,6 +3136,46 @@ def xm_tmem_set(args):
if compress is not None:
server.xend.node.tmem_set_compress(domid, compress)
+def xm_tmem_shared_auth(args):
+ try:
+ (options, params) = getopt.gnu_getopt(args, 'au:A:', ['all','uuid=','auth='])
+ except getopt.GetoptError, opterr:
+ err(opterr)
+ usage('tmem-shared-auth')
+
+ all = False
+ for (k, v) in options:
+ if k in ['-a', '--all']:
+ all = True
+
+ if not all and len(params) == 0:
+ err('You must specify -a or --all or a domain id.')
+ usage('tmem-shared-auth')
+
+ if all:
+ domid = -1
+ else:
+ try:
+ domid = int(params[0])
+ params = params[1:]
+ except:
+ err('Unrecognized domain id: %s' % params[0])
+ usage('tmem-shared-auth')
+
+ for (k, v) in options:
+ if k in ['-u', '--uuid']:
+ uuid_str = v
+
+ auth = 0
+ for (k, v) in options:
+ if k in ['-A', '--auth']:
+ auth = v
+
+ if serverType == SERVER_XEN_API:
+ return server.xenapi.host.tmem_shared_auth(domid,uuid_str,auth)
+ else:
+ return server.xend.node.tmem_shared_auth(domid,uuid_str,auth)
+
commands = {
"shell": xm_shell,
@@ -3211,6 +3258,7 @@ commands = {
"tmem-destroy": xm_tmem_destroy,
"tmem-list": xm_tmem_list,
"tmem-set": xm_tmem_set,
+ "tmem-shared-auth": xm_tmem_shared_auth,
}
## The commands supported by a separate argument parser in xend.xm.
diff --git a/xen/common/tmem.c b/xen/common/tmem.c
index f45d880614..c350de9d62 100644
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -27,6 +27,8 @@
#define EXPORT /* indicates code other modules are dependent upon */
#define FORWARD
+#define TMEM_SPEC_VERSION 0
+
/************ INTERFACE TO TMEM HOST-DEPENDENT (tmh) CODE ************/
#define CLI_ID_NULL TMH_CLI_ID_NULL
@@ -105,6 +107,7 @@ DECL_CYC_COUNTER(decompress);
#define MAX_GLOBAL_SHARED_POOLS 16
struct tm_pool;
+struct tmem_page_descriptor;
struct client {
struct list_head client_list;
struct tm_pool *pools[MAX_POOLS_PER_DOMAIN];
@@ -116,11 +119,20 @@ struct client {
uint32_t cap;
bool_t compress;
bool_t frozen;
+ bool_t shared_auth_required;
+ /* for save/restore/migration */
+ bool_t live_migrating;
+ bool_t was_frozen;
+ struct list_head persistent_invalidated_list;
+ struct tmem_page_descriptor *cur_pgp;
+ /* statistics collection */
unsigned long compress_poor, compress_nomem;
unsigned long compressed_pages;
uint64_t compressed_sum_size;
uint64_t total_cycles;
unsigned long succ_pers_puts, succ_eph_gets, succ_pers_gets;
+ /* shared pool authentication */
+ uint64_t shared_auth_uuid[MAX_GLOBAL_SHARED_POOLS][2];
};
typedef struct client client_t;
@@ -137,6 +149,7 @@ typedef struct share_list sharelist_t;
struct tm_pool {
bool_t shared;
bool_t persistent;
+ int pageshift; /* 0 == 2**12 */
struct list_head pool_list; /* FIXME do we need this anymore? */
client_t *client;
uint64_t uuid[2]; /* 0 for private, non-zero for shared */
@@ -144,8 +157,11 @@ struct tm_pool {
rwlock_t pool_rwlock;
struct rb_root obj_rb_root[OBJ_HASH_BUCKETS]; /* protected by pool_rwlock */
struct list_head share_list; /* valid if shared */
- DECL_SENTINEL
int shared_count; /* valid if shared */
+ /* for save/restore/migration */
+ struct list_head persistent_page_list;
+ struct tmem_page_descriptor *cur_pgp;
+ /* statistics collection */
atomic_t pgp_count;
int pgp_count_max;
long obj_count; /* atomicity depends on pool_rwlock held for write */
@@ -158,6 +174,7 @@ struct tm_pool {
unsigned long gets, found_gets;
unsigned long flushs, flushs_found;
unsigned long flush_objs, flush_objs_found;
+ DECL_SENTINEL
};
typedef struct tm_pool pool_t;
@@ -189,16 +206,29 @@ struct tmem_object_node {
typedef struct tmem_object_node objnode_t;
struct tmem_page_descriptor {
- struct list_head global_eph_pages;
- struct list_head client_eph_pages;
- obj_t *obj;
+ union {
+ struct list_head global_eph_pages;
+ struct list_head client_inv_pages;
+ };
+ union {
+ struct list_head client_eph_pages;
+ struct list_head pool_pers_pages;
+ };
+ union {
+ obj_t *obj;
+ uint64_t inv_oid; /* used for invalid list only */
+ };
uint32_t index;
- size_t size; /* 0 == PAGE_SIZE (pfp), else compressed data (cdata) */
+ size_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid,
+ else compressed data (cdata) */
union {
pfp_t *pfp; /* page frame pointer */
char *cdata; /* compressed data */
};
- uint64_t timestamp;
+ union {
+ uint64_t timestamp;
+ uint32_t pool_id; /* used for invalid list only */
+ };
DECL_SENTINEL
};
typedef struct tmem_page_descriptor pgp_t;
@@ -209,6 +239,7 @@ static LIST_HEAD(global_client_list);
static LIST_HEAD(global_pool_list);
static pool_t *global_shared_pools[MAX_GLOBAL_SHARED_POOLS] = { 0 };
+static bool_t global_shared_auth = 0;
static atomic_t client_weight_total = ATOMIC_INIT(0);
static int tmem_initialized = 0;
@@ -217,6 +248,7 @@ static int tmem_initialized = 0;
EXPORT DEFINE_SPINLOCK(tmem_spinlock); /* used iff tmh_lock_all */
EXPORT DEFINE_RWLOCK(tmem_rwlock); /* used iff !tmh_lock_all */
static DEFINE_SPINLOCK(eph_lists_spinlock); /* protects global AND clients */
+static DEFINE_SPINLOCK(pers_lists_spinlock);
#define tmem_spin_lock(_l) do {if (!tmh_lock_all) spin_lock(_l);}while(0)
#define tmem_spin_unlock(_l) do {if (!tmh_lock_all) spin_unlock(_l);}while(0)
@@ -366,36 +398,61 @@ static NOINLINE void pgp_free(pgp_t *pgp, int from_delete)
ASSERT(pgp->obj != NULL);
ASSERT_SENTINEL(pgp->obj,OBJ);
ASSERT_SENTINEL(pgp->obj->pool,POOL);
- ASSERT(list_empty(&pgp->global_eph_pages));
- ASSERT(list_empty(&pgp->client_eph_pages));
+ ASSERT(pgp->obj->pool->client != NULL);
if ( from_delete )
ASSERT(pgp_lookup_in_obj(pgp->obj,pgp->index) == NULL);
ASSERT(pgp->obj->pool != NULL);
pool = pgp->obj->pool;
+ if ( is_ephemeral(pool) )
+ {
+ ASSERT(list_empty(&pgp->global_eph_pages));
+ ASSERT(list_empty(&pgp->client_eph_pages));
+ }
pgp_free_data(pgp, pool);
+ atomic_dec_and_assert(global_pgp_count);
+ atomic_dec_and_assert(pool->pgp_count);
+ pgp->size = -1;
+ if ( is_persistent(pool) && pool->client->live_migrating )
+ {
+ pgp->inv_oid = pgp->obj->oid;
+ pgp->pool_id = pool->pool_id;
+ return;
+ }
+ INVERT_SENTINEL(pgp,PGD);
+ pgp->obj = NULL;
+ pgp->index = -1;
+ tmem_free(pgp,sizeof(pgp_t),pool);
+}
+
+static NOINLINE void pgp_free_from_inv_list(client_t *client, pgp_t *pgp)
+{
+ pool_t *pool = client->pools[pgp->pool_id];
+
+ ASSERT_SENTINEL(pool,POOL);
+ ASSERT_SENTINEL(pgp,PGD);
INVERT_SENTINEL(pgp,PGD);
pgp->obj = NULL;
pgp->index = -1;
- pgp->size = -1;
- atomic_dec_and_assert(global_pgp_count);
- atomic_dec_and_assert(pool->pgp_count);
tmem_free(pgp,sizeof(pgp_t),pool);
}
/* remove the page from appropriate lists but not from parent object */
static void pgp_delist(pgp_t *pgp, bool_t no_eph_lock)
{
+ client_t *client;
+
ASSERT(pgp != NULL);
ASSERT(pgp->obj != NULL);
ASSERT(pgp->obj->pool != NULL);
- ASSERT(pgp->obj->pool->client != NULL);
+ client = pgp->obj->pool->client;
+ ASSERT(client != NULL);
if ( is_ephemeral(pgp->obj->pool) )
{
if ( !no_eph_lock )
tmem_spin_lock(&eph_lists_spinlock);
if ( !list_empty(&pgp->client_eph_pages) )
- pgp->obj->pool->client->eph_count--;
- ASSERT(pgp->obj->pool->client->eph_count >= 0);
+ client->eph_count--;
+ ASSERT(client->eph_count >= 0);
list_del_init(&pgp->client_eph_pages);
if ( !list_empty(&pgp->global_eph_pages) )
global_eph_count--;
@@ -403,6 +460,20 @@ static void pgp_delist(pgp_t *pgp, bool_t no_eph_lock)
list_del_init(&pgp->global_eph_pages);
if ( !no_eph_lock )
tmem_spin_unlock(&eph_lists_spinlock);
+ } else {
+ if ( client->live_migrating )
+ {
+ tmem_spin_lock(&pers_lists_spinlock);
+ list_add_tail(&pgp->client_inv_pages,
+ &client->persistent_invalidated_list);
+ if ( pgp != pgp->obj->pool->cur_pgp )
+ list_del_init(&pgp->pool_pers_pages);
+ tmem_spin_unlock(&pers_lists_spinlock);
+ } else {
+ tmem_spin_lock(&pers_lists_spinlock);
+ list_del_init(&pgp->pool_pers_pages);
+ tmem_spin_unlock(&pers_lists_spinlock);
+ }
}
}
@@ -564,6 +635,7 @@ static NOINLINE void obj_free(obj_t *obj, int no_rebalance)
ASSERT(obj->pgp_count == 0);
pool = obj->pool;
ASSERT(pool != NULL);
+ ASSERT(pool->client != NULL);
ASSERT_WRITELOCK(&pool->pool_rwlock);
if ( obj->tree_root.rnode != NULL ) /* may be a "stump" with no leaves */
radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free);
@@ -685,11 +757,14 @@ static pool_t * pool_alloc(void)
for (i = 0; i < OBJ_HASH_BUCKETS; i++)
pool->obj_rb_root[i] = RB_ROOT;
INIT_LIST_HEAD(&pool->pool_list);
+ INIT_LIST_HEAD(&pool->persistent_page_list);
+ pool->cur_pgp = NULL;
rwlock_init(&pool->pool_rwlock);
pool->pgp_count_max = pool->obj_count_max = 0;
pool->objnode_count = pool->objnode_count_max = 0;
atomic_set(&pool->pgp_count,0);
- pool->obj_count = 0;
+ pool->obj_count = 0; pool->shared_count = 0;
+ pool->pageshift = PAGE_SHIFT - 12;
pool->good_puts = pool->puts = pool->dup_puts_flushed = 0;
pool->dup_puts_replaced = pool->no_mem_puts = 0;
pool->found_gets = pool->gets = 0;
@@ -805,6 +880,12 @@ static void pool_flush(pool_t *pool, cli_id_t cli_id, bool_t destroy)
is_persistent(pool) ? "persistent" : "ephemeral" ,
is_shared(pool) ? "shared" : "private");
printk("%s=%d pool_id=%d\n", cli_id_str,pool->client->cli_id,pool->pool_id);
+ if ( pool->client->live_migrating )
+ {
+ printk("can't %s pool while %s is live-migrating\n",
+ destroy?"destroy":"flush", client_str);
+ return;
+ }
pool_destroy_objs(pool,0,CLI_ID_NULL);
if ( destroy )
{
@@ -815,10 +896,10 @@ static void pool_flush(pool_t *pool, cli_id_t cli_id, bool_t destroy)
/************ CLIENT MANIPULATION OPERATIONS **************************/
-static client_t *client_create(void)
+static client_t *client_create(cli_id_t cli_id)
{
client_t *client = tmem_malloc(client_t,NULL);
- cli_id_t cli_id = tmh_get_cli_id_from_current();
+ int i;
printk("tmem: initializing tmem capability for %s=%d...",cli_id_str,cli_id);
if ( client == NULL )
@@ -834,15 +915,23 @@ static client_t *client_create(void)
tmem_free(client,sizeof(client_t),NULL);
return NULL;
}
- tmh_set_current_client(client);
+ tmh_set_client_from_id(client,cli_id);
client->cli_id = cli_id;
#ifdef __i386__
client->compress = 0;
#else
client->compress = tmh_compression_enabled();
#endif
+ client->shared_auth_required = tmh_shared_auth();
+ for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++)
+ client->shared_auth_uuid[i][0] =
+ client->shared_auth_uuid[i][1] = -1L;
+ client->frozen = 0; client->live_migrating = 0;
+ client->weight = 0; client->cap = 0;
list_add_tail(&client->client_list, &global_client_list);
INIT_LIST_HEAD(&client->ephemeral_page_list);
+ INIT_LIST_HEAD(&client->persistent_invalidated_list);
+ client->cur_pgp = NULL;
client->eph_count = client->eph_count_max = 0;
client->total_cycles = 0; client->succ_pers_puts = 0;
client->succ_eph_gets = 0; client->succ_pers_gets = 0;
@@ -887,6 +976,11 @@ static bool_t client_over_quota(client_t *client)
((total*100L) / client->weight) );
}
+static void client_freeze(client_t *client, int freeze)
+{
+ client->frozen = freeze;
+}
+
/************ MEMORY REVOCATION ROUTINES *******************************/
static int tmem_evict(void)
@@ -993,7 +1087,8 @@ static unsigned long tmem_relinquish_npages(unsigned long n)
/************ TMEM CORE OPERATIONS ************************************/
-static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn)
+static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn,
+ void *cva)
{
void *dst, *p;
size_t size;
@@ -1011,7 +1106,7 @@ static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn)
if ( pgp->pfp != NULL )
pgp_free_data(pgp, pgp->obj->pool); /* FIXME... is this right? */
START_CYC_COUNTER(compress);
- ret = tmh_compress_from_client(cmfn, &dst, &size);
+ ret = tmh_compress_from_client(cmfn, &dst, &size, cva);
if ( (ret == -EFAULT) || (ret == 0) )
goto out;
else if ( (size == 0) || (size >= tmem_subpage_maxsize()) )
@@ -1034,7 +1129,7 @@ out:
}
static NOINLINE int do_tmem_dup_put(pgp_t *pgp, tmem_cli_mfn_t cmfn,
- uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len)
+ uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cva)
{
pool_t *pool;
obj_t *obj;
@@ -1042,7 +1137,6 @@ static NOINLINE int do_tmem_dup_put(pgp_t *pgp, tmem_cli_mfn_t cmfn,
pgp_t *pgpfound = NULL;
int ret;
- /* if we can successfully manipulate pgp to change out the data, do so */
ASSERT(pgp != NULL);
ASSERT(pgp->pfp != NULL);
ASSERT(pgp->size != -1);
@@ -1052,10 +1146,12 @@ static NOINLINE int do_tmem_dup_put(pgp_t *pgp, tmem_cli_mfn_t cmfn,
pool = obj->pool;
ASSERT(pool != NULL);
client = pool->client;
- if ( len != 0 && tmh_compression_enabled() &&
- client->compress && pgp->size != 0 )
+ if ( client->live_migrating )
+ goto failed_dup; /* no dups allowed when migrating */
+ /* can we successfully manipulate pgp to change out the data? */
+ if ( len != 0 && client->compress && pgp->size != 0 )
{
- ret = do_tmem_put_compress(pgp,cmfn);
+ ret = do_tmem_put_compress(pgp,cmfn,cva);
if ( ret == 1 )
goto done;
else if ( ret == 0 )
@@ -1072,7 +1168,7 @@ copy_uncompressed:
if ( ( pgp->pfp = tmem_page_alloc(pool) ) == NULL )
goto failed_dup;
/* tmh_copy_from_client properly handles len==0 and offsets != 0 */
- ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len);
+ ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,0);
if ( ret == -EFAULT )
goto bad_copy;
pgp->size = 0;
@@ -1115,9 +1211,10 @@ failed_dup:
}
-static NOINLINE int do_tmem_put(pool_t *pool, uint64_t oid, uint32_t index,
+static NOINLINE int do_tmem_put(pool_t *pool,
+ uint64_t oid, uint32_t index,
tmem_cli_mfn_t cmfn, uint32_t tmem_offset,
- uint32_t pfn_offset, uint32_t len)
+ uint32_t pfn_offset, uint32_t len, void *cva)
{
obj_t *obj = NULL, *objfound = NULL, *objnew = NULL;
pgp_t *pgp = NULL, *pgpdel = NULL;
@@ -1131,7 +1228,7 @@ static NOINLINE int do_tmem_put(pool_t *pool, uint64_t oid, uint32_t index,
{
ASSERT_SPINLOCK(&objfound->obj_spinlock);
if ((pgp = pgp_lookup_in_obj(objfound, index)) != NULL)
- return do_tmem_dup_put(pgp,cmfn,tmem_offset,pfn_offset,len);
+ return do_tmem_dup_put(pgp,cmfn,tmem_offset,pfn_offset,len,cva);
}
/* no puts allowed into a frozen pool (except dup puts) */
@@ -1162,10 +1259,10 @@ static NOINLINE int do_tmem_put(pool_t *pool, uint64_t oid, uint32_t index,
ASSERT(ret != -EEXIST);
pgp->index = index;
- if ( len != 0 && tmh_compression_enabled() && client->compress )
+ if ( len != 0 && client->compress )
{
ASSERT(pgp->pfp == NULL);
- ret = do_tmem_put_compress(pgp,cmfn);
+ ret = do_tmem_put_compress(pgp,cmfn,cva);
if ( ret == 1 )
goto insert_page;
if ( ret == -ENOMEM )
@@ -1189,7 +1286,7 @@ copy_uncompressed:
goto delete_and_free;
}
/* tmh_copy_from_client properly handles len==0 (TMEM_NEW_PAGE) */
- ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len);
+ ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,cva);
if ( ret == -EFAULT )
goto bad_copy;
pgp->size = 0;
@@ -1207,6 +1304,11 @@ insert_page:
if (++client->eph_count > client->eph_count_max)
client->eph_count_max = client->eph_count;
tmem_spin_unlock(&eph_lists_spinlock);
+ } else { /* is_persistent */
+ tmem_spin_lock(&pers_lists_spinlock);
+ list_add_tail(&pgp->pool_pers_pages,
+ &pool->persistent_page_list);
+ tmem_spin_unlock(&pers_lists_spinlock);
}
ASSERT( ((objnew==obj)||(objfound==obj)) && (objnew!=objfound));
if ( is_shared(pool) )
@@ -1249,7 +1351,7 @@ ASSERT(0);
static NOINLINE int do_tmem_get(pool_t *pool, uint64_t oid, uint32_t index,
tmem_cli_mfn_t cmfn, uint32_t tmem_offset,
- uint32_t pfn_offset, uint32_t len)
+ uint32_t pfn_offset, uint32_t len, void *cva)
{
obj_t *obj;
pgp_t *pgp;
@@ -1279,12 +1381,13 @@ static NOINLINE int do_tmem_get(pool_t *pool, uint64_t oid, uint32_t index,
if ( pgp->size != 0 )
{
START_CYC_COUNTER(decompress);
- if ( tmh_decompress_to_client(cmfn, pgp->cdata, pgp->size) == -EFAULT )
+ if ( tmh_decompress_to_client(cmfn, pgp->cdata,
+ pgp->size, cva) == -EFAULT )
goto bad_copy;
END_CYC_COUNTER(decompress);
}
else if ( tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset,
- pfn_offset, len) == -EFAULT)
+ pfn_offset, len, cva) == -EFAULT)
goto bad_copy;
if ( is_ephemeral(pool) )
{
@@ -1398,10 +1501,12 @@ static NOINLINE int do_tmem_destroy_pool(uint32_t pool_id)
return 1;
}
-static NOINLINE int do_tmem_new_pool(uint32_t flags, uint64_t uuid_lo, uint64_t uuid_hi)
+static NOINLINE int do_tmem_new_pool(cli_id_t this_cli_id,
+ uint32_t this_pool_id, uint32_t flags,
+ uint64_t uuid_lo, uint64_t uuid_hi)
{
- client_t *client = tmh_client_from_current();
- cli_id_t cli_id = tmh_get_cli_id_from_current();
+ client_t *client;
+ cli_id_t cli_id;
int persistent = flags & TMEM_POOL_PERSIST;
int shared = flags & TMEM_POOL_SHARED;
int pagebits = (flags >> TMEM_POOL_PAGESIZE_SHIFT)
@@ -1410,12 +1515,22 @@ static NOINLINE int do_tmem_new_pool(uint32_t flags, uint64_t uuid_lo, uint64_t
& TMEM_POOL_VERSION_MASK;
pool_t *pool, *shpool;
int s_poolid, d_poolid, first_unused_s_poolid;
+ int i;
+ if ( this_cli_id == CLI_ID_NULL )
+ {
+ client = tmh_client_from_current();
+ cli_id = tmh_get_cli_id_from_current();
+ } else {
+ if ( (client = tmh_client_from_cli_id(this_cli_id)) == NULL)
+ return -EPERM;
+ cli_id = this_cli_id;
+ }
ASSERT(client != NULL);
printk("tmem: allocating %s-%s tmem pool for %s=%d...",
persistent ? "persistent" : "ephemeral" ,
shared ? "shared" : "private", cli_id_str, cli_id);
- if ( specversion != 0 )
+ if ( specversion != TMEM_SPEC_VERSION )
{
printk("failed... unsupported spec version\n");
return -EPERM;
@@ -1430,15 +1545,36 @@ static NOINLINE int do_tmem_new_pool(uint32_t flags, uint64_t uuid_lo, uint64_t
printk("failed... out of memory\n");
return -ENOMEM;
}
- for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ )
+ if ( this_cli_id != CLI_ID_NULL )
+ {
+ d_poolid = this_pool_id;
+ if ( client->pools[d_poolid] != NULL )
+ return -EPERM;
+ d_poolid = this_pool_id;
+ }
+ else for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ )
if ( client->pools[d_poolid] == NULL )
break;
- if ( d_poolid == MAX_POOLS_PER_DOMAIN )
+ if ( d_poolid >= MAX_POOLS_PER_DOMAIN )
{
printk("failed... no more pool slots available for this %s\n",
client_str);
goto fail;
}
+ if ( shared )
+ {
+ if ( uuid_lo == -1L && uuid_hi == -1L )
+ shared = 0;
+ if ( client->shared_auth_required && !global_shared_auth )
+ {
+ for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++)
+ if ( (client->shared_auth_uuid[i][0] == uuid_lo) &&
+ (client->shared_auth_uuid[i][1] == uuid_hi) )
+ break;
+ if ( i == MAX_GLOBAL_SHARED_POOLS )
+ shared = 0;
+ }
+ }
pool->shared = shared;
pool->client = client;
if ( shared )
@@ -1491,7 +1627,7 @@ fail:
/************ TMEM CONTROL OPERATIONS ************************************/
/* freeze/thaw all pools belonging to client cli_id (all domains if -1) */
-static int tmemc_freeze_pools(int cli_id, int arg)
+static int tmemc_freeze_pools(cli_id_t cli_id, int arg)
{
client_t *client;
bool_t freeze = (arg == TMEMC_FREEZE) ? 1 : 0;
@@ -1502,20 +1638,20 @@ static int tmemc_freeze_pools(int cli_id, int arg)
if ( cli_id == CLI_ID_NULL )
{
list_for_each_entry(client,&global_client_list,client_list)
- client->frozen = freeze;
+ client_freeze(client,freeze);
printk("tmem: all pools %s for all %ss\n",s,client_str);
}
else
{
if ( (client = tmh_client_from_cli_id(cli_id)) == NULL)
return -1;
- client->frozen = freeze;
+ client_freeze(client,freeze);
printk("tmem: all pools %s for %s=%d\n",s,cli_id_str,cli_id);
}
return 0;
}
-static int tmemc_flush_mem(int cli_id, uint32_t kb)
+static int tmemc_flush_mem(cli_id_t cli_id, uint32_t kb)
{
uint32_t npages, flushed_pages, flushed_kb;
@@ -1699,7 +1835,7 @@ static int tmemc_list_global(tmem_cli_va_t buf, int off, uint32_t len,
return sum;
}
-static int tmemc_list(int cli_id, tmem_cli_va_t buf, uint32_t len,
+static int tmemc_list(cli_id_t cli_id, tmem_cli_va_t buf, uint32_t len,
bool_t use_long)
{
client_t *client;
@@ -1717,7 +1853,6 @@ static int tmemc_list(int cli_id, tmem_cli_va_t buf, uint32_t len,
else
off = tmemc_list_client(client, buf, 0, len, use_long);
-
return 0;
}
@@ -1740,6 +1875,9 @@ static int tmemc_set_var_one(client_t *client, uint32_t subop, uint32_t arg1)
printk("tmem: cap set to %d for %s=%d\n",arg1,cli_id_str,cli_id);
break;
case TMEMC_SET_COMPRESS:
+#ifdef __i386__
+ return -1;
+#endif
client->compress = arg1 ? 1 : 0;
printk("tmem: compression %s for %s=%d\n",
arg1 ? "enabled" : "disabled",cli_id_str,cli_id);
@@ -1751,7 +1889,7 @@ static int tmemc_set_var_one(client_t *client, uint32_t subop, uint32_t arg1)
return 0;
}
-static int tmemc_set_var(int cli_id, uint32_t subop, uint32_t arg1)
+static int tmemc_set_var(cli_id_t cli_id, uint32_t subop, uint32_t arg1)
{
client_t *client;
@@ -1765,11 +1903,229 @@ static int tmemc_set_var(int cli_id, uint32_t subop, uint32_t arg1)
return 0;
}
-static int do_tmem_control(uint32_t subop, uint32_t cli_id32,
- uint32_t arg1, uint32_t arg2, tmem_cli_va_t buf)
+static NOINLINE int tmemc_shared_pool_auth(cli_id_t cli_id, uint64_t uuid_lo,
+ uint64_t uuid_hi, bool_t auth)
+{
+ client_t *client;
+ int i, free = -1;
+
+ if ( cli_id == CLI_ID_NULL )
+ {
+ global_shared_auth = auth;
+ return 1;
+ }
+ client = tmh_client_from_cli_id(cli_id);
+ for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++)
+ {
+ if ( (client->shared_auth_uuid[i][0] == uuid_lo) &&
+ (client->shared_auth_uuid[i][1] == uuid_hi) )
+ {
+ if ( auth == 0 )
+ client->shared_auth_uuid[i][0] =
+ client->shared_auth_uuid[i][1] = -1L;
+ return 1;
+ }
+ if ( (auth == 1) && (client->shared_auth_uuid[i][0] == -1L) &&
+ (client->shared_auth_uuid[i][1] == -1L) && (free == -1) )
+ free = i;
+ }
+ if ( auth == 0 )
+ return 0;
+ if ( auth == 1 && free == -1 )
+ return -ENOMEM;
+ client->shared_auth_uuid[free][0] = uuid_lo;
+ client->shared_auth_uuid[free][1] = uuid_hi;
+ return 1;
+}
+
+static NOINLINE int tmemc_save_subop(int cli_id, uint32_t pool_id,
+ uint32_t subop, tmem_cli_va_t buf, uint32_t arg1)
+{
+ client_t *client = tmh_client_from_cli_id(cli_id);
+ pool_t *pool = (client == NULL) ? NULL : client->pools[pool_id];
+ uint32_t p;
+ uint64_t *uuid;
+ pgp_t *pgp, *pgp2;
+
+ switch(subop)
+ {
+ case TMEMC_SAVE_BEGIN:
+ if ( client == NULL )
+ return 0;
+ for (p = 0; p < MAX_POOLS_PER_DOMAIN; p++)
+ if ( client->pools[p] != NULL )
+ break;
+ if ( p == MAX_POOLS_PER_DOMAIN )
+ return 0;
+ client->was_frozen = client->frozen;
+ client->frozen = 1;
+ if ( arg1 != 0 )
+ client->live_migrating = 1;
+ return 1;
+ case TMEMC_RESTORE_BEGIN:
+ ASSERT(client == NULL);
+ if ( (client = client_create(cli_id)) == NULL )
+ return -1;
+ return 1;
+ case TMEMC_SAVE_GET_VERSION:
+ return TMEM_SPEC_VERSION;
+ case TMEMC_SAVE_GET_MAXPOOLS:
+ return MAX_POOLS_PER_DOMAIN;
+ case TMEMC_SAVE_GET_CLIENT_WEIGHT:
+ return client->weight == -1 ? -2 : client->weight;
+ case TMEMC_SAVE_GET_CLIENT_CAP:
+ return client->cap == -1 ? -2 : client->cap;
+ case TMEMC_SAVE_GET_CLIENT_FLAGS:
+ return (client->compress ? TMEM_CLIENT_COMPRESS : 0 ) |
+ (client->was_frozen ? TMEM_CLIENT_FROZEN : 0 );
+ case TMEMC_SAVE_GET_POOL_FLAGS:
+ if ( pool == NULL )
+ return -1;
+ return (pool->persistent ? TMEM_POOL_PERSIST : 0) |
+ (pool->shared ? TMEM_POOL_SHARED : 0) |
+ (pool->pageshift << TMEM_POOL_PAGESIZE_SHIFT);
+ case TMEMC_SAVE_GET_POOL_NPAGES:
+ if ( pool == NULL )
+ return -1;
+ return _atomic_read(pool->pgp_count);
+ case TMEMC_SAVE_GET_POOL_UUID:
+ if ( pool == NULL )
+ return -1;
+ uuid = (uint64_t *)buf.p;
+ *uuid++ = pool->uuid[0];
+ *uuid = pool->uuid[1];
+ return 0;
+ case TMEMC_SAVE_END:
+ client->live_migrating = 0;
+ if ( !list_empty(&client->persistent_invalidated_list) )
+ list_for_each_entry_safe(pgp,pgp2,
+ &client->persistent_invalidated_list, client_inv_pages)
+ pgp_free_from_inv_list(client,pgp);
+ client->frozen = client->was_frozen;
+ return 0;
+ }
+ return -1;
+}
+
+static NOINLINE int tmemc_save_get_next_page(int cli_id, int pool_id,
+ tmem_cli_va_t buf, uint32_t bufsize)
+{
+ client_t *client = tmh_client_from_cli_id(cli_id);
+ pool_t *pool = (client == NULL) ? NULL : client->pools[pool_id];
+ pgp_t *pgp;
+ int ret = 0;
+ struct tmem_handle *h;
+ unsigned int pagesize = 1 << (pool->pageshift+12);
+
+ if ( pool == NULL )
+ return -1;
+ if ( is_ephemeral(pool) )
+ return -1;
+ if ( bufsize < pagesize + sizeof(struct tmem_handle) )
+ return -ENOMEM;
+
+ tmem_spin_lock(&pers_lists_spinlock);
+ if ( list_empty(&pool->persistent_page_list) )
+ {
+ ret = -1;
+ goto out;
+ }
+ /* note: pool->cur_pgp is the pgp last returned by get_next_page */
+ if ( pool->cur_pgp == NULL )
+ {
+ /* process the first one */
+ pool->cur_pgp = pgp = list_entry((&pool->persistent_page_list)->next,
+ pgp_t,pool_pers_pages);
+ } else if ( list_is_last(&pool->cur_pgp->pool_pers_pages,
+ &pool->persistent_page_list) )
+ {
+ /* already processed the last one in the list */
+ ret = -1;
+ goto out;
+ }
+ pgp = list_entry((&pool->cur_pgp->pool_pers_pages)->next,
+ pgp_t,pool_pers_pages);
+ pool->cur_pgp = pgp;
+ h = (struct tmem_handle *)buf.p;
+ h->oid = pgp->obj->oid;
+ h->index = pgp->index;
+ buf.p = (void *)(h+1);
+ ret = do_tmem_get(pool, h->oid, h->index,0,0,0,pagesize,buf.p);
+
+out:
+ tmem_spin_unlock(&pers_lists_spinlock);
+ return ret;
+}
+
+static NOINLINE int tmemc_save_get_next_inv(int cli_id, tmem_cli_va_t buf,
+ uint32_t bufsize)
+{
+ client_t *client = tmh_client_from_cli_id(cli_id);
+ pgp_t *pgp;
+ struct tmem_handle *h;
+ int ret = 0;
+
+ if ( client == NULL )
+ return 0;
+ if ( bufsize < sizeof(struct tmem_handle) )
+ return 0;
+ tmem_spin_lock(&pers_lists_spinlock);
+ if ( list_empty(&client->persistent_invalidated_list) )
+ goto out;
+ if ( client->cur_pgp == NULL )
+ {
+ pgp = list_entry((&client->persistent_invalidated_list)->next,
+ pgp_t,client_inv_pages);
+ client->cur_pgp = pgp;
+ } else if ( list_is_last(&client->cur_pgp->client_inv_pages,
+ &client->persistent_invalidated_list) )
+ {
+ client->cur_pgp = NULL;
+ ret = 0;
+ goto out;
+ } else {
+ pgp = list_entry((&client->cur_pgp->client_inv_pages)->next,
+ pgp_t,client_inv_pages);
+ client->cur_pgp = pgp;
+ }
+ h = (struct tmem_handle *)buf.p;
+ h->pool_id = pgp->pool_id;
+ h->oid = pgp->inv_oid;
+ h->index = pgp->index;
+ ret = 1;
+out:
+ tmem_spin_unlock(&pers_lists_spinlock);
+ return ret;
+}
+
+static int tmemc_restore_put_page(int cli_id, int pool_id, uint64_t oid,
+ uint32_t index, tmem_cli_va_t buf, uint32_t bufsize)
+{
+ client_t *client = tmh_client_from_cli_id(cli_id);
+ pool_t *pool = (client == NULL) ? NULL : client->pools[pool_id];
+int ret = 0;
+
+ if ( pool == NULL )
+ return -1;
+ return do_tmem_put(pool,oid,index,0,0,0,bufsize,buf.p);
+}
+
+static int tmemc_restore_flush_page(int cli_id, int pool_id, uint64_t oid,
+ uint32_t index)
+{
+ client_t *client = tmh_client_from_cli_id(cli_id);
+ pool_t *pool = (client == NULL) ? NULL : client->pools[pool_id];
+
+ if ( pool == NULL )
+ return -1;
+ return do_tmem_flush_page(pool, oid, index);
+}
+
+static NOINLINE int do_tmem_control(struct tmem_op *op)
{
int ret;
- cli_id_t cli_id = (cli_id_t)cli_id32;
+ uint32_t pool_id = op->pool_id;
+ uint32_t subop = op->u.ctrl.subop;
if (!tmh_current_is_privileged())
{
@@ -1781,18 +2137,50 @@ static int do_tmem_control(uint32_t subop, uint32_t cli_id32,
case TMEMC_THAW:
case TMEMC_FREEZE:
case TMEMC_DESTROY:
- ret = tmemc_freeze_pools(cli_id,subop);
+ ret = tmemc_freeze_pools(op->u.ctrl.cli_id,subop);
break;
case TMEMC_FLUSH:
- ret = tmemc_flush_mem(cli_id,arg1);
+ ret = tmemc_flush_mem(op->u.ctrl.cli_id,op->u.ctrl.arg1);
break;
case TMEMC_LIST:
- ret = tmemc_list(cli_id,buf,arg1,arg2);
+ ret = tmemc_list(op->u.ctrl.cli_id,op->u.ctrl.buf,
+ op->u.ctrl.arg1,op->u.ctrl.arg2);
break;
case TMEMC_SET_WEIGHT:
case TMEMC_SET_CAP:
case TMEMC_SET_COMPRESS:
- ret = tmemc_set_var(cli_id,subop,arg1);
+ ret = tmemc_set_var(op->u.ctrl.cli_id,subop,op->u.ctrl.arg1);
+ break;
+ case TMEMC_SAVE_BEGIN:
+ case TMEMC_RESTORE_BEGIN:
+ case TMEMC_SAVE_GET_VERSION:
+ case TMEMC_SAVE_GET_MAXPOOLS:
+ case TMEMC_SAVE_GET_CLIENT_WEIGHT:
+ case TMEMC_SAVE_GET_CLIENT_CAP:
+ case TMEMC_SAVE_GET_CLIENT_FLAGS:
+ case TMEMC_SAVE_GET_POOL_FLAGS:
+ case TMEMC_SAVE_GET_POOL_NPAGES:
+ case TMEMC_SAVE_GET_POOL_UUID:
+ case TMEMC_SAVE_END:
+ ret = tmemc_save_subop(op->u.ctrl.cli_id,pool_id,subop,
+ op->u.ctrl.buf,op->u.ctrl.arg1);
+ break;
+ case TMEMC_SAVE_GET_NEXT_PAGE:
+ ret = tmemc_save_get_next_page(op->u.ctrl.cli_id, pool_id,
+ op->u.ctrl.buf, op->u.ctrl.arg1);
+ break;
+ case TMEMC_SAVE_GET_NEXT_INV:
+ ret = tmemc_save_get_next_inv(op->u.ctrl.cli_id, op->u.ctrl.buf,
+ op->u.ctrl.arg1);
+ break;
+ case TMEMC_RESTORE_PUT_PAGE:
+ ret = tmemc_restore_put_page(op->u.ctrl.cli_id,pool_id,
+ op->u.ctrl.arg3, op->u.ctrl.arg2,
+ op->u.ctrl.buf, op->u.ctrl.arg1);
+ break;
+ case TMEMC_RESTORE_FLUSH_PAGE:
+ ret = tmemc_restore_flush_page(op->u.ctrl.cli_id,pool_id,
+ op->u.ctrl.arg3, op->u.ctrl.arg2);
break;
default:
ret = -1;
@@ -1850,8 +2238,19 @@ EXPORT long do_tmem_op(tmem_cli_op_t uops)
{
tmem_write_lock(&tmem_rwlock);
tmem_write_lock_set = 1;
- rc = do_tmem_control(op.u.ctrl.subop, op.u.ctrl.cli_id,
- op.u.ctrl.arg1, op.u.ctrl.arg2, op.u.ctrl.buf);
+ rc = do_tmem_control(&op);
+ goto out;
+ } else if ( op.cmd == TMEM_AUTH ) {
+ tmem_write_lock(&tmem_rwlock);
+ tmem_write_lock_set = 1;
+ rc = tmemc_shared_pool_auth(op.u.new.arg1,op.u.new.uuid[0],
+ op.u.new.uuid[1],op.u.new.flags);
+ goto out;
+ } else if ( op.cmd == TMEM_RESTORE_NEW ) {
+ tmem_write_lock(&tmem_rwlock);
+ tmem_write_lock_set = 1;
+ rc = do_tmem_new_pool(op.u.new.arg1, op.pool_id, op.u.new.flags,
+ op.u.new.uuid[0], op.u.new.uuid[1]);
goto out;
}
@@ -1860,7 +2259,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uops)
{
tmem_write_lock(&tmem_rwlock);
tmem_write_lock_set = 1;
- if ( (client = client_create()) == NULL )
+ if ( (client = client_create(tmh_get_cli_id_from_current())) == NULL )
{
printk("tmem: can't create tmem structure for %s\n",client_str);
rc = -ENOMEM;
@@ -1896,22 +2295,22 @@ EXPORT long do_tmem_op(tmem_cli_op_t uops)
switch ( op.cmd )
{
case TMEM_NEW_POOL:
- rc = do_tmem_new_pool(op.u.new.flags,
+ rc = do_tmem_new_pool(CLI_ID_NULL, 0, op.u.new.flags,
op.u.new.uuid[0], op.u.new.uuid[1]);
break;
case TMEM_NEW_PAGE:
- rc = do_tmem_put(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
- 0, 0, 0);
+ rc = do_tmem_put(pool, op.u.gen.object,
+ op.u.gen.index, op.u.gen.cmfn, 0, 0, 0, NULL);
break;
case TMEM_PUT_PAGE:
- rc = do_tmem_put(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
- 0, 0, PAGE_SIZE);
+ rc = do_tmem_put(pool, op.u.gen.object,
+ op.u.gen.index, op.u.gen.cmfn, 0, 0, PAGE_SIZE, NULL);
if (rc == 1) succ_put = 1;
else non_succ_put = 1;
break;
case TMEM_GET_PAGE:
rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
- 0, 0, PAGE_SIZE);
+ 0, 0, PAGE_SIZE, 0);
if (rc == 1) succ_get = 1;
else non_succ_get = 1;
break;
@@ -1930,12 +2329,13 @@ EXPORT long do_tmem_op(tmem_cli_op_t uops)
case TMEM_READ:
rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
op.u.gen.tmem_offset, op.u.gen.pfn_offset,
- op.u.gen.len);
+ op.u.gen.len,0);
break;
case TMEM_WRITE:
- rc = do_tmem_put(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
+ rc = do_tmem_put(pool, op.u.gen.object,
+ op.u.gen.index, op.u.gen.cmfn,
op.u.gen.tmem_offset, op.u.gen.pfn_offset,
- op.u.gen.len);
+ op.u.gen.len, NULL);
break;
case TMEM_XCHG:
/* need to hold global lock to ensure xchg is atomic */
diff --git a/xen/common/tmem_xen.c b/xen/common/tmem_xen.c
index f813b0e52f..d273ab7cbe 100644
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -20,6 +20,9 @@ boolean_param("tmem", opt_tmem);
EXPORT int opt_tmem_compress = 0;
boolean_param("tmem_compress", opt_tmem_compress);
+EXPORT int opt_tmem_shared_auth = 0;
+boolean_param("tmem_shared_auth", opt_tmem_shared_auth);
+
EXPORT int opt_tmem_lock = 0;
integer_param("tmem_lock", opt_tmem_lock);
@@ -98,14 +101,14 @@ static inline void *cli_mfn_to_va(tmem_cli_mfn_t cmfn, unsigned long *pcli_mfn)
EXPORT int tmh_copy_from_client(pfp_t *pfp,
tmem_cli_mfn_t cmfn, uint32_t tmem_offset,
- uint32_t pfn_offset, uint32_t len)
+ uint32_t pfn_offset, uint32_t len, void *cli_va)
{
unsigned long tmem_mfn;
- void *tmem_va, *cli_va = NULL;
+ void *tmem_va;
ASSERT(pfp != NULL);
if ( tmem_offset || pfn_offset || len )
- if ( (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL)
+ if ( (cli_va == NULL) && ((cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL) )
return -EFAULT;
tmem_mfn = page_to_mfn(pfp);
tmem_va = map_domain_page(tmem_mfn);
@@ -123,14 +126,13 @@ EXPORT int tmh_copy_from_client(pfp_t *pfp,
}
EXPORT int tmh_compress_from_client(tmem_cli_mfn_t cmfn,
- void **out_va, size_t *out_len)
+ void **out_va, size_t *out_len, void *cli_va)
{
- void *cli_va;
int ret = 0;
unsigned char *dmem = this_cpu(dstmem);
unsigned char *wmem = this_cpu(workmem);
- if ( (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL)
+ if ( (cli_va == NULL) && (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL)
return -EFAULT;
if ( dmem == NULL || wmem == NULL )
return 0; /* no buffer, so can't compress */
@@ -143,13 +145,16 @@ EXPORT int tmh_compress_from_client(tmem_cli_mfn_t cmfn,
}
EXPORT int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
- uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len)
+ uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cli_va)
{
- unsigned long tmem_mfn, cli_mfn;
- void *tmem_va, *cli_va;
+ unsigned long tmem_mfn, cli_mfn = 0;
+ int mark_dirty = 1;
+ void *tmem_va;
ASSERT(pfp != NULL);
- if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
+ if ( cli_va != NULL )
+ mark_dirty = 0;
+ else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
return -EFAULT;
tmem_mfn = page_to_mfn(pfp);
tmem_va = map_domain_page(tmem_mfn);
@@ -158,26 +163,35 @@ EXPORT int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
else if ( (tmem_offset+len <= PAGE_SIZE) && (pfn_offset+len <= PAGE_SIZE) )
memcpy((char *)cli_va+pfn_offset,(char *)tmem_va+tmem_offset,len);
unmap_domain_page(tmem_va);
- unmap_domain_page(cli_va);
- paging_mark_dirty(current->domain,cli_mfn);
+ if ( mark_dirty )
+ {
+ unmap_domain_page(cli_va);
+ paging_mark_dirty(current->domain,cli_mfn);
+ }
mb();
return 1;
}
-EXPORT int tmh_decompress_to_client(tmem_cli_mfn_t cmfn, void *tmem_va, size_t size)
+EXPORT int tmh_decompress_to_client(tmem_cli_mfn_t cmfn, void *tmem_va,
+ size_t size, void *cli_va)
{
- unsigned long cli_mfn;
- void *cli_va;
+ unsigned long cli_mfn = 0;
+ int mark_dirty = 1;
size_t out_len = PAGE_SIZE;
int ret;
- if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
+ if ( cli_va != NULL )
+ mark_dirty = 0;
+ else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
return -EFAULT;
ret = lzo1x_decompress_safe(tmem_va, size, cli_va, &out_len);
ASSERT(ret == LZO_E_OK);
ASSERT(out_len == PAGE_SIZE);
- unmap_domain_page(cli_va);
- paging_mark_dirty(current->domain,cli_mfn);
+ if ( mark_dirty )
+ {
+ unmap_domain_page(cli_va);
+ paging_mark_dirty(current->domain,cli_mfn);
+ }
mb();
return 1;
}
diff --git a/xen/include/public/tmem.h b/xen/include/public/tmem.h
index 03e3a40c5d..39f7c31ebd 100644
--- a/xen/include/public/tmem.h
+++ b/xen/include/public/tmem.h
@@ -42,15 +42,36 @@
#define TMEM_WRITE 9
#define TMEM_XCHG 10
+/* Privileged commands to HYPERVISOR_tmem_op() */
+#define TMEM_AUTH 101
+#define TMEM_RESTORE_NEW 102
+
/* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */
-#define TMEMC_THAW 0
-#define TMEMC_FREEZE 1
-#define TMEMC_FLUSH 2
-#define TMEMC_DESTROY 3
-#define TMEMC_LIST 4
-#define TMEMC_SET_WEIGHT 5
-#define TMEMC_SET_CAP 6
-#define TMEMC_SET_COMPRESS 7
+#define TMEMC_THAW 0
+#define TMEMC_FREEZE 1
+#define TMEMC_FLUSH 2
+#define TMEMC_DESTROY 3
+#define TMEMC_LIST 4
+#define TMEMC_SET_WEIGHT 5
+#define TMEMC_SET_CAP 6
+#define TMEMC_SET_COMPRESS 7
+#define TMEMC_SHARED_POOL_AUTH 8
+#define TMEMC_SHARED_POOL_DEAUTH 9
+#define TMEMC_SAVE_BEGIN 10
+#define TMEMC_SAVE_GET_VERSION 11
+#define TMEMC_SAVE_GET_MAXPOOLS 12
+#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13
+#define TMEMC_SAVE_GET_CLIENT_CAP 14
+#define TMEMC_SAVE_GET_CLIENT_FLAGS 15
+#define TMEMC_SAVE_GET_POOL_FLAGS 16
+#define TMEMC_SAVE_GET_POOL_NPAGES 17
+#define TMEMC_SAVE_GET_POOL_UUID 18
+#define TMEMC_SAVE_GET_NEXT_PAGE 19
+#define TMEMC_SAVE_GET_NEXT_INV 20
+#define TMEMC_SAVE_END 21
+#define TMEMC_RESTORE_BEGIN 30
+#define TMEMC_RESTORE_PUT_PAGE 32
+#define TMEMC_RESTORE_FLUSH_PAGE 33
/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */
#define TMEM_POOL_PERSIST 1
@@ -60,6 +81,10 @@
#define TMEM_POOL_VERSION_SHIFT 24
#define TMEM_POOL_VERSION_MASK 0xff
+/* Bits for client flags (save/restore) */
+#define TMEM_CLIENT_COMPRESS 1
+#define TMEM_CLIENT_FROZEN 2
+
/* Special errno values */
#define EFROZEN 1000
#define EEMPTY 1001
@@ -70,32 +95,41 @@ typedef xen_pfn_t tmem_cli_mfn_t;
typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t;
struct tmem_op {
uint32_t cmd;
- int32_t pool_id; /* private > 0; shared < 0; 0 is invalid */
+ int32_t pool_id;
union {
- struct { /* for cmd == TMEM_NEW_POOL */
+ struct {
uint64_t uuid[2];
uint32_t flags;
- } new;
- struct { /* for cmd == TMEM_CONTROL */
+ uint32_t arg1;
+ } new; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */
+ struct {
uint32_t subop;
uint32_t cli_id;
uint32_t arg1;
uint32_t arg2;
+ uint64_t arg3;
tmem_cli_va_t buf;
- } ctrl;
+ } ctrl; /* for cmd == TMEM_CONTROL */
struct {
+
uint64_t object;
uint32_t index;
uint32_t tmem_offset;
uint32_t pfn_offset;
uint32_t len;
tmem_cli_mfn_t cmfn; /* client machine page frame */
- } gen;
+ } gen; /* for all other cmd ("generic") */
} u;
};
typedef struct tmem_op tmem_op_t;
DEFINE_XEN_GUEST_HANDLE(tmem_op_t);
+struct tmem_handle {
+ uint32_t pool_id;
+ uint32_t index;
+ uint64_t oid;
+};
+
#endif
#endif /* __XEN_PUBLIC_TMEM_H__ */
diff --git a/xen/include/xen/tmem_xen.h b/xen/include/xen/tmem_xen.h
index 55d5f2152a..8970327563 100644
--- a/xen/include/xen/tmem_xen.h
+++ b/xen/include/xen/tmem_xen.h
@@ -55,6 +55,12 @@ static inline int tmh_compression_enabled(void)
return opt_tmem_compress;
}
+extern int opt_tmem_shared_auth;
+static inline int tmh_shared_auth(void)
+{
+ return opt_tmem_shared_auth;
+}
+
extern int opt_tmem;
static inline int tmh_enabled(void)
{
@@ -271,9 +277,10 @@ static inline tmh_cli_ptr_t *tmh_get_cli_ptr_from_current(void)
return current->domain;
}
-static inline void tmh_set_current_client(struct client *client)
+static inline void tmh_set_client_from_id(struct client *client,cli_id_t cli_id)
{
- current->domain->tmem = client;
+ struct domain *d = get_domain_by_id(cli_id);
+ d->tmem = client;
}
static inline bool_t tmh_current_is_privileged(void)
@@ -301,9 +308,11 @@ static inline int tmh_get_tmemop_from_client(tmem_op_t *op, tmem_cli_op_t uops)
return rc;
switch ( cop.cmd )
{
- case TMEM_NEW_POOL: u = XLAT_tmem_op_u_new; break;
- case TMEM_CONTROL: u = XLAT_tmem_op_u_ctrl; break;
- default: u = XLAT_tmem_op_u_gen; break;
+ case TMEM_NEW_POOL: u = XLAT_tmem_op_u_new; break;
+ case TMEM_CONTROL: u = XLAT_tmem_op_u_ctrl; break;
+ case TMEM_AUTH: u = XLAT_tmem_op_u_new; break;
+ case TMEM_RESTORE_NEW:u = XLAT_tmem_op_u_new; break;
+ default: u = XLAT_tmem_op_u_gen ; break;
}
#define XLAT_tmem_op_HNDL_u_ctrl_buf(_d_, _s_) \
guest_from_compat_handle((_d_)->u.ctrl.buf, (_s_)->u.ctrl.buf)
@@ -326,16 +335,16 @@ static inline void tmh_copy_to_client_buf_offset(tmem_cli_va_t clibuf, int off,
#define tmh_cli_id_str "domid"
#define tmh_client_str "domain"
-extern int tmh_decompress_to_client(tmem_cli_mfn_t,void*,size_t);
+extern int tmh_decompress_to_client(tmem_cli_mfn_t,void*,size_t,void*);
-extern int tmh_compress_from_client(tmem_cli_mfn_t,void**,size_t *);
+extern int tmh_compress_from_client(tmem_cli_mfn_t,void**,size_t *,void*);
extern int tmh_copy_from_client(pfp_t *pfp,
tmem_cli_mfn_t cmfn, uint32_t tmem_offset,
- uint32_t pfn_offset, uint32_t len);
+ uint32_t pfn_offset, uint32_t len, void *cva);
extern int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
- uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len);
+ uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cva);
#define TMEM_PERF