aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>2005-06-09 14:40:39 +0000
committercl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>2005-06-09 14:40:39 +0000
commit06940d306a7f4af8e5b25fcdb792d1aaf6176ae1 (patch)
treee35eef7660ae1525c39e2606f06b3c40178a3279
parent34446ff07c752abb53e912fc6ea0b6899613c83f (diff)
parent8b624164abbe4b3ddb274e0b8f31b17cfca17b7a (diff)
downloadxen-06940d306a7f4af8e5b25fcdb792d1aaf6176ae1.tar.gz
xen-06940d306a7f4af8e5b25fcdb792d1aaf6176ae1.tar.bz2
xen-06940d306a7f4af8e5b25fcdb792d1aaf6176ae1.zip
bitkeeper revision 1.1700 (42a854e79oBFuqa_DSY4Lr9IhenUQw)
Merge xenstore changes. Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
-rw-r--r--.rootkeys45
-rw-r--r--BitKeeper/etc/ignore9
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/netback/control.c2
-rw-r--r--tools/Makefile2
-rw-r--r--tools/libxc/xc.h4
-rw-r--r--tools/libxc/xc_linux_build.c59
-rw-r--r--tools/python/setup.py16
-rw-r--r--tools/python/xen/lowlevel/xc/xc.c31
-rw-r--r--tools/python/xen/lowlevel/xs/xs.c617
-rw-r--r--tools/python/xen/lowlevel/xu/xu.c3
-rw-r--r--tools/python/xen/util/mac.py11
-rw-r--r--tools/python/xen/web/SrvDir.py23
-rw-r--r--tools/python/xen/xend/PrettyPrint.py21
-rw-r--r--tools/python/xen/xend/XendCheckpoint.py8
-rw-r--r--tools/python/xen/xend/XendDomain.py284
-rw-r--r--tools/python/xen/xend/XendDomainInfo.py704
-rw-r--r--tools/python/xen/xend/XendRoot.py21
-rw-r--r--tools/python/xen/xend/XendVnet.py22
-rw-r--r--tools/python/xen/xend/image.py339
-rw-r--r--tools/python/xen/xend/server/SrvConsole.py2
-rw-r--r--tools/python/xen/xend/server/SrvDaemon.py44
-rw-r--r--tools/python/xen/xend/server/SrvDomain.py38
-rw-r--r--tools/python/xen/xend/server/SrvDomainDir.py2
-rwxr-xr-xtools/python/xen/xend/server/blkif.py39
-rwxr-xr-xtools/python/xen/xend/server/channel.py181
-rwxr-xr-xtools/python/xen/xend/server/console.py14
-rwxr-xr-xtools/python/xen/xend/server/controller.py125
-rwxr-xr-xtools/python/xen/xend/server/netif.py85
-rw-r--r--tools/python/xen/xend/server/params.py36
-rw-r--r--tools/python/xen/xend/server/usbif.py13
-rw-r--r--tools/python/xen/xend/uuid.py65
-rw-r--r--tools/python/xen/xend/xenstore/__init__.py2
-rw-r--r--tools/python/xen/xend/xenstore/xsnode.py382
-rw-r--r--tools/python/xen/xend/xenstore/xsobj.py522
-rw-r--r--tools/python/xen/xend/xenstore/xsresource.py136
-rw-r--r--tools/xenstore/.gdbinit4
-rw-r--r--tools/xenstore/Makefile97
-rw-r--r--tools/xenstore/TODO7
-rw-r--r--tools/xenstore/fake_libxc.c119
-rw-r--r--tools/xenstore/list.h508
-rw-r--r--tools/xenstore/talloc.c1143
-rw-r--r--tools/xenstore/talloc.h134
-rw-r--r--tools/xenstore/talloc_guide.txt569
-rw-r--r--tools/xenstore/testsuite/01simple.sh4
-rw-r--r--tools/xenstore/testsuite/02directory.sh31
-rw-r--r--tools/xenstore/testsuite/03write.sh17
-rw-r--r--tools/xenstore/testsuite/04rm.sh18
-rw-r--r--tools/xenstore/testsuite/05filepermissions.sh49
-rw-r--r--tools/xenstore/testsuite/06dirpermissions.sh61
-rw-r--r--tools/xenstore/testsuite/07watch.sh32
-rw-r--r--tools/xenstore/testsuite/08transaction.sh54
-rw-r--r--tools/xenstore/testsuite/09domain.sh15
-rwxr-xr-xtools/xenstore/testsuite/test.sh44
-rw-r--r--tools/xenstore/utils.c143
-rw-r--r--tools/xenstore/utils.h61
-rw-r--r--tools/xenstore/xenstored.h81
-rw-r--r--tools/xenstore/xenstored_core.c1354
-rw-r--r--tools/xenstore/xenstored_core.h123
-rw-r--r--tools/xenstore/xenstored_domain.c387
-rw-r--r--tools/xenstore/xenstored_domain.h38
-rw-r--r--tools/xenstore/xenstored_test.h37
-rw-r--r--tools/xenstore/xenstored_transaction.c284
-rw-r--r--tools/xenstore/xenstored_transaction.h50
-rw-r--r--tools/xenstore/xenstored_watch.c279
-rw-r--r--tools/xenstore/xenstored_watch.h42
-rw-r--r--tools/xenstore/xs.c551
-rw-r--r--tools/xenstore/xs.h146
-rw-r--r--tools/xenstore/xs_lib.c141
-rw-r--r--tools/xenstore/xs_lib.h63
-rw-r--r--tools/xenstore/xs_random.c1646
-rw-r--r--tools/xenstore/xs_stress.c207
-rw-r--r--tools/xenstore/xs_test.c647
-rw-r--r--xen/include/public/xen.h25
73 files changed, 12292 insertions, 826 deletions
diff --git a/.rootkeys b/.rootkeys
index a5198616a0..6be475bc10 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -864,6 +864,7 @@
40dc4076hGpwa8-sWRN0jtXZeQJuKg tools/python/xen/__init__.py
40dfd40aMOhnw_cQLve9462UR5yYxQ tools/python/xen/lowlevel/__init__.py
3fbd0a42l40lM0IICw2jXbQBVZSdZg tools/python/xen/lowlevel/xc/xc.c
+42a59f20JpCmm9DsCoVZowGafnhBuw tools/python/xen/lowlevel/xs/xs.c
40dc4076St6AmPTmQPrtQ6LGHPxGmw tools/python/xen/lowlevel/xu/__init__.py
40dc4076CwBYRTUQDdbdU1L6KcLgSw tools/python/xen/lowlevel/xu/xu.c
40d8915cyoVA0hJxiBFNymL7YvDaRg tools/python/xen/util/Brctl.py
@@ -871,6 +872,7 @@
4270e4efFg3wHCCxXpA0h6yoMTkeSQ tools/python/xen/util/blkif.py
4055ee4dwy4l0MghZosxoiu6zmhc9Q tools/python/xen/util/console_client.py
40c9c468IienauFHQ_xJIcqnPJ8giQ tools/python/xen/util/ip.py
+42a4a80aiq_AT5whiSw-fKhNhRKITw tools/python/xen/util/mac.py
41dde8b0yuJX-S79w4xJKxBQ-Mhp1A tools/python/xen/util/memmap.py
4288c6fcB1kUAqX0gzU85GGxmamS4Q tools/python/xen/util/process.py
4059c6a0pnxhG8hwSOivXybbGOwuXw tools/python/xen/util/tempfile.py
@@ -908,6 +910,7 @@
40c9c468xzANp6o2D_MeCYwNmOIUsQ tools/python/xen/xend/XendVnet.py
40c9c468x191zetrVlMnExfsQWHxIQ tools/python/xen/xend/__init__.py
40c9c468S2YnCEKmk4ey8XQIST7INg tools/python/xen/xend/encode.py
+42a475165HuglqWwNi2fjqNOIHbIKQ tools/python/xen/xend/image.py
4266169ezWIlXSfY50n6HSoVFbosmw tools/python/xen/xend/scheduler.py
40c9c468IxQabrKJSWs0aEjl-27mRQ tools/python/xen/xend/server/SrvConsole.py
40c9c4689Io5bxfbYIfRiUvsiLX0EQ tools/python/xen/xend/server/SrvConsoleDir.py
@@ -933,6 +936,11 @@
4294a1bf8rMUcddot-B2-pOxORimOg tools/python/xen/xend/server/relocate.py
41ee5e8dq9NtihbL4nWKjuSLOhXPUg tools/python/xen/xend/server/usbif.py
40c9c469LNxLVizOUpOjEaTKKCm8Aw tools/python/xen/xend/sxp.py
+42a48d152jkT7ykQT_LWKnS-ojV_ZA tools/python/xen/xend/uuid.py
+42a5a2c0ik9zrQvwjTUKDVVEQmvO2Q tools/python/xen/xend/xenstore/__init__.py
+42a5a2c04xNCYAUXD0b9IDf4XekXRg tools/python/xen/xend/xenstore/xsnode.py
+42a5a2c0-aP98db2PJIDxQJfTEMZ-A tools/python/xen/xend/xenstore/xsobj.py
+42a5a2c0gxfQiAH_oVTShNPeG0LG2Q tools/python/xen/xend/xenstore/xsresource.py
40d05079aFRp6NQdo5wIh5Ly31c0cg tools/python/xen/xm/__init__.py
40cf2937gKQcATgXKGtNeWb1PDH5nA tools/python/xen/xm/create.py
40f552eariuUSB9TWqCPnDLz5zvxMw tools/python/xen/xm/destroy.py
@@ -1052,6 +1060,43 @@
4292540couq-V0TPwyQ6bspNEWNcvw tools/xcutils/Makefile
42925407VysDb9O06OK_RUzTZxfLoA tools/xcutils/xc_restore.c
42936745WTLYamYsmXm_JGJ72JX-_Q tools/xcutils/xc_save.c
+42a57d97mxMTlPnxBKep6R4ViI5rjg tools/xenstore/.gdbinit
+42a57d97ZEoHuhMAFTuBMlLzA9v_ng tools/xenstore/Makefile
+42a57d97ccA4uY-RxONvIH0P8U0gqg tools/xenstore/TODO
+42a57d972RzmyLgsoH9b8qqk-UjcCA tools/xenstore/fake_libxc.c
+42a57d97IjoPvbIVc4BUzwoKyM0VSw tools/xenstore/list.h
+42a57d97fKgtf0HQLiQkAkVsOvuSyA tools/xenstore/talloc.c
+42a57d98U3p0XP6xzCybTuaVQscUdw tools/xenstore/talloc.h
+42a57d98LFN6Mug-uR4xgAxCE7lwUg tools/xenstore/talloc_guide.txt
+42a57d98S69vKJYwO_WUjoFQZ6KzQg tools/xenstore/testsuite/01simple.sh
+42a57d98BHcFpZz_fXHweylUEUU97Q tools/xenstore/testsuite/02directory.sh
+42a57d98ua4Xeb6pmtbFNTAI833dyw tools/xenstore/testsuite/03write.sh
+42a57d98nbuCUsVT0RJj1zA1JyMDsw tools/xenstore/testsuite/04rm.sh
+42a57d98_ULKHP3_uX1PK2nPMTzWSQ tools/xenstore/testsuite/05filepermissions.sh
+42a57d98YGCLyTDSGmoyFqRqQUlagQ tools/xenstore/testsuite/06dirpermissions.sh
+42a57d98fdO519YyATk4_Zwr1STNfQ tools/xenstore/testsuite/07watch.sh
+42a57d98zZUtvirUMjmHxFphJjmO7Q tools/xenstore/testsuite/08transaction.sh
+42a57d98sn9RbpBgHRv1D99Kt7LwYA tools/xenstore/testsuite/09domain.sh
+42a57d98tSuoFCHnnM2GgENXJrRQmw tools/xenstore/testsuite/test.sh
+42a57d98zxDP2Ti7dTznGROi66rUGw tools/xenstore/utils.c
+42a57d98SDvOYCEjmCjwHSk6390GLA tools/xenstore/utils.h
+42a57d98hFKbOY9D0mCE4H4NDoKr1w tools/xenstore/xenstored.h
+42a57d981KFHLmJ0CjKkn1_gZhYvdw tools/xenstore/xenstored_core.c
+42a57d98bcgE13vYaFxGTusmWbrFDA tools/xenstore/xenstored_core.h
+42a57d98cD9wOFyRYfaEP0QgtqL1Xw tools/xenstore/xenstored_domain.c
+42a57d98noLWvXU8ePbcqvvmu4p2Gw tools/xenstore/xenstored_domain.h
+42a57d98kxHaQ1ApS7RpqmFoEnDmbg tools/xenstore/xenstored_test.h
+42a57d981c9P3aFkWtxWEIRUapt_FQ tools/xenstore/xenstored_transaction.c
+42a57d99pVo__10bbckp_b_rm6i59A tools/xenstore/xenstored_transaction.h
+42a57d99izTIjWfG-IjQAPqYlDWJNg tools/xenstore/xenstored_watch.c
+42a57d99-zLxBjzC7rfj_perV-orUg tools/xenstore/xenstored_watch.h
+42a57d99BnkhISKgCCRcUqhteyuxCw tools/xenstore/xs.c
+42a57d99FyiYSz9AkKKROrRydnA-gQ tools/xenstore/xs.h
+42a57d99SrtsJCDUlKyRPf3EX86A1Q tools/xenstore/xs_lib.c
+42a57d99L2pYeMFyjQ_4Rnb17xTSMg tools/xenstore/xs_lib.h
+42a57d99Kl6Ba8oCHv2fggl7QN9QZA tools/xenstore/xs_random.c
+42a57d99SHYR1lQOD0shuErPDg9NKQ tools/xenstore/xs_stress.c
+42a57d996aBawpkQNOWkNWXD6LrhPg tools/xenstore/xs_test.c
403a3edbrr8RE34gkbR40zep98SXbg tools/xentrace/Makefile
40a107afN60pFdURgBv9KwEzgRl5mQ tools/xentrace/formats
420d52d2_znVbT4JAPIU36vQOme83g tools/xentrace/xenctx.c
diff --git a/BitKeeper/etc/ignore b/BitKeeper/etc/ignore
index abb31ab817..33edc625d6 100644
--- a/BitKeeper/etc/ignore
+++ b/BitKeeper/etc/ignore
@@ -94,8 +94,8 @@ tools/cmdline/*
tools/cmdline/xen/*
tools/firmware/*.bin
tools/firmware/*.sym
-tools/firmware/*bios/*bios*.txt
tools/firmware/*/biossums
+tools/firmware/*bios/*bios*.txt
tools/firmware/rombios/BIOS-bochs-latest
tools/firmware/rombios/_rombios_.c
tools/firmware/rombios/rombios.s
@@ -140,6 +140,13 @@ tools/xcs/xcs
tools/xcs/xcsdump
tools/xcutils/xc_restore
tools/xcutils/xc_save
+tools/xenstore/testsuite/tmp/*
+tools/xenstore/xen
+tools/xenstore/xenstored
+tools/xenstore/xenstored_test
+tools/xenstore/xs_random
+tools/xenstore/xs_stress
+tools/xenstore/xs_test
tools/xentrace/xentrace
tools/xfrd/xfrd
xen/BLOG
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netback/control.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/control.c
index 7ffaa2f781..9392d5a3d2 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/control.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/control.c
@@ -10,6 +10,8 @@
static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
{
+ DPRINTK("Received netif backend message, subtype=%d\n", msg->subtype);
+
switch ( msg->subtype )
{
case CMSG_NETIF_BE_CREATE:
diff --git a/tools/Makefile b/tools/Makefile
index 7f578b6880..b122ba465a 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -3,12 +3,14 @@ include $(XEN_ROOT)/tools/Rules.mk
SUBDIRS :=
SUBDIRS += libxc
+SUBDIRS += xenstore
SUBDIRS += misc
SUBDIRS += examples
SUBDIRS += xentrace
SUBDIRS += python
SUBDIRS += xcs
SUBDIRS += xcutils
+SUBDIRS += xenstore
SUBDIRS += pygrub
SUBDIRS += firmware
diff --git a/tools/libxc/xc.h b/tools/libxc/xc.h
index 3c768f1a4a..27e7845798 100644
--- a/tools/libxc/xc.h
+++ b/tools/libxc/xc.h
@@ -252,7 +252,9 @@ int xc_linux_build(int xc_handle,
const char *cmdline,
unsigned int control_evtchn,
unsigned long flags,
- unsigned int vcpus);
+ unsigned int vcpus,
+ unsigned int store_evtchn,
+ unsigned long *store_mfn);
int
xc_plan9_build (int xc_handle,
diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c
index 012d66ae93..660c30a3f1 100644
--- a/tools/libxc/xc_linux_build.c
+++ b/tools/libxc/xc_linux_build.c
@@ -48,17 +48,18 @@ static int probeimageformat(char *image,
}
static int setup_guest(int xc_handle,
- u32 dom,
- char *image, unsigned long image_size,
- gzFile initrd_gfd, unsigned long initrd_len,
- unsigned long nr_pages,
- unsigned long *pvsi, unsigned long *pvke,
- vcpu_guest_context_t *ctxt,
- const char *cmdline,
- unsigned long shared_info_frame,
- unsigned int control_evtchn,
- unsigned long flags,
- unsigned int vcpus)
+ u32 dom,
+ char *image, unsigned long image_size,
+ gzFile initrd_gfd, unsigned long initrd_len,
+ unsigned long nr_pages,
+ unsigned long *pvsi, unsigned long *pvke,
+ unsigned long *pvss, vcpu_guest_context_t *ctxt,
+ const char *cmdline,
+ unsigned long shared_info_frame,
+ unsigned int control_evtchn,
+ unsigned long flags,
+ unsigned int vcpus,
+ unsigned int store_evtchn, unsigned long *store_mfn)
{
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -91,6 +92,8 @@ static int setup_guest(int xc_handle,
unsigned long vphysmap_end;
unsigned long vstartinfo_start;
unsigned long vstartinfo_end;
+ unsigned long vstoreinfo_start;
+ unsigned long vstoreinfo_end;
unsigned long vstack_start;
unsigned long vstack_end;
unsigned long vpt_start;
@@ -130,7 +133,10 @@ static int setup_guest(int xc_handle,
vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
vstartinfo_start = vpt_end;
vstartinfo_end = vstartinfo_start + PAGE_SIZE;
- vstack_start = vstartinfo_end;
+ /* Place store shared page after startinfo. */
+ vstoreinfo_start = vstartinfo_end;
+ vstoreinfo_end = vstartinfo_end + PAGE_SIZE;
+ vstack_start = vstoreinfo_end;
vstack_end = vstack_start + PAGE_SIZE;
v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
if ( (v_end - vstack_end) < (512UL << 10) )
@@ -161,6 +167,7 @@ static int setup_guest(int xc_handle,
" Phys-Mach map: %p->%p\n"
" Page tables: %p->%p\n"
" Start info: %p->%p\n"
+ " Store page: %p->%p\n"
" Boot stack: %p->%p\n"
" TOTAL: %p->%p\n",
_p(dsi.v_kernstart), _p(dsi.v_kernend),
@@ -168,6 +175,7 @@ static int setup_guest(int xc_handle,
_p(vphysmap_start), _p(vphysmap_end),
_p(vpt_start), _p(vpt_end),
_p(vstartinfo_start), _p(vstartinfo_end),
+ _p(vstoreinfo_start), _p(vstoreinfo_end),
_p(vstack_start), _p(vstack_end),
_p(dsi.v_start), _p(v_end));
printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
@@ -377,6 +385,8 @@ static int setup_guest(int xc_handle,
start_info->nr_pt_frames = nr_pt_pages;
start_info->mfn_list = vphysmap_start;
start_info->domain_controller_evtchn = control_evtchn;
+ start_info->store_page = vstoreinfo_start;
+ start_info->store_evtchn = store_evtchn;
if ( initrd_len != 0 )
{
start_info->mod_start = vinitrd_start;
@@ -386,6 +396,9 @@ static int setup_guest(int xc_handle,
start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
munmap(start_info, PAGE_SIZE);
+ /* Tell our caller where we told domain store page was. */
+ *store_mfn = page_array[((vstoreinfo_start-dsi.v_start)>>PAGE_SHIFT)];
+
/* shared_info page starts its life empty. */
shared_info = xc_map_foreign_range(
xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
@@ -407,6 +420,7 @@ static int setup_guest(int xc_handle,
free(page_array);
*pvsi = vstartinfo_start;
+ *pvss = vstack_start;
*pvke = dsi.v_kernentry;
return 0;
@@ -426,7 +440,9 @@ int xc_linux_build(int xc_handle,
const char *cmdline,
unsigned int control_evtchn,
unsigned long flags,
- unsigned int vcpus)
+ unsigned int vcpus,
+ unsigned int store_evtchn,
+ unsigned long *store_mfn)
{
dom0_op_t launch_op, op;
int initrd_fd = -1;
@@ -436,7 +452,7 @@ int xc_linux_build(int xc_handle,
unsigned long nr_pages;
char *image = NULL;
unsigned long image_size, initrd_size=0;
- unsigned long vstartinfo_start, vkern_entry;
+ unsigned long vstartinfo_start, vkern_entry, vstack_start;
if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
{
@@ -493,11 +509,12 @@ int xc_linux_build(int xc_handle,
}
if ( setup_guest(xc_handle, domid, image, image_size,
- initrd_gfd, initrd_size, nr_pages,
- &vstartinfo_start, &vkern_entry,
- ctxt, cmdline,
- op.u.getdomaininfo.shared_info_frame,
- control_evtchn, flags, vcpus) < 0 )
+ initrd_gfd, initrd_size, nr_pages,
+ &vstartinfo_start, &vkern_entry,
+ &vstack_start, ctxt, cmdline,
+ op.u.getdomaininfo.shared_info_frame,
+ control_evtchn, flags, vcpus,
+ store_evtchn, store_mfn) < 0 )
{
ERROR("Error constructing guest OS");
goto error_out;
@@ -528,7 +545,7 @@ int xc_linux_build(int xc_handle,
ctxt->user_regs.ss = FLAT_KERNEL_SS;
ctxt->user_regs.cs = FLAT_KERNEL_CS;
ctxt->user_regs.eip = vkern_entry;
- ctxt->user_regs.esp = vstartinfo_start + 2*PAGE_SIZE;
+ ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
ctxt->user_regs.esi = vstartinfo_start;
ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
@@ -550,7 +567,7 @@ int xc_linux_build(int xc_handle,
/* Ring 1 stack is the initial stack. */
ctxt->kernel_ss = FLAT_KERNEL_SS;
- ctxt->kernel_sp = vstartinfo_start + 2*PAGE_SIZE;
+ ctxt->kernel_sp = vstack_start + PAGE_SIZE;
/* No debugging. */
memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
diff --git a/tools/python/setup.py b/tools/python/setup.py
index e6b04f8708..fabe80bd8b 100644
--- a/tools/python/setup.py
+++ b/tools/python/setup.py
@@ -9,13 +9,15 @@ extra_compile_args = [ "-fno-strict-aliasing", "-Wall", "-Werror" ]
include_dirs = [ XEN_ROOT + "/tools/python/xen/lowlevel/xu",
XEN_ROOT + "/tools/libxc",
+ XEN_ROOT + "/tools/xenstore",
XEN_ROOT + "/tools/xcs",
]
library_dirs = [ XEN_ROOT + "/tools/libxc",
+ XEN_ROOT + "/tools/xenstore",
]
-libraries = [ "xc" ]
+libraries = [ "xc", "xenstore" ]
xc = Extension("xc",
extra_compile_args = extra_compile_args,
@@ -30,7 +32,14 @@ xu = Extension("xu",
library_dirs = library_dirs,
libraries = libraries,
sources = [ "xen/lowlevel/xu/xu.c" ])
-
+
+xs = Extension("xs",
+ extra_compile_args = extra_compile_args,
+ include_dirs = include_dirs + [ "xen/lowlevel/xs" ],
+ library_dirs = library_dirs,
+ libraries = libraries,
+ sources = [ "xen/lowlevel/xs/xs.c" ])
+
setup(name = 'xen',
version = '2.0',
description = 'Xen',
@@ -39,11 +48,12 @@ setup(name = 'xen',
'xen.util',
'xen.xend',
'xen.xend.server',
+ 'xen.xend.xenstore',
'xen.xm',
'xen.web',
],
ext_package = "xen.lowlevel",
- ext_modules = [ xc, xu ]
+ ext_modules = [ xc, xu, xs ]
)
os.chdir('logging')
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index 013fbe1fcc..13d60be08e 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -14,6 +14,7 @@
#include <sys/socket.h>
#include <netdb.h>
#include <arpa/inet.h>
+
#include "xc_private.h"
#include "linux_boot_params.h"
@@ -259,25 +260,28 @@ static PyObject *pyxc_linux_build(PyObject *self,
{
XcObject *xc = (XcObject *)self;
- u32 dom;
+ u32 dom;
char *image, *ramdisk = NULL, *cmdline = "";
- int control_evtchn, flags = 0, vcpus = 1;
+ int flags = 0, vcpus = 1;
+ int control_evtchn, store_evtchn;
+ unsigned long store_mfn = 0;
- static char *kwd_list[] = { "dom", "control_evtchn",
- "image", "ramdisk", "cmdline", "flags", "vcpus",
- NULL };
+ static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
+ "image", "ramdisk", "cmdline", "flags",
+ "vcpus", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|ssii", kwd_list,
- &dom, &control_evtchn,
- &image, &ramdisk, &cmdline, &flags, &vcpus) )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssii", kwd_list,
+ &dom, &control_evtchn, &store_evtchn,
+ &image, &ramdisk, &cmdline, &flags,
+ &vcpus) )
return NULL;
if ( xc_linux_build(xc->xc_handle, dom, image,
- ramdisk, cmdline, control_evtchn, flags, vcpus) != 0 )
+ ramdisk, cmdline, control_evtchn, flags, vcpus,
+ store_evtchn, &store_mfn) != 0 )
return PyErr_SetFromErrno(xc_error);
- Py_INCREF(zero);
- return zero;
+ return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
}
static PyObject *pyxc_plan9_build(PyObject *self,
@@ -834,6 +838,7 @@ static PyMethodDef pyxc_methods[] = {
0, "\n"
"Query the xc control interface file descriptor.\n\n"
"Returns: [int] file descriptor\n" },
+
{ "domain_create",
(PyCFunction)pyxc_domain_create,
METH_VARARGS | METH_KEYWORDS, "\n"
@@ -844,8 +849,8 @@ static PyMethodDef pyxc_methods[] = {
{ "domain_dumpcore",
(PyCFunction)pyxc_domain_dumpcore,
METH_VARARGS | METH_KEYWORDS, "\n"
- "dump core of a domain.\n"
- " dom [int]: Identifier of domain to be paused.\n\n"
+ "Dump core of a domain.\n"
+ " dom [int]: Identifier of domain to dump core of.\n"
" corefile [string]: Name of corefile to be created.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
diff --git a/tools/python/xen/lowlevel/xs/xs.c b/tools/python/xen/lowlevel/xs/xs.c
new file mode 100644
index 0000000000..98d7826809
--- /dev/null
+++ b/tools/python/xen/lowlevel/xs/xs.c
@@ -0,0 +1,617 @@
+#include <Python.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "xs.h"
+
+/** @file
+ * Python interface to the Xen Store Daemon (xs).
+ */
+
+/* Needed for Python versions earlier than 2.3. */
+//#ifndef PyMODINIT_FUNC
+//#define PyMODINIT_FUNC DL_EXPORT(void)
+//#endif
+
+#define PYPKG "xen.lowlevel.xs"
+
+/** Python wrapper round an xs handle.
+ */
+typedef struct XsHandle {
+ PyObject_HEAD;
+ struct xs_handle *xh;
+} XsHandle;
+
+static inline struct xs_handle *xshandle(PyObject *self)
+{
+ struct xs_handle *xh = ((XsHandle*)self)->xh;
+ if (!xh)
+ PyErr_SetString(PyExc_RuntimeError, "invalid xenstore daemon handle");
+ return xh;
+}
+
+static inline PyObject *pyvalue_int(int val) {
+ return (val
+ ? PyInt_FromLong(val)
+ : PyErr_SetFromErrno(PyExc_RuntimeError));
+}
+
+static inline PyObject *pyvalue_str(char *val) {
+ return (val
+ ? PyString_FromString(val)
+ : PyErr_SetFromErrno(PyExc_RuntimeError));
+}
+
+static PyObject *xspy_write(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", "data", "create", "excl", NULL };
+ static char *arg_spec = "ss#|ii";
+ char *path = NULL;
+ char *data = NULL;
+ int data_n = 0;
+ int create = 0;
+ int excl = 0;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int flags = 0;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &path, &data, &data_n, &create, &excl))
+ goto exit;
+ if (create)
+ flags |= O_CREAT;
+ if (excl)
+ flags |= O_EXCL;
+ xsval = xs_write(xh, path, data, data_n, flags);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_read(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ char *xsval = NULL;
+ int xsval_n = 0;
+ PyObject *val = NULL;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &path))
+ goto exit;
+ xsval = xs_read(xh, path, &xsval_n);
+ if (!xsval) {
+ val = pyvalue_int(0);
+ goto exit;
+ }
+ val = PyString_FromStringAndSize(xsval, xsval_n);
+ exit:
+ if (xsval)
+ free(xsval);
+ return val;
+}
+
+static PyObject *xspy_mkdir(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_mkdir(xh, path);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_ls(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ char **xsval = NULL;
+ int xsval_n = 0;
+ int i;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_directory(xh, path, &xsval_n);
+ if (!xsval) {
+ val = pyvalue_int(0);
+ goto exit;
+ }
+ val = PyList_New(xsval_n);
+ for (i = 0; i < xsval_n; i++)
+ PyList_SetItem(val, i, PyString_FromString(xsval[i]));
+ exit:
+ return val;
+}
+
+static PyObject *xspy_rm(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_rm(xh, path);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_get_permissions(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ struct xs_permissions *perms;
+ int perms_n = 0;
+ int i;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ perms = xs_get_permissions(xh, path, &perms_n);
+ if (!perms) {
+ PyErr_SetFromErrno(PyExc_RuntimeError);
+ goto exit;
+ }
+ val = PyList_New(perms_n);
+ for (i = 0; i < perms_n; i++, perms++) {
+ PyObject *p = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i}",
+ "dom", perms->id,
+ "read", (perms->perms & XS_PERM_READ),
+ "write", (perms->perms & XS_PERM_WRITE),
+ "create", (perms->perms & XS_PERM_CREATE),
+ "owner", (perms->perms & XS_PERM_OWNER));
+ PyList_SetItem(val, i, p);
+ }
+ exit:
+ return val;
+}
+
+static PyObject *xspy_set_permissions(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", "perms", NULL };
+ static char *arg_spec = "sO";
+ char *path = NULL;
+ PyObject *perms = NULL;
+ static char *perm_names[] = { "dom", "read", "write", "create", "owner",
+ NULL };
+ static char *perm_spec = "i|iiii";
+
+ struct xs_handle *xh = xshandle(self);
+ int i, xsval;
+ struct xs_permissions *xsperms = NULL;
+ int xsperms_n = 0;
+ PyObject *tuple0 = NULL;
+ PyObject *val = NULL;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &path, &perms))
+ goto exit;
+ if (!PyList_Check(perms)) {
+ PyErr_SetString(PyExc_RuntimeError, "perms must be a list");
+ goto exit;
+ }
+ xsperms_n = PyList_Size(perms);
+ xsperms = calloc(xsperms_n, sizeof(struct xs_permissions));
+ if (!xsperms) {
+ PyErr_SetString(PyExc_RuntimeError, "out of memory");
+ goto exit;
+ }
+ tuple0 = PyTuple_New(0);
+ if (!tuple0)
+ goto exit;
+ for (i = 0; i < xsperms_n; i++) {
+ /* Domain the permissions apply to. */
+ int dom = 0;
+ /* Read/write perms. Set these. */
+ int p_read = 0, p_write = 0;
+ /* Create/owner perms. Ignore them.
+ * This is so the output from get_permissions() can be used
+ * as input to set_permissions().
+ */
+ int p_create = 0, p_owner = 0;
+ PyObject *p = PyList_GetItem(perms, i);
+ if (!PyArg_ParseTupleAndKeywords(tuple0, p, perm_spec, perm_names,
+ &dom, &p_read, &p_write, &p_create,
+ &p_owner))
+ goto exit;
+ xsperms[i].id = dom;
+ if (p_read)
+ xsperms[i].perms |= XS_PERM_READ;
+ if (p_write)
+ xsperms[i].perms |= XS_PERM_WRITE;
+ }
+ xsval = xs_set_permissions(xh, path, xsperms, xsperms_n);
+ val = pyvalue_int(xsval);
+ exit:
+ Py_XDECREF(tuple0);
+ if (xsperms)
+ free(xsperms);
+ return val;
+}
+
+static PyObject *xspy_watch(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", "priority", NULL };
+ static char *arg_spec = "s|i";
+ char *path = NULL;
+ int priority = 0;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &path, &priority))
+ goto exit;
+ xsval = xs_watch(xh, path, priority);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_read_watch(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { NULL };
+ static char *arg_spec = "";
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ char *xsval = NULL;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
+ goto exit;
+ xsval = xs_read_watch(xh);
+ val = pyvalue_str(xsval);
+ exit:
+ if (xsval)
+ free(xsval);
+ return val;
+}
+
+static PyObject *xspy_acknowledge_watch(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { NULL };
+ static char *arg_spec = "";
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
+ goto exit;
+ xsval = xs_acknowledge_watch(xh);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_unwatch(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_unwatch(xh, path);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_transaction_start(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_transaction_start(xh, path);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_transaction_end(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "abort", NULL };
+ static char *arg_spec = "|i";
+ int abort = 0;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &abort))
+ goto exit;
+ xsval = xs_transaction_end(xh, abort);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_introduce_domain(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "dom", "page", "port", "path", NULL };
+ static char *arg_spec = "iiis|";
+ domid_t dom = 0;
+ unsigned long page = 0;
+ unsigned int port = 0;
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &dom, &page, &port, &path))
+ goto exit;
+ printf("%s> dom=%u page=0x%08lx port=%u path=%s\n", __FUNCTION__, dom,
+ page, port, path);
+ xsval = xs_introduce_domain(xh, dom, page, port, path);
+ printf("%s> xsval=%d\n", __FUNCTION__, xsval);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_release_domain(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "dom", NULL };
+ static char *arg_spec = "i|";
+ domid_t dom;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &dom))
+ goto exit;
+ printf("%s> dom=%u\n", __FUNCTION__, dom);
+ xsval = xs_release_domain(xh, dom);
+ printf("%s> xsval=%d\n", __FUNCTION__, xsval);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_close(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { NULL };
+ static char *arg_spec = "";
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 1;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
+ goto exit;
+ xs_daemon_close(xh);
+ ((XsHandle*)self)->xh = NULL;
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_shutdown(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { NULL };
+ static char *arg_spec = "";
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
+ goto exit;
+ xsval = xs_shutdown(xh);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+#define XSPY_METH(_name) \
+ #_name, \
+ (PyCFunction) xspy_ ## _name, \
+ (METH_VARARGS | METH_KEYWORDS)
+// mtime
+// ctime
+
+static PyMethodDef xshandle_methods[] = {
+ { XSPY_METH(read),
+ "read(path) : read data\n" },
+ { XSPY_METH(write),
+ "write(path, data, [creat], [excl]): write data\n" },
+ { XSPY_METH(ls),
+ "ls(path): list directory.\n" },
+ { XSPY_METH(mkdir),
+ "mkdir(path): make a directory.\n" },
+ { XSPY_METH(rm),
+ "rm(path): remove a path (dir must be empty).\n" },
+ { XSPY_METH(get_permissions),
+ "get_permissions(path)\n" },
+ { XSPY_METH(set_permissions),
+ "set_permissions(path)\n" },
+ { XSPY_METH(watch),
+ "watch(path)\n" },
+ { XSPY_METH(read_watch),
+ "read_watch()\n" },
+ { XSPY_METH(acknowledge_watch),
+ "acknowledge_watch()\n" },
+ { XSPY_METH(unwatch),
+ "unwatch()\n" },
+ { XSPY_METH(transaction_start),
+ "transaction_start()\n" },
+ { XSPY_METH(transaction_end),
+ "transaction_end([abort])\n" },
+ { XSPY_METH(introduce_domain),
+ "introduce_domain(dom, page, port)\n" },
+ { XSPY_METH(release_domain),
+ "release_domain(dom)\n" },
+ { XSPY_METH(close),
+ "close()\n" },
+ { XSPY_METH(shutdown),
+ "shutdown()\n" },
+ { NULL, NULL, 0, NULL }
+};
+
+static PyObject *xshandle_getattr(PyObject *self, char *name)
+{
+ PyObject *val = NULL;
+ if (strcmp(name, "fileno") == 0) {
+ struct xs_handle *xh = xshandle(self);
+ val = PyInt_FromLong((xh ? xs_fileno(xh) : -1));
+ } else
+ val = Py_FindMethod(xshandle_methods, self, name);
+ return val;
+}
+
+static void xshandle_dealloc(PyObject *self)
+{
+ XsHandle *xh = (XsHandle*)self;
+ if (xh->xh) {
+ xs_daemon_close(xh->xh);
+ xh->xh = NULL;
+ }
+ PyObject_Del(self);
+}
+
+static PyTypeObject xshandle_type = {
+ PyObject_HEAD_INIT(&PyType_Type)
+ 0,
+ "xshandle",
+ sizeof(XsHandle),
+ 0,
+ xshandle_dealloc, /* tp_dealloc */
+ NULL, /* tp_print */
+ xshandle_getattr, /* tp_getattr */
+ NULL, /* tp_setattr */
+ NULL, /* tp_compare */
+ NULL, /* tp_repr */
+ NULL, /* tp_as_number */
+ NULL, /* tp_as_sequence */
+ NULL, /* tp_as_mapping */
+ NULL /* tp_hash */
+};
+
+static PyObject *xshandle_open(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "readonly", NULL };
+ static char *arg_spec = "|i";
+ int readonly = 0;
+
+ XsHandle *xsh = NULL;
+ PyObject *val = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &readonly))
+ goto exit;
+
+ xsh = PyObject_New(XsHandle, &xshandle_type);
+ if (!xsh)
+ goto exit;
+ xsh->xh = (readonly ? xs_daemon_open_readonly() : xs_daemon_open());
+ if (!xsh->xh) {
+ PyObject_Del(xsh);
+ val = pyvalue_int(0);
+ goto exit;
+ }
+ val = (PyObject *)xsh;
+ exit:
+ return val;
+}
+
+static PyMethodDef xs_methods[] = {
+ { "open", (PyCFunction)xshandle_open, (METH_VARARGS | METH_KEYWORDS),
+ "Open a connection to the xenstore daemon.\n" },
+ { NULL, NULL, 0, NULL }
+};
+
+PyMODINIT_FUNC initxs (void)
+{
+ PyObject *module;
+
+ module = Py_InitModule(PYPKG, xs_methods);
+}
diff --git a/tools/python/xen/lowlevel/xu/xu.c b/tools/python/xen/lowlevel/xu/xu.c
index c9c5b3873a..359cb71a2d 100644
--- a/tools/python/xen/lowlevel/xu/xu.c
+++ b/tools/python/xen/lowlevel/xu/xu.c
@@ -1370,7 +1370,8 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args, PyObject *kwds)
fail1:
PyObject_Del((PyObject *)xup);
- return NULL;
+ PyErr_SetString(PyExc_ValueError, "cannot create port");
+ return NULL;
}
static PyObject *xu_port_getattr(PyObject *obj, char *name)
diff --git a/tools/python/xen/util/mac.py b/tools/python/xen/util/mac.py
new file mode 100644
index 0000000000..47dffd80d5
--- /dev/null
+++ b/tools/python/xen/util/mac.py
@@ -0,0 +1,11 @@
+
+from string import join, split
+
+def macToString(mac):
+ return ':'.join(map(lambda x: "%02x" % x, mac))
+
+def macFromString(str):
+ mac = [ int(x, 16) for x in str.split(':') ]
+ if len(mac) != 6:
+ raise ValueError("invalid mac: %s" % str)
+ return mac
diff --git a/tools/python/xen/web/SrvDir.py b/tools/python/xen/web/SrvDir.py
index fb9eb14b3c..b168a8ef48 100644
--- a/tools/python/xen/web/SrvDir.py
+++ b/tools/python/xen/web/SrvDir.py
@@ -77,19 +77,16 @@ class SrvDir(SrvBase):
return v
def render_GET(self, req):
- try:
- if self.use_sxp(req):
- req.setHeader("Content-type", sxp.mime_type)
- self.ls(req, 1)
- else:
- req.write('<html><head></head><body>')
- self.print_path(req)
- self.ls(req)
- self.form(req)
- req.write('</body></html>')
- return ''
- except Exception, ex:
- self._perform_err(ex, "GET", req)
+ if self.use_sxp(req):
+ req.setHeader("Content-type", sxp.mime_type)
+ self.ls(req, 1)
+ else:
+ req.write('<html><head></head><body>')
+ self.print_path(req)
+ self.ls(req)
+ self.form(req)
+ req.write('</body></html>')
+ return ''
def ls(self, req, use_sxp=0):
url = req.prePathURL()
diff --git a/tools/python/xen/xend/PrettyPrint.py b/tools/python/xen/xend/PrettyPrint.py
index 5fcc6e6d08..a57a3c6b52 100644
--- a/tools/python/xen/xend/PrettyPrint.py
+++ b/tools/python/xen/xend/PrettyPrint.py
@@ -285,15 +285,18 @@ def prettyprint(sxpr, out=sys.stdout, width=80):
sxp.show(sxpr, out=out)
print >> out
-def prettyprintstring(sxp):
- class tmpstr:
- def __init__(self):
- self.str = ""
- def write(self, str):
- self.str = self.str + str
- tmp = tmpstr()
- prettyprint(sxp, out=tmp)
- return tmp.str
+def prettyprintstring(sxpr, width=80):
+ """Prettyprint an SXP form to a string.
+
+ sxpr s-expression
+ width maximum output width
+ """
+ io = StringIO.StringIO()
+ prettyprint(sxpr, out=io, width=width)
+ io.seek(0)
+ val = io.getvalue()
+ io.close()
+ return val
def main():
pin = sxp.Parser()
diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py
index e3908df885..654fb022c5 100644
--- a/tools/python/xen/xend/XendCheckpoint.py
+++ b/tools/python/xen/xend/XendCheckpoint.py
@@ -43,7 +43,7 @@ def save(xd, fd, dominfo):
write_exact(fd, config, "could not write guest state file: config")
cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
- dominfo.id]
+ str(dominfo.id)]
log.info("[xc_save] " + join(cmd))
child = xPopen3(cmd, True, -1, [fd, xc.handle()])
@@ -63,10 +63,10 @@ def save(xd, fd, dominfo):
if fd == child.fromchild.fileno():
l = child.fromchild.readline()
if l.rstrip() == "suspend":
- log.info("suspending %s" % dominfo.id)
+ log.info("suspending %d" % dominfo.id)
xd.domain_shutdown(dominfo.id, reason='suspend')
dominfo.state_wait("suspended")
- log.info("suspend %s done" % dominfo.id)
+ log.info("suspend %d done" % dominfo.id)
child.tochild.write("done\n")
child.tochild.flush()
if filter(lambda (fd, event): event & select.POLLHUP, r):
@@ -109,7 +109,7 @@ def restore(xd, fd):
"not a valid guest state file: pfn count out of range")
cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
- dominfo.id, str(nr_pfns)]
+ str(dominfo.id), str(nr_pfns)]
log.info("[xc_restore] " + join(cmd))
child = xPopen3(cmd, True, -1, [fd, xc.handle()])
child.tochild.close()
diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/XendDomain.py
index 3fb066327f..ff688f6df1 100644
--- a/tools/python/xen/xend/XendDomain.py
+++ b/tools/python/xen/xend/XendDomain.py
@@ -7,46 +7,42 @@
"""
import errno
import os
-import scheduler
-import string
import sys
-import traceback
import time
+import traceback
import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
-from xen.xend.server import relocate
-import sxp
-import XendRoot; xroot = XendRoot.instance()
-import XendCheckpoint
-import XendDB
-import XendDomainInfo
-import EventServer; eserver = EventServer.instance()
-from XendError import XendError
-from XendLogging import log
-
+from xen.xend import sxp
+from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend import XendCheckpoint
+from xen.xend.XendDomainInfo import XendDomainInfo, shutdown_reason
+from xen.xend import EventServer; eserver = EventServer.instance()
+from xen.xend.XendError import XendError
+from xen.xend.XendLogging import log
+from xen.xend import scheduler
from xen.xend.server import channel
+from xen.xend.server import relocate
+from xen.xend.uuid import getUuid
+from xen.xend.xenstore import XenNode, DBMap
__all__ = [ "XendDomain" ]
SHUTDOWN_TIMEOUT = 30
+class XendDomainDict(dict):
+ def get_by_name(self, name):
+ try:
+ return filter(lambda d: d.name == name, self.values())[0]
+ except IndexError, err:
+ return None
+
class XendDomain:
"""Index of all domains. Singleton.
"""
- """Path to domain database."""
- dbpath = "domain"
-
- class XendDomainDict(dict):
- def get_by_name(self, name):
- try:
- return filter(lambda d: d.name == name, self.values())[0]
- except IndexError, err:
- return None
-
"""Dict of domain info indexed by domain id."""
- domains = XendDomainDict()
+ domains = None
def __init__(self):
# Hack alert. Python does not support mutual imports, but XendDomainInfo
@@ -54,8 +50,8 @@ class XendDomain:
# to import XendDomain from XendDomainInfo causes unbounded recursion.
# So we stuff the XendDomain instance (self) into xroot's components.
xroot.add_component("xen.xend.XendDomain", self)
- # Table of domain info indexed by domain id.
- self.db = XendDB.XendDB(self.dbpath)
+ self.domains = XendDomainDict()
+ self.dbmap = DBMap(db=XenNode("/domain"))
eserver.subscribe('xend.virq', self.onVirq)
self.initial_refresh()
@@ -77,18 +73,16 @@ class XendDomain:
domlist = xc.domain_getinfo()
doms = {}
for d in domlist:
- domid = str(d['dom'])
+ domid = d['dom']
doms[domid] = d
return doms
def xen_domain(self, dom):
"""Get info about a single domain from xc.
Returns None if not found.
+
+ @param dom domain id (int)
"""
- try:
- dom = int(dom)
- except ValueError:
- return None
dominfo = xc.domain_getinfo(dom, 1)
if dominfo == [] or dominfo[0]['dom'] != dom:
dominfo = None
@@ -100,37 +94,36 @@ class XendDomain:
"""Refresh initial domain info from db.
"""
doms = self.xen_domains()
- for config in self.db.fetchall("").values():
- domid = str(sxp.child_value(config, 'id'))
- if domid in doms:
+ self.dbmap.readDB()
+ for domdb in self.dbmap.values():
+ try:
+ domid = int(domdb.id)
+ except:
+ domid = None
+ # XXX if domid in self.domains, then something went wrong
+ if (domid is None) or (domid in self.domains):
+ domdb.delete()
+ elif domid in doms:
try:
- self._new_domain(config, doms[domid])
- self.update_domain(domid)
+ self._new_domain(domdb, doms[domid])
except Exception, ex:
- log.exception("Error recreating domain info: id=%s", domid)
+ log.exception("Error recreating domain info: id=%d", domid)
self._delete_domain(domid)
else:
self._delete_domain(domid)
self.refresh(cleanup=True)
- def sync_domain(self, info):
- """Sync info for a domain to disk.
-
- info domain info
- """
- self.db.save(info.id, info.sxpr())
-
def close(self):
pass
- def _new_domain(self, savedinfo, info):
+ def _new_domain(self, db, info):
"""Create a domain entry from saved info.
- @param savedinfo: saved info from the db
- @param info: domain info from xen
+ @param db: saved info from the db
+ @param info: domain info from xen
@return: domain
"""
- dominfo = XendDomainInfo.vm_recreate(savedinfo, info)
+ dominfo = XendDomainInfo.recreate(db, info)
self.domains[dominfo.id] = dominfo
return dominfo
@@ -144,11 +137,11 @@ class XendDomain:
for i, d in self.domains.items():
if i != d.id:
del self.domains[i]
- self.db.delete(i)
+ self.dbmap.delete(d.uuid)
if info.id in self.domains:
notify = False
self.domains[info.id] = info
- self.sync_domain(info)
+ info.exportToDB(save=True)
if notify:
eserver.inject('xend.domain.create', [info.name, info.id])
@@ -158,12 +151,26 @@ class XendDomain:
@param id: domain id
@param notify: send a domain died event if true
"""
+ try:
+ if self.xen_domain(id):
+ return
+ except:
+ pass
info = self.domains.get(id)
if info:
del self.domains[id]
+ info.cleanup()
+ info.delete()
if notify:
eserver.inject('xend.domain.died', [info.name, info.id])
- self.db.delete(id)
+ # XXX this should not be needed
+ for domdb in self.dbmap.values():
+ try:
+ domid = int(domdb.id)
+ except:
+ domid = None
+ if (domid is None) or (domid == id):
+ domdb.delete()
def reap(self):
"""Look for domains that have crashed or stopped.
@@ -178,22 +185,19 @@ class XendDomain:
not(d['running'] or d['paused'] or d['blocked']))
if dead:
casualties.append(d)
- destroyed = 0
for d in casualties:
- id = str(d['dom'])
- #print 'reap>', id
+ id = d['dom']
dominfo = self.domains.get(id)
name = (dominfo and dominfo.name) or '??'
if dominfo and dominfo.is_terminated():
- #print 'reap> already terminated:', id
continue
- log.debug('XendDomain>reap> domain died name=%s id=%s', name, id)
+ log.debug('XendDomain>reap> domain died name=%s id=%d', name, id)
if d['shutdown']:
- reason = XendDomainInfo.shutdown_reason(d['shutdown_reason'])
- log.debug('XendDomain>reap> shutdown name=%s id=%s reason=%s', name, id, reason)
+ reason = shutdown_reason(d['shutdown_reason'])
+ log.debug('XendDomain>reap> shutdown name=%s id=%d reason=%s', name, id, reason)
if reason in ['suspend']:
if dominfo and dominfo.is_terminated():
- log.debug('XendDomain>reap> Suspended domain died id=%s', id)
+ log.debug('XendDomain>reap> Suspended domain died id=%d', id)
else:
eserver.inject('xend.domain.suspended', [name, id])
if dominfo:
@@ -203,10 +207,9 @@ class XendDomain:
eserver.inject('xend.domain.exit', [name, id, reason])
self.domain_restart_schedule(id, reason)
else:
- if xroot.get_enable_dump() == 'true':
- xc.domain_dumpcore(dom = int(id), corefile = "/var/xen/dump/%s.%s.core"%(name,id))
+ if xroot.get_enable_dump():
+ self.domain_dumpcore(id)
eserver.inject('xend.domain.exit', [name, id, 'crash'])
- destroyed += 1
self.final_domain_destroy(id)
def refresh(self, cleanup=False):
@@ -216,7 +219,7 @@ class XendDomain:
self.reap()
doms = self.xen_domains()
# Add entries for any domains we don't know about.
- for (id, d) in doms.items():
+ for id in doms.keys():
if id not in self.domains:
self.domain_lookup(id)
# Remove entries for domains that no longer exist.
@@ -234,16 +237,7 @@ class XendDomain:
scheduler.now(self.domain_restarts)
def update_domain(self, id):
- """Update the saved info for a domain.
-
- @param id: domain id
- """
- dominfo = self.domains.get(id)
- if dominfo:
- self.sync_domain(dominfo)
-
- def refresh_domain(self, id):
- """Refresh information for a single domain.
+ """Update information for a single domain.
@param id: domain id
"""
@@ -279,8 +273,7 @@ class XendDomain:
@param config: configuration
@return: domain
"""
- dominfo = XendDomainInfo.vm_create(config)
- self._add_domain(dominfo)
+ dominfo = XendDomainInfo.create(self.dbmap, config)
return dominfo
def domain_restart(self, dominfo):
@@ -293,7 +286,6 @@ class XendDomain:
[dominfo.name, dominfo.id, "begin"])
try:
dominfo.restart()
- self._add_domain(dominfo)
log.info('Restarted domain name=%s id=%s', dominfo.name, dominfo.id)
eserver.inject("xend.domain.restart",
[dominfo.name, dominfo.id, "success"])
@@ -309,14 +301,13 @@ class XendDomain:
"""Configure an existing domain. This is intended for internal
use by domain restore and migrate.
- @param id: domain id
@param vmconfig: vm configuration
"""
config = sxp.child_value(vmconfig, 'config')
- dominfo = XendDomainInfo.vm_restore(config)
- self._add_domain(dominfo)
+ uuid = sxp.child_value(vmconfig, 'uuid')
+ dominfo = XendDomainInfo.restore(self.dbmap, config, uuid=uuid)
return dominfo
-
+
def domain_restore(self, src, progress=False):
"""Restore a domain from file.
@@ -326,9 +317,7 @@ class XendDomain:
try:
fd = os.open(src, os.O_RDONLY)
-
return XendCheckpoint.restore(self, fd)
-
except OSError, ex:
raise XendError("can't read guest state file %s: %s" %
(src, ex[1]))
@@ -339,24 +328,35 @@ class XendDomain:
@param id: domain id
@return: domain object (or None)
"""
- id = str(id)
- self.refresh_domain(id)
+ self.update_domain(id)
return self.domains.get(id)
- def domain_lookup(self, name):
- name = str(name)
- dominfo = self.domains.get_by_name(name) or self.domains.get(name)
- if dominfo:
- return dominfo
- try:
- d = self.xen_domain(name)
- if d:
- log.info("Creating entry for unknown domain: id=%s", name)
- dominfo = XendDomainInfo.vm_recreate(None, d)
- self._add_domain(dominfo)
- return dominfo
- except Exception, ex:
- log.exception("Error creating domain info: id=%s", name)
+ def domain_lookup(self, id):
+ dominfo = self.domains.get(id)
+ if not dominfo:
+ try:
+ info = self.xen_domain(id)
+ if info:
+ uuid = getUuid()
+ log.info(
+ "Creating entry for unknown domain: id=%d uuid=%s",
+ id, uuid)
+ db = self.dbmap.addChild(uuid)
+ dominfo = XendDomainInfo.recreate(db, info)
+ self._add_domain(dominfo)
+ except Exception, ex:
+ log.exception("Error creating domain info: id=%d", id)
+ return dominfo
+
+ def domain_lookup_by_name(self, name):
+ dominfo = self.domains.get_by_name(name)
+ if not dominfo:
+ try:
+ id = int(name)
+ dominfo = self.domain_lookup(id)
+ except ValueError:
+ pass
+ return dominfo
def domain_unpause(self, id):
"""Unpause domain execution.
@@ -366,7 +366,7 @@ class XendDomain:
dominfo = self.domain_lookup(id)
eserver.inject('xend.domain.unpause', [dominfo.name, dominfo.id])
try:
- return xc.domain_unpause(dom=dominfo.dom)
+ return xc.domain_unpause(dom=dominfo.id)
except Exception, ex:
raise XendError(str(ex))
@@ -378,7 +378,7 @@ class XendDomain:
dominfo = self.domain_lookup(id)
eserver.inject('xend.domain.pause', [dominfo.name, dominfo.id])
try:
- return xc.domain_pause(dom=dominfo.dom)
+ return xc.domain_pause(dom=dominfo.id)
except Exception, ex:
raise XendError(str(ex))
@@ -436,7 +436,7 @@ class XendDomain:
@param id: domain id
@param reason: shutdown reason
"""
- log.debug('domain_restart_schedule> %s %s %d', id, reason, force)
+ log.debug('domain_restart_schedule> %d %s %d', id, reason, force)
dominfo = self.domain_lookup(id)
if not dominfo:
return
@@ -484,7 +484,7 @@ class XendDomain:
except:
#todo
try:
- val = xc.domain_destroy(dom=int(id))
+ val = xc.domain_destroy(dom=id)
except Exception, ex:
raise XendError(str(ex))
return val
@@ -553,7 +553,7 @@ class XendDomain:
"""
dominfo = self.domain_lookup(id)
try:
- return xc.domain_pincpu(int(dominfo.id), vcpu, cpumap)
+ return xc.domain_pincpu(dominfo.id, vcpu, cpumap)
except Exception, ex:
raise XendError(str(ex))
@@ -562,7 +562,7 @@ class XendDomain:
"""
dominfo = self.domain_lookup(id)
try:
- return xc.bvtsched_domain_set(dom=dominfo.dom, mcuadv=mcuadv,
+ return xc.bvtsched_domain_set(dom=dominfo.id, mcuadv=mcuadv,
warpback=warpback, warpvalue=warpvalue,
warpl=warpl, warpu=warpu)
except Exception, ex:
@@ -573,7 +573,7 @@ class XendDomain:
"""
dominfo = self.domain_lookup(id)
try:
- return xc.bvtsched_domain_get(dominfo.dom)
+ return xc.bvtsched_domain_get(dominfo.id)
except Exception, ex:
raise XendError(str(ex))
@@ -581,20 +581,21 @@ class XendDomain:
def domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight):
"""Set Simple EDF scheduler parameters for a domain.
"""
- dominfo = self.domain_lookup(id)
+ dominfo = self.domain_lookup(id)
try:
- return xc.sedf_domain_set(dominfo.dom, period, slice, latency, extratime, weight)
+ return xc.sedf_domain_set(dominfo.id, period, slice, latency, extratime, weight)
except Exception, ex:
raise XendError(str(ex))
def domain_cpu_sedf_get(self, id):
- """Get Atropos scheduler parameters for a domain.
+ """Get Simple EDF scheduler parameters for a domain.
"""
dominfo = self.domain_lookup(id)
try:
- return xc.sedf_domain_get(dominfo.dom)
+ return xc.sedf_domain_get(dominfo.id)
except Exception, ex:
raise XendError(str(ex))
+
def domain_device_create(self, id, devconfig):
"""Create a new device for a domain.
@@ -603,44 +604,44 @@ class XendDomain:
"""
dominfo = self.domain_lookup(id)
val = dominfo.device_create(devconfig)
- self.update_domain(dominfo.id)
+ dominfo.exportToDB()
return val
- def domain_device_configure(self, id, devconfig, idx):
+ def domain_device_configure(self, id, devconfig, devid):
"""Configure an existing device for a domain.
@param id: domain id
@param devconfig: device configuration
- @param idx: device index
+ @param devid: device id
@return: updated device configuration
"""
dominfo = self.domain_lookup(id)
- val = dominfo.device_configure(devconfig, idx)
- self.update_domain(dominfo.id)
+ val = dominfo.device_configure(devconfig, devid)
+ dominfo.exportToDB()
return val
- def domain_device_refresh(self, id, type, idx):
+ def domain_device_refresh(self, id, type, devid):
"""Refresh a device.
@param id: domain id
- @param idx: device index
+ @param devid: device id
@param type: device type
"""
dominfo = self.domain_lookup(id)
- val = dominfo.device_refresh(type, idx)
- self.update_domain(dominfo.id)
+ val = dominfo.device_refresh(type, devid)
+ dominfo.exportToDB()
return val
- def domain_device_destroy(self, id, type, idx):
+ def domain_device_destroy(self, id, type, devid):
"""Destroy a device.
@param id: domain id
- @param idx: device index
+ @param devid: device id
@param type: device type
"""
dominfo = self.domain_lookup(id)
- val = dominfo.device_destroy(type, idx)
- self.update_domain(dominfo.id)
+ val = dominfo.device_destroy(type, devid)
+ dominfo.exportToDB()
return val
def domain_devtype_ls(self, id, type):
@@ -653,22 +654,22 @@ class XendDomain:
dominfo = self.domain_lookup(id)
return dominfo.getDeviceSxprs(type)
- def domain_devtype_get(self, id, type, idx):
+ def domain_devtype_get(self, id, type, devid):
"""Get a device from a domain.
-
+
@param id: domain
@param type: device type
- @param idx: device index
+ @param devid: device id
@return: device object (or None)
"""
dominfo = self.domain_lookup(id)
- return dominfo.getDeviceByIndex(type, idx)
+ return dominfo.getDevice(type, devid)
def domain_vif_limit_set(self, id, vif, credit, period):
"""Limit the vif's transmission rate
"""
dominfo = self.domain_lookup(id)
- dev = dominfo.getDeviceById('vif', vif)
+ dev = dominfo.getDevice('vif', vif)
if not dev:
raise XendError("invalid vif")
return dev.setCreditLimit(credit, period)
@@ -681,30 +682,47 @@ class XendDomain:
"""
dominfo = self.domain_lookup(id)
try:
- return xc.shadow_control(dominfo.dom, op)
+ return xc.shadow_control(dominfo.id, op)
except Exception, ex:
raise XendError(str(ex))
def domain_maxmem_set(self, id, mem):
"""Set the memory limit for a domain.
- @param dom: domain
+ @param id: domain
@param mem: memory limit (in MB)
@return: 0 on success, -1 on error
"""
dominfo = self.domain_lookup(id)
maxmem = int(mem) * 1024
try:
- return xc.domain_setmaxmem(dominfo.dom, maxmem_kb = maxmem)
+ return xc.domain_setmaxmem(dominfo.id, maxmem_kb = maxmem)
except Exception, ex:
raise XendError(str(ex))
- def domain_mem_target_set(self, id, target):
+ def domain_mem_target_set(self, id, mem):
+ """Set the memory target for a domain.
+
+ @param id: domain
+ @param mem: memory target (in MB)
+ @return: 0 on success, -1 on error
+ """
dominfo = self.domain_lookup(id)
- return dominfo.mem_target_set(target)
-
+ return dominfo.mem_target_set(mem)
+ def domain_dumpcore(self, id):
+ """Save a core dump for a crashed domain.
+ @param id: domain
+ """
+ dominfo = self.domain_lookup(id)
+ corefile = "/var/xen/dump/%s.%s.core"% (dominfo.name, dominfo.id)
+ try:
+ xc.domain_dumpcore(dom=dominfo.id, corefile=corefile)
+ except Exception, ex:
+ log.warning("Dumpcore failed, id=%s name=%s: %s",
+ dominfo.id, dominfo.name, ex)
+
def instance():
"""Singleton constructor. Use this instead of the class constructor.
"""
diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
index 0dc1aae79c..16415d78a7 100644
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -14,21 +14,23 @@ import time
import threading
import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
-import xen.util.ip
-from xen.xend.server import channel, controller
+from xen.util.ip import check_subnet, get_current_ipgw
from xen.util.blkif import blkdev_uname_to_file
-from server.channel import channelFactory
-import server.SrvDaemon; xend = server.SrvDaemon.instance()
-from server import messages
+from xen.xend.server import controller
+from xen.xend.server import SrvDaemon; xend = SrvDaemon.instance()
+from xen.xend.server import messages
+from xen.xend.server.channel import EventChannel, channelFactory
+from xen.xend import sxp
+from xen.xend.PrettyPrint import prettyprintstring
from xen.xend.XendBootloader import bootloader
-import sxp
-from XendLogging import log
+from xen.xend.XendLogging import log
from XendError import XendError, VmError
-from XendRoot import get_component
+from xen.xend.XendRoot import get_component
-from PrettyPrint import prettyprintstring
+from xen.xend.uuid import getUuid
+from xen.xend.xenstore import DBVar
"""Flag for a block device backend domain."""
SIF_BLK_BE_DOMAIN = (1<<4)
@@ -45,11 +47,16 @@ DOMAIN_REBOOT = 1
"""Shutdown code for suspend."""
DOMAIN_SUSPEND = 2
+"""Shutdown code for crash."""
+DOMAIN_CRASH = 3
+
"""Map shutdown codes to strings."""
shutdown_reasons = {
DOMAIN_POWEROFF: "poweroff",
DOMAIN_REBOOT : "reboot",
- DOMAIN_SUSPEND : "suspend" }
+ DOMAIN_SUSPEND : "suspend",
+ DOMAIN_CRASH : "crash",
+ }
"""Map shutdown reasons to the message type to use.
"""
@@ -81,7 +88,7 @@ STATE_VM_SUSPENDED = "suspended"
def domain_exists(name):
# See comment in XendDomain constructor.
xd = get_component('xen.xend.XendDomain')
- return xd.domain_lookup(name)
+ return xd.domain_lookup_by_name(name)
def shutdown_reason(code):
"""Get a shutdown reason from a code.
@@ -110,25 +117,6 @@ def get_config_handler(name):
"""
return config_handlers.get(name)
-"""Table of handlers for virtual machine images.
-Indexed by image type.
-"""
-image_handlers = {}
-
-def add_image_handler(name, h):
- """Add a handler for an image type
- @param name: image type
- @param h: handler: fn(config, name, memory, image)
- """
- image_handlers[name] = h
-
-def get_image_handler(name):
- """Get the handler for an image type.
- @param name: image type
- @return: handler or None
- """
- return image_handlers.get(name)
-
"""Table of handlers for devices.
Indexed by device type.
"""
@@ -139,61 +127,6 @@ def add_device_handler(name, type):
def get_device_handler(name):
return device_handlers[name]
-
-
-def vm_create(config):
- """Create a VM from a configuration.
- If a vm has been partially created and there is an error it
- is destroyed.
-
- @param config configuration
- @raise: VmError for invalid configuration
- """
- vm = XendDomainInfo()
- vm.construct(config)
- return vm
-
-def vm_restore(config):
- """Create a domain and a VM object to do a restore.
-
- @param config: domain configuration
- """
- vm = XendDomainInfo()
- dom = xc.domain_create()
- vm.dom_construct(dom, config)
- return vm
-
-def vm_recreate(savedinfo, info):
- """Create the VM object for an existing domain.
-
- @param savedinfo: saved info from the domain DB
- @type savedinfo: sxpr
- @param info: domain info from xc
- @type info: xc domain dict
- """
- log.debug('savedinfo=' + prettyprintstring(savedinfo))
- log.debug('info=' + str(info))
- vm = XendDomainInfo()
- vm.recreate = True
- vm.savedinfo = savedinfo
- vm.setdom(info['dom'])
- vm.memory = info['mem_kb']/1024
- start_time = sxp.child_value(savedinfo, 'start_time')
- if start_time is not None:
- vm.start_time = float(start_time)
- vm.restart_state = sxp.child_value(savedinfo, 'restart_state')
- vm.restart_count = int(sxp.child_value(savedinfo, 'restart_count', 0))
- restart_time = sxp.child_value(savedinfo, 'restart_time')
- if restart_time is not None:
- vm.restart_time = float(restart_time)
- config = sxp.child_value(savedinfo, 'config')
- if config:
- vm.construct(config)
- else:
- vm.name = sxp.child_value(savedinfo, 'name', "Domain-%d" % info['dom'])
- vm.recreate = False
- vm.savedinfo = None
- return vm
def dom_get(dom):
"""Get info from xen for an existing domain.
@@ -213,25 +146,104 @@ class XendDomainInfo:
"""
MINIMUM_RESTART_TIME = 20
- def __init__(self):
+ def create(cls, parentdb, config):
+ """Create a VM from a configuration.
+
+ @param parentdb: parent db
+ @param config configuration
+ @raise: VmError for invalid configuration
+ """
+ uuid = getUuid()
+ db = parentdb.addChild(uuid)
+ vm = cls(db)
+ vm.construct(config)
+ vm.saveDB(sync=True)
+ return vm
+
+ create = classmethod(create)
+
+ def recreate(cls, db, info):
+ """Create the VM object for an existing domain.
+
+ @param db: domain db
+ @param info: domain info from xc
+ """
+ dom = info['dom']
+ vm = cls(db)
+ db.readDB()
+ vm.importFromDB()
+ config = vm.config
+ log.debug('info=' + str(info))
+ log.debug('config=' + prettyprintstring(config))
+
+ vm.setdom(dom)
+ vm.memory = info['mem_kb']/1024
+
+ if config:
+ try:
+ vm.recreate = True
+ vm.construct(config)
+ finally:
+ vm.recreate = False
+ else:
+ vm.setName("Domain-%d" % dom)
+
+ vm.exportToDB(save=True)
+ return vm
+
+ recreate = classmethod(recreate)
+
+ def restore(cls, parentdb, config, uuid=None):
+ """Create a domain and a VM object to do a restore.
+
+ @param parentdb: parent db
+ @param config: domain configuration
+ @param uuid: uuid to use
+ """
+ db = parentdb.addChild(uuid)
+ vm = cls(db)
+ dom = xc.domain_create()
+ vm.setdom(dom)
+ vm.dom_construct(vm.id, config)
+ vm.saveDB(sync=True)
+ return vm
+
+ restore = classmethod(restore)
+
+ __exports__ = [
+ DBVar('id', ty='str'),
+ DBVar('name', ty='str'),
+ DBVar('uuid', ty='str'),
+ DBVar('config', ty='sxpr'),
+ DBVar('start_time', ty='float'),
+ DBVar('state', ty='str'),
+ DBVar('store_mfn', ty='long'),
+ DBVar('restart_mode', ty='str'),
+ DBVar('restart_state', ty='str'),
+ DBVar('restart_time', ty='float'),
+ DBVar('restart_count', ty='int'),
+ ]
+
+ def __init__(self, db):
+ self.db = db
+ self.uuid = db.getName()
+
self.recreate = 0
self.restore = 0
+
self.config = None
self.id = None
- self.dom = None
self.cpu_weight = 1
self.start_time = None
self.name = None
self.memory = None
self.image = None
- self.ramdisk = None
- self.cmdline = None
self.channel = None
+ self.store_channel = None
+ self.store_mfn = None
self.controllers = {}
- self.configs = []
-
self.info = None
self.blkif_backend = False
self.netif_backend = False
@@ -249,22 +261,39 @@ class XendDomainInfo:
self.restart_count = 0
self.console_port = None
- self.savedinfo = None
- self.image_handler = None
- self.is_vmx = False
self.vcpus = 1
self.bootloader = None
+ def setDB(self, db):
+ self.db = db
+
+ def saveDB(self, save=False, sync=False):
+ self.db.saveDB(save=save, sync=sync)
+
+ def exportToDB(self, save=False, sync=False):
+ if self.channel:
+ self.channel.saveToDB(self.db.addChild("channel"))
+ if self.store_channel:
+ self.store_channel.saveToDB(self.db.addChild("store_channel"))
+ self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync)
+
+ def importFromDB(self):
+ self.db.importFromDB(self, fields=self.__exports__)
+
def setdom(self, dom):
"""Set the domain id.
@param dom: domain id
"""
- self.dom = int(dom)
- self.id = str(dom)
+ self.id = int(dom)
+ #self.db.id = self.id
def getDomain(self):
- return self.dom
+ return self.id
+
+ def setName(self, name):
+ self.name = name
+ self.db.name = self.name
def getName(self):
return self.name
@@ -272,6 +301,9 @@ class XendDomainInfo:
def getChannel(self):
return self.channel
+ def getStoreChannel(self):
+ return self.store_channel
+
def update(self, info):
"""Update with info from xc.domain_getinfo().
"""
@@ -284,6 +316,7 @@ class XendDomainInfo:
self.state = state
self.state_updated.notifyAll()
self.state_updated.release()
+ self.saveDB()
def state_wait(self, state):
self.state_updated.acquire()
@@ -293,14 +326,12 @@ class XendDomainInfo:
def __str__(self):
s = "domain"
- s += " id=" + self.id
+ s += " id=" + str(self.id)
s += " name=" + self.name
s += " memory=" + str(self.memory)
console = self.getConsole()
if console:
s += " console=" + str(console.console_port)
- if self.image:
- s += " image=" + self.image
s += ""
return s
@@ -327,9 +358,10 @@ class XendDomainInfo:
self.controllers[type] = ctrl
return ctrl
- def createDevice(self, type, devconfig, recreate=False):
+ def createDevice(self, type, devconfig, change=False):
ctrl = self.findDeviceController(type)
- return ctrl.createDevice(devconfig, recreate=self.recreate)
+ return ctrl.createDevice(devconfig, recreate=self.recreate,
+ change=change)
def configureDevice(self, type, id, devconfig):
ctrl = self.getDeviceController(type)
@@ -343,30 +375,14 @@ class XendDomainInfo:
ctrl = self.getDeviceController(type)
return ctrl.deleteDevice(id)
- def getDevice(self, type, id):
+ def getDevice(self, type, id, error=True):
ctrl = self.getDeviceController(type)
- return ctrl.getDevice(id)
+ return ctrl.getDevice(id, error=error)
- def getDeviceByIndex(self, type, idx):
- ctrl = self.getDeviceController(type)
- return ctrl.getDeviceByIndex(idx)
-
- def getDeviceConfig(self, type, id):
- ctrl = self.getDeviceController(type)
- return ctrl.getDeviceConfig(id)
-
def getDeviceIds(self, type):
ctrl = self.getDeviceController(type)
return ctrl.getDeviceIds()
- def getDeviceIndexes(self, type):
- ctrl = self.getDeviceController(type)
- return ctrl.getDeviceIndexes()
-
- def getDeviceConfigs(self, type):
- ctrl = self.getDeviceController(type)
- return ctrl.getDeviceConfigs()
-
def getDeviceSxprs(self, type):
ctrl = self.getDeviceController(type)
return ctrl.getDeviceSxprs()
@@ -376,7 +392,8 @@ class XendDomainInfo:
['id', self.id],
['name', self.name],
['memory', self.memory] ]
-
+ if self.uuid:
+ sxpr.append(['uuid', self.uuid])
if self.info:
sxpr.append(['maxmem', self.info['maxmem_kb']/1024 ])
run = (self.info['running'] and 'r') or '-'
@@ -403,6 +420,8 @@ class XendDomainInfo:
if self.channel:
sxpr.append(self.channel.sxpr())
+ if self.store_channel:
+ sxpr.append(self.store_channel.sxpr())
console = self.getConsole()
if console:
sxpr.append(console.sxpr())
@@ -454,7 +473,7 @@ class XendDomainInfo:
return
if dominfo.is_terminated():
return
- if not self.dom or (dominfo.dom != self.dom):
+ if not self.id or (dominfo.id != self.id):
raise VmError('vm name clash: ' + name)
def construct(self, config):
@@ -467,10 +486,10 @@ class XendDomainInfo:
self.config = config
try:
# Initial domain create.
- self.name = sxp.child_value(config, 'name')
+ self.setName(sxp.child_value(config, 'name'))
self.check_name(self.name)
+ self.init_image()
self.configure_cpus(config)
- self.find_image_handler()
self.init_domain()
self.register_domain()
self.configure_bootloader()
@@ -481,6 +500,7 @@ class XendDomainInfo:
self.configure_restart()
self.construct_image()
self.configure()
+ self.exportToDB()
except Exception, ex:
# Catch errors, cleanup and re-raise.
print 'Domain construction error:', ex
@@ -492,6 +512,7 @@ class XendDomainInfo:
def register_domain(self):
xd = get_component('xen.xend.XendDomain')
xd._add_domain(self)
+ self.exportToDB()
def configure_cpus(self, config):
try:
@@ -502,8 +523,8 @@ class XendDomainInfo:
if self.memory is None:
raise VmError('missing memory size')
cpu = sxp.child_value(config, 'cpu')
- if self.recreate and self.dom and cpu is not None and int(cpu) >= 0:
- xc.domain_pincpu(self.dom, 0, 1<<int(cpu))
+ if self.recreate and self.id and cpu is not None and int(cpu) >= 0:
+ xc.domain_pincpu(self.id, 0, 1<<int(cpu))
try:
image = sxp.child_value(self.config, 'image')
vcpus = sxp.child_value(image, 'vcpus')
@@ -512,89 +533,50 @@ class XendDomainInfo:
except:
raise VmError('invalid vcpus value')
- def find_image_handler(self):
- """Construct the boot image for the domain.
-
- @return vm
+ def init_image(self):
+ """Create boot image handler for the domain.
"""
image = sxp.child_value(self.config, 'image')
if image is None:
raise VmError('missing image')
- image_name = sxp.name(image)
- if image_name is None:
- raise VmError('missing image name')
- if image_name == "vmx":
- self.is_vmx = True
- image_handler = get_image_handler(image_name)
- if image_handler is None:
- raise VmError('unknown image type: ' + image_name)
- self.image_handler = image_handler
- return self
+ self.image = ImageHandler.create(self, image)
def construct_image(self):
- image = sxp.child_value(self.config, 'image')
- self.image_handler(self, image)
- return self
-
- def config_devices(self, name):
- """Get a list of the 'device' nodes of a given type from the config.
-
- @param name: device type
- @type name: string
- @return: device configs
- @rtype: list
- """
- devices = []
- for d in sxp.children(self.config, 'device'):
- dev = sxp.child0(d)
- if dev is None: continue
- if name == sxp.name(dev):
- devices.append(dev)
- return devices
-
- def get_device_savedinfo(self, type, index):
- val = None
- if self.savedinfo is None:
- return val
- devices = sxp.child(self.savedinfo, 'devices')
- if devices is None:
- return val
- index = str(index)
- for d in sxp.children(devices, type):
- dindex = sxp.child_value(d, 'index')
- if dindex is None: continue
- if str(dindex) == index:
- val = d
- break
- return val
-
- def get_device_recreate(self, type, index):
- return self.get_device_savedinfo(type, index) or self.recreate
-
- def add_config(self, val):
- """Add configuration data to a virtual machine.
-
- @param val: data to add
+ """Construct the boot image for the domain.
"""
- self.configs.append(val)
-
- def destroy(self):
- """Completely destroy the vm.
+ self.create_channel()
+ self.image.createImage()
+ self.image.exportToDB()
+ #if self.store_channel:
+ # self.db.introduceDomain(self.id,
+ # self.store_mfn,
+ # self.store_channel)
+
+ def delete(self):
+ """Delete the vm's db.
"""
- self.cleanup()
- return self.destroy_domain()
+ if self.dom_get(self.id):
+ return
+ self.id = None
+ self.saveDB(sync=True)
+ try:
+ # Todo: eventually will have to wait for devices to signal
+ # destruction before can delete the db.
+ if self.db:
+ self.db.delete()
+ except Exception, ex:
+ log.warning("error in domain db delete: %s", ex)
+ pass
def destroy_domain(self):
"""Destroy the vm's domain.
The domain will not finally go away unless all vm
devices have been released.
"""
- if self.channel:
- self.channel.close()
- self.channel = None
- if self.dom is None: return 0
+ if self.id is None:
+ return
try:
- return xc.domain_destroy(dom=self.dom)
+ xc.domain_destroy(dom=self.id)
except Exception, err:
log.exception("Domain destroy failed: %s", self.name)
@@ -603,6 +585,37 @@ class XendDomainInfo:
"""
self.state = STATE_VM_TERMINATED
self.release_devices()
+ if self.channel:
+ try:
+ self.channel.close()
+ self.channel = None
+ except:
+ pass
+ if self.store_channel:
+ try:
+ self.store_channel.close()
+ self.store_channel = None
+ except:
+ pass
+ #try:
+ # self.db.releaseDomain(self.id)
+ #except Exception, ex:
+ # log.warning("error in domain release on xenstore: %s", ex)
+ # pass
+ if self.image:
+ try:
+ self.image.destroy()
+ self.image = None
+ except:
+ pass
+
+ def destroy(self):
+ """Clenup vm and destroy domain.
+ """
+ self.cleanup()
+ self.destroy_domain()
+ self.saveDB()
+ return 0
def is_terminated(self):
"""Check if a domain has been terminated.
@@ -616,20 +629,13 @@ class XendDomainInfo:
for ctrl in self.getDeviceControllers():
if ctrl.isDestroyed(): continue
ctrl.destroyController(reboot=reboot)
- if not reboot:
- self.configs = []
def show(self):
"""Print virtual machine info.
"""
- print "[VM dom=%d name=%s memory=%d" % (self.dom, self.name, self.memory)
+ print "[VM dom=%d name=%s memory=%d" % (self.id, self.name, self.memory)
print "image:"
sxp.show(self.image)
- print
- for val in self.configs:
- print "config:"
- sxp.show(val)
- print
print "]"
def init_domain(self):
@@ -639,107 +645,42 @@ class XendDomainInfo:
return
if self.start_time is None:
self.start_time = time.time()
- if self.restore:
- return
- dom = self.dom or 0
- memory = self.memory
try:
cpu = int(sxp.child_value(self.config, 'cpu', '-1'))
except:
raise VmError('invalid cpu')
- cpu_weight = self.cpu_weight
- memory = memory * 1024 + self.pgtable_size(memory)
- dom = xc.domain_create(dom= dom)
- if self.bootloader:
- try:
- if kernel: os.unlink(kernel)
- if ramdisk: os.unlink(ramdisk)
- except OSError, e:
- log.warning('unable to unlink kernel/ramdisk: %s' %(e,))
-
- if dom <= 0:
- raise VmError('Creating domain failed: name=%s memory=%d'
- % (self.name, memory))
- xc.domain_setcpuweight(dom, cpu_weight)
- xc.domain_setmaxmem(dom, memory)
- xc.domain_memory_increase_reservation(dom, memory)
- if cpu != -1:
- xc.domain_pincpu(dom, 0, 1<<int(cpu))
- log.debug('init_domain> Created domain=%d name=%s memory=%d', dom, self.name, memory)
- self.setdom(dom)
-
- def build_domain(self, ostype, kernel, ramdisk, cmdline, memmap):
- """Build the domain boot image.
- """
- if self.recreate or self.restore: return
- if not os.path.isfile(kernel):
- raise VmError('Kernel image does not exist: %s' % kernel)
- if ramdisk and not os.path.isfile(ramdisk):
- raise VmError('Kernel ramdisk does not exist: %s' % ramdisk)
- if len(cmdline) >= 256:
- log.warning('kernel cmdline too long, domain %d', self.dom)
- dom = self.dom
- buildfn = getattr(xc, '%s_build' % ostype)
- flags = 0
- if self.netif_backend: flags |= SIF_NET_BE_DOMAIN
- if self.blkif_backend: flags |= SIF_BLK_BE_DOMAIN
- #todo generalise this
- if ostype == "vmx":
- log.debug('building vmx domain')
- err = buildfn(dom = dom,
- image = kernel,
- control_evtchn = 0,
- memsize = self.memory,
- memmap = memmap,
- cmdline = cmdline,
- ramdisk = ramdisk,
- flags = flags)
- else:
- log.debug('building dom with %d vcpus', self.vcpus)
- err = buildfn(dom = dom,
- image = kernel,
- control_evtchn = self.channel.getRemotePort(),
- cmdline = cmdline,
- ramdisk = ramdisk,
- flags = flags,
- vcpus = self.vcpus)
- if err != 0:
- raise VmError('Building domain failed: type=%s dom=%d err=%d'
- % (ostype, dom, err))
-
- def create_domain(self, ostype, kernel, ramdisk, cmdline, memmap=''):
- """Create a domain. Builds the image but does not configure it.
+ dom = self.image.initDomain(self.id, self.memory, cpu, self.cpu_weight)
+ log.debug('init_domain> Created domain=%d name=%s memory=%d',
+ dom, self.name, self.memory)
+ if not self.restore:
+ self.setdom(dom)
- @param ostype: OS type
- @param kernel: kernel image
- @param ramdisk: kernel ramdisk
- @param cmdline: kernel commandline
+ def openChannel(self, key, local, remote):
+ """Create a channel to the domain.
+ If saved info is available recreate the channel.
+
+ @param key db key for the saved data (if any)
+ @param local default local port
+ @param remote default remote port
"""
-
- self.create_channel()
- self.build_domain(ostype, kernel, ramdisk, cmdline, memmap)
- self.image = kernel
- self.ramdisk = ramdisk
- self.cmdline = cmdline
-
+ db = self.db.addChild(key)
+ chan = channelFactory().restoreFromDB(db, self.id, local, remote)
+ #todo: save here?
+ #chan.saveToDB(db)
+ return chan
+
+ def eventChannel(self, key):
+ db = self.db.addChild(key)
+ return EventChannel.restoreFromDB(db, 0, self.id)
+
def create_channel(self):
- """Create the control channel to the domain.
- If saved info is available recreate the channel using the saved ports.
+ """Create the channels to the domain.
"""
- local = 0
- remote = 1
- if self.savedinfo:
- info = sxp.child(self.savedinfo, "channel")
- if info:
- local = int(sxp.child_value(info, "local_port", 0))
- remote = int(sxp.child_value(info, "remote_port", 1))
- self.channel = channelFactory().openChannel(self.dom,
- local_port=local,
- remote_port=remote)
+ self.channel = self.openChannel("channel", 0, 1)
+ self.store_channel = self.eventChannel("store_channel")
def create_configured_devices(self):
devices = sxp.children(self.config, 'device')
- indexes = {}
for d in devices:
dev_config = sxp.child0(d)
if dev_config is None:
@@ -748,13 +689,7 @@ class XendDomainInfo:
ctrl_type = get_device_handler(dev_type)
if ctrl_type is None:
raise VmError('unknown device type: ' + dev_type)
- # Keep track of device indexes by type, so we can fish
- # out saved info for recreation.
- idx = indexes.get(dev_type, -1)
- idx += 1
- indexes[ctrl_type] = idx
- recreate = self.get_device_recreate(dev_type, idx)
- self.createDevice(ctrl_type, dev_config, recreate=recreate)
+ self.createDevice(ctrl_type, dev_config)
def create_devices(self):
"""Create the devices for a vm.
@@ -766,43 +701,6 @@ class XendDomainInfo:
ctrl.initController(reboot=True)
else:
self.create_configured_devices()
- if self.is_vmx:
- self.create_vmx_model()
-
- def create_vmx_model(self):
- #todo: remove special case for vmx
- device_model = sxp.child_value(self.config, 'device_model')
- if not device_model:
- raise VmError("vmx: missing device model")
- device_config = sxp.child_value(self.config, 'device_config')
- if not device_config:
- raise VmError("vmx: missing device config")
- #todo: self.memory?
- memory = sxp.child_value(self.config, "memory")
- # Create an event channel
- device_channel = channel.eventChannel(0, self.dom)
- # see if a vncviewer was specified
- # XXX RN: bit of a hack. should unify this, maybe stick in config space
- vncconnect=""
- image = sxp.child_value(self.config, "image")
- args = sxp.child_value(image, "args")
- if args:
- arg_list = string.split(args)
- for arg in arg_list:
- al = string.split(arg, '=')
- if al[0] == "VNC_VIEWER":
- vncconnect=" -v %s" % al[1]
- break
-
- # Execute device model.
- #todo: Error handling
- # XXX RN: note that the order of args matter!
- os.system(device_model
- + " -f %s" % device_config
- + vncconnect
- + " -d %d" % self.dom
- + " -p %d" % device_channel['port1']
- + " -m %s" % memory)
def device_create(self, dev_config):
"""Create a new device.
@@ -814,16 +712,14 @@ class XendDomainInfo:
self.config.append(['device', dev.getConfig()])
return dev.sxpr()
- def device_configure(self, dev_config, idx):
+ def device_configure(self, dev_config, id):
"""Configure an existing device.
@param dev_config: device configuration
- @param idx: device index
+ @param id: device id
"""
type = sxp.name(dev_config)
- dev = self.getDeviceByIndex(type, idx)
- if not dev:
- raise VmError('invalid device: %s %s' % (type, idx))
+ dev = self.getDevice(type, id)
old_config = dev.getConfig()
new_config = dev.configure(dev_config, change=True)
# Patch new config into vm config.
@@ -833,26 +729,22 @@ class XendDomainInfo:
self.config[old_index] = new_full_config
return new_config
- def device_refresh(self, type, idx):
+ def device_refresh(self, type, id):
"""Refresh a device.
@param type: device type
- @param idx: device index
+ @param id: device id
"""
- dev = self.getDeviceByIndex(type, idx)
- if not dev:
- raise VmError('invalid device: %s %s' % (type, idx))
+ dev = self.getDevice(type, id)
dev.refresh()
- def device_delete(self, type, idx):
+ def device_delete(self, type, id):
"""Destroy and remove a device.
@param type: device type
- @param idx: device index
+ @param id: device id
"""
- dev = self.getDeviceByIndex(type, idx)
- if not dev:
- raise VmError('invalid device: %s %s' % (type, idx))
+ dev = self.getDevice(type, id)
dev_config = dev.getConfig()
if dev_config:
self.config.remove(['device', dev_config])
@@ -861,9 +753,7 @@ class XendDomainInfo:
def configure_bootloader(self):
"""Configure boot loader.
"""
- bl = sxp.child_value(self.config, "bootloader")
- if bl is not None:
- self.bootloader = bl
+ self.bootloader = sxp.child_value(self.config, "bootloader")
def configure_console(self):
"""Configure the vm console port.
@@ -946,6 +836,7 @@ class XendDomainInfo:
if self.bootloader:
self.config = self.bootloader_config()
self.construct(self.config)
+ self.saveDB()
finally:
self.restart_state = None
@@ -1051,19 +942,6 @@ class XendDomainInfo:
log.warning("Unknown config field %s", field_name)
index[field_name] = field_index + 1
- def pgtable_size(self, memory):
- """Return the size of memory needed for 1:1 page tables for physical
- mode.
-
- @param memory: size in MB
- @return size in KB
- """
- if self.is_vmx:
- # Logic x86-32 specific.
- # 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
- return (1 + ((memory + 3) >> 2)) * 4
- return 0
-
def mem_target_set(self, target):
"""Set domain memory target in pages.
"""
@@ -1090,83 +968,6 @@ class XendDomainInfo:
return 0
return timeout - (time.time() - self.shutdown_pending['start'])
-def vm_image_linux(vm, image):
- """Create a VM for a linux image.
-
- @param name: vm name
- @param memory: vm memory
- @param image: image config
- @return: vm
- """
- kernel = sxp.child_value(image, "kernel")
- cmdline = ""
- ip = sxp.child_value(image, "ip", None)
- if ip:
- cmdline += " ip=" + ip
- root = sxp.child_value(image, "root")
- if root:
- cmdline += " root=" + root
- args = sxp.child_value(image, "args")
- if args:
- cmdline += " " + args
- ramdisk = sxp.child_value(image, "ramdisk", '')
- log.debug("creating linux domain with cmdline: %s" %(cmdline,))
- vm.create_domain("linux", kernel, ramdisk, cmdline)
- return vm
-
-def vm_image_plan9(vm, image):
- """Create a VM for a Plan 9 image.
-
- name vm name
- memory vm memory
- image image config
-
- returns vm
- """
- kernel = sxp.child_value(image, "kernel")
- cmdline = ""
- ip = sxp.child_value(image, "ip", "dhcp")
- if ip:
- cmdline += "ip=" + ip
- root = sxp.child_value(image, "root")
- if root:
- cmdline += "root=" + root
- args = sxp.child_value(image, "args")
- if args:
- cmdline += " " + args
- ramdisk = sxp.child_value(image, "ramdisk", '')
- log.debug("creating plan9 domain with cmdline: %s" %(cmdline,))
- vm.create_domain("plan9", kernel, ramdisk, cmdline)
- return vm
-
-def vm_image_vmx(vm, image):
- """Create a VM for the VMX environment.
-
- @param name: vm name
- @param memory: vm memory
- @param image: image config
- @return: vm
- """
- kernel = sxp.child_value(image, "kernel")
- cmdline = ""
- ip = sxp.child_value(image, "ip", "dhcp")
- if ip:
- cmdline += " ip=" + ip
- root = sxp.child_value(image, "root")
- if root:
- cmdline += " root=" + root
- args = sxp.child_value(image, "args")
- if args:
- cmdline += " " + args
- ramdisk = sxp.child_value(image, "ramdisk", '')
- memmap = sxp.child_value(vm.config, "memmap", '')
- memmap = sxp.parse(open(memmap))[0]
- from xen.util.memmap import memmap_parse
- memmap = memmap_parse(memmap)
- log.debug("creating vmx domain with cmdline: %s" %(cmdline,))
- vm.create_domain("vmx", kernel, ramdisk, cmdline, memmap)
- return vm
-
def vm_field_ignore(vm, config, val, index):
"""Dummy config field handler used for fields with built-in handling.
@@ -1192,13 +993,20 @@ def vm_field_maxmem(vm, config, val, index):
maxmem = int(maxmem)
except:
raise VmError("invalid maxmem: " + str(maxmem))
- xc.domain_setmaxmem(vm.dom, maxmem_kb = maxmem * 1024)
+ xc.domain_setmaxmem(vm.id, maxmem_kb = maxmem * 1024)
#============================================================================
# Register image handlers.
-add_image_handler('linux', vm_image_linux)
-add_image_handler('plan9', vm_image_plan9)
-add_image_handler('vmx', vm_image_vmx)
+from image import \
+ addImageHandlerClass, \
+ ImageHandler, \
+ LinuxImageHandler, \
+ Plan9ImageHandler, \
+ VmxImageHandler
+
+addImageHandlerClass(LinuxImageHandler)
+addImageHandlerClass(Plan9ImageHandler)
+addImageHandlerClass(VmxImageHandler)
# Ignore the fields we already handle.
add_config_handler('name', vm_field_ignore)
diff --git a/tools/python/xen/xend/XendRoot.py b/tools/python/xen/xend/XendRoot.py
index d1bd503f8a..045a5a5fa4 100644
--- a/tools/python/xen/xend/XendRoot.py
+++ b/tools/python/xen/xend/XendRoot.py
@@ -25,9 +25,6 @@ import sxp
class XendRoot:
"""Root of the management classes."""
- """Default path to the root of the database."""
- dbroot_default = "/var/lib/xen/xend-db"
-
"""Default path to the config file."""
config_default = "/etc/xen/xend-config.sxp"
@@ -82,7 +79,6 @@ class XendRoot:
components = {}
def __init__(self):
- self.dbroot = None
self.config_path = None
self.config = None
self.logging = None
@@ -171,13 +167,15 @@ class XendRoot:
def configure(self):
self.set_config()
self.configure_logger()
- self.dbroot = self.get_config_value("dbroot", self.dbroot_default)
def configure_logger(self):
logfile = self.get_config_value("logfile", self.logfile_default)
loglevel = self.get_config_value("loglevel", self.loglevel_default)
self.logging = XendLogging(logfile, level=loglevel)
- #self.logging.addLogStderr()
+
+ from xen.xend.server import params
+ if params.XEND_DEBUG:
+ self.logging.addLogStderr()
def get_logging(self):
"""Get the XendLogging instance.
@@ -189,11 +187,6 @@ class XendRoot:
"""
return self.logging and self.logging.getLogger()
- def get_dbroot(self):
- """Get the path to the database root.
- """
- return self.dbroot
-
def set_config(self):
"""If the config file exists, read it. If not, ignore it.
@@ -241,9 +234,9 @@ class XendRoot:
def get_config_bool(self, name, val=None):
v = self.get_config_value(name, val)
- if v in ['yes', '1', 'on', 1, True]:
+ if v in ['yes', '1', 'on', 'true', 1, True]:
return True
- if v in ['no', '0', 'off', 0, False]:
+ if v in ['no', '0', 'off', 'false', 0, False]:
return False
raise XendError("invalid xend config %s: expected bool: %s" % (name, v))
@@ -325,7 +318,7 @@ class XendRoot:
return self.get_config_value('network-script', 'network')
def get_enable_dump(self):
- return self.get_config_value('enable-dump', 'false')
+ return self.get_config_bool('enable-dump', 'no')
def get_vif_bridge(self):
return self.get_config_value('vif-bridge', 'xen-br0')
diff --git a/tools/python/xen/xend/XendVnet.py b/tools/python/xen/xend/XendVnet.py
index d95fd204aa..3614127c49 100644
--- a/tools/python/xen/xend/XendVnet.py
+++ b/tools/python/xen/xend/XendVnet.py
@@ -4,11 +4,10 @@
"""
from xen.util import Brctl
-
-import sxp
-import XendDB
-from XendError import XendError
-from XendLogging import log
+from xen.xend import sxp
+from xen.xend.XendError import XendError
+from xen.xend.XendLogging import log
+from xen.xend.xenstore import XenNode, DBMap
def vnet_cmd(cmd):
out = None
@@ -63,14 +62,15 @@ class XendVnet:
"""Index of all vnets. Singleton.
"""
- dbpath = "vnet"
+ dbpath = "/vnet"
def __init__(self):
# Table of vnet info indexed by vnet id.
self.vnet = {}
- self.db = XendDB.XendDB(self.dbpath)
- vnets = self.db.fetchall("")
- for config in vnets.values():
+ self.dbmap = DBMap(db=XenNode(self.dbpath))
+ self.dbmap.readDB()
+ for vnetdb in self.dbmap.values():
+ config = vnetdb.config
info = XendVnetInfo(config)
self.vnet[info.id] = info
try:
@@ -115,7 +115,7 @@ class XendVnet:
"""
info = XendVnetInfo(config)
self.vnet[info.id] = info
- self.db.save(info.id, info.sxpr())
+ self.dbmap["%s/config" % info.id] = info.sxpr()
info.configure()
def vnet_delete(self, id):
@@ -126,7 +126,7 @@ class XendVnet:
info = self.vnet_get(id)
if info:
del self.vnet[id]
- self.db.delete(id)
+ self.dbmap.delete(id)
info.delete()
def instance():
diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
new file mode 100644
index 0000000000..e0d70581bf
--- /dev/null
+++ b/tools/python/xen/xend/image.py
@@ -0,0 +1,339 @@
+import os
+
+import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+from xen.xend import sxp
+from xen.xend.XendError import VmError
+from xen.xend.XendLogging import log
+from xen.xend.xenstore import DBVar
+
+class ImageHandler:
+ """Abstract base class for image handlers.
+
+ initDomain() is called to initialise the domain memory.
+
+ createImage() is called to configure and build the domain from its
+ kernel image and ramdisk etc.
+
+ The method buildDomain() is used to build the domain, and must be
+ defined in a subclass. Usually this is the only method that needs
+ defining in a subclass.
+
+ The method createDeviceModel() is called to create the domain device
+ model if it needs one. The default is to do nothing.
+
+ The method destroy() is called when the domain is destroyed.
+ The default is to do nothing.
+
+ """
+
+ #======================================================================
+ # Class vars and methods.
+
+ """Table of image handler classes for virtual machine images.
+ Indexed by image type.
+ """
+ imageHandlerClasses = {}
+
+ def addImageHandlerClass(cls, h):
+ """Add a handler class for an image type
+ @param h: handler: ImageHandler subclass
+ """
+ cls.imageHandlerClasses[h.ostype] = h
+
+ addImageHandlerClass = classmethod(addImageHandlerClass)
+
+ def findImageHandlerClass(cls, image):
+ """Find the image handler class for an image config.
+
+ @param image config
+ @return ImageHandler subclass or None
+ """
+ ty = sxp.name(image)
+ if ty is None:
+ raise VmError('missing image type')
+ imageClass = cls.imageHandlerClasses.get(ty)
+ if imageClass is None:
+ raise VmError('unknown image type: ' + ty)
+ return imageClass
+
+ findImageHandlerClass = classmethod(findImageHandlerClass)
+
+ def create(cls, vm, image):
+ """Create an image handler for a vm.
+
+ @param vm vm
+ @param image image config
+ @return ImageHandler instance
+ """
+ imageClass = cls.findImageHandlerClass(image)
+ return imageClass(vm, image)
+
+ create = classmethod(create)
+
+ #======================================================================
+ # Instance vars and methods.
+
+ db = None
+ ostype = None
+
+ config = None
+ kernel = None
+ ramdisk = None
+ cmdline = None
+ flags = 0
+
+ __exports__ = [
+ DBVar('ostype', ty='str'),
+ DBVar('config', ty='sxpr'),
+ DBVar('kernel', ty='str'),
+ DBVar('ramdisk', ty='str'),
+ DBVar('cmdline', ty='str'),
+ DBVar('flags', ty='int'),
+ ]
+
+ def __init__(self, vm, config):
+ self.vm = vm
+ self.db = vm.db.addChild('/image')
+ self.config = config
+
+ def exportToDB(self, save=False):
+ self.db.exportToDB(self, fields=self.__exports__, save=save)
+
+ def importFromDB(self):
+ self.db.importFromDB(self, fields=self.__exports__)
+
+ def unlink(self, f):
+ if not f: return
+ try:
+ os.unlink(f)
+ except OSError, ex:
+ log.warning("error removing bootloader file '%s': %s", f, ex)
+
+ def initDomain(self, dom, memory, cpu, cpu_weight):
+ """Initial domain create.
+
+ @return domain id
+ """
+
+ mem_kb = self.getDomainMemory(memory)
+ if not self.vm.restore:
+ dom = xc.domain_create(dom = dom or 0)
+ # if bootloader, unlink here. But should go after buildDomain() ?
+ if self.vm.bootloader:
+ self.unlink(self.kernel)
+ self.unlink(self.ramdisk)
+ if dom <= 0:
+ raise VmError('Creating domain failed: name=%s' % self.vm.name)
+ log.debug("initDomain: cpu=%d mem_kb=%d dom=%d", cpu, mem_kb, dom)
+ # xc.domain_setuuid(dom, uuid)
+ xc.domain_setcpuweight(dom, cpu_weight)
+ xc.domain_setmaxmem(dom, mem_kb)
+ xc.domain_memory_increase_reservation(dom, mem_kb)
+ if cpu != -1:
+ xc.domain_pincpu(dom, 0, 1<<int(cpu))
+ return dom
+
+ def createImage(self):
+ """Entry point to create domain memory image.
+ Override in subclass if needed.
+ """
+ self.configure()
+ self.createDomain()
+
+ def configure(self):
+ """Config actions common to all unix-like domains."""
+ self.kernel = sxp.child_value(self.config, "kernel")
+ self.cmdline = ""
+ ip = sxp.child_value(self.config, "ip", None)
+ if ip:
+ self.cmdline += " ip=" + ip
+ root = sxp.child_value(self.config, "root")
+ if root:
+ self.cmdline += " root=" + root
+ args = sxp.child_value(self.config, "args")
+ if args:
+ self.cmdline += " " + args
+ self.ramdisk = sxp.child_value(self.config, "ramdisk", '')
+
+ def createDomain(self):
+ """Build the domain boot image.
+ """
+ # Set params and call buildDomain().
+ self.flags = 0
+ if self.vm.netif_backend: self.flags |= SIF_NET_BE_DOMAIN
+ if self.vm.blkif_backend: self.flags |= SIF_BLK_BE_DOMAIN
+
+ if self.vm.recreate or self.vm.restore:
+ return
+ if not os.path.isfile(self.kernel):
+ raise VmError('Kernel image does not exist: %s' % self.kernel)
+ if self.ramdisk and not os.path.isfile(self.ramdisk):
+ raise VmError('Kernel ramdisk does not exist: %s' % self.ramdisk)
+ if len(self.cmdline) >= 256:
+ log.warning('kernel cmdline too long, domain %d', self.vm.getDomain())
+
+ log.info("buildDomain os=%s dom=%d vcpus=%d", self.ostype,
+ self.vm.getDomain(), self.vm.vcpus)
+ err = self.buildDomain()
+ if err != 0:
+ raise VmError('Building domain failed: ostype=%s dom=%d err=%d'
+ % (self.ostype, self.vm.getDomain(), err))
+
+ def getDomainMemory(self, mem_mb):
+ """Memory (in KB) the domain will need for mem_mb (in MB)."""
+ return mem_mb * 1024
+
+ def buildDomain(self):
+ """Build the domain. Define in subclass."""
+ raise NotImplementedError()
+
+ def createDeviceModel(self):
+ """Create device model for the domain (define in subclass if needed)."""
+ pass
+
+ def destroy(self):
+ """Extra cleanup on domain destroy (define in subclass if needed)."""
+ pass
+
+addImageHandlerClass = ImageHandler.addImageHandlerClass
+
+class LinuxImageHandler(ImageHandler):
+
+ ostype = "linux"
+
+ def buildDomain(self):
+ if self.vm.store_channel:
+ store_evtchn = self.vm.store_channel.port2
+ else:
+ store_evtchn = 0
+ ret = xc.linux_build(dom = self.vm.getDomain(),
+ image = self.kernel,
+ control_evtchn = self.vm.channel.getRemotePort(),
+ store_evtchn = store_evtchn,
+ cmdline = self.cmdline,
+ ramdisk = self.ramdisk,
+ flags = self.flags,
+ vcpus = self.vm.vcpus)
+ if isinstance(ret, dict):
+ self.vm.store_mfn = ret.get('store_mfn')
+ return 0
+ return ret
+
+class Plan9ImageHandler(ImageHandler):
+
+ ostype = "plan9"
+
+ def buildDomain(self):
+ return xc.plan9_build(dom = self.vm.getDomain(),
+ image = self.kernel,
+ control_evtchn = self.vm.channel.getRemotePort(),
+ cmdline = self.cmdline,
+ ramdisk = self.ramdisk,
+ flags = self.flags,
+ vcpus = self.vm.vcpus)
+
+class VmxImageHandler(ImageHandler):
+
+ __exports__ = ImageHandler.__exports__ + [
+ DBVar('memmap', ty='str'),
+ DBVar('memmap_value', ty='sxpr'),
+ # device channel?
+ ]
+
+ ostype = "vmx"
+ memmap = None
+ memmap_value = None
+ device_channel = None
+
+ def createImage(self):
+ """Create a VM for the VMX environment.
+ """
+ self.configure()
+ self.parseMemmap()
+ self.createDomain()
+
+ def buildDomain(self):
+ return xc.vmx_build(dom = self.vm.getDomain(),
+ image = self.kernel,
+ control_evtchn = 0,
+ memsize = self.vm.memory,
+ memmap = self.memmap_value,
+ cmdline = self.cmdline,
+ ramdisk = self.ramdisk,
+ flags = self.flags)
+
+ def parseMemmap(self):
+ self.memmap = sxp.child_value(self.vm.config, "memmap")
+ if self.memmap is None:
+ raise VmError("missing memmap")
+ memmap = sxp.parse(open(self.memmap))[0]
+ from xen.util.memmap import memmap_parse
+ self.memmap_value = memmap_parse(memmap)
+
+ def createDeviceModel_old(self):
+ device_model = sxp.child_value(self.vm.config, 'device_model')
+ if not device_model:
+ raise VmError("vmx: missing device model")
+ device_config = sxp.child_value(self.vm.config, 'device_config')
+ if not device_config:
+ raise VmError("vmx: missing device config")
+ # Create an event channel.
+ self.device_channel = channel.eventChannel(0, self.vm.getDomain())
+ # Execute device model.
+ #todo: Error handling
+ os.system(device_model
+ + " -f %s" % device_config
+ + " -d %d" % self.vm.getDomain()
+ + " -p %d" % self.device_channel['port1']
+ + " -m %s" % self.vm.memory)
+
+ def createDeviceModel(self):
+ device_model = sxp.child_value(self.vm.config, 'device_model')
+ if not device_model:
+ raise VmError("vmx: missing device model")
+ device_config = sxp.child_value(self.vm.config, 'device_config')
+ if not device_config:
+ raise VmError("vmx: missing device config")
+ # Create an event channel
+ self.device_channel = channel.eventChannel(0, self.vm.getDomain())
+ # Execute device model.
+ #todo: Error handling
+ # XXX RN: note that the order of args matter!
+ os.system(device_model
+ + " -f %s" % device_config
+ + self.vncParams()
+ + " -d %d" % self.vm.getDomain()
+ + " -p %d" % self.device_channel['port1']
+ + " -m %s" % self.vm.memory)
+
+ def vncParams(self):
+ # see if a vncviewer was specified
+ # XXX RN: bit of a hack. should unify this, maybe stick in config space
+ vncconnect=""
+ image = self.config
+ args = sxp.child_value(image, "args")
+ if args:
+ arg_list = string.split(args)
+ for arg in arg_list:
+ al = string.split(arg, '=')
+ if al[0] == "VNC_VIEWER":
+ vncconnect=" -v %s" % al[1]
+ break
+ return vncconnect
+
+ def destroy(self):
+ channel.eventChannelClose(self.device_channel)
+
+ def getDomainMemory(self, mem_mb):
+ return (mem_mb * 1024) + self.getPageTableSize(mem_mb)
+
+ def getPageTableSize(self, mem_mb):
+ """Return the size of memory needed for 1:1 page tables for physical
+ mode.
+
+ @param mem_mb: size in MB
+ @return size in KB
+ """
+ # Logic x86-32 specific.
+ # 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
+ return (1 + ((mem_mb + 3) >> 2)) * 4
diff --git a/tools/python/xen/xend/server/SrvConsole.py b/tools/python/xen/xend/server/SrvConsole.py
index 233f62b968..f147f2810b 100644
--- a/tools/python/xen/xend/server/SrvConsole.py
+++ b/tools/python/xen/xend/server/SrvConsole.py
@@ -30,7 +30,7 @@ class SrvConsole(SrvDir):
#self.ls()
req.write('<p>%s</p>' % self.info)
req.write('<p><a href="%s">Connect to domain %d</a></p>'
- % (self.info.uri(), self.info.dom))
+ % (self.info.uri(), self.info.id))
self.form(req)
req.write('</body></html>')
diff --git a/tools/python/xen/xend/server/SrvDaemon.py b/tools/python/xen/xend/server/SrvDaemon.py
index 061aa3dba7..133df206b9 100644
--- a/tools/python/xen/xend/server/SrvDaemon.py
+++ b/tools/python/xen/xend/server/SrvDaemon.py
@@ -32,9 +32,6 @@ import event
import relocate
from params import *
-DAEMONIZE = 0
-DEBUG = 1
-
class Daemon:
"""The xend daemon.
"""
@@ -128,9 +125,13 @@ class Daemon:
def cleanup_xend(self, kill=False):
return self.cleanup_process(XEND_PID_FILE, "xend", kill)
+ def cleanup_xenstored(self, kill=False):
+ return self.cleanup_process(XENSTORED_PID_FILE, "xenstored", kill)
+
def cleanup(self, kill=False):
self.cleanup_xend(kill=kill)
-
+ #self.cleanup_xenstored(kill=kill)
+
def status(self):
"""Returns the status of the xend daemon.
The return value is defined by the LSB:
@@ -166,8 +167,29 @@ class Daemon:
pidfile.close()
return pid
+ def start_xenstored(self):
+ """Fork and exec xenstored, writing its pid to XENSTORED_PID_FILE.
+ """
+ def mkdirs(p):
+ try:
+ os.makedirs(p)
+ except:
+ pass
+ mkdirs(XENSTORED_RUN_DIR)
+ mkdirs(XENSTORED_LIB_DIR)
+
+ pid = self.fork_pid(XENSTORED_PID_FILE)
+ if pid:
+ # Parent
+ log.info("Started xenstored, pid=%d", pid)
+ else:
+ # Child
+ if XEND_DAEMONIZE and (not XENSTORED_DEBUG):
+ self.daemonize()
+ os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork")
+
def daemonize(self):
- if not DAEMONIZE: return
+ if not XEND_DAEMONIZE: return
# Detach from TTY.
os.setsid()
@@ -177,16 +199,16 @@ class Daemon:
os.close(0)
os.close(1)
os.close(2)
- if DEBUG:
+ if XEND_DEBUG:
os.open('/dev/null', os.O_RDONLY)
# XXX KAF: Why doesn't this capture output from C extensions that
# fprintf(stdout) or fprintf(stderr) ??
- os.open('/var/log/xend-debug.log', os.O_WRONLY|os.O_CREAT)
+ os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
os.dup(1)
else:
os.open('/dev/null', os.O_RDWR)
os.dup(0)
- os.open('/var/log/xend-debug.log', os.O_WRONLY|os.O_CREAT)
+ os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
def start(self, trace=0):
@@ -196,11 +218,15 @@ class Daemon:
4 Insufficient privileges
"""
xend_pid = self.cleanup_xend()
+ xenstored_pid = self.cleanup_xenstored()
if self.set_user():
return 4
os.chdir("/")
+ if xenstored_pid == 0:
+ self.start_xenstored()
+
if xend_pid > 0:
# Trying to run an already-running service is a success.
return 0
@@ -308,7 +334,7 @@ class Daemon:
servers.start()
except Exception, ex:
print >>sys.stderr, 'Exception starting xend:', ex
- if DEBUG:
+ if XEND_DEBUG:
traceback.print_exc()
log.exception("Exception starting xend")
self.exit(1)
diff --git a/tools/python/xen/xend/server/SrvDomain.py b/tools/python/xen/xend/server/SrvDomain.py
index c9cf4fe603..255e6157bf 100644
--- a/tools/python/xen/xend/server/SrvDomain.py
+++ b/tools/python/xen/xend/server/SrvDomain.py
@@ -28,19 +28,19 @@ class SrvDomain(SrvDir):
fn = FormFn(self.xd.domain_configure,
[['dom', 'int'],
['config', 'sxpr']])
- return fn(req.args, {'dom': self.dom.dom})
+ return fn(req.args, {'dom': self.dom.id})
def op_unpause(self, op, req):
- val = self.xd.domain_unpause(self.dom.name)
+ val = self.xd.domain_unpause(self.dom.id)
return val
def op_pause(self, op, req):
- val = self.xd.domain_pause(self.dom.name)
+ val = self.xd.domain_pause(self.dom.id)
return val
def op_shutdown(self, op, req):
fn = FormFn(self.xd.domain_shutdown,
- [['dom', 'str'],
+ [['dom', 'int'],
['reason', 'str'],
['key', 'int']])
val = fn(req.args, {'dom': self.dom.id})
@@ -50,7 +50,7 @@ class SrvDomain(SrvDir):
def op_destroy(self, op, req):
fn = FormFn(self.xd.domain_destroy,
- [['dom', 'str'],
+ [['dom', 'int'],
['reason', 'str']])
val = fn(req.args, {'dom': self.dom.id})
req.setHeader("Location", "%s/.." % req.prePathURL())
@@ -61,7 +61,7 @@ class SrvDomain(SrvDir):
def do_save(self, op, req):
fn = FormFn(self.xd.domain_save,
- [['dom', 'str'],
+ [['dom', 'int'],
['file', 'str']])
val = fn(req.args, {'dom': self.dom.id})
return 0
@@ -71,7 +71,7 @@ class SrvDomain(SrvDir):
def do_migrate(self, op, req):
fn = FormFn(self.xd.domain_migrate,
- [['dom', 'str'],
+ [['dom', 'int'],
['destination', 'str'],
['live', 'int'],
['resource', 'int']])
@@ -79,7 +79,7 @@ class SrvDomain(SrvDir):
def op_pincpu(self, op, req):
fn = FormFn(self.xd.domain_pincpu,
- [['dom', 'str'],
+ [['dom', 'int'],
['vcpu', 'int'],
['cpumap', 'int']])
val = fn(req.args, {'dom': self.dom.id})
@@ -87,7 +87,7 @@ class SrvDomain(SrvDir):
def op_cpu_bvt_set(self, op, req):
fn = FormFn(self.xd.domain_cpu_bvt_set,
- [['dom', 'str'],
+ [['dom', 'int'],
['mcuadv', 'int'],
['warpback', 'int'],
['warpvalue', 'int'],
@@ -99,7 +99,7 @@ class SrvDomain(SrvDir):
def op_cpu_sedf_set(self, op, req):
fn = FormFn(self.xd.domain_cpu_sedf_set,
- [['dom', 'str'],
+ [['dom', 'int'],
['period', 'int'],
['slice', 'int'],
['latency', 'int'],
@@ -110,28 +110,28 @@ class SrvDomain(SrvDir):
def op_maxmem_set(self, op, req):
fn = FormFn(self.xd.domain_maxmem_set,
- [['dom', 'str'],
+ [['dom', 'int'],
['memory', 'int']])
val = fn(req.args, {'dom': self.dom.id})
return val
def op_mem_target_set(self, op, req):
fn = FormFn(self.xd.domain_mem_target_set,
- [['dom', 'str'],
+ [['dom', 'int'],
['target', 'int']])
val = fn(req.args, {'dom': self.dom.id})
return val
def op_devices(self, op, req):
fn = FormFn(self.xd.domain_devtype_ls,
- [['dom', 'str'],
+ [['dom', 'int'],
['type', 'str']])
val = fn(req.args, {'dom': self.dom.id})
return val
def op_device(self, op, req):
fn = FormFn(self.xd.domain_devtype_get,
- [['dom', 'str'],
+ [['dom', 'int'],
['type', 'str'],
['idx', 'int']])
val = fn(req.args, {'dom': self.dom.id})
@@ -142,14 +142,14 @@ class SrvDomain(SrvDir):
def op_device_create(self, op, req):
fn = FormFn(self.xd.domain_device_create,
- [['dom', 'str'],
+ [['dom', 'int'],
['config', 'sxpr']])
val = fn(req.args, {'dom': self.dom.id})
return val
def op_device_refresh(self, op, req):
fn = FormFn(self.xd.domain_device_refresh,
- [['dom', 'str'],
+ [['dom', 'int'],
['type', 'str'],
['idx', 'str']])
val = fn(req.args, {'dom': self.dom.id})
@@ -157,7 +157,7 @@ class SrvDomain(SrvDir):
def op_device_destroy(self, op, req):
fn = FormFn(self.xd.domain_device_destroy,
- [['dom', 'str'],
+ [['dom', 'int'],
['type', 'str'],
['idx', 'str']])
val = fn(req.args, {'dom': self.dom.id})
@@ -165,7 +165,7 @@ class SrvDomain(SrvDir):
def op_device_configure(self, op, req):
fn = FormFn(self.xd.domain_device_configure,
- [['dom', 'str'],
+ [['dom', 'int'],
['config', 'sxpr'],
['idx', 'str']])
val = fn(req.args, {'dom': self.dom.id})
@@ -173,7 +173,7 @@ class SrvDomain(SrvDir):
def op_vif_limit_set(self, op, req):
fn = FormFn(self.xd.domain_vif_limit_set,
- [['dom', 'str'],
+ [['dom', 'int'],
['vif', 'int'],
['credit', 'int'],
['period', 'int']])
diff --git a/tools/python/xen/xend/server/SrvDomainDir.py b/tools/python/xen/xend/server/SrvDomainDir.py
index e10561ee45..d6f6291716 100644
--- a/tools/python/xen/xend/server/SrvDomainDir.py
+++ b/tools/python/xen/xend/server/SrvDomainDir.py
@@ -24,7 +24,7 @@ class SrvDomainDir(SrvDir):
def domain(self, x):
val = None
- dom = self.xd.domain_lookup(x)
+ dom = self.xd.domain_lookup_by_name(x)
if not dom:
raise XendError('No such domain ' + str(x))
val = SrvDomain(dom)
diff --git a/tools/python/xen/xend/server/blkif.py b/tools/python/xen/xend/server/blkif.py
index 5a179c23a0..75a76e8bda 100755
--- a/tools/python/xen/xend/server/blkif.py
+++ b/tools/python/xen/xend/server/blkif.py
@@ -5,14 +5,15 @@ import string
from xen.util import blkif
from xen.xend.XendError import XendError, VmError
-from xen.xend import XendRoot
+from xen.xend.XendRoot import get_component
from xen.xend.XendLogging import log
from xen.xend import sxp
from xen.xend import Blkctl
+from xen.xend.xenstore import DBVar
-import channel
-from controller import CtrlMsgRcvr, Dev, DevController
-from messages import *
+from xen.xend.server import channel
+from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController
+from xen.xend.server.messages import *
class BlkifBackend:
""" Handler for the 'back-end' channel to a block device driver domain
@@ -56,7 +57,7 @@ class BlkifBackend:
def openEvtchn(self):
self.evtchn = channel.eventChannel(self.backendDomain, self.frontendDomain)
-
+
def getEventChannelBackend(self):
val = 0
if self.evtchn:
@@ -158,6 +159,18 @@ class BlkDev(Dev):
"""Info record for a block device.
"""
+ __exports__ = Dev.__exports__ + [
+ DBVar('dev', ty='str'),
+ DBVar('vdev', ty='int'),
+ DBVar('mode', ty='str'),
+ DBVar('viftype', ty='str'),
+ DBVar('params', ty='str'),
+ DBVar('node', ty='str'),
+ DBVar('device', ty='long'),
+ DBVar('start_sector', ty='long'),
+ DBVar('nr_sectors', ty='long'),
+ ]
+
def __init__(self, controller, id, config, recreate=False):
Dev.__init__(self, controller, id, config, recreate=recreate)
self.dev = None
@@ -206,7 +219,8 @@ class BlkDev(Dev):
raise VmError('vbd: Device not found: %s' % self.dev)
try:
- self.backendDomain = int(sxp.child_value(config, 'backend', '0'))
+ xd = get_component('xen.xend.XendDomain')
+ self.backendDomain = xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).id
except:
raise XendError('invalid backend domain')
@@ -214,8 +228,7 @@ class BlkDev(Dev):
def attach(self, recreate=False, change=False):
if recreate:
- node = sxp.child_value(recreate, 'node')
- self.setNode(node)
+ pass
else:
node = Blkctl.block('bind', self.type, self.params)
self.setNode(node)
@@ -263,7 +276,7 @@ class BlkDev(Dev):
def check_mounted(self, name):
mode = blkif.mount_mode(name)
- xd = XendRoot.get_component('xen.xend.XendDomain')
+ xd = get_component('xen.xend.XendDomain')
for vm in xd.list():
ctrl = vm.getDeviceController(self.getType(), error=False)
if (not ctrl): continue
@@ -292,14 +305,14 @@ class BlkDev(Dev):
val.append(['uname', self.uname])
if self.node:
val.append(['node', self.node])
- val.append(['index', self.getIndex()])
return val
def getBackend(self):
return self.controller.getBackend(self.backendDomain)
def refresh(self):
- log.debug("Refreshing vbd domain=%d id=%s", self.frontendDomain, self.id)
+ log.debug("Refreshing vbd domain=%d id=%s", self.frontendDomain,
+ self.id)
self.interfaceChanged()
def destroy(self, change=False, reboot=False):
@@ -308,7 +321,8 @@ class BlkDev(Dev):
@param change: change flag
"""
self.destroyed = True
- log.debug("Destroying vbd domain=%d id=%s", self.frontendDomain, self.id)
+ log.debug("Destroying vbd domain=%d id=%s", self.frontendDomain,
+ self.id)
self.send_be_vbd_destroy()
if change:
self.interfaceChanged()
@@ -445,5 +459,4 @@ class BlkifController(DevController):
log.error("Exception connecting backend: %s", ex)
else:
log.error('interface connect on unknown interface: id=%d', id)
-
diff --git a/tools/python/xen/xend/server/channel.py b/tools/python/xen/xend/server/channel.py
index e2b2043e66..00f451a7b8 100755
--- a/tools/python/xen/xend/server/channel.py
+++ b/tools/python/xen/xend/server/channel.py
@@ -14,52 +14,129 @@ DEBUG = 0
RESPONSE_TIMEOUT = 20.0
-def eventChannel(dom1, dom2):
- """Create an event channel between domains.
- The returned dict contains dom1, dom2, port1 and port2 on success.
+class EventChannel(dict):
+ """An event channel between domains.
+ """
+
+ def interdomain(cls, dom1, dom2, port1=0, port2=0):
+ """Create an event channel between domains.
+
+ @return EventChannel (None on error)
+ """
+ v = xc.evtchn_bind_interdomain(dom1=dom1, dom2=dom2,
+ port1=port1, port2=port2)
+ if v:
+ v = cls(dom1, dom2, v)
+ return v
+
+ interdomain = classmethod(interdomain)
+
+ def restoreFromDB(cls, db, dom1, dom2, port1=0, port2=0):
+ """Create an event channel using db info if available.
+ Inverse to saveToDB().
+
+ @param db db
+ @param dom1
+ @param dom2
+ @param port1
+ @param port2
+ """
+ try:
+ dom1 = int(db['dom1'])
+ except: pass
+ try:
+ dom2 = int(db['dom2'])
+ except: pass
+ try:
+ port1 = int(db['port1'])
+ except: pass
+ try:
+ port2 = int(db['port2'])
+ except: pass
+ evtchn = cls.interdomain(dom1, dom2, port1=port1, port2=port2)
+ return evtchn
+
+ restoreFromDB = classmethod(restoreFromDB)
+
+ def __init__(self, dom1, dom2, d):
+ d['dom1'] = dom1
+ d['dom2'] = dom2
+ self.update(d)
+ self.dom1 = dom1
+ self.dom2 = dom2
+ self.port1 = d.get('port1')
+ self.port2 = d.get('port2')
+
+ def close(self):
+ """Close the event channel.
+ """
+ def evtchn_close(dom, port):
+ try:
+ xc.evtchn_close(dom=dom, port=port)
+ except Exception, ex:
+ pass
+
+ if DEBUG:
+ print 'EventChannel>close>', self
+ evtchn_close(self.dom1, self.port1)
+ evtchn_close(self.dom2, self.port2)
+
+ def saveToDB(self, db):
+ """Save the event channel to the db so it can be restored later,
+ using restoreFromDB() on the class.
+
+ @param db db
+ """
+ db['dom1'] = str(self.dom1)
+ db['dom2'] = str(self.dom2)
+ db['port1'] = str(self.port1)
+ db['port2'] = str(self.port2)
+ db.saveDB()
+
+ def sxpr(self):
+ return ['event-channel',
+ ['dom1', self.dom1 ],
+ ['port1', self.port1 ],
+ ['dom2', self.dom2 ],
+ ['port2', self.port2 ]
+ ]
+
+ def __repr__(self):
+ return ("<EventChannel dom1:%d:%d dom2:%d:%d>"
+ % (self.dom1, self.port1, self.dom2, self.port2))
- @return dict (empty on error)
+def eventChannel(dom1, dom2, port1=0, port2=0):
+ """Create an event channel between domains.
+
+ @return EventChannel (None on error)
"""
- evtchn = xc.evtchn_bind_interdomain(dom1=dom1, dom2=dom2)
- if evtchn:
- evtchn['dom1'] = dom1
- evtchn['dom2'] = dom2
- return evtchn
+ return EventChannel.interdomain(dom1, dom2, port1=port1, port2=port2)
def eventChannelClose(evtchn):
- """Close an event channel that was opened by eventChannel().
+ """Close an event channel.
"""
- def evtchn_close(dom, port):
- if (dom is None) or (port is None): return
- try:
- xc.evtchn_close(dom=dom, port=port)
- except Exception, ex:
- pass
-
if not evtchn: return
- if DEBUG:
- print 'eventChannelClose>', evtchn
- evtchn_close(evtchn.get('dom1'), evtchn.get('port1'))
- evtchn_close(evtchn.get('dom2'), evtchn.get('port2'))
-
+ evtchn.close()
class ChannelFactory:
- """Factory for creating channels.
+ """Factory for creating control channels.
Maintains a table of channels.
"""
""" Channels indexed by index. """
- channels = {}
+ channels = None
thread = None
notifier = None
"""Map of ports to the virq they signal."""
- virqPorts = {}
+ virqPorts = None
def __init__(self):
"""Constructor - do not use. Use the channelFactory function."""
+ self.channels = {}
+ self.virqPorts = {}
self.notifier = xu.notifier()
# Register interest in virqs.
self.bind_virq(xen.lowlevel.xc.VIRQ_DOM_EXC)
@@ -70,10 +147,6 @@ class ChannelFactory:
self.virqPorts[port] = virq
log.info("Virq %s on port %s", virq, port)
- def virq(self):
- log.error("virq")
- self.notifier.virq_send(self.virqPort)
-
def start(self):
"""Fork a thread to read messages.
"""
@@ -182,9 +255,13 @@ class ChannelFactory:
return None
def openChannel(self, dom, local_port=0, remote_port=0):
- return (self.findChannel(dom, local_port=local_port, remote_port=remote_port)
- or
- self.newChannel(dom, local_port, remote_port))
+ chan = self.findChannel(dom, local_port=local_port,
+ remote_port=remote_port)
+ if chan:
+ return chan
+ chan = self.newChannel(dom, local_port, remote_port)
+ return chan
+
def createPort(self, dom, local_port=0, remote_port=0):
"""Create a port for a channel to the given domain.
@@ -203,8 +280,31 @@ class ChannelFactory:
@type remote: int
@return: port object
"""
- return xu.port(dom, local_port=int(local_port),
- remote_port=int(remote_port))
+ return xu.port(dom, local_port=local_port, remote_port=remote_port)
+
+ def restoreFromDB(self, db, dom, local, remote):
+ """Create a channel using ports restored from the db (if available).
+ Otherwise use the given ports. This is the inverse operation to
+ saveToDB() on a channel.
+
+ @param db db
+ @param dom domain the channel connects to
+ @param local default local port
+ @param remote default remote port
+ """
+ try:
+ local_port = int(db['local_port'])
+ except:
+ local_port = local
+ try:
+ remote_port = int(db['remote_port'])
+ except:
+ remote_port = remote
+ try:
+ chan = self.openChannel(dom, local_port, remote_port)
+ except:
+ return None
+ return chan
def channelFactory():
"""Singleton constructor for the channel factory.
@@ -218,7 +318,7 @@ def channelFactory():
return inst
class Channel:
- """Chanel to a domain.
+ """Control channel to a domain.
Maintains a list of device handlers to dispatch requests to, based
on the request type.
"""
@@ -239,6 +339,17 @@ class Channel:
# Make sure the port will deliver all the messages.
self.port.register(TYPE_WILDCARD)
+ def saveToDB(self, db):
+ """Save the channel ports to the db so the channel can be restored later,
+ using restoreFromDB() on the factory.
+
+ @param db db
+ """
+ if self.closed: return
+ db['local_port'] = str(self.getLocalPort())
+ db['remote_port'] = str(self.getRemotePort())
+ db.saveDB()
+
def getKey(self):
"""Get the channel key.
"""
diff --git a/tools/python/xen/xend/server/console.py b/tools/python/xen/xend/server/console.py
index f3dade883b..743ace4aec 100755
--- a/tools/python/xen/xend/server/console.py
+++ b/tools/python/xen/xend/server/console.py
@@ -13,10 +13,11 @@ from xen.xend import EventServer; eserver = EventServer.instance()
from xen.xend.XendLogging import log
from xen.xend import XendRoot; xroot = XendRoot.instance()
from xen.xend import sxp
+from xen.xend.xenstore import DBVar
-from controller import CtrlMsgRcvr, Dev, DevController
-from messages import *
-from params import *
+from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController
+from xen.xend.server.messages import *
+from xen.xend.server.params import *
class ConsoleProtocol(protocol.Protocol):
"""Asynchronous handler for a console socket.
@@ -76,6 +77,12 @@ class ConsoleDev(Dev, protocol.ServerFactory):
STATUS_CONNECTED = 'connected'
STATUS_LISTENING = 'listening'
+ __exports__ = Dev.__exports__ + [
+ DBVar('status', ty='str'),
+ #DBVar('listening', ty='str'),
+ DBVar('console_port', ty='int'),
+ ]
+
def __init__(self, controller, id, config, recreate=False):
Dev.__init__(self, controller, id, config)
self.lock = threading.RLock()
@@ -129,7 +136,6 @@ class ConsoleDev(Dev, protocol.ServerFactory):
val.append(['local_port', self.getLocalPort() ])
val.append(['remote_port', self.getRemotePort() ])
val.append(['console_port', self.console_port ])
- val.append(['index', self.getIndex()])
if self.addr:
val.append(['connected', self.addr[0], self.addr[1]])
finally:
diff --git a/tools/python/xen/xend/server/controller.py b/tools/python/xen/xend/server/controller.py
index 9205b2778e..d1e19efee1 100755
--- a/tools/python/xen/xend/server/controller.py
+++ b/tools/python/xen/xend/server/controller.py
@@ -4,7 +4,8 @@ for a domain.
"""
from xen.xend.XendError import XendError
-from messages import msgTypeName, printMsg, getMessageType
+from xen.xend.xenstore import DBVar
+from xen.xend.server.messages import msgTypeName, printMsg, getMessageType
DEBUG = 0
@@ -115,18 +116,18 @@ class DevControllerTable:
def getDevControllerClass(self, type):
return self.controllerClasses.get(type)
- def addDevControllerClass(self, klass):
- self.controllerClasses[klass.getType()] = klass
+ def addDevControllerClass(self, cls):
+ self.controllerClasses[cls.getType()] = cls
def delDevControllerClass(self, type):
if type in self.controllerClasses:
del self.controllerClasses[type]
def createDevController(self, type, vm, recreate=False):
- klass = self.getDevControllerClass(type)
- if not klass:
+ cls = self.getDevControllerClass(type)
+ if not cls:
raise XendError("unknown device type: " + type)
- return klass.createDevController(vm, recreate=recreate)
+ return cls.createDevController(vm, recreate=recreate)
def getDevControllerTable():
"""Singleton constructor for the controller table.
@@ -138,11 +139,11 @@ def getDevControllerTable():
devControllerTable = DevControllerTable()
return devControllerTable
-def addDevControllerClass(name, klass):
+def addDevControllerClass(name, cls):
"""Add a device controller class to the controller table.
"""
- klass.name = name
- getDevControllerTable().addDevControllerClass(klass)
+ cls.type = name
+ getDevControllerTable().addDevControllerClass(cls)
def createDevController(name, vm, recreate=False):
return getDevControllerTable().createDevController(name, vm, recreate=recreate)
@@ -155,29 +156,57 @@ class DevController:
"""
- name = None
+ # State:
+ # controller/<type> : for controller
+ # device/<type>/<id> : for each device
- def createDevController(klass, vm, recreate=False):
+ def createDevController(cls, vm, recreate=False):
"""Class method to create a dev controller.
"""
- ctrl = klass(vm, recreate=recreate)
+ ctrl = cls(vm, recreate=recreate)
ctrl.initController(recreate=recreate)
+ ctrl.exportToDB()
return ctrl
createDevController = classmethod(createDevController)
- def getType(klass):
- return klass.name
+ def getType(cls):
+ return cls.type
getType = classmethod(getType)
+ __exports__ = [
+ DBVar('type', 'str'),
+ DBVar('destroyed', 'bool'),
+ ]
+
+ # Set when registered.
+ type = None
+
def __init__(self, vm, recreate=False):
self.destroyed = False
self.vm = vm
+ self.db = self.getDB()
self.deviceId = 0
self.devices = {}
self.device_order = []
+ def getDB(self):
+ """Get the db node to use for a controller.
+ """
+ return self.vm.db.addChild("/controller/%s" % self.getType())
+
+ def getDevDB(self, id):
+ """Get the db node to use for a device.
+ """
+ return self.vm.db.addChild("/device/%s/%s" % (self.getType(), id))
+
+ def exportToDB(self, save=False):
+ self.db.exportToDB(self, fields=self.__exports__, save=save)
+
+ def importFromDB(self):
+ self.db.importFromDB(self, fields=self.__exports__)
+
def getDevControllerType(self):
return self.dctype
@@ -229,18 +258,19 @@ class DevController:
i.e. it is being added at runtime rather than when the domain is created.
"""
dev = self.newDevice(self.nextDeviceId(), config, recreate=recreate)
+ if self.vm.recreate:
+ dev.importFromDB()
dev.init(recreate=recreate)
self.addDevice(dev)
- idx = self.getDeviceIndex(dev)
- recreate = self.vm.get_device_recreate(self.getType(), idx)
+ if not recreate:
+ dev.exportToDB()
dev.attach(recreate=recreate, change=change)
+ dev.exportToDB()
def configureDevice(self, id, config, change=False):
"""Reconfigure an existing device.
May be defined in subclass."""
- dev = self.getDevice(id)
- if not dev:
- raise XendError("invalid device id: " + id)
+ dev = self.getDevice(id, error=True)
dev.configure(config, change=change)
def destroyDevice(self, id, change=False, reboot=False):
@@ -251,9 +281,7 @@ class DevController:
The device is not deleted, since it may be recreated later.
"""
- dev = self.getDevice(id)
- if not dev:
- raise XendError("invalid device id: " + id)
+ dev = self.getDevice(id, error=True)
dev.destroy(change=change, reboot=reboot)
return dev
@@ -278,24 +306,15 @@ class DevController:
def isDestroyed(self):
return self.destroyed
- def getDevice(self, id):
- return self.devices.get(id)
-
- def getDeviceByIndex(self, idx):
- if 0 <= idx < len(self.device_order):
- return self.device_order[idx]
- else:
- return None
-
- def getDeviceIndex(self, dev):
- return self.device_order.index(dev)
+ def getDevice(self, id, error=False):
+ dev = self.devices.get(id)
+ if error and not dev:
+ raise XendError("invalid device id: " + id)
+ return dev
def getDeviceIds(self):
return [ dev.getId() for dev in self.device_order ]
- def getDeviceIndexes(self):
- return range(0, len(self.device_order))
-
def getDevices(self):
return self.device_order
@@ -353,11 +372,42 @@ class Dev:
@type controller: DevController
"""
+ # ./status : need 2: actual and requested?
+ # down-down: initial.
+ # up-up: fully up.
+ # down-up: down requested, still up. Watch front and back, when both
+ # down go to down-down. But what if one (or both) is not connected?
+ # Still have front/back trees with status? Watch front/status, back/status?
+ # up-down: up requested, still down.
+ # Back-end watches ./status, front/status
+ # Front-end watches ./status, back/status
+ # i.e. each watches the other 2.
+ # Each is status/request status/actual?
+ #
+ # backend?
+ # frontend?
+
+ __exports__ = [
+ DBVar('id', ty='int'),
+ DBVar('type', ty='str'),
+ DBVar('config', ty='sxpr'),
+ DBVar('destroyed', ty='bool'),
+ ]
+
def __init__(self, controller, id, config, recreate=False):
self.controller = controller
self.id = id
self.config = config
self.destroyed = False
+ self.type = self.getType()
+
+ self.db = controller.getDevDB(id)
+
+ def exportToDB(self, save=False):
+ self.db.exportToDB(self, fields=self.__exports__, save=save)
+
+ def importFromDB(self):
+ self.db.importFromDB(self, fields=self.__exports__)
def getDomain(self):
return self.controller.getDomain()
@@ -380,9 +430,6 @@ class Dev:
def getId(self):
return self.id
- def getIndex(self):
- return self.controller.getDeviceIndex(self)
-
def getConfig(self):
return self.config
diff --git a/tools/python/xen/xend/server/netif.py b/tools/python/xen/xend/server/netif.py
index 9d2dbc4f63..0a49842522 100755
--- a/tools/python/xen/xend/server/netif.py
+++ b/tools/python/xen/xend/server/netif.py
@@ -4,21 +4,75 @@
import random
+from xen.util.mac import macFromString, macToString
+
from xen.xend import sxp
from xen.xend import Vifctl
from xen.xend.XendError import XendError, VmError
from xen.xend.XendLogging import log
from xen.xend import XendVnet
from xen.xend.XendRoot import get_component
+from xen.xend.xenstore import DBVar
-import channel
-from controller import CtrlMsgRcvr, Dev, DevController
-from messages import *
+from xen.xend.server import channel
+from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController
+from xen.xend.server.messages import *
class NetDev(Dev):
"""A network device.
"""
+ # State:
+ # inherited +
+ # ./config
+ # ./mac
+ # ./be_mac
+ # ./bridge
+ # ./script
+ # ./ipaddr ?
+ #
+ # ./credit
+ # ./period
+ #
+ # ./vifctl: up/down?
+ # ./vifname
+ #
+ #
+ # Poss should have no backend state here - except for ref to backend's own tree
+ # for the device? And a status - the one we want.
+ # ./back/dom
+ # ./back/devid - id for back-end (netif_handle) - same as front/devid
+ # ./back/id - backend id (if more than one b/e per domain)
+ # ./back/status
+ # ./back/tx_shmem_frame - actually these belong in back-end state
+ # ./back/rx_shmem_frame
+ #
+ # ./front/dom
+ # ./front/devid
+ # ./front/status - need 2: one for requested, one for actual? Or drive from dev status
+ # and this is front status only.
+ # ./front/tx_shmem_frame
+ # ./front/rx_shmem_frame
+ #
+ # ./evtchn/front - here or in front/back?
+ # ./evtchn/back
+ # ./evtchn/status ?
+ # At present created by dev: but should be created unbound by front/back
+ # separately and then bound (by back)?
+
+ __exports__ = Dev.__exports__ + [
+ DBVar('config', ty='sxpr'),
+ DBVar('mac', ty='mac'),
+ DBVar('be_mac', ty='mac'),
+ DBVar('bridge', ty='str'),
+ DBVar('script', ty='str'),
+ #DBVar('ipaddr'),
+ DBVar('credit', ty='int'),
+ DBVar('period', ty='int'),
+ DBVar('vifname', ty='str'),
+ DBVar('evtchn'), #todo: export fields (renamed)
+ ]
+
def __init__(self, controller, id, config, recreate=False):
Dev.__init__(self, controller, id, config, recreate=recreate)
self.vif = int(self.id)
@@ -49,15 +103,19 @@ class NetDev(Dev):
def _get_config_mac(self, config):
vmac = sxp.child_value(config, 'mac')
if not vmac: return None
- mac = [ int(x, 16) for x in vmac.split(':') ]
- if len(mac) != 6: raise XendError("invalid mac: %s" % vmac)
+ try:
+ mac = macFromString(vmac)
+ except:
+ raise XendError("invalid mac: %s" % vmac)
return mac
def _get_config_be_mac(self, config):
vmac = sxp.child_value(config, 'be_mac')
if not vmac: return None
- mac = [ int(x, 16) for x in vmac.split(':') ]
- if len(mac) != 6: raise XendError("invalid backend mac: %s" % vmac)
+ try:
+ mac = macFromString(vmac)
+ except:
+ raise XendError("invalid backend mac: %s" % vmac)
return mac
def _get_config_ipaddr(self, config):
@@ -102,7 +160,7 @@ class NetDev(Dev):
else:
#todo: Code below will fail on xend restart when backend is not domain 0.
xd = get_component('xen.xend.XendDomain')
- self.backendDomain = int(xd.domain_lookup(sxp.child_value(config, 'backend', '0')).id)
+ self.backendDomain = xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).id
except:
raise XendError('invalid backend domain')
return self.config
@@ -127,13 +185,13 @@ class NetDev(Dev):
ipaddr = self._get_config_ipaddr(config)
xd = get_component('xen.xend.XendDomain')
- backendDomain = str(xd.domain_lookup(sxp.child_value(config, 'backend', '0')).id)
+ backendDomain = xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).id
if (mac is not None) and (mac != self.mac):
raise XendError("cannot change mac")
if (be_mac is not None) and (be_mac != self.be_mac):
raise XendError("cannot change backend mac")
- if (backendDomain is not None) and (backendDomain != str(self.backendDomain)):
+ if (backendDomain is not None) and (backendDomain != self.backendDomain):
raise XendError("cannot change backend")
if (bridge is not None) and (bridge != self.bridge):
changes['bridge'] = bridge
@@ -199,7 +257,6 @@ class NetDev(Dev):
val.append(['evtchn',
self.evtchn['port1'],
self.evtchn['port2']])
- val.append(['index', self.getIndex()])
return val
def get_vifname(self):
@@ -213,12 +270,12 @@ class NetDev(Dev):
def get_mac(self):
"""Get the MAC address as a string.
"""
- return ':'.join(map(lambda x: "%02x" % x, self.mac))
+ return macToString(self.mac)
def get_be_mac(self):
"""Get the backend MAC address as a string.
"""
- return ':'.join(map(lambda x: "%02x" % x, self.be_mac))
+ return macToString(self.be_mac)
def vifctl_params(self, vmname=None):
"""Get the parameters to pass to vifctl.
@@ -230,7 +287,7 @@ class NetDev(Dev):
vm = xd.domain_lookup(dom)
vmname = vm.name
except:
- vmname = 'DOM%d' % dom
+ vmname = 'Domain-%d' % dom
return { 'domain': vmname,
'vif' : self.get_vifname(),
'mac' : self.get_mac(),
diff --git a/tools/python/xen/xend/server/params.py b/tools/python/xen/xend/server/params.py
index 5c7fdf7bad..2565c2dfcd 100644
--- a/tools/python/xen/xend/server/params.py
+++ b/tools/python/xen/xend/server/params.py
@@ -1,6 +1,34 @@
-# The following parameters could be placed in a configuration file.
-XEND_PID_FILE = '/var/run/xend.pid'
-XEND_TRACE_FILE = '/var/log/xend.trace'
+import os
+
+def getenv(var, val, conv=None):
+ """Get a value from the environment, with optional conversion.
-XEND_USER = 'root'
+ @param var name of environment variable
+ @param val default value
+ @param conv conversion function to apply to env value
+ @return converted value or default
+ """
+ try:
+ v = os.getenv(var)
+ if v is None:
+ v = val
+ else:
+ print var, '=', v
+ if conv:
+ v = conv(v)
+ except:
+ v = val
+ return v
+
+# The following parameters could be placed in a configuration file.
+XEND_PID_FILE = '/var/run/xend.pid'
+XEND_TRACE_FILE = '/var/log/xend.trace'
+XEND_DEBUG_LOG = '/var/log/xend-debug.log'
+XEND_USER = 'root'
+XEND_DEBUG = getenv("XEND_DEBUG", 0, conv=int)
+XEND_DAEMONIZE = getenv("XEND_DAEMONIZE", not XEND_DEBUG, conv=int)
+XENSTORED_PID_FILE = '/var/run/xenstored.pid'
+XENSTORED_RUN_DIR = '/var/run/xenstored'
+XENSTORED_LIB_DIR = '/var/lib/xenstored'
+XENSTORED_DEBUG = getenv("XENSTORED_DEBUG", 0, conv=int)
diff --git a/tools/python/xen/xend/server/usbif.py b/tools/python/xen/xend/server/usbif.py
index 9535fdd202..d366985740 100644
--- a/tools/python/xen/xend/server/usbif.py
+++ b/tools/python/xen/xend/server/usbif.py
@@ -7,10 +7,11 @@
from xen.xend import sxp
from xen.xend.XendLogging import log
from xen.xend.XendError import XendError
+from xen.xend.xenstore import DBVar
-import channel
-from controller import Dev, DevController
-from messages import *
+from xen.xend.server import channel
+from xen.xend.server.controller import Dev, DevController
+from xen.xend.server.messages import *
class UsbBackend:
"""Handler for the 'back-end' channel to a USB device driver domain
@@ -141,6 +142,11 @@ class UsbBackend:
class UsbDev(Dev):
+
+ __exports__ = Dev.__exports__ + [
+ DBVar('port', ty='int'),
+ DBVar('path', ty='str'),
+ ]
def __init__(self, controller, id, config, recreate=False):
Dev.__init__(self, controller, id, config, recreate=recreate)
@@ -186,7 +192,6 @@ class UsbDev(Dev):
['port', self.port],
['path', self.path],
]
- val.append(['index', self.getIndex()])
return val
def getBackend(self):
diff --git a/tools/python/xen/xend/uuid.py b/tools/python/xen/xend/uuid.py
new file mode 100644
index 0000000000..096fef7f9f
--- /dev/null
+++ b/tools/python/xen/xend/uuid.py
@@ -0,0 +1,65 @@
+"""Universal(ly) Unique Identifiers (UUIDs).
+"""
+import commands
+import random
+
+def uuidgen(random=True):
+ """Generate a UUID using the command uuidgen.
+
+ If random is true (default) generates a random uuid.
+ If random is false generates a time-based uuid.
+ """
+ cmd = "uuidgen"
+ if random:
+ cmd += " -r"
+ else:
+ cmd += " -t"
+ return commands.getoutput(cmd)
+
+class UuidFactoryUuidgen:
+
+ """A uuid factory using uuidgen."""
+
+ def __init__(self):
+ pass
+
+ def getUuid(self):
+ return uuidgen()
+
+class UuidFactoryRandom:
+
+ """A random uuid factory."""
+
+ def __init__(self):
+ f = file("/dev/urandom", "r")
+ seed = f.read(16)
+ f.close()
+ self.rand = random.Random(seed)
+
+ def randBytes(self, n):
+ return [ self.rand.randint(0, 255) for i in range(0, n) ]
+
+ def getUuid(self):
+ bytes = self.randBytes(16)
+ # Encode the variant.
+ bytes[6] = (bytes[6] & 0x0f) | 0x40
+ bytes[8] = (bytes[8] & 0x3f) | 0x80
+ f = "%02x"
+ return ( "-".join([f*4, f*2, f*2, f*2, f*6]) % tuple(bytes) )
+
+def getFactory():
+ """Get the factory to use for creating uuids.
+ This is so it's easy to change the uuid factory.
+ For example, for testing we might want repeatable uuids
+ rather than the random ones we normally use.
+ """
+ global uuidFactory
+ try:
+ uuidFactory
+ except:
+ #uuidFactory = UuidFactoryUuidgen()
+ uuidFactory = UuidFactoryRandom()
+ return uuidFactory
+
+def getUuid():
+ return getFactory().getUuid()
diff --git a/tools/python/xen/xend/xenstore/__init__.py b/tools/python/xen/xend/xenstore/__init__.py
new file mode 100644
index 0000000000..6772d2ceca
--- /dev/null
+++ b/tools/python/xen/xend/xenstore/__init__.py
@@ -0,0 +1,2 @@
+from xsnode import *
+from xsobj import *
diff --git a/tools/python/xen/xend/xenstore/xsnode.py b/tools/python/xen/xend/xenstore/xsnode.py
new file mode 100644
index 0000000000..ae770219ab
--- /dev/null
+++ b/tools/python/xen/xend/xenstore/xsnode.py
@@ -0,0 +1,382 @@
+import errno
+import os
+import os.path
+import select
+import sys
+import time
+
+from xen.lowlevel import xs
+from xen.xend import sxp
+from xen.xend.PrettyPrint import prettyprint
+
+SELECT_TIMEOUT = 2.0
+
+def getEventPath(event):
+ return os.path.join("/_event", event)
+
+def getEventIdPath(event):
+ return os.path.join(eventPath(event), "@eid")
+
+class Subscription:
+
+ def __init__(self, event, fn, id):
+ self.event = event
+ self.watcher = None
+ self.fn = fn
+ self.id = id
+
+ def watch(self, watcher):
+ self.watcher = watcher
+ watcher.addSubs(self)
+
+ def unwatch(self):
+ watcher = self.watcher
+ if watcher:
+ self.watcher = None
+ watcher.delSubs(self)
+
+ def notify(self, event):
+ try:
+ self.fn(event, id)
+ except SystemExitException:
+ raise
+ except:
+ pass
+
+class Watcher:
+
+ def __init__(self, store, event):
+ self.path = getEventPath(event)
+ self.eidPath = getEventIdPath(event)
+ store.mkdirs(self.path)
+ if not store.exists(self.eidPath):
+ store.writeInt(self.eidPath, 0)
+ self.xs = None
+ self.subs = []
+
+ def __getattr__(self, k, v):
+ if k == "fileno":
+ if self.xs:
+ return self.xs.fileno
+ else:
+ return -1
+ else:
+ return self.__dict__.get(k, v)
+
+ def addSubs(self, subs):
+ self.subs.append(subs)
+ self.watch()
+
+ def delSubs(self, subs):
+ self.subs.remove(subs)
+ if len(self.subs) == 0:
+ self.unwatch()
+
+ def getEvent(self):
+ return self.event
+
+ def watch(self):
+ if self.xs: return
+ self.xs = xs.open()
+ self.xs.watch(path)
+
+ def unwatch(self):
+ if self.xs:
+ self.xs.unwatch(self.path)
+ self.xs.close()
+ self.xs = None
+
+ def watching(self):
+ return self.xs is not None
+
+ def getNotification(self):
+ p = self.xs.read_watch()
+ self.xs.acknowledge_watch()
+ eid = self.xs.readInt(self.eidPath)
+ return p
+
+ def notify(self, subs):
+ p = self.getNotification()
+ for s in subs:
+ s.notify(p)
+
+class XenStore:
+
+ def __init__(self):
+ self.xs = None
+ #self.xs = xs.open()
+ self.subscription = {}
+ self.subscription_id = 0
+ self.events = {}
+ self.write("/", "")
+
+ def getxs(self):
+ if self.xs is None:
+ ex = None
+ for i in range(0,20):
+ try:
+ self.xs = xs.open()
+ ex = None
+ break
+ except Exception, ex:
+ print >>stderr, "Exception connecting to xsdaemon:", ex
+ print >>stderr, "Trying again..."
+ time.sleep(1)
+ else:
+ raise ex
+
+ #todo would like to reconnect if xs conn closes (e.g. daemon restart).
+ return self.xs
+
+ def dump(self, path="/", out=sys.stdout):
+ print 'dump>', path
+ val = ['node']
+ val.append(['path', path])
+## perms = ['perms']
+## for p in self.getPerms(path):
+## l = ['perm']
+## l.append('dom', p.get['dom'])
+## for k in ['read', 'write', 'create', 'owner']:
+## v = p.get(k)
+## l.append([k, v])
+## perms.append(l)
+## val.append(perms)
+ data = self.read(path)
+ if data:
+ val.append(['data', data])
+ children = ['children']
+ for x in self.lsPaths(path):
+ print 'dump>', 'child=', x
+ children.append(self.dump(x))
+ if len(children) > 1:
+ val.append(children)
+ prettyprint(val, out=out)
+ return val
+
+ def getPerms(self, path):
+ return self.getxs().get_permissions(path)
+
+ def ls(self, path="/"):
+ return self.getxs().ls(path)
+
+ def lsPaths(self, path="/"):
+ return [ os.path.join(path, x) for x in self.ls(path) ]
+
+ def lsr(self, path="/", list=None):
+ if list is None:
+ list = []
+ list.append(path)
+ for x in self.lsPaths(path):
+ list.append(x)
+ self.lsr(x, list=list)
+ return list
+
+ def rm(self, path):
+ try:
+ #for x in self.lsPaths():
+ # self.getxs().rm(x)
+ self.getxs().rm(path)
+ except:
+ pass
+
+ def exists(self, path):
+ try:
+ self.getxs().ls(path)
+ return True
+ except RuntimeError, ex:
+ if ex.args[0] == errno.ENOENT:
+ return False
+ else:
+ raise
+
+ def mkdirs(self, path):
+ if self.exists(path):
+ return
+ elts = path.split("/")
+ p = "/"
+ for x in elts:
+ if x == "": continue
+ p = os.path.join(p, x)
+ if not self.exists(p):
+ self.getxs().write(p, "", create=True)
+
+ def read(self, path):
+ try:
+ return self.getxs().read(path)
+ except RuntimeError, ex:
+ if ex.args[0] == errno.EISDIR:
+ return None
+ else:
+ raise
+
+ def create(self, path, excl=False):
+ self.write(path, "", create=True, excl=excl)
+
+ def write(self, path, data, create=True, excl=False):
+ self.mkdirs(path)
+ self.getxs().write(path, data, create=create, excl=excl)
+
+ def begin(self, path):
+ self.getxs().begin_transaction(path)
+
+ def commit(self, abandon=False):
+ self.getxs().end_transaction(abort=abandon)
+
+ def subscribe(self, event, fn):
+ watcher = self.watchEvent(event)
+ self.subscription_id += 1
+ subs = Subscription(event, fn, self.subscription_id)
+ self.subscription[subs.id] = subs
+ subs.watch(watcher)
+ return subs.id
+
+ def unsubscribe(self, sid):
+ s = self.subscription.get(sid)
+ if not s: return
+ del self.subscription[s.id]
+ s.unwatch()
+ unwatchEvent(s.event)
+
+ def sendEvent(self, event, data):
+ eventPath = getEventPath(event)
+ eidPath = getEventIdPath(event)
+ try:
+ self.begin(eventPath)
+ self.mkdirs(eventPath)
+ if self.exists(eidPath):
+ eid = self.readInt(eidPath)
+ eid += 1
+ else:
+ eid = 1
+ self.writeInt(eidPath, eid)
+ self.write(os.path.join(eventPath, str(eid)), data)
+ finally:
+ self.commit()
+
+ def watchEvent(self, event):
+ if event in self.events:
+ return
+ watcher = Watcher(event)
+ self.watchers[watcher.getEvent()] = watcher
+ self.watchStart()
+ return watcher
+
+ def unwatchEvent(self, event):
+ watcher = self.watchers.get(event)
+ if not watcher:
+ return
+ if not watcher.watching():
+ del self.watchers[event]
+
+ def watchStart(self):
+ if self.watchThread: return
+
+ def watchMain(self):
+ try:
+ while True:
+ if self.watchThread is None: return
+ if not self.events:
+ return
+ rd = self.watchers.values()
+ try:
+ (rd, wr, er) = select.select(rd, [], [], SELECT_TIMEOUT)
+ for watcher in rd:
+ watcher.notify()
+ except socket.error, ex:
+ if ex.args[0] in (EAGAIN, EINTR):
+ pass
+ else:
+ raise
+ finally:
+ self.watchThread = None
+
+ def introduceDomain(self, dom, page, evtchn, path):
+ self.getxs().introduce_domain(dom, page, evtchn.port1, path)
+
+ def releaseDomain(self, dom):
+ self.getxs().release_domain(dom)
+
+def getXenStore():
+ global xenstore
+ try:
+ return xenstore
+ except:
+ xenstore = XenStore()
+ return xenstore
+
+class XenNode:
+
+ def __init__(self, path="/", create=True):
+ self.store = getXenStore()
+ self.path = path
+ if not self.store.exists(path):
+ if create:
+ self.store.create(path)
+ else:
+ raise ValueError("path does not exist: '%s'" % path)
+
+ def relPath(self, path=""):
+ if not path:
+ return self.path
+ if path and path.startswith("/"):
+ path = path[1:]
+ return os.path.join(self.path, path)
+
+ def delete(self, path=""):
+ self.store.rm(self.relPath(path))
+
+ def exists(self, path=""):
+ return self.store.exists(self.relPath(path))
+
+ def getNode(self, path="", create=True):
+ if path == "":
+ return self
+ else:
+ return XenNode(self.relPath(path=path), create=create)
+
+ getChild = getNode
+
+ def getData(self, path=""):
+ path = self.relPath(path)
+ try:
+ return self.store.read(path)
+ except:
+ return None
+
+ def setData(self, data, path=""):
+ path = self.relPath(path)
+ #print 'XenNode>setData>', 'path=', path, 'data=', data
+ return self.store.write(path, data)
+
+ def getLock(self):
+ return None
+
+ def lock(self, lockid):
+ return None
+
+ def unlock(self, lockid):
+ return None
+
+ def deleteChild(self, name):
+ self.delete(name)
+
+ def deleteChildren(self):
+ for name in self.ls():
+ self.deleteChild(name)
+
+ def getChildren(self):
+ return [ self.getNode(name) for name in self.ls() ]
+
+ def ls(self):
+ return self.store.ls(self.path)
+
+ def introduceDomain(self, dom, page, evtchn, path):
+ self.store.introduceDomain(dom, page, evtchn, path)
+
+ def releaseDomain(self, dom):
+ self.store.releaseDomain(dom)
+
+ def __repr__(self):
+ return "<XenNode %s>" % self.path
+
+
diff --git a/tools/python/xen/xend/xenstore/xsobj.py b/tools/python/xen/xend/xenstore/xsobj.py
new file mode 100644
index 0000000000..b1c9a4f1d1
--- /dev/null
+++ b/tools/python/xen/xend/xenstore/xsobj.py
@@ -0,0 +1,522 @@
+import string
+import types
+
+from xen.xend import sxp
+from xsnode import XenNode
+from xen.util.mac import macToString, macFromString
+
+VALID_KEY_CHARS = string.ascii_letters + string.digits + "_-@"
+
+def hasAttr(obj, attr):
+ if isinstance(obj, dict):
+ return obj.contains(attr)
+ else:
+ return hasattr(obj, attr)
+
+def getAttr(obj, attr):
+ if isinstance(obj, dict):
+ return dict.get(attr)
+ else:
+ return getattr(obj, attr, None)
+
+def setAttr(obj, attr, val):
+ if isinstance(obj, dict):
+ dict[attr] = val
+ else:
+ setattr(obj, attr, val)
+
+class DBConverter:
+ """Conversion of values to and from strings in xenstore.
+ """
+
+ converters = {}
+
+ def checkType(cls, ty):
+ if ty is None or ty in cls.converters:
+ return
+ raise ValueError("invalid converter type: '%s'" % ty)
+
+ checkType = classmethod(checkType)
+
+ def getConverter(cls, ty=None):
+ if ty is None:
+ ty = "str"
+ conv = cls.converters.get(ty)
+ if not conv:
+ raise ValueError("no converter for type: '%s'" % ty)
+ return conv
+
+ getConverter = classmethod(getConverter)
+
+ def convertToDB(cls, val, ty=None):
+ return cls.getConverter(ty).toDB(val)
+
+ convertToDB = classmethod(convertToDB)
+
+ def convertFromDB(cls, val, ty=None):
+ return cls.getConverter(ty).fromDB(val)
+
+ convertFromDB = classmethod(convertFromDB)
+
+ # Must define in subclass.
+ name = None
+
+ def __init__(self):
+ self.register()
+
+ def register(self):
+ if not self.name:
+ raise ValueError("invalid converter name: '%s'" % self.name)
+ self.converters[self.name] = self
+
+ def toDB(self, val):
+ raise NotImplementedError()
+
+ def fromDB(self, val):
+ raise NotImplementedError()
+
+class StrConverter(DBConverter):
+
+ name = "str"
+
+ def toDB(self, val):
+ # Convert True/False to 1/0, otherwise they convert to
+ # 'True' and 'False' rather than '1' and '0', even though
+ # isinstance(True/False, int) is true.
+ if isinstance(val, bool):
+ val = int(val)
+ return str(val)
+
+ def fromDB(self, data):
+ return data
+
+StrConverter()
+
+class BoolConverter(DBConverter):
+
+ name = "bool"
+
+ def toDB(self, val):
+ return str(int(bool(val)))
+
+ def fromDB(self, data):
+ return bool(int(data))
+
+BoolConverter()
+
+class SxprConverter(DBConverter):
+
+ name = "sxpr"
+
+ def toDB(self, val):
+ return sxp.to_string(val)
+
+ def fromDB(self, data):
+ return sxp.from_string(data)
+
+SxprConverter()
+
+class IntConverter(DBConverter):
+
+ name = "int"
+
+ def toDB(self, val):
+ return str(int(val))
+
+ def fromDB(self, data):
+ return int(data)
+
+IntConverter()
+
+class FloatConverter(DBConverter):
+
+ name = "float"
+
+ def toDB(self, val):
+ return str(float(val))
+
+ def fromDB(self, data):
+ return float(data)
+
+FloatConverter()
+
+class LongConverter(DBConverter):
+
+ name = "long"
+
+ def toDB(self, val):
+ return str(long(val))
+
+ def fromDB(self, data):
+ return long(data)
+
+LongConverter()
+
+class MacConverter(DBConverter):
+
+ name = "mac"
+
+ def toDB(self, val):
+ return macToString(val)
+
+ def fromDB(self, data):
+ return macFromString(data)
+
+MacConverter()
+
+class DBVar:
+
+ def __init__(self, var, ty=None, path=None):
+ DBConverter.checkType(ty)
+ if path is None:
+ path = var
+ self.var = var
+ self.ty = ty
+ self.path = path
+ varpath = filter(bool, self.var.split())
+ self.attrpath = varpath[:-1]
+ self.attr = varpath[-1]
+
+ def exportToDB(self, db, obj):
+ self.setDB(db, self.getObj(obj))
+
+ def importFromDB(self, db, obj):
+ self.setObj(obj, self.getDB(db))
+
+ def getObj(self, obj):
+ o = obj
+ for x in self.attrpath:
+ o = getAttr(o, x)
+ if o is None:
+ return None
+ return getAttr(o, self.attr)
+
+ def setObj(self, obj, val):
+ o = obj
+ for x in self.attrpath:
+ o = getAttr(o, x)
+ # Don't set obj attr if val is None.
+ if val is None and hasAttr(o, self.attr):
+ return
+ setAttr(o, self.attr, val)
+
+ def getDB(self, db):
+ try:
+ data = getattr(db, self.path)
+ except AttributeError:
+ return None
+ return DBConverter.convertFromDB(data, ty=self.ty)
+
+ def setDB(self, db, val):
+ # Don't set in db if val is None.
+ #print 'DBVar>setDB>', self.path, 'val=', val
+ if val is None:
+ return
+ data = DBConverter.convertToDB(val, ty=self.ty)
+ #print 'DBVar>setDB>', self.path, 'data=', data
+ setattr(db, self.path, data)
+
+
+class DBMap(dict):
+ """A persistent map. Extends dict with persistence.
+ Set and get values using the usual map syntax:
+
+ m[k], m.get(k)
+ m[k] = v
+
+ Also supports being treated as an object with attributes.
+ When 'k' is a legal identifier you may also use
+
+ m.k, getattr(m, k)
+ m.k = v, setattr(m, k)
+ k in m, hasattr(m, k)
+
+ When setting you can pass in a normal value, for example
+
+ m.x = 3
+
+ Getting works too:
+
+ m.x ==> 3
+
+ while m['x'] will return the map for x.
+
+ m['x'].getData() ==> 3
+
+ To get values from subdirs use get() to get the subdir first:
+
+ get(m, 'foo').x
+ m['foo'].x
+
+ instead of m.foo.x, because m.foo will return the data for field foo,
+ not the directory.
+
+ You can assign values into a subdir by passing a map:
+
+ m.foo = {'x': 1, 'y':2 }
+
+ You can also use paths as keys:
+
+ m['foo/x'] = 1
+
+ sets field x in subdir foo.
+
+ """
+
+ __db__ = None
+ __data__ = None
+ __perms__ = None
+ __parent__ = None
+ __name__ = ""
+
+ __transaction__ = False
+
+ # True if value set since saved (or never saved).
+ __dirty__ = True
+
+ def __init__(self, parent=None, name="", db=None):
+ if parent is None:
+ self.__name__ = name
+ else:
+ if not isinstance(parent, DBMap):
+ raise ValueError("invalid parent")
+ self.__parent__ = parent
+ self.__name__ = name
+ db = self.__parent__.getChildDB(name)
+ self.setDB(db)
+
+ def getName(self):
+ return self.__name__
+
+ def getPath(self):
+ return self.__db__ and self.__db__.relPath()
+
+ def introduceDomain(self, dom, page, evtchn, path=None):
+ db = self.__db__
+ if path is None:
+ path = db.relPath()
+ print 'DBMap>introduceDomain>', dom, page, evtchn, path
+ try:
+ db.introduceDomain(dom, page, evtchn, path)
+ except Exception, ex:
+ import traceback
+ traceback.print_exc()
+ print 'DBMap>introduceDomain>', ex
+ pass # todo: don't ignore
+
+ def releaseDomain(self, dom):
+ db = self.__db__
+ print 'DBMap>releaseDomain>', dom
+ try:
+ db.releaseDomain(dom)
+ except Exception, ex:
+ import traceback
+ traceback.print_exc()
+ print 'DBMap>releaseDomain>', ex
+ pass # todo: don't ignore
+
+ def transactionBegin(self):
+ # Begin a transaction.
+ pass
+
+ def transactionCommit(self):
+ # Commit writes to db.
+ pass
+
+ def transactionFail(self):
+ # Fail a transaction.
+ # We have changed values, what do we do?
+ pass
+
+ def watch(self, fn):
+ pass
+
+ def unwatch(self, watch):
+ pass
+
+ def checkName(self, k):
+ if k == "":
+ raise ValueError("invalid key, empty string")
+ for c in k:
+ if c in VALID_KEY_CHARS: continue
+ raise ValueError("invalid key char '%s'" % c)
+
+ def _setData(self, v):
+ #print 'DBMap>_setData>', self.getPath(), 'data=', v
+ if v != self.__data__:
+ self.__dirty__ = True
+ self.__data__ = v
+
+ def setData(self, v):
+ if isinstance(v, dict):
+ for (key, val) in v.items():
+ self[key] = val
+ else:
+ self._setData(v)
+
+ def getData(self):
+ return self.__data__
+
+ def _set(self, k, v):
+ dict.__setitem__(self, k, v)
+
+ def _get(self, k):
+ try:
+ return dict.__getitem__(self, k)
+ except:
+ return None
+
+ def _del(self, k, v):
+ try:
+ dict.__delitem__(self, k)
+ except:
+ pass
+
+ def _contains(self, k):
+ return dict.__contains__(self, k)
+
+ def __setitem__(self, k, v, save=False):
+ node = self.addChild(k)
+ node.setData(v)
+ if save:
+ node.saveDB()
+
+ def __getitem__(self, k):
+ if self._contains(k):
+ v = self._get(k)
+ else:
+ v = self.readChildDB(k)
+ self._set(k, v)
+ return v
+
+ def __delitem__(self, k):
+ self._del(k)
+ self.deleteChildDB(k)
+
+ def __repr__(self):
+ if len(self):
+ return dict.__repr__(self)
+ else:
+ return repr(self.__data__)
+
+ def __setattr__(self, k, v):
+ if k.startswith("__"):
+ object.__setattr__(self, k, v)
+ else:
+ self.__setitem__(k, v, save=True)
+ return v
+
+ def __getattr__(self, k):
+ if k.startswith("__"):
+ v = object.__getattr__(self, k)
+ else:
+ try:
+ v = self.__getitem__(k).getData()
+ except LookupError, ex:
+ raise AttributeError(ex.args)
+ return v
+
+ def __delattr__(self, k):
+ return self.__delitem__(k)
+
+ def delete(self):
+ dict.clear(self)
+ self.__data__ = None
+ if self.__db__:
+ self.__db__.delete()
+
+ def clear(self):
+ dict.clear(self)
+ if self.__db__:
+ self.__db__.deleteChildren()
+
+ def getChild(self, k):
+ return self._get(k)
+
+ def getChildDB(self, k):
+ self.checkName(k)
+ return self.__db__ and self.__db__.getChild(k)
+
+ def deleteChildDB(self, k):
+ if self.__db__:
+ self.__db__.deleteChild(k)
+
+ def _addChild(self, k):
+ kid = self._get(k)
+ if kid is None:
+ kid = DBMap(parent=self, name=k, db=self.getChildDB(k))
+ self._set(k, kid)
+ return kid
+
+ def addChild(self, path):
+ l = path.split("/")
+ n = self
+ for x in l:
+ if x == "": continue
+ n = n._addChild(x)
+ return n
+
+ def setDB(self, db):
+ if (db is not None) and not isinstance(db, XenNode):
+ raise ValueError("invalid db")
+ self.__db__ = db
+ for (k, v) in self.items():
+ if v is None: continue
+ if isinstance(v, DBMap):
+ v._setDB(self.addChild(k), restore)
+
+ def readDB(self):
+ if self.__db__ is None:
+ return
+ self.__data__ = self.__db__.getData()
+ for k in self.__db__.ls():
+ n = self.addChild(k)
+ n.readDB()
+ self.__dirty__ = False
+
+ def readChildDB(self, k):
+ if self.__db__ and (k in self.__db__.ls()):
+ n = self.addChild(k)
+ n.readDB()
+ raise LookupError("invalid key '%s'" % k)
+
+ def saveDB(self, sync=False, save=False):
+ """Save unsaved data to db.
+ If save or sync is true, saves whether dirty or not.
+ If sync is true, removes db entries not in the map.
+ """
+
+ if self.__db__ is None:
+ #print 'DBMap>saveDB>',self.getPath(), 'no db'
+ return
+ # Write data.
+ #print 'DBMap>saveDB>', self.getPath(), 'dirty=', self.__dirty__, 'data=', self.__data__
+ if ((self.__data__ is not None)
+ and (sync or save or self.__dirty__)):
+ self.__db__.setData(self.__data__)
+ self.__dirty__ = False
+ else:
+ #print 'DBMap>saveDB>', self.getPath(), 'not written'
+ pass
+ # Write children.
+ for (name, node) in self.items():
+ if not isinstance(node, DBMap): continue
+ node.saveDB(sync=sync, save=save)
+ # Remove db nodes not in children.
+ if sync:
+ for name in self.__db__.ls():
+ if name not in self:
+ self.__db__.delete(name)
+
+ def importFromDB(self, obj, fields):
+ """Set fields in obj from db fields.
+ """
+ for f in fields:
+ f.importFromDB(self, obj)
+
+ def exportToDB(self, obj, fields, save=False, sync=False):
+ """Set fields in db from obj fields.
+ """
+ for f in fields:
+ f.exportToDB(self, obj)
+ self.saveDB(save=save, sync=sync)
diff --git a/tools/python/xen/xend/xenstore/xsresource.py b/tools/python/xen/xend/xenstore/xsresource.py
new file mode 100644
index 0000000000..37011bdea3
--- /dev/null
+++ b/tools/python/xen/xend/xenstore/xsresource.py
@@ -0,0 +1,136 @@
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@hp.com>
+#============================================================================
+# HTTP interface onto xenstore (read-only).
+# Mainly intended for testing.
+
+import os
+import os.path
+
+from xen.web.httpserver import HttpServer, UnixHttpServer
+from xen.web.SrvBase import SrvBase
+from xen.web.SrvDir import SrvDir
+from xen.xend.Args import FormFn
+from xen.xend.xenstore import XenNode
+
+def pathurl(req):
+ url = req.prePathURL()
+ if not url.endswith('/'):
+ url += '/'
+ return url
+
+def writelist(req, l):
+ req.write('(')
+ for k in l:
+ req.write(' ' + k)
+ req.write(')')
+
+def lsData(dbnode, req, url):
+ v = dbnode.getData()
+ if v is None:
+ req.write('<p>No data')
+ else:
+ req.write('<p>Data: <pre>')
+ req.write(str(v))
+ req.write('</pre>')
+ v = dbnode.getLock()
+ if v is None:
+ req.write("<p>Unlocked")
+ else:
+ req.write("<p>Lock = %s" % v)
+
+def lsChildren(dbnode, req, url):
+ l = dbnode.ls()
+ if l:
+ req.write('<p>Children: <ul>')
+ for key in l:
+ child = dbnode.getChild(key)
+ data = child.getData()
+ if data is None: data = ""
+ req.write('<li><a href="%(url)s%(key)s">%(key)s</a> %(data)s</li>'
+ % { "url": url, "key": key, "data": data })
+ req.write('</ul>')
+ else:
+ req.write('<p>No children')
+
+
+class DBDataResource(SrvBase):
+ """Resource for the node data.
+ """
+
+ def __init__(self, dbnode):
+ SrvBase.__init__(self)
+ self.dbnode = dbnode
+
+ def render_GET(self, req):
+ req.write('<html><head></head><body>')
+ self.print_path(req)
+ req.write("<pre>")
+ req.write(self.getData() or self.getNoData())
+ req.write("</pre>")
+ req.write('</body></html>')
+
+ def getContentType(self):
+ # Use content-type from metadata.
+ return "text/plain"
+
+ def getData(self):
+ v = self.dbnode.getData()
+ if v is None: return v
+ return str(v)
+
+ def getNoData(self):
+ return ""
+
+class DBNodeResource(SrvDir):
+ """Resource for a DB node.
+ """
+
+ def __init__(self, dbnode):
+ SrvDir.__init__(self)
+ self.dbnode = dbnode
+
+ def get(self, x):
+ val = None
+ if x == "__data__":
+ val = DBDataResource(self.dbnode)
+ else:
+ if self.dbnode.exists(x):
+ child = self.dbnode.getChild(x, create=False)
+ else:
+ child = None
+ if child is not None:
+ val = DBNodeResource(child)
+ return val
+
+ def render_POST(self, req):
+ return self.perform(req)
+
+ def ls(self, req, use_sxp=0):
+ if use_sxp:
+ writelist(req, self.dbnode.getChildren())
+ else:
+ url = pathurl(req)
+ req.write("<fieldset>")
+ lsData(self.dbnode, req, url)
+ lsChildren(self.dbnode, req, url)
+ req.write("</fieldset>")
+
+ def form(self, req):
+ url = req.prePathURL()
+ pass
+
+class DBRootResource(DBNodeResource):
+ """Resource for the root of a DB.
+ """
+
+ def __init__(self):
+ DBNodeResource.__init__(self, XenNode())
+
+def main(argv):
+ root = SrvDir()
+ root.putChild('xenstore', DBRootResource())
+ interface = ''
+ port = 8003
+ server = HttpServer(root=root, interface=interface, port=port)
+ server.run()
diff --git a/tools/xenstore/.gdbinit b/tools/xenstore/.gdbinit
new file mode 100644
index 0000000000..9a71b20ac4
--- /dev/null
+++ b/tools/xenstore/.gdbinit
@@ -0,0 +1,4 @@
+set environment XENSTORED_RUNDIR=testsuite/tmp
+set environment XENSTORED_ROOTDIR=testsuite/tmp
+handle SIGUSR1 noprint nostop
+handle SIGPIPE noprint nostop
diff --git a/tools/xenstore/Makefile b/tools/xenstore/Makefile
new file mode 100644
index 0000000000..cd4a7b3079
--- /dev/null
+++ b/tools/xenstore/Makefile
@@ -0,0 +1,97 @@
+XEN_ROOT=../..
+# This does something wrong to TARGET_ARCH.
+#include $(XEN_ROOT)/tools/Rules.mk
+LIBDIR = lib
+XEN_LIBXC = $(XEN_ROOT)/tools/libxc
+
+INSTALL = install
+INSTALL_DATA = $(INSTALL) -m0644
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+PROFILE=#-pg
+BASECFLAGS=-Wall -W -g
+# Make gcc generate dependencies.
+BASECFLAGS += -Wp,-MD,.$(@F).d
+PROG_DEP = .*.d
+#BASECFLAGS+= -O3 $(PROFILE)
+#BASECFLAGS+= -I$(XEN_ROOT)/tools
+BASECFLAGS+= -I$(XEN_ROOT)/tools/libxc
+BASECFLAGS+= -I$(XEN_ROOT)/xen/include/public
+BASECFLAGS+= -I.
+
+CFLAGS+=$(BASECFLAGS)
+LDFLAGS=$(PROFILE) -L$(XEN_LIBXC)
+TESTDIR=`pwd`/testsuite/tmp
+TESTFLAGS=-DTESTING
+TESTENV=XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR)
+
+all: xen xenstored libxenstore.a
+
+testcode: xen xs_test xenstored_test xs_random
+
+xen:
+ ln -sf $(XEN_ROOT)/xen/include/public $@
+
+xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o
+ $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxc -o $@
+
+xenstored_test: xenstored_core_test.o xenstored_watch_test.o xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o fake_libxc.o utils.o
+ $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
+
+xs_test: xs_test.o xs_lib.o utils.o
+xs_random: xs_random.o xs_test_lib.o xs_lib.o talloc.o utils.o
+xs_stress: xs_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o
+
+xs_test.o xs_stress.o xenstored_core_test.o xenstored_watch_test.o xenstored_transaction_test.o xenstored_domain_test.o xs_random.o xs_test_lib.o talloc_test.o fake_libxc.o: CFLAGS=$(BASECFLAGS) $(TESTFLAGS)
+
+xenstored_%_test.o: xenstored_%.c
+ $(COMPILE.c) -o $@ $<
+
+xs_test_lib.o: xs.c
+ $(COMPILE.c) -o $@ $<
+
+talloc_test.o: talloc.c
+ $(COMPILE.c) -o $@ $<
+
+libxenstore.a: libxenstore.a(xs.o) libxenstore.a(xs_lib.o)
+
+clean: testsuite-clean
+ rm -f *.o *.a xs_test xenstored xenstored_test xs_random xs_stress xen
+ -$(RM) $(PROG_DEP)
+
+check: testsuite-run randomcheck stresstest
+
+testsuite-run: xen xenstored_test xs_test
+ $(TESTENV) testsuite/test.sh
+
+testsuite-clean:
+ rm -rf $(TESTDIR)
+
+# Make this visible so they can see repeat tests without --fast if they
+# fail.
+RANDSEED=$(shell date +%s)
+randomcheck: xs_random xenstored_test
+ $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000 $(RANDSEED)
+ $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED)
+ $(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED)
+
+stresstest: xs_stress xenstored_test
+ rm -rf $(TESTDIR)/store
+ export $(TESTENV); PID=`./xenstored_test --output-pid`; ./xs_stress 10000; ret=$$?; kill $$PID; exit $$ret
+
+TAGS:
+ etags `find . -name '*.[ch]'`
+
+tarball: clean
+ cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/
+
+install: xenstored libxenstore.a
+ $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored
+ $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
+ $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxenstore.a $(DESTDIR)/usr/$(LIBDIR)
+
+-include $(PROG_DEP)
diff --git a/tools/xenstore/TODO b/tools/xenstore/TODO
new file mode 100644
index 0000000000..9e22afe536
--- /dev/null
+++ b/tools/xenstore/TODO
@@ -0,0 +1,7 @@
+TODO in no particular order. Some of these will never be done. There
+are omissions of important but necessary things. It is up to the
+reader to fill in the blanks.
+
+- Remove calls to system() from daemon
+- Timeout failed watch responses
+- Timeout blocking transactions
diff --git a/tools/xenstore/fake_libxc.c b/tools/xenstore/fake_libxc.c
new file mode 100644
index 0000000000..decfb4001d
--- /dev/null
+++ b/tools/xenstore/fake_libxc.c
@@ -0,0 +1,119 @@
+/*
+ Fake libxc which doesn't require hypervisor but talks to xs_test.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <assert.h>
+#include <signal.h>
+#include "utils.h"
+#include "xenstored_core.h"
+#include "xenstored_domain.h"
+#include "xenstored_test.h"
+
+static int sigfd;
+static int xs_test_pid;
+static u16 port;
+
+/* The event channel maps to a signal, shared page to an mmapped file. */
+int xc_evtchn_send(int xc_handle __attribute__((unused)), int local_port)
+{
+ assert(local_port == port);
+ if (kill(xs_test_pid, SIGUSR2) != 0)
+ barf_perror("fake event channel failed");
+ return 0;
+}
+
+void *xc_map_foreign_range(int xc_handle, u32 dom __attribute__((unused)),
+ int size, int prot,
+ unsigned long mfn __attribute__((unused)))
+{
+ void *ret;
+
+ ret = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
+ if (ret == MAP_FAILED)
+ return NULL;
+
+ /* xs_test tells us pid and port by putting it in buffer, we reply. */
+ xs_test_pid = *(int *)(ret + 32);
+ port = *(int *)(ret + 36);
+ *(int *)(ret + 32) = getpid();
+ return ret;
+}
+
+int xc_interface_open(void)
+{
+ int fd;
+ char page[getpagesize()];
+
+ fd = open("/tmp/xcmap", O_RDWR|O_CREAT|O_TRUNC, 0600);
+ if (fd < 0)
+ return fd;
+
+ memset(page, 0, sizeof(page));
+ if (!write_all(fd, page, sizeof(page)))
+ barf_perror("Failed to write /tmp/xcmap page");
+
+ return fd;
+}
+
+int xc_interface_close(int xc_handle)
+{
+ close(xc_handle);
+ return 0;
+}
+
+static void send_to_fd(int signo __attribute__((unused)))
+{
+ int saved_errno = errno;
+ write(sigfd, &port, sizeof(port));
+ errno = saved_errno;
+}
+
+void fake_block_events(void)
+{
+ signal(SIGUSR2, SIG_IGN);
+}
+
+void fake_ack_event(void)
+{
+ signal(SIGUSR2, send_to_fd);
+}
+
+int fake_open_eventchn(void)
+{
+ int fds[2];
+
+ if (pipe(fds) != 0)
+ return -1;
+
+ if (signal(SIGUSR2, send_to_fd) == SIG_ERR) {
+ int saved_errno = errno;
+ close(fds[0]);
+ close(fds[1]);
+ errno = saved_errno;
+ return -1;
+ }
+ sigfd = fds[1];
+ return fds[0];
+}
diff --git a/tools/xenstore/list.h b/tools/xenstore/list.h
new file mode 100644
index 0000000000..eb35293d7f
--- /dev/null
+++ b/tools/xenstore/list.h
@@ -0,0 +1,508 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+/* Taken from Linux kernel code, but de-kernelized for userspace. */
+#include <stddef.h>
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *) 0x00100100)
+#define LIST_POISON2 ((void *) 0x00200200)
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+#define list_top(head, type, member) \
+({ \
+ struct list_head *_head = (head); \
+ list_empty(_head) ? NULL : list_entry(_head->next, type, member); \
+})
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add_rcu(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ new->next = next;
+ new->prev = prev;
+ next->prev = new;
+ prev->next = new;
+}
+
+/**
+ * list_add_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add_rcu(struct list_head *new, struct list_head *head)
+{
+ __list_add_rcu(new, head, head->next);
+}
+
+/**
+ * list_add_tail_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail_rcu(struct list_head *new, struct list_head *head)
+{
+ __list_add_rcu(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_rcu - deletes entry from list without re-initialization
+ * @entry: the element to delete from the list.
+ *
+ * Note: list_empty on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the list.
+ */
+static inline void list_del_rcu(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev - iterate over a list backwards
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev; pos != (head); pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+
+/**
+ * list_for_each_entry_continue - iterate over list of given type
+ * continuing after existing point
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_continue(pos, head, member) \
+ for (pos = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+#define INIT_HLIST_NODE(ptr) ((ptr)->next = NULL, (ptr)->pprev = NULL)
+
+static __inline__ int hlist_unhashed(struct hlist_node *h)
+{
+ return !h->pprev;
+}
+
+static __inline__ int hlist_empty(struct hlist_head *h)
+{
+ return !h->first;
+}
+
+static __inline__ void __hlist_del(struct hlist_node *n)
+{
+ struct hlist_node *next = n->next;
+ struct hlist_node **pprev = n->pprev;
+ *pprev = next;
+ if (next)
+ next->pprev = pprev;
+}
+
+static __inline__ void hlist_del(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->next = LIST_POISON1;
+ n->pprev = LIST_POISON2;
+}
+
+/**
+ * hlist_del_rcu - deletes entry from hash list without re-initialization
+ * @entry: the element to delete from the hash list.
+ *
+ * Note: list_unhashed() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the hash list.
+ */
+static inline void hlist_del_rcu(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->pprev = LIST_POISON2;
+}
+
+static __inline__ void hlist_del_init(struct hlist_node *n)
+{
+ if (n->pprev) {
+ __hlist_del(n);
+ INIT_HLIST_NODE(n);
+ }
+}
+
+#define hlist_del_rcu_init hlist_del_init
+
+static __inline__ void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ h->first = n;
+ n->pprev = &h->first;
+}
+
+static __inline__ void hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+ n->next = first;
+ n->pprev = &h->first;
+ if (first)
+ first->pprev = &n->next;
+ h->first = n;
+}
+
+/* next must be != NULL */
+static __inline__ void hlist_add_before(struct hlist_node *n, struct hlist_node *next)
+{
+ n->pprev = next->pprev;
+ n->next = next;
+ next->pprev = &n->next;
+ *(n->pprev) = n;
+}
+
+static __inline__ void hlist_add_after(struct hlist_node *n,
+ struct hlist_node *next)
+{
+ next->next = n->next;
+ *(next->pprev) = n;
+ n->next = next;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+/* Cannot easily do prefetch unfortunately */
+#define hlist_for_each(pos, head) \
+ for (pos = (head)->first; pos; pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+ for (pos = (head)->first; n = pos ? pos->next : 0, pos; \
+ pos = n)
+
+/**
+ * hlist_for_each_entry - iterate over list of given type
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member) \
+ for (pos = (head)->first; \
+ pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member) \
+ for (pos = (pos)->next; \
+ pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from existing point
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member) \
+ for (; pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @n: another &struct hlist_node to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \
+ for (pos = (head)->first; \
+ pos && ({ n = pos->next; 1; }) && \
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = n)
+
+#endif
diff --git a/tools/xenstore/talloc.c b/tools/xenstore/talloc.c
new file mode 100644
index 0000000000..8e93c28fe3
--- /dev/null
+++ b/tools/xenstore/talloc.c
@@ -0,0 +1,1143 @@
+/*
+ Samba Unix SMB/CIFS implementation.
+
+ Samba trivial allocation library - new interface
+
+ NOTE: Please read talloc_guide.txt for full documentation
+
+ Copyright (C) Andrew Tridgell 2004
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+/*
+ inspired by http://swapped.cc/halloc/
+*/
+
+
+#ifdef _SAMBA_BUILD_
+#include "includes.h"
+#if ((SAMBA_VERSION_MAJOR==3)&&(SAMBA_VERSION_MINOR<9))
+/* This is to circumvent SAMBA3's paranoid malloc checker. Here in this file
+ * we trust ourselves... */
+#ifdef malloc
+#undef malloc
+#endif
+#ifdef realloc
+#undef realloc
+#endif
+#endif
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include "talloc.h"
+/* assume a modern system */
+#define HAVE_VA_COPY
+#endif
+
+/* use this to force every realloc to change the pointer, to stress test
+ code that might not cope */
+#ifdef TESTING
+#define ALWAYS_REALLOC 1
+void *test_malloc(size_t size);
+#define malloc test_malloc
+#endif
+
+#define MAX_TALLOC_SIZE 0x10000000
+#define TALLOC_MAGIC 0xe814ec4f
+#define TALLOC_MAGIC_FREE 0x7faebef3
+#define TALLOC_MAGIC_REFERENCE ((const char *)1)
+
+/* by default we abort when given a bad pointer (such as when talloc_free() is called
+ on a pointer that came from malloc() */
+#ifndef TALLOC_ABORT
+#define TALLOC_ABORT(reason) abort()
+#endif
+
+#ifndef discard_const_p
+#if defined(__intptr_t_defined) || defined(HAVE_INTPTR_T)
+# define discard_const_p(type, ptr) ((type *)((intptr_t)(ptr)))
+#else
+# define discard_const_p(type, ptr) ((type *)(ptr))
+#endif
+#endif
+
+/* this null_context is only used if talloc_enable_leak_report() or
+ talloc_enable_leak_report_full() is called, otherwise it remains
+ NULL
+*/
+static const void *null_context;
+static void *cleanup_context;
+static int (*malloc_fail_handler)(void *);
+static void *malloc_fail_data;
+
+struct talloc_reference_handle {
+ struct talloc_reference_handle *next, *prev;
+ void *ptr;
+};
+
+typedef int (*talloc_destructor_t)(void *);
+
+struct talloc_chunk {
+ struct talloc_chunk *next, *prev;
+ struct talloc_chunk *parent, *child;
+ struct talloc_reference_handle *refs;
+ size_t size;
+ unsigned magic;
+ talloc_destructor_t destructor;
+ const char *name;
+};
+
+/* panic if we get a bad magic value */
+static struct talloc_chunk *talloc_chunk_from_ptr(const void *ptr)
+{
+ struct talloc_chunk *tc = discard_const_p(struct talloc_chunk, ptr)-1;
+ if (tc->magic != TALLOC_MAGIC) {
+ if (tc->magic == TALLOC_MAGIC_FREE) {
+ TALLOC_ABORT("Bad talloc magic value - double free");
+ } else {
+ TALLOC_ABORT("Bad talloc magic value - unknown value");
+ }
+ }
+
+ return tc;
+}
+
+/* hook into the front of the list */
+#define _TLIST_ADD(list, p) \
+do { \
+ if (!(list)) { \
+ (list) = (p); \
+ (p)->next = (p)->prev = NULL; \
+ } else { \
+ (list)->prev = (p); \
+ (p)->next = (list); \
+ (p)->prev = NULL; \
+ (list) = (p); \
+ }\
+} while (0)
+
+/* remove an element from a list - element doesn't have to be in list. */
+#define _TLIST_REMOVE(list, p) \
+do { \
+ if ((p) == (list)) { \
+ (list) = (p)->next; \
+ if (list) (list)->prev = NULL; \
+ } else { \
+ if ((p)->prev) (p)->prev->next = (p)->next; \
+ if ((p)->next) (p)->next->prev = (p)->prev; \
+ } \
+ if ((p) && ((p) != (list))) (p)->next = (p)->prev = NULL; \
+} while (0)
+
+
+/*
+ return the parent chunk of a pointer
+*/
+static struct talloc_chunk *talloc_parent_chunk(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ while (tc->prev) tc=tc->prev;
+ return tc->parent;
+}
+
+void *talloc_parent(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_parent_chunk(ptr);
+ return (void *)(tc+1);
+}
+
+/*
+ Allocate a bit of memory as a child of an existing pointer
+*/
+void *_talloc(const void *context, size_t size)
+{
+ struct talloc_chunk *tc;
+
+ if (context == NULL) {
+ context = null_context;
+ }
+
+ if (size >= MAX_TALLOC_SIZE) {
+ return NULL;
+ }
+
+ tc = malloc(sizeof(*tc)+size);
+ if (tc == NULL) {
+ if (malloc_fail_handler)
+ if (malloc_fail_handler(malloc_fail_data))
+ tc = malloc(sizeof(*tc)+size);
+ if (!tc)
+ return NULL;
+ }
+
+ tc->size = size;
+ tc->magic = TALLOC_MAGIC;
+ tc->destructor = NULL;
+ tc->child = NULL;
+ tc->name = NULL;
+ tc->refs = NULL;
+
+ if (context) {
+ struct talloc_chunk *parent = talloc_chunk_from_ptr(context);
+
+ tc->parent = parent;
+
+ if (parent->child) {
+ parent->child->parent = NULL;
+ }
+
+ _TLIST_ADD(parent->child, tc);
+ } else {
+ tc->next = tc->prev = tc->parent = NULL;
+ }
+
+ return (void *)(tc+1);
+}
+
+
+/*
+ setup a destructor to be called on free of a pointer
+ the destructor should return 0 on success, or -1 on failure.
+ if the destructor fails then the free is failed, and the memory can
+ be continued to be used
+*/
+void talloc_set_destructor(const void *ptr, int (*destructor)(void *))
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ tc->destructor = destructor;
+}
+
+/*
+ increase the reference count on a piece of memory.
+*/
+void talloc_increase_ref_count(const void *ptr)
+{
+ talloc_reference(null_context, ptr);
+}
+
+/*
+ helper for talloc_reference()
+*/
+static int talloc_reference_destructor(void *ptr)
+{
+ struct talloc_reference_handle *handle = ptr;
+ struct talloc_chunk *tc1 = talloc_chunk_from_ptr(ptr);
+ struct talloc_chunk *tc2 = talloc_chunk_from_ptr(handle->ptr);
+ if (tc1->destructor != (talloc_destructor_t)-1) {
+ tc1->destructor = NULL;
+ }
+ _TLIST_REMOVE(tc2->refs, handle);
+ talloc_free(handle);
+ return 0;
+}
+
+/*
+ make a secondary reference to a pointer, hanging off the given context.
+ the pointer remains valid until both the original caller and this given
+ context are freed.
+
+ the major use for this is when two different structures need to reference the
+ same underlying data, and you want to be able to free the two instances separately,
+ and in either order
+*/
+void *talloc_reference(const void *context, const void *ptr)
+{
+ struct talloc_chunk *tc;
+ struct talloc_reference_handle *handle;
+ if (ptr == NULL) return NULL;
+
+ tc = talloc_chunk_from_ptr(ptr);
+ handle = talloc_named_const(context, sizeof(*handle), TALLOC_MAGIC_REFERENCE);
+
+ if (handle == NULL) return NULL;
+
+ /* note that we hang the destructor off the handle, not the
+ main context as that allows the caller to still setup their
+ own destructor on the context if they want to */
+ talloc_set_destructor(handle, talloc_reference_destructor);
+ handle->ptr = discard_const_p(void, ptr);
+ _TLIST_ADD(tc->refs, handle);
+ return handle->ptr;
+}
+
+/*
+ remove a secondary reference to a pointer. This undo's what
+ talloc_reference() has done. The context and pointer arguments
+ must match those given to a talloc_reference()
+*/
+static int talloc_unreference(const void *context, const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ struct talloc_reference_handle *h;
+
+ if (context == NULL) {
+ context = null_context;
+ }
+
+ for (h=tc->refs;h;h=h->next) {
+ struct talloc_chunk *p = talloc_parent_chunk(h);
+ if ((p==NULL && context==NULL) || p+1 == context) break;
+ }
+ if (h == NULL) {
+ return -1;
+ }
+
+ talloc_set_destructor(h, NULL);
+ _TLIST_REMOVE(tc->refs, h);
+ talloc_free(h);
+ return 0;
+}
+
+/*
+ remove a specific parent context from a pointer. This is a more
+ controlled varient of talloc_free()
+*/
+int talloc_unlink(const void *context, void *ptr)
+{
+ struct talloc_chunk *tc_p, *new_p;
+ void *new_parent;
+
+ if (ptr == NULL) {
+ return -1;
+ }
+
+ if (context == NULL) {
+ context = null_context;
+ }
+
+ if (talloc_unreference(context, ptr) == 0) {
+ return 0;
+ }
+
+ if (context == NULL) {
+ if (talloc_parent_chunk(ptr) != NULL) {
+ return -1;
+ }
+ } else {
+ if (talloc_chunk_from_ptr(context) != talloc_parent_chunk(ptr)) {
+ return -1;
+ }
+ }
+
+ tc_p = talloc_chunk_from_ptr(ptr);
+
+ if (tc_p->refs == NULL) {
+ return talloc_free(ptr);
+ }
+
+ new_p = talloc_parent_chunk(tc_p->refs);
+ if (new_p) {
+ new_parent = new_p+1;
+ } else {
+ new_parent = NULL;
+ }
+
+ if (talloc_unreference(new_parent, ptr) != 0) {
+ return -1;
+ }
+
+ talloc_steal(new_parent, ptr);
+
+ return 0;
+}
+
+/*
+ add a name to an existing pointer - va_list version
+*/
+static void talloc_set_name_v(const void *ptr, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0);
+
+static void talloc_set_name_v(const void *ptr, const char *fmt, va_list ap)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ tc->name = talloc_vasprintf(ptr, fmt, ap);
+ if (tc->name) {
+ talloc_set_name_const(tc->name, ".name");
+ }
+}
+
+/*
+ add a name to an existing pointer
+*/
+void talloc_set_name(const void *ptr, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ talloc_set_name_v(ptr, fmt, ap);
+ va_end(ap);
+}
+
+/*
+ more efficient way to add a name to a pointer - the name must point to a
+ true string constant
+*/
+void talloc_set_name_const(const void *ptr, const char *name)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ tc->name = name;
+}
+
+/*
+ create a named talloc pointer. Any talloc pointer can be named, and
+ talloc_named() operates just like talloc() except that it allows you
+ to name the pointer.
+*/
+void *talloc_named(const void *context, size_t size, const char *fmt, ...)
+{
+ va_list ap;
+ void *ptr;
+
+ ptr = _talloc(context, size);
+ if (ptr == NULL) return NULL;
+
+ va_start(ap, fmt);
+ talloc_set_name_v(ptr, fmt, ap);
+ va_end(ap);
+
+ return ptr;
+}
+
+/*
+ create a named talloc pointer. Any talloc pointer can be named, and
+ talloc_named() operates just like talloc() except that it allows you
+ to name the pointer.
+*/
+void *talloc_named_const(const void *context, size_t size, const char *name)
+{
+ void *ptr;
+
+ ptr = _talloc(context, size);
+ if (ptr == NULL) {
+ return NULL;
+ }
+
+ talloc_set_name_const(ptr, name);
+
+ return ptr;
+}
+
+/*
+ return the name of a talloc ptr, or "UNNAMED"
+*/
+const char *talloc_get_name(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ if (tc->name == TALLOC_MAGIC_REFERENCE) {
+ return ".reference";
+ }
+ if (tc->name) {
+ return tc->name;
+ }
+ return "UNNAMED";
+}
+
+
+/*
+ check if a pointer has the given name. If it does, return the pointer,
+ otherwise return NULL
+*/
+void *talloc_check_name(const void *ptr, const char *name)
+{
+ const char *pname;
+ if (ptr == NULL) return NULL;
+ pname = talloc_get_name(ptr);
+ if (pname == name || strcmp(pname, name) == 0) {
+ return discard_const_p(void, ptr);
+ }
+ return NULL;
+}
+
+
+/*
+ this is for compatibility with older versions of talloc
+*/
+void *talloc_init(const char *fmt, ...)
+{
+ va_list ap;
+ void *ptr;
+
+ ptr = _talloc(NULL, 0);
+ if (ptr == NULL) return NULL;
+
+ va_start(ap, fmt);
+ talloc_set_name_v(ptr, fmt, ap);
+ va_end(ap);
+
+ return ptr;
+}
+
+/*
+ this is a replacement for the Samba3 talloc_destroy_pool functionality. It
+ should probably not be used in new code. It's in here to keep the talloc
+ code consistent across Samba 3 and 4.
+*/
+void talloc_free_children(void *ptr)
+{
+ struct talloc_chunk *tc;
+
+ if (ptr == NULL) {
+ return;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ while (tc->child) {
+ /* we need to work out who will own an abandoned child
+ if it cannot be freed. In priority order, the first
+ choice is owner of any remaining reference to this
+ pointer, the second choice is our parent, and the
+ final choice is the null context. */
+ void *child = tc->child+1;
+ const void *new_parent = null_context;
+ if (tc->child->refs) {
+ struct talloc_chunk *p = talloc_parent_chunk(tc->child->refs);
+ if (p) new_parent = p+1;
+ }
+ if (talloc_free(child) == -1) {
+ if (new_parent == null_context) {
+ struct talloc_chunk *p = talloc_parent_chunk(ptr);
+ if (p) new_parent = p+1;
+ }
+ talloc_steal(new_parent, child);
+ }
+ }
+}
+
+/*
+ free a talloc pointer. This also frees all child pointers of this
+ pointer recursively
+
+ return 0 if the memory is actually freed, otherwise -1. The memory
+ will not be freed if the ref_count is > 1 or the destructor (if
+ any) returns non-zero
+*/
+int talloc_free(void *ptr)
+{
+ struct talloc_chunk *tc;
+
+ if (ptr == NULL) {
+ return -1;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ if (tc->refs) {
+ talloc_reference_destructor(tc->refs);
+ return -1;
+ }
+
+ if (tc->destructor) {
+ talloc_destructor_t d = tc->destructor;
+ if (d == (talloc_destructor_t)-1) {
+ return -1;
+ }
+ tc->destructor = (talloc_destructor_t)-1;
+ if (d(ptr) == -1) {
+ tc->destructor = d;
+ return -1;
+ }
+ tc->destructor = NULL;
+ }
+
+ talloc_free_children(ptr);
+
+ if (tc->parent) {
+ _TLIST_REMOVE(tc->parent->child, tc);
+ if (tc->parent->child) {
+ tc->parent->child->parent = tc->parent;
+ }
+ } else {
+ if (tc->prev) tc->prev->next = tc->next;
+ if (tc->next) tc->next->prev = tc->prev;
+ }
+
+ tc->magic = TALLOC_MAGIC_FREE;
+
+ free(tc);
+ return 0;
+}
+
+
+
+/*
+ A talloc version of realloc. The context argument is only used if
+ ptr is NULL
+*/
+void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *name)
+{
+ struct talloc_chunk *tc;
+ void *new_ptr;
+
+ /* size zero is equivalent to free() */
+ if (size == 0) {
+ talloc_free(ptr);
+ return NULL;
+ }
+
+ if (size >= MAX_TALLOC_SIZE) {
+ return NULL;
+ }
+
+ /* realloc(NULL) is equavalent to malloc() */
+ if (ptr == NULL) {
+ return talloc_named_const(context, size, name);
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ /* don't allow realloc on referenced pointers */
+ if (tc->refs) {
+ return NULL;
+ }
+
+ /* by resetting magic we catch users of the old memory */
+ tc->magic = TALLOC_MAGIC_FREE;
+
+#if ALWAYS_REALLOC
+ new_ptr = malloc(size + sizeof(*tc));
+ if (!new_ptr) {
+ tc->magic = TALLOC_MAGIC;
+ if (malloc_fail_handler)
+ if (malloc_fail_handler(malloc_fail_data))
+ new_ptr = malloc(size + sizeof(*tc));
+ }
+ if (new_ptr) {
+ memcpy(new_ptr, tc, tc->size + sizeof(*tc));
+ free(tc);
+ }
+#else
+ new_ptr = realloc(tc, size + sizeof(*tc));
+ if (!new_ptr) {
+ tc->magic = TALLOC_MAGIC;
+ if (malloc_fail_handler)
+ if (malloc_fail_handler(malloc_fail_data))
+ new_ptr = realloc(tc, size + sizeof(*tc));
+ }
+#endif
+ if (!new_ptr) {
+ tc->magic = TALLOC_MAGIC;
+ return NULL;
+ }
+
+ tc = new_ptr;
+ tc->magic = TALLOC_MAGIC;
+ if (tc->parent) {
+ tc->parent->child = new_ptr;
+ }
+ if (tc->child) {
+ tc->child->parent = new_ptr;
+ }
+
+ if (tc->prev) {
+ tc->prev->next = tc;
+ }
+ if (tc->next) {
+ tc->next->prev = tc;
+ }
+
+ tc->size = size;
+ talloc_set_name_const(tc+1, name);
+
+ return (void *)(tc+1);
+}
+
+/*
+ move a lump of memory from one talloc context to another return the
+ ptr on success, or NULL if it could not be transferred.
+ passing NULL as ptr will always return NULL with no side effects.
+*/
+void *talloc_steal(const void *new_ctx, const void *ptr)
+{
+ struct talloc_chunk *tc, *new_tc;
+
+ if (!ptr) {
+ return NULL;
+ }
+
+ if (new_ctx == NULL) {
+ new_ctx = null_context;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ if (new_ctx == NULL) {
+ if (tc->parent) {
+ _TLIST_REMOVE(tc->parent->child, tc);
+ if (tc->parent->child) {
+ tc->parent->child->parent = tc->parent;
+ }
+ } else {
+ if (tc->prev) tc->prev->next = tc->next;
+ if (tc->next) tc->next->prev = tc->prev;
+ }
+
+ tc->parent = tc->next = tc->prev = NULL;
+ return discard_const_p(void, ptr);
+ }
+
+ new_tc = talloc_chunk_from_ptr(new_ctx);
+
+ if (tc == new_tc) {
+ return discard_const_p(void, ptr);
+ }
+
+ if (tc->parent) {
+ _TLIST_REMOVE(tc->parent->child, tc);
+ if (tc->parent->child) {
+ tc->parent->child->parent = tc->parent;
+ }
+ } else {
+ if (tc->prev) tc->prev->next = tc->next;
+ if (tc->next) tc->next->prev = tc->prev;
+ }
+
+ tc->parent = new_tc;
+ if (new_tc->child) new_tc->child->parent = NULL;
+ _TLIST_ADD(new_tc->child, tc);
+
+ return discard_const_p(void, ptr);
+}
+
+/*
+ return the total size of a talloc pool (subtree)
+*/
+off_t talloc_total_size(const void *ptr)
+{
+ off_t total = 0;
+ struct talloc_chunk *c, *tc;
+
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) {
+ return 0;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ total = tc->size;
+ for (c=tc->child;c;c=c->next) {
+ total += talloc_total_size(c+1);
+ }
+ return total;
+}
+
+/*
+ return the total number of blocks in a talloc pool (subtree)
+*/
+off_t talloc_total_blocks(const void *ptr)
+{
+ off_t total = 0;
+ struct talloc_chunk *c, *tc;
+
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) {
+ return 0;
+ }
+ tc = talloc_chunk_from_ptr(ptr);
+
+ total++;
+ for (c=tc->child;c;c=c->next) {
+ total += talloc_total_blocks(c+1);
+ }
+ return total;
+}
+
+/*
+ return the number of external references to a pointer
+*/
+static int talloc_reference_count(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ struct talloc_reference_handle *h;
+ int ret = 0;
+
+ for (h=tc->refs;h;h=h->next) {
+ ret++;
+ }
+ return ret;
+}
+
+/*
+ report on memory usage by all children of a pointer, giving a full tree view
+*/
+void talloc_report_depth(const void *ptr, FILE *f, int depth)
+{
+ struct talloc_chunk *c, *tc = talloc_chunk_from_ptr(ptr);
+
+ for (c=tc->child;c;c=c->next) {
+ if (c->name == TALLOC_MAGIC_REFERENCE) {
+ struct talloc_reference_handle *handle = (void *)(c+1);
+ const char *name2 = talloc_get_name(handle->ptr);
+ fprintf(f, "%*sreference to: %s\n", depth*4, "", name2);
+ } else {
+ const char *name = talloc_get_name(c+1);
+ fprintf(f, "%*s%-30s contains %6lu bytes in %3lu blocks (ref %d)\n",
+ depth*4, "",
+ name,
+ (unsigned long)talloc_total_size(c+1),
+ (unsigned long)talloc_total_blocks(c+1),
+ talloc_reference_count(c+1));
+ talloc_report_depth(c+1, f, depth+1);
+ }
+ }
+
+}
+
+/*
+ report on memory usage by all children of a pointer, giving a full tree view
+*/
+void talloc_report_full(const void *ptr, FILE *f)
+{
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) return;
+
+ fprintf(f,"full talloc report on '%s' (total %lu bytes in %lu blocks)\n",
+ talloc_get_name(ptr),
+ (unsigned long)talloc_total_size(ptr),
+ (unsigned long)talloc_total_blocks(ptr));
+
+ talloc_report_depth(ptr, f, 1);
+ fflush(f);
+}
+
+/*
+ report on memory usage by all children of a pointer
+*/
+void talloc_report(const void *ptr, FILE *f)
+{
+ struct talloc_chunk *c, *tc;
+
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) return;
+
+ fprintf(f,"talloc report on '%s' (total %lu bytes in %lu blocks)\n",
+ talloc_get_name(ptr),
+ (unsigned long)talloc_total_size(ptr),
+ (unsigned long)talloc_total_blocks(ptr));
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ for (c=tc->child;c;c=c->next) {
+ fprintf(f, "\t%-30s contains %6lu bytes in %3lu blocks\n",
+ talloc_get_name(c+1),
+ (unsigned long)talloc_total_size(c+1),
+ (unsigned long)talloc_total_blocks(c+1));
+ }
+ fflush(f);
+}
+
+/*
+ report on any memory hanging off the null context
+*/
+static void talloc_report_null(void)
+{
+ if (talloc_total_size(null_context) != 0) {
+ talloc_report(null_context, stderr);
+ }
+}
+
+/*
+ report on any memory hanging off the null context
+*/
+static void talloc_report_null_full(void)
+{
+ if (talloc_total_size(null_context) != 0) {
+ talloc_report_full(null_context, stderr);
+ }
+}
+
+/*
+ enable tracking of the NULL context
+*/
+void talloc_enable_null_tracking(void)
+{
+ if (null_context == NULL) {
+ null_context = talloc_named_const(NULL, 0, "null_context");
+ }
+}
+
+/*
+ enable leak reporting on exit
+*/
+void talloc_enable_leak_report(void)
+{
+ talloc_enable_null_tracking();
+ atexit(talloc_report_null);
+}
+
+/*
+ enable full leak reporting on exit
+*/
+void talloc_enable_leak_report_full(void)
+{
+ talloc_enable_null_tracking();
+ atexit(talloc_report_null_full);
+}
+
+/*
+ talloc and zero memory.
+*/
+void *_talloc_zero(const void *ctx, size_t size, const char *name)
+{
+ void *p = talloc_named_const(ctx, size, name);
+
+ if (p) {
+ memset(p, '\0', size);
+ }
+
+ return p;
+}
+
+
+/*
+ memdup with a talloc.
+*/
+void *_talloc_memdup(const void *t, const void *p, size_t size, const char *name)
+{
+ void *newp = talloc_named_const(t, size, name);
+
+ if (newp) {
+ memcpy(newp, p, size);
+ }
+
+ return newp;
+}
+
+/*
+ strdup with a talloc
+*/
+char *talloc_strdup(const void *t, const char *p)
+{
+ char *ret;
+ if (!p) {
+ return NULL;
+ }
+ ret = talloc_memdup(t, p, strlen(p) + 1);
+ if (ret) {
+ talloc_set_name_const(ret, ret);
+ }
+ return ret;
+}
+
+/*
+ strndup with a talloc
+*/
+char *talloc_strndup(const void *t, const char *p, size_t n)
+{
+ size_t len;
+ char *ret;
+
+ for (len=0; p[len] && len<n; len++) ;
+
+ ret = _talloc(t, len + 1);
+ if (!ret) { return NULL; }
+ memcpy(ret, p, len);
+ ret[len] = 0;
+ talloc_set_name_const(ret, ret);
+ return ret;
+}
+
+#ifndef VA_COPY
+#ifdef HAVE_VA_COPY
+#define VA_COPY(dest, src) va_copy(dest, src)
+#elif defined(HAVE___VA_COPY)
+#define VA_COPY(dest, src) __va_copy(dest, src)
+#else
+#define VA_COPY(dest, src) (dest) = (src)
+#endif
+#endif
+
+char *talloc_vasprintf(const void *t, const char *fmt, va_list ap)
+{
+ int len;
+ char *ret;
+ va_list ap2;
+
+ VA_COPY(ap2, ap);
+
+ len = vsnprintf(NULL, 0, fmt, ap2);
+
+ ret = _talloc(t, len+1);
+ if (ret) {
+ VA_COPY(ap2, ap);
+ vsnprintf(ret, len+1, fmt, ap2);
+ talloc_set_name_const(ret, ret);
+ }
+
+ return ret;
+}
+
+
+/*
+ Perform string formatting, and return a pointer to newly allocated
+ memory holding the result, inside a memory pool.
+ */
+char *talloc_asprintf(const void *t, const char *fmt, ...)
+{
+ va_list ap;
+ char *ret;
+
+ va_start(ap, fmt);
+ ret = talloc_vasprintf(t, fmt, ap);
+ va_end(ap);
+ return ret;
+}
+
+
+/**
+ * Realloc @p s to append the formatted result of @p fmt and @p ap,
+ * and return @p s, which may have moved. Good for gradually
+ * accumulating output into a string buffer.
+ **/
+
+static char *talloc_vasprintf_append(char *s, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0);
+
+static char *talloc_vasprintf_append(char *s, const char *fmt, va_list ap)
+{
+ struct talloc_chunk *tc;
+ int len, s_len;
+ va_list ap2;
+
+ if (s == NULL) {
+ return talloc_vasprintf(NULL, fmt, ap);
+ }
+
+ tc = talloc_chunk_from_ptr(s);
+
+ VA_COPY(ap2, ap);
+
+ s_len = tc->size - 1;
+ len = vsnprintf(NULL, 0, fmt, ap2);
+
+ s = talloc_realloc(NULL, s, char, s_len + len+1);
+ if (!s) return NULL;
+
+ VA_COPY(ap2, ap);
+
+ vsnprintf(s+s_len, len+1, fmt, ap2);
+ talloc_set_name_const(s, s);
+
+ return s;
+}
+
+/*
+ Realloc @p s to append the formatted result of @p fmt and return @p
+ s, which may have moved. Good for gradually accumulating output
+ into a string buffer.
+ */
+char *talloc_asprintf_append(char *s, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ s = talloc_vasprintf_append(s, fmt, ap);
+ va_end(ap);
+ return s;
+}
+
+/*
+ alloc an array, checking for integer overflow in the array size
+*/
+void *_talloc_array(const void *ctx, size_t el_size, unsigned count, const char *name)
+{
+ if (count >= MAX_TALLOC_SIZE/el_size) {
+ return NULL;
+ }
+ return talloc_named_const(ctx, el_size * count, name);
+}
+
+/*
+ alloc an zero array, checking for integer overflow in the array size
+*/
+void *_talloc_zero_array(const void *ctx, size_t el_size, unsigned count, const char *name)
+{
+ if (count >= MAX_TALLOC_SIZE/el_size) {
+ return NULL;
+ }
+ return _talloc_zero(ctx, el_size * count, name);
+}
+
+
+/*
+ realloc an array, checking for integer overflow in the array size
+*/
+void *_talloc_realloc_array(const void *ctx, void *ptr, size_t el_size, unsigned count, const char *name)
+{
+ if (count >= MAX_TALLOC_SIZE/el_size) {
+ return NULL;
+ }
+ return _talloc_realloc(ctx, ptr, el_size * count, name);
+}
+
+/*
+ a function version of talloc_realloc(), so it can be passed as a function pointer
+ to libraries that want a realloc function (a realloc function encapsulates
+ all the basic capabilities of an allocation library, which is why this is useful)
+*/
+void *talloc_realloc_fn(const void *context, void *ptr, size_t size)
+{
+ return _talloc_realloc(context, ptr, size, NULL);
+}
+
+
+static void talloc_autofree(void)
+{
+ talloc_free(cleanup_context);
+ cleanup_context = NULL;
+}
+
+/*
+ return a context which will be auto-freed on exit
+ this is useful for reducing the noise in leak reports
+*/
+void *talloc_autofree_context(void)
+{
+ if (cleanup_context == NULL) {
+ cleanup_context = talloc_named_const(NULL, 0, "autofree_context");
+ atexit(talloc_autofree);
+ }
+ return cleanup_context;
+}
+
+size_t talloc_get_size(const void *context)
+{
+ struct talloc_chunk *tc;
+
+ if (context == NULL)
+ return 0;
+
+ tc = talloc_chunk_from_ptr(context);
+
+ return tc->size;
+}
+
+talloc_fail_handler *talloc_set_fail_handler(talloc_fail_handler *handler,
+ void *data)
+{
+ talloc_fail_handler *old = malloc_fail_handler;
+ malloc_fail_handler = handler;
+ malloc_fail_data = data;
+ return old;
+}
diff --git a/tools/xenstore/talloc.h b/tools/xenstore/talloc.h
new file mode 100644
index 0000000000..39bcb53fb7
--- /dev/null
+++ b/tools/xenstore/talloc.h
@@ -0,0 +1,134 @@
+#ifndef _TALLOC_H_
+#define _TALLOC_H_
+/*
+ Unix SMB/CIFS implementation.
+ Samba temporary memory allocation functions
+
+ Copyright (C) Andrew Tridgell 2004-2005
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+/* this is only needed for compatibility with the old talloc */
+typedef void TALLOC_CTX;
+
+/*
+ this uses a little trick to allow __LINE__ to be stringified
+*/
+#define _STRING_LINE_(s) #s
+#define _STRING_LINE2_(s) _STRING_LINE_(s)
+#define __LINESTR__ _STRING_LINE2_(__LINE__)
+#define __location__ __FILE__ ":" __LINESTR__
+
+#ifndef TALLOC_DEPRECATED
+#define TALLOC_DEPRECATED 0
+#endif
+
+/* useful macros for creating type checked pointers */
+#define talloc(ctx, type) (type *)talloc_named_const(ctx, sizeof(type), #type)
+#define talloc_size(ctx, size) talloc_named_const(ctx, size, __location__)
+
+#define talloc_new(ctx) talloc_named_const(ctx, 0, "talloc_new: " __location__)
+
+#define talloc_zero(ctx, type) (type *)_talloc_zero(ctx, sizeof(type), #type)
+#define talloc_zero_size(ctx, size) _talloc_zero(ctx, size, __location__)
+
+#define talloc_zero_array(ctx, type, count) (type *)_talloc_zero_array(ctx, sizeof(type), count, #type)
+#define talloc_array(ctx, type, count) (type *)_talloc_array(ctx, sizeof(type), count, #type)
+#define talloc_array_size(ctx, size, count) _talloc_array(ctx, size, count, __location__)
+
+#define talloc_realloc(ctx, p, type, count) (type *)_talloc_realloc_array(ctx, p, sizeof(type), count, #type)
+#define talloc_realloc_size(ctx, ptr, size) _talloc_realloc(ctx, ptr, size, __location__)
+
+#define talloc_memdup(t, p, size) _talloc_memdup(t, p, size, __location__)
+
+#define malloc_p(type) (type *)malloc(sizeof(type))
+#define malloc_array_p(type, count) (type *)realloc_array(NULL, sizeof(type), count)
+#define realloc_p(p, type, count) (type *)realloc_array(p, sizeof(type), count)
+
+#define data_blob(ptr, size) data_blob_named(ptr, size, "DATA_BLOB: "__location__)
+#define data_blob_talloc(ctx, ptr, size) data_blob_talloc_named(ctx, ptr, size, "DATA_BLOB: "__location__)
+#define data_blob_dup_talloc(ctx, blob) data_blob_talloc_named(ctx, (blob)->data, (blob)->length, "DATA_BLOB: "__location__)
+
+#define talloc_set_type(ptr, type) talloc_set_name_const(ptr, #type)
+#define talloc_get_type(ptr, type) (type *)talloc_check_name(ptr, #type)
+
+
+#if TALLOC_DEPRECATED
+#define talloc_zero_p(ctx, type) talloc_zero(ctx, type)
+#define talloc_p(ctx, type) talloc(ctx, type)
+#define talloc_array_p(ctx, type, count) talloc_array(ctx, type, count)
+#define talloc_realloc_p(ctx, p, type, count) talloc_realloc(ctx, p, type, count)
+#define talloc_destroy(ctx) talloc_free(ctx)
+#endif
+
+#ifndef PRINTF_ATTRIBUTE
+#if (__GNUC__ >= 3)
+/** Use gcc attribute to check printf fns. a1 is the 1-based index of
+ * the parameter containing the format, and a2 the index of the first
+ * argument. Note that some gcc 2.x versions don't handle this
+ * properly **/
+#define PRINTF_ATTRIBUTE(a1, a2) __attribute__ ((format (__printf__, a1, a2)))
+#else
+#define PRINTF_ATTRIBUTE(a1, a2)
+#endif
+#endif
+
+
+/* The following definitions come from talloc.c */
+void *_talloc(const void *context, size_t size);
+void talloc_set_destructor(const void *ptr, int (*destructor)(void *));
+void talloc_increase_ref_count(const void *ptr);
+void *talloc_reference(const void *context, const void *ptr);
+int talloc_unlink(const void *context, void *ptr);
+void talloc_set_name(const void *ptr, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+void talloc_set_name_const(const void *ptr, const char *name);
+void *talloc_named(const void *context, size_t size,
+ const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
+void *talloc_named_const(const void *context, size_t size, const char *name);
+const char *talloc_get_name(const void *ptr);
+void *talloc_check_name(const void *ptr, const char *name);
+void talloc_report_depth(const void *ptr, FILE *f, int depth);
+void *talloc_parent(const void *ptr);
+void *talloc_init(const char *fmt, ...) PRINTF_ATTRIBUTE(1,2);
+int talloc_free(void *ptr);
+void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *name);
+void *talloc_steal(const void *new_ctx, const void *ptr);
+off_t talloc_total_size(const void *ptr);
+off_t talloc_total_blocks(const void *ptr);
+void talloc_report_full(const void *ptr, FILE *f);
+void talloc_report(const void *ptr, FILE *f);
+void talloc_enable_null_tracking(void);
+void talloc_enable_leak_report(void);
+void talloc_enable_leak_report_full(void);
+void *_talloc_zero(const void *ctx, size_t size, const char *name);
+void *_talloc_memdup(const void *t, const void *p, size_t size, const char *name);
+char *talloc_strdup(const void *t, const char *p);
+char *talloc_strndup(const void *t, const char *p, size_t n);
+char *talloc_vasprintf(const void *t, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0);
+char *talloc_asprintf(const void *t, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+char *talloc_asprintf_append(char *s,
+ const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+void *_talloc_array(const void *ctx, size_t el_size, unsigned count, const char *name);
+void *_talloc_zero_array(const void *ctx, size_t el_size, unsigned count, const char *name);
+void *_talloc_realloc_array(const void *ctx, void *ptr, size_t el_size, unsigned count, const char *name);
+void *talloc_realloc_fn(const void *context, void *ptr, size_t size);
+void *talloc_autofree_context(void);
+size_t talloc_get_size(const void *ctx);
+
+typedef int talloc_fail_handler(void *);
+talloc_fail_handler *talloc_set_fail_handler(talloc_fail_handler *, void *);
+#endif
+
diff --git a/tools/xenstore/talloc_guide.txt b/tools/xenstore/talloc_guide.txt
new file mode 100644
index 0000000000..c23ac77cad
--- /dev/null
+++ b/tools/xenstore/talloc_guide.txt
@@ -0,0 +1,569 @@
+Using talloc in Samba4
+----------------------
+
+Andrew Tridgell
+September 2004
+
+The most current version of this document is available at
+ http://samba.org/ftp/unpacked/samba4/source/lib/talloc/talloc_guide.txt
+
+If you are used to talloc from Samba3 then please read this carefully,
+as talloc has changed a lot.
+
+The new talloc is a hierarchical, reference counted memory pool system
+with destructors. Quite a mounthful really, but not too bad once you
+get used to it.
+
+Perhaps the biggest change from Samba3 is that there is no distinction
+between a "talloc context" and a "talloc pointer". Any pointer
+returned from talloc() is itself a valid talloc context. This means
+you can do this:
+
+ struct foo *X = talloc(mem_ctx, struct foo);
+ X->name = talloc_strdup(X, "foo");
+
+and the pointer X->name would be a "child" of the talloc context "X"
+which is itself a child of mem_ctx. So if you do talloc_free(mem_ctx)
+then it is all destroyed, whereas if you do talloc_free(X) then just X
+and X->name are destroyed, and if you do talloc_free(X->name) then
+just the name element of X is destroyed.
+
+If you think about this, then what this effectively gives you is an
+n-ary tree, where you can free any part of the tree with
+talloc_free().
+
+If you find this confusing, then I suggest you run the testsuite to
+watch talloc in action. You may also like to add your own tests to
+testsuite.c to clarify how some particular situation is handled.
+
+
+Performance
+-----------
+
+All the additional features of talloc() over malloc() do come at a
+price. We have a simple performance test in Samba4 that measures
+talloc() versus malloc() performance, and it seems that talloc() is
+about 10% slower than malloc() on my x86 Debian Linux box. For Samba,
+the great reduction in code complexity that we get by using talloc
+makes this worthwhile, especially as the total overhead of
+talloc/malloc in Samba is already quite small.
+
+
+talloc API
+----------
+
+The following is a complete guide to the talloc API. Read it all at
+least twice.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc(const void *context, type);
+
+The talloc() macro is the core of the talloc library. It takes a
+memory context and a type, and returns a pointer to a new area of
+memory of the given type.
+
+The returned pointer is itself a talloc context, so you can use it as
+the context argument to more calls to talloc if you wish.
+
+The returned pointer is a "child" of the supplied context. This means
+that if you talloc_free() the context then the new child disappears as
+well. Alternatively you can free just the child.
+
+The context argument to talloc() can be NULL, in which case a new top
+level context is created.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_size(const void *context, size_t size);
+
+The function talloc_size() should be used when you don't have a
+convenient type to pass to talloc(). Unlike talloc(), it is not type
+safe (as it returns a void *), so you are on your own for type checking.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+int talloc_free(void *ptr);
+
+The talloc_free() function frees a piece of talloc memory, and all its
+children. You can call talloc_free() on any pointer returned by
+talloc().
+
+The return value of talloc_free() indicates success or failure, with 0
+returned for success and -1 for failure. The only possible failure
+condition is if the pointer had a destructor attached to it and the
+destructor returned -1. See talloc_set_destructor() for details on
+destructors.
+
+If this pointer has an additional parent when talloc_free() is called
+then the memory is not actually released, but instead the most
+recently established parent is destroyed. See talloc_reference() for
+details on establishing additional parents.
+
+For more control on which parent is removed, see talloc_unlink()
+
+talloc_free() operates recursively on its children.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+int talloc_free_children(void *ptr);
+
+The talloc_free_children() walks along the list of all children of a
+talloc context and talloc_free()s only the children, not the context
+itself.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_reference(const void *context, const void *ptr);
+
+The talloc_reference() function makes "context" an additional parent
+of "ptr".
+
+The return value of talloc_reference() is always the original pointer
+"ptr", unless talloc ran out of memory in creating the reference in
+which case it will return NULL (each additional reference consumes
+around 48 bytes of memory on intel x86 platforms).
+
+If "ptr" is NULL, then the function is a no-op, and simply returns NULL.
+
+After creating a reference you can free it in one of the following
+ways:
+
+ - you can talloc_free() any parent of the original pointer. That
+ will reduce the number of parents of this pointer by 1, and will
+ cause this pointer to be freed if it runs out of parents.
+
+ - you can talloc_free() the pointer itself. That will destroy the
+ most recently established parent to the pointer and leave the
+ pointer as a child of its current parent.
+
+For more control on which parent to remove, see talloc_unlink()
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+int talloc_unlink(const void *context, const void *ptr);
+
+The talloc_unlink() function removes a specific parent from ptr. The
+context passed must either be a context used in talloc_reference()
+with this pointer, or must be a direct parent of ptr.
+
+Note that if the parent has already been removed using talloc_free()
+then this function will fail and will return -1. Likewise, if "ptr"
+is NULL, then the function will make no modifications and return -1.
+
+Usually you can just use talloc_free() instead of talloc_unlink(), but
+sometimes it is useful to have the additional control on which parent
+is removed.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_set_destructor(const void *ptr, int (*destructor)(void *));
+
+The function talloc_set_destructor() sets the "destructor" for the
+pointer "ptr". A destructor is a function that is called when the
+memory used by a pointer is about to be released. The destructor
+receives the pointer as an argument, and should return 0 for success
+and -1 for failure.
+
+The destructor can do anything it wants to, including freeing other
+pieces of memory. A common use for destructors is to clean up
+operating system resources (such as open file descriptors) contained
+in the structure the destructor is placed on.
+
+You can only place one destructor on a pointer. If you need more than
+one destructor then you can create a zero-length child of the pointer
+and place an additional destructor on that.
+
+To remove a destructor call talloc_set_destructor() with NULL for the
+destructor.
+
+If your destructor attempts to talloc_free() the pointer that it is
+the destructor for then talloc_free() will return -1 and the free will
+be ignored. This would be a pointless operation anyway, as the
+destructor is only called when the memory is just about to go away.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_increase_ref_count(const void *ptr);
+
+The talloc_increase_ref_count(ptr) function is exactly equivalent to:
+
+ talloc_reference(NULL, ptr);
+
+You can use either syntax, depending on which you think is clearer in
+your code.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_set_name(const void *ptr, const char *fmt, ...);
+
+Each talloc pointer has a "name". The name is used principally for
+debugging purposes, although it is also possible to set and get the
+name on a pointer in as a way of "marking" pointers in your code.
+
+The main use for names on pointer is for "talloc reports". See
+talloc_report() and talloc_report_full() for details. Also see
+talloc_enable_leak_report() and talloc_enable_leak_report_full().
+
+The talloc_set_name() function allocates memory as a child of the
+pointer. It is logically equivalent to:
+ talloc_set_name_const(ptr, talloc_asprintf(ptr, fmt, ...));
+
+Note that multiple calls to talloc_set_name() will allocate more
+memory without releasing the name. All of the memory is released when
+the ptr is freed using talloc_free().
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_set_name_const(const void *ptr, const char *name);
+
+The function talloc_set_name_const() is just like talloc_set_name(),
+but it takes a string constant, and is much faster. It is extensively
+used by the "auto naming" macros, such as talloc_p().
+
+This function does not allocate any memory. It just copies the
+supplied pointer into the internal representation of the talloc
+ptr. This means you must not pass a name pointer to memory that will
+disappear before the ptr is freed with talloc_free().
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_named(const void *context, size_t size, const char *fmt, ...);
+
+The talloc_named() function creates a named talloc pointer. It is
+equivalent to:
+
+ ptr = talloc_size(context, size);
+ talloc_set_name(ptr, fmt, ....);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_named_const(const void *context, size_t size, const char *name);
+
+This is equivalent to:
+
+ ptr = talloc_size(context, size);
+ talloc_set_name_const(ptr, name);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+const char *talloc_get_name(const void *ptr);
+
+This returns the current name for the given talloc pointer. See
+talloc_set_name() for details.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_init(const char *fmt, ...);
+
+This function creates a zero length named talloc context as a top
+level context. It is equivalent to:
+
+ talloc_named(NULL, 0, fmt, ...);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_new(void *ctx);
+
+This is a utility macro that creates a new memory context hanging
+off an exiting context, automatically naming it "talloc_new: __location__"
+where __location__ is the source line it is called from. It is
+particularly useful for creating a new temporary working context.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_realloc(const void *context, void *ptr, type, count);
+
+The talloc_realloc() macro changes the size of a talloc
+pointer. The "count" argument is the number of elements of type "type"
+that you want the resulting pointer to hold.
+
+talloc_realloc() has the following equivalences:
+
+ talloc_realloc(context, NULL, type, 1) ==> talloc(context, type);
+ talloc_realloc(context, NULL, type, N) ==> talloc_array(context, type, N);
+ talloc_realloc(context, ptr, type, 0) ==> talloc_free(ptr);
+
+The "context" argument is only used if "ptr" is not NULL, otherwise it
+is ignored.
+
+talloc_realloc() returns the new pointer, or NULL on failure. The call
+will fail either due to a lack of memory, or because the pointer has
+more than one parent (see talloc_reference()).
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_realloc_size(const void *context, void *ptr, size_t size);
+
+the talloc_realloc_size() function is useful when the type is not
+known so the typesafe talloc_realloc() cannot be used.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_steal(const void *new_ctx, const void *ptr);
+
+The talloc_steal() function changes the parent context of a talloc
+pointer. It is typically used when the context that the pointer is
+currently a child of is going to be freed and you wish to keep the
+memory for a longer time.
+
+The talloc_steal() function returns the pointer that you pass it. It
+does not have any failure modes.
+
+NOTE: It is possible to produce loops in the parent/child relationship
+if you are not careful with talloc_steal(). No guarantees are provided
+as to your sanity or the safety of your data if you do this.
+
+talloc_steal (new_ctx, NULL) will return NULL with no sideeffects.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+off_t talloc_total_size(const void *ptr);
+
+The talloc_total_size() function returns the total size in bytes used
+by this pointer and all child pointers. Mostly useful for debugging.
+
+Passing NULL is allowed, but it will only give a meaningful result if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+off_t talloc_total_blocks(const void *ptr);
+
+The talloc_total_blocks() function returns the total memory block
+count used by this pointer and all child pointers. Mostly useful for
+debugging.
+
+Passing NULL is allowed, but it will only give a meaningful result if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_report(const void *ptr, FILE *f);
+
+The talloc_report() function prints a summary report of all memory
+used by ptr. One line of report is printed for each immediate child of
+ptr, showing the total memory and number of blocks used by that child.
+
+You can pass NULL for the pointer, in which case a report is printed
+for the top level memory context, but only if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_report_full(const void *ptr, FILE *f);
+
+This provides a more detailed report than talloc_report(). It will
+recursively print the ensire tree of memory referenced by the
+pointer. References in the tree are shown by giving the name of the
+pointer that is referenced.
+
+You can pass NULL for the pointer, in which case a report is printed
+for the top level memory context, but only if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_enable_leak_report(void);
+
+This enables calling of talloc_report(NULL, stderr) when the program
+exits. In Samba4 this is enabled by using the --leak-report command
+line option.
+
+For it to be useful, this function must be called before any other
+talloc function as it establishes a "null context" that acts as the
+top of the tree. If you don't call this function first then passing
+NULL to talloc_report() or talloc_report_full() won't give you the
+full tree printout.
+
+Here is a typical talloc report:
+
+talloc report on 'null_context' (total 267 bytes in 15 blocks)
+ libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks
+ libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks
+ iconv(UTF8,CP850) contains 42 bytes in 2 blocks
+ libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks
+ iconv(CP850,UTF8) contains 42 bytes in 2 blocks
+ iconv(UTF8,UTF-16LE) contains 45 bytes in 2 blocks
+ iconv(UTF-16LE,UTF8) contains 45 bytes in 2 blocks
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_enable_leak_report_full(void);
+
+This enables calling of talloc_report_full(NULL, stderr) when the
+program exits. In Samba4 this is enabled by using the
+--leak-report-full command line option.
+
+For it to be useful, this function must be called before any other
+talloc function as it establishes a "null context" that acts as the
+top of the tree. If you don't call this function first then passing
+NULL to talloc_report() or talloc_report_full() won't give you the
+full tree printout.
+
+Here is a typical full report:
+
+full talloc report on 'root' (total 18 bytes in 8 blocks)
+ p1 contains 18 bytes in 7 blocks (ref 0)
+ r1 contains 13 bytes in 2 blocks (ref 0)
+ reference to: p2
+ p2 contains 1 bytes in 1 blocks (ref 1)
+ x3 contains 1 bytes in 1 blocks (ref 0)
+ x2 contains 1 bytes in 1 blocks (ref 0)
+ x1 contains 1 bytes in 1 blocks (ref 0)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_enable_null_tracking(void);
+
+This enables tracking of the NULL memory context without enabling leak
+reporting on exit. Useful for when you want to do your own leak
+reporting call via talloc_report_null_full();
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_zero(const void *ctx, type);
+
+The talloc_zero() macro is equivalent to:
+
+ ptr = talloc(ctx, type);
+ if (ptr) memset(ptr, 0, sizeof(type));
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_zero_size(const void *ctx, size_t size)
+
+The talloc_zero_size() function is useful when you don't have a known type
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_memdup(const void *ctx, const void *p, size_t size);
+
+The talloc_memdup() function is equivalent to:
+
+ ptr = talloc_size(ctx, size);
+ if (ptr) memcpy(ptr, p, size);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_strdup(const void *ctx, const char *p);
+
+The talloc_strdup() function is equivalent to:
+
+ ptr = talloc_size(ctx, strlen(p)+1);
+ if (ptr) memcpy(ptr, p, strlen(p)+1);
+
+This functions sets the name of the new pointer to the passed
+string. This is equivalent to:
+ talloc_set_name_const(ptr, ptr)
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_strndup(const void *t, const char *p, size_t n);
+
+The talloc_strndup() function is the talloc equivalent of the C
+library function strndup()
+
+This functions sets the name of the new pointer to the passed
+string. This is equivalent to:
+ talloc_set_name_const(ptr, ptr)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_vasprintf(const void *t, const char *fmt, va_list ap);
+
+The talloc_vasprintf() function is the talloc equivalent of the C
+library function vasprintf()
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_asprintf(const void *t, const char *fmt, ...);
+
+The talloc_asprintf() function is the talloc equivalent of the C
+library function asprintf()
+
+This functions sets the name of the new pointer to the passed
+string. This is equivalent to:
+ talloc_set_name_const(ptr, ptr)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_asprintf_append(char *s, const char *fmt, ...);
+
+The talloc_asprintf_append() function appends the given formatted
+string to the given string.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_array(const void *ctx, type, uint_t count);
+
+The talloc_array() macro is equivalent to:
+
+ (type *)talloc_size(ctx, sizeof(type) * count);
+
+except that it provides integer overflow protection for the multiply,
+returning NULL if the multiply overflows.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_array_size(const void *ctx, size_t size, uint_t count);
+
+The talloc_array_size() function is useful when the type is not
+known. It operates in the same way as talloc_array(), but takes a size
+instead of a type.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_realloc_fn(const void *ctx, void *ptr, size_t size);
+
+This is a non-macro version of talloc_realloc(), which is useful
+as libraries sometimes want a ralloc function pointer. A realloc()
+implementation encapsulates the functionality of malloc(), free() and
+realloc() in one call, which is why it is useful to be able to pass
+around a single function pointer.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_autofree_context(void);
+
+This is a handy utility function that returns a talloc context
+which will be automatically freed on program exit. This can be used
+to reduce the noise in memory leak reports.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_check_name(const void *ptr, const char *name);
+
+This function checks if a pointer has the specified name. If it does
+then the pointer is returned. It it doesn't then NULL is returned.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_get_type(const void *ptr, type);
+
+This macro allows you to do type checking on talloc pointers. It is
+particularly useful for void* private pointers. It is equivalent to
+this:
+
+ (type *)talloc_check_name(ptr, #type)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+talloc_set_type(const void *ptr, type);
+
+This macro allows you to force the name of a pointer to be a
+particular type. This can be used in conjunction with
+talloc_get_type() to do type checking on void* pointers.
+
+It is equivalent to this:
+ talloc_set_name_const(ptr, #type)
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+talloc_get_size(const void *ctx);
+
+This function lets you know the amount of memory alloced so far by
+this context. It does NOT account for subcontext memory.
+This can be used to calculate the size of an array.
+
diff --git a/tools/xenstore/testsuite/01simple.sh b/tools/xenstore/testsuite/01simple.sh
new file mode 100644
index 0000000000..9b1eb8f5c3
--- /dev/null
+++ b/tools/xenstore/testsuite/01simple.sh
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+# Create an entry, read it.
+[ "`echo -e 'write /test create contents\nread /test' | ./xs_test 2>&1`" = "contents" ]
diff --git a/tools/xenstore/testsuite/02directory.sh b/tools/xenstore/testsuite/02directory.sh
new file mode 100644
index 0000000000..f63ef1ff3d
--- /dev/null
+++ b/tools/xenstore/testsuite/02directory.sh
@@ -0,0 +1,31 @@
+#! /bin/sh
+
+# Root directory has nothing in it.
+[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "" ]
+
+# Create a file.
+[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ]
+
+# Directory shows it.
+[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "test" ]
+
+# Make a new directory.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+
+# Check it's there.
+DIR="`echo -e 'dir /' | ./xs_test 2>&1`"
+[ "$DIR" = "test
+dir" ] || [ "$DIR" = "dir
+test" ]
+
+# Check it's empty.
+[ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "" ]
+
+# Create a file, check it exists.
+[ "`echo -e 'write /dir/test2 create contents2' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "test2" ]
+[ "`echo -e 'read /dir/test2' | ./xs_test 2>&1`" = "contents2" ]
+
+# Creating dir over the top should fail.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "FATAL: mkdir: File exists" ]
+[ "`echo -e 'mkdir /dir/test2' | ./xs_test 2>&1`" = "FATAL: mkdir: File exists" ]
diff --git a/tools/xenstore/testsuite/03write.sh b/tools/xenstore/testsuite/03write.sh
new file mode 100644
index 0000000000..cf5f897c54
--- /dev/null
+++ b/tools/xenstore/testsuite/03write.sh
@@ -0,0 +1,17 @@
+#! /bin/sh
+
+# Write without create fails.
+[ "`echo -e 'write /test none contents' | ./xs_test 2>&1`" = "FATAL: write: No such file or directory" ]
+
+# Exclusive write succeeds
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents" ]
+
+# Exclusive write fails to overwrite.
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "FATAL: write: File exists" ]
+
+# Non-exclusive overwrite succeeds.
+[ "`echo -e 'write /test none contents2' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents2" ]
+[ "`echo -e 'write /test create contents3' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents3" ]
diff --git a/tools/xenstore/testsuite/04rm.sh b/tools/xenstore/testsuite/04rm.sh
new file mode 100644
index 0000000000..abadd6110a
--- /dev/null
+++ b/tools/xenstore/testsuite/04rm.sh
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+# Remove non-existant fails.
+[ "`echo -e 'rm /test' | ./xs_test 2>&1`" = "FATAL: rm: No such file or directory" ]
+[ "`echo -e 'rm /dir/test' | ./xs_test 2>&1`" = "FATAL: rm: No such file or directory" ]
+
+# Create file and remove it
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'rm /test' | ./xs_test 2>&1`" = "" ]
+
+# Create directory and remove it.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'rm /dir' | ./xs_test 2>&1`" = "" ]
+
+# Create directory, create file, remove all.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'write /dir/test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'rm /dir' | ./xs_test 2>&1`" = "" ]
diff --git a/tools/xenstore/testsuite/05filepermissions.sh b/tools/xenstore/testsuite/05filepermissions.sh
new file mode 100644
index 0000000000..9d9043f191
--- /dev/null
+++ b/tools/xenstore/testsuite/05filepermissions.sh
@@ -0,0 +1,49 @@
+#! /bin/sh
+
+# Fail to get perms on non-existent file.
+[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: No such file or directory" ]
+[ "`echo -e 'getperm /dir/test' | ./xs_test 2>&1`" = "FATAL: getperm: No such file or directory" ]
+
+# Create file: we own it, noone has access.
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "0 NONE" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone read access to file.
+[ "`echo -e 'setperm /test 0 READ' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "0 READ" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents" ]
+[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone write access to file.
+[ "`echo -e 'setperm /test 0 WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents2" ]
+
+# Grant everyone both read and write access.
+[ "`echo -e 'setperm /test 0 READ/WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "0 READ/WRITE" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents2" ]
+[ "`echo -e 'setid 1\nwrite /test none contents3' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents3" ]
+
+# Change so that user 1 owns it, noone else can do anything.
+[ "`echo -e 'setperm /test 1 NONE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents3" ]
+[ "`echo -e 'setid 1\nwrite /test none contents4' | ./xs_test 2>&1`" = "" ]
+
+# User 2 can do nothing.
+[ "`echo -e 'setid 2\nsetperm /test 2 NONE' | ./xs_test 2>&1`" = "FATAL: setperm: Permission denied" ]
+[ "`echo -e 'setid 2\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 2\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /test none contents4' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Tools can always access things.
+[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents4" ]
+[ "`echo -e 'write /test none contents5' | ./xs_test 2>&1`" = "" ]
diff --git a/tools/xenstore/testsuite/06dirpermissions.sh b/tools/xenstore/testsuite/06dirpermissions.sh
new file mode 100644
index 0000000000..922a794f04
--- /dev/null
+++ b/tools/xenstore/testsuite/06dirpermissions.sh
@@ -0,0 +1,61 @@
+#! /bin/sh
+
+# Root directory: owned by tool, everyone has read access.
+[ "`echo -e 'getperm /' | ./xs_test 2>&1`" = "0 READ" ]
+
+# Create directory: we own it, noone has access.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'getperm /dir' | ./xs_test 2>&1`" = "0 NONE" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ]
+[ "`echo -e 'setid 1\nread /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone read access to directoy.
+[ "`echo -e 'setperm /dir 0 READ' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "0 READ" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\nwrite /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone write access to directory.
+[ "`echo -e 'setperm /dir 0 WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /dir/test create contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /dir/test' | ./xs_test 2>&1`" = "contents" ]
+
+# Grant everyone both read and write access.
+[ "`echo -e 'setperm /dir 0 READ/WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "0 READ/WRITE" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "test" ]
+[ "`echo -e 'setid 1\nwrite /dir/test2 create contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\nread /dir/test2' | ./xs_test 2>&1`" = "contents" ]
+
+# Change so that user 1 owns it, noone else can do anything.
+[ "`echo -e 'setperm /dir 1 NONE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1 | sort`" = "test
+test2" ]
+[ "`echo -e 'setid 1\nwrite /dir/test3 create contents' | ./xs_test 2>&1`" = "" ]
+
+# User 2 can do nothing. Can't even tell if file exists.
+[ "`echo -e 'setid 2\nsetperm /dir 2 NONE' | ./xs_test 2>&1`" = "FATAL: setperm: Permission denied" ]
+[ "`echo -e 'setid 2\ngetperm /dir' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 2\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test2' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test3' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test4' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test none contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test create contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test excl contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test4 none contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test4 create contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test4 excl contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Tools can always access things.
+[ "`echo -e 'getperm /dir' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'dir /dir' | ./xs_test 2>&1 | sort`" = "test
+test2
+test3" ]
+[ "`echo -e 'write /dir/test4 create contents' | ./xs_test 2>&1`" = "" ]
+
diff --git a/tools/xenstore/testsuite/07watch.sh b/tools/xenstore/testsuite/07watch.sh
new file mode 100644
index 0000000000..bedce6ad5b
--- /dev/null
+++ b/tools/xenstore/testsuite/07watch.sh
@@ -0,0 +1,32 @@
+#! /bin/sh
+
+# Watch something, write to it, check watch has fired.
+[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ]
+
+[ "`echo -e '1 watch /test 100\n2 write /test create contents2\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/test" ]
+
+# Check that reads don't set it off.
+[ "`echo -e '1 watch /test 100\n2 read /test\n1 waitwatch' | ./xs_test 2>&1`" = "2:contents2
+1:waitwatch timeout" ]
+
+# mkdir, setperm and rm should (also /tests watching dirs)
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e '1 watch /dir 100\n2 mkdir /dir/newdir\n1 waitwatch\n1 ackwatch\n2 setperm /dir/newdir 0 READ\n1 waitwatch\n1 ackwatch\n2 rm /dir/newdir\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/dir/newdir
+1:/dir/newdir
+1:/dir/newdir" ]
+
+# ignore watches while doing commands, should work.
+[ "`echo -e 'watch /dir 100\nwrite /dir/test create contents\nread /dir/test\nwaitwatch\nackwatch' | ./xs_test 2>&1`" = "contents
+/dir/test" ]
+
+# watch priority /test.
+[ "`echo -e '1 watch /dir 1\n3 watch /dir 3\n2 watch /dir 2\nwrite /dir/test create contents\n3 waitwatch\n3 ackwatch\n2 waitwatch\n2 ackwatch\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "3:/dir/test
+2:/dir/test
+1:/dir/test" ]
+
+# If one dies (without acking), the other should still get ack.
+[ "`echo -e '1 watch /dir 0\n2 watch /dir 1\nwrite /dir/test create contents\n2 waitwatch\n2 close\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "2:/dir/test
+1:/dir/test" ]
+
+# If one dies (without reading at all), the other should still get ack.
+[ "`echo -e '1 watch /dir 0\n2 watch /dir 1\nwrite /dir/test create contents\n2 close\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/dir/test" ]
diff --git a/tools/xenstore/testsuite/08transaction.sh b/tools/xenstore/testsuite/08transaction.sh
new file mode 100644
index 0000000000..2c23ed2496
--- /dev/null
+++ b/tools/xenstore/testsuite/08transaction.sh
@@ -0,0 +1,54 @@
+#! /bin/sh
+# Test transactions.
+
+# Simple transaction: create a file inside transaction.
+[ "`echo -e '1 start /
+1 write /entry1 create contents
+2 dir /
+1 dir /
+1 commit
+2 read /entry1' | ./xs_test`" = "1:entry1
+2:contents" ]
+echo rm /entry1 | ./xs_test
+
+# Create a file and abort transaction.
+[ "`echo -e '1 start /
+1 write /entry1 create contents
+2 dir /
+1 dir /
+1 abort
+2 dir /' | ./xs_test`" = "1:entry1" ]
+
+echo write /entry1 create contents | ./xs_test
+# Delete in transaction, commit
+[ "`echo -e '1 start /
+1 rm /entry1
+2 dir /
+1 dir /
+1 commit
+2 dir /' | ./xs_test`" = "2:entry1" ]
+
+# Delete in transaction, abort.
+echo write /entry1 create contents | ./xs_test
+[ "`echo -e '1 start /
+1 rm /entry1
+2 dir /
+1 dir /
+1 abort
+2 dir /' | ./xs_test`" = "2:entry1
+2:entry1" ]
+
+# Transactions can take as long as the want...
+[ "`echo -e 'start /
+sleep 1
+rm /entry1
+commit
+dir /' | ./xs_test`" = "" ]
+
+# ... as long as noone is waiting.
+[ "`echo -e '1 start /
+2 mkdir /dir
+1 mkdir /dir
+1 dir /
+1 commit' | ./xs_test 2>&1`" = "1:dir
+FATAL: 1: commit: Connection timed out" ]
diff --git a/tools/xenstore/testsuite/09domain.sh b/tools/xenstore/testsuite/09domain.sh
new file mode 100644
index 0000000000..9208dda0ec
--- /dev/null
+++ b/tools/xenstore/testsuite/09domain.sh
@@ -0,0 +1,15 @@
+#! /bin/sh
+# Test domain communication.
+
+# Create a domain, write an entry.
+[ "`echo -e 'introduce 1 100 7 /my/home
+1 write /entry1 create contents
+dir /' | ./xs_test 2>&1`" = "handle is 1
+entry1" ]
+
+# Release that domain.
+[ "`echo -e 'release 1' | ./xs_test`" = "" ]
+
+# Introduce and release by same connection.
+[ "`echo -e 'introduce 1 100 7 /my/home
+release 1' | ./xs_test 2>&1`" = "handle is 1" ]
diff --git a/tools/xenstore/testsuite/test.sh b/tools/xenstore/testsuite/test.sh
new file mode 100755
index 0000000000..5718e84a15
--- /dev/null
+++ b/tools/xenstore/testsuite/test.sh
@@ -0,0 +1,44 @@
+#! /bin/sh
+
+set -e
+set -m
+
+run_test()
+{
+ rm -rf $XENSTORED_ROOTDIR
+ mkdir $XENSTORED_ROOTDIR
+# Weird failures with this.
+ if type valgrind >/dev/null 2>&1; then
+ valgrind -q --logfile-fd=3 ./xenstored_test --output-pid --no-fork 3>testsuite/tmp/vgout > /tmp/pid &
+ while [ ! -s /tmp/pid ]; do sleep 0; done
+ PID=`cat /tmp/pid`
+ rm /tmp/pid
+ else
+ PID=`./xenstored_test --output-pid`
+ fi
+ if sh -e $2 $1; then
+ if [ -s testsuite/tmp/vgout ]; then
+ kill $PID
+ echo VALGRIND errors:
+ cat testsuite/tmp/vgout
+ return 1
+ fi
+ echo shutdown | ./xs_test
+ return 0
+ else
+ # In case daemon is wedged.
+ kill $PID
+ sleep 1
+ return 1
+ fi
+}
+
+for f in testsuite/[0-9]*.sh; do
+ if run_test $f; then
+ echo Test $f passed...
+ else
+ echo Test $f failed, running verbosely...
+ run_test $f -x
+ exit 1
+ fi
+done
diff --git a/tools/xenstore/utils.c b/tools/xenstore/utils.c
new file mode 100644
index 0000000000..2345021f70
--- /dev/null
+++ b/tools/xenstore/utils.c
@@ -0,0 +1,143 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "utils.h"
+
+void xprintf(const char *fmt, ...)
+{
+ static FILE *out = NULL;
+ va_list args;
+ if (!out)
+ out = fopen("/dev/console", "w");
+ if (!out)
+ out = stderr;
+
+ va_start(args, fmt);
+ vfprintf(out, fmt, args);
+ va_end(args);
+ fflush(out);
+}
+
+void barf(const char *fmt, ...)
+{
+ char *str;
+ va_list arglist;
+
+ xprintf("FATAL: ");
+
+ va_start(arglist, fmt);
+ vasprintf(&str, fmt, arglist);
+ va_end(arglist);
+
+ xprintf("%s\n", str);
+ free(str);
+ exit(1);
+}
+
+void barf_perror(const char *fmt, ...)
+{
+ char *str;
+ int err = errno;
+ va_list arglist;
+
+ xprintf("FATAL: ");
+
+ va_start(arglist, fmt);
+ vasprintf(&str, fmt, arglist);
+ va_end(arglist);
+
+ xprintf("%s: %s\n", str, strerror(err));
+ free(str);
+ exit(1);
+}
+
+void *_realloc_array(void *ptr, size_t size, size_t num)
+{
+ if (num >= SIZE_MAX/size)
+ return NULL;
+ return realloc_nofail(ptr, size * num);
+}
+
+void *realloc_nofail(void *ptr, size_t size)
+{
+ ptr = realloc(ptr, size);
+ if (ptr)
+ return ptr;
+ barf("realloc of %zu failed", size);
+}
+
+void *malloc_nofail(size_t size)
+{
+ void *ptr = malloc(size);
+ if (ptr)
+ return ptr;
+ barf("malloc of %zu failed", size);
+}
+
+/* Stevens. */
+void daemonize(void)
+{
+ pid_t pid;
+
+ /* Separate from our parent via fork, so init inherits us. */
+ if ((pid = fork()) < 0)
+ barf_perror("Failed to fork daemon");
+ if (pid != 0)
+ exit(0);
+
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+
+ /* Session leader so ^C doesn't whack us. */
+ setsid();
+ /* Move off any mount points we might be in. */
+ chdir("/");
+ /* Discard our parent's old-fashioned umask prejudices. */
+ umask(0);
+}
+
+
+/* This version adds one byte (for nul term) */
+void *grab_file(const char *filename, unsigned long *size)
+{
+ unsigned int max = 16384;
+ int ret, fd;
+ void *buffer;
+
+ if (streq(filename, "-"))
+ fd = dup(STDIN_FILENO);
+ else
+ fd = open(filename, O_RDONLY, 0);
+
+ if (fd < 0)
+ return NULL;
+
+ buffer = malloc(max+1);
+ *size = 0;
+ while ((ret = read(fd, buffer + *size, max - *size)) > 0) {
+ *size += ret;
+ if (*size == max)
+ buffer = realloc(buffer, max *= 2 + 1);
+ }
+ if (ret < 0) {
+ free(buffer);
+ buffer = NULL;
+ } else
+ ((char *)buffer)[*size] = '\0';
+ close(fd);
+ return buffer;
+}
+
+void release_file(void *data, unsigned long size __attribute__((unused)))
+{
+ free(data);
+}
diff --git a/tools/xenstore/utils.h b/tools/xenstore/utils.h
new file mode 100644
index 0000000000..a84f19a22a
--- /dev/null
+++ b/tools/xenstore/utils.h
@@ -0,0 +1,61 @@
+#ifndef _UTILS_H
+#define _UTILS_H
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+
+/* Is A == B ? */
+#define streq(a,b) (strcmp((a),(b)) == 0)
+
+/* Does A start with B ? */
+#define strstarts(a,b) (strncmp((a),(b),strlen(b)) == 0)
+
+/* Does A end in B ? */
+static inline bool strends(const char *a, const char *b)
+{
+ if (strlen(a) < strlen(b))
+ return false;
+
+ return streq(a + strlen(a) - strlen(b), b);
+}
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define ___stringify(x) #x
+#define __stringify(x) ___stringify(x)
+
+/* Convenient wrappers for malloc and realloc. Use them. */
+#define new(type) ((type *)malloc_nofail(sizeof(type)))
+#define new_array(type, num) realloc_array((type *)0, (num))
+#define realloc_array(ptr, num) ((__typeof__(ptr))_realloc_array((ptr), sizeof((*ptr)), (num)))
+
+void *malloc_nofail(size_t size);
+void *realloc_nofail(void *ptr, size_t size);
+void *_realloc_array(void *ptr, size_t size, size_t num);
+
+void barf(const char *fmt, ...) __attribute__((noreturn));
+void barf_perror(const char *fmt, ...) __attribute__((noreturn));
+
+/* This version adds one byte (for nul term) */
+void *grab_file(const char *filename, unsigned long *size);
+void release_file(void *data, unsigned long size);
+
+/* For writing daemons, based on Stevens. */
+void daemonize(void);
+
+/* Signal handling: returns fd to listen on. */
+int signal_to_fd(int signal);
+void close_signal(int fd);
+
+void xprintf(const char *fmt, ...);
+
+#define eprintf(_fmt, _args...) xprintf("[ERR] %s" _fmt, __FUNCTION__, ##_args)
+#define iprintf(_fmt, _args...) xprintf("[INF] %s" _fmt, __FUNCTION__, ##_args)
+
+#ifdef DEBUG
+#define dprintf(_fmt, _args...) xprintf("[DBG] %s" _fmt, __FUNCTION__, ##_args)
+#else
+#define dprintf(_fmt, _args...) ((void)0)
+#endif
+
+#endif /* _UTILS_H */
diff --git a/tools/xenstore/xenstored.h b/tools/xenstore/xenstored.h
new file mode 100644
index 0000000000..784ec987a8
--- /dev/null
+++ b/tools/xenstore/xenstored.h
@@ -0,0 +1,81 @@
+/*
+ Simple prototyle Xen Store Daemon providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_H
+#define _XENSTORED_H
+
+enum xsd_sockmsg_type
+{
+ XS_DEBUG,
+ XS_SHUTDOWN,
+ XS_DIRECTORY,
+ XS_READ,
+ XS_GET_PERMS,
+ XS_WATCH,
+ XS_WATCH_ACK,
+ XS_UNWATCH,
+ XS_TRANSACTION_START,
+ XS_TRANSACTION_END,
+ XS_OP_READ_ONLY = XS_TRANSACTION_END,
+ XS_INTRODUCE,
+ XS_RELEASE,
+ XS_GETDOMAINPATH,
+ XS_WRITE,
+ XS_MKDIR,
+ XS_RM,
+ XS_SET_PERMS,
+ XS_WATCH_EVENT,
+ XS_ERROR,
+};
+
+#define XS_WRITE_NONE "NONE"
+#define XS_WRITE_CREATE "CREATE"
+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
+
+/* We hand errors as strings, for portability. */
+struct xsd_errors
+{
+ int errnum;
+ const char *errstring;
+};
+#define XSD_ERROR(x) { x, #x }
+static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
+ XSD_ERROR(EINVAL),
+ XSD_ERROR(EACCES),
+ XSD_ERROR(EEXIST),
+ XSD_ERROR(EISDIR),
+ XSD_ERROR(ENOENT),
+ XSD_ERROR(ENOMEM),
+ XSD_ERROR(ENOSPC),
+ XSD_ERROR(EIO),
+ XSD_ERROR(ENOTEMPTY),
+ XSD_ERROR(ENOSYS),
+ XSD_ERROR(EROFS),
+ XSD_ERROR(EBUSY),
+ XSD_ERROR(ETIMEDOUT),
+ XSD_ERROR(EISCONN),
+};
+struct xsd_sockmsg
+{
+ u32 type;
+ u32 len; /* Length of data following this. */
+
+ /* Generally followed by nul-terminated string(s). */
+};
+
+#endif /* _XENSTORED_H */
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
new file mode 100644
index 0000000000..9d15848463
--- /dev/null
+++ b/tools/xenstore/xenstored_core.c
@@ -0,0 +1,1354 @@
+/*
+ Simple prototype Xen Store Daemon providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <string.h>
+#include <errno.h>
+#include <dirent.h>
+#include <getopt.h>
+#include <signal.h>
+#include <assert.h>
+#include <setjmp.h>
+
+//#define DEBUG
+#include "utils.h"
+#include "list.h"
+#include "talloc.h"
+#include "xs_lib.h"
+#include "xenstored.h"
+#include "xenstored_core.h"
+#include "xenstored_watch.h"
+#include "xenstored_transaction.h"
+#include "xenstored_domain.h"
+
+static bool verbose;
+static LIST_HEAD(connections);
+
+#ifdef TESTING
+static bool failtest = false;
+
+/* We override talloc's malloc. */
+void *test_malloc(size_t size)
+{
+ /* 1 in 20 means only about 50% of connections establish. */
+ if (failtest && (random() % 32) == 0)
+ return NULL;
+ return malloc(size);
+}
+
+static void stop_failtest(int signum __attribute__((unused)))
+{
+ failtest = false;
+}
+
+/* Need these before we #define away write_all/mkdir in testing.h */
+bool test_write_all(int fd, void *contents, unsigned int len);
+bool test_write_all(int fd, void *contents, unsigned int len)
+{
+ if (failtest && (random() % 8) == 0) {
+ if (len)
+ len = random() % len;
+ write(fd, contents, len);
+ errno = ENOSPC;
+ return false;
+ }
+ return write_all(fd, contents, len);
+}
+
+int test_mkdir(const char *dir, int perms);
+int test_mkdir(const char *dir, int perms)
+{
+ if (failtest && (random() % 8) == 0) {
+ errno = ENOSPC;
+ return -1;
+ }
+ return mkdir(dir, perms);
+}
+#endif /* TESTING */
+
+#include "xenstored_test.h"
+
+/* FIXME: Ideally, this should never be called. Some can be eliminated. */
+/* Something is horribly wrong: shutdown immediately. */
+void __attribute__((noreturn)) corrupt(struct connection *conn,
+ const char *fmt, ...)
+{
+ va_list arglist;
+ char *str;
+ int saved_errno = errno;
+
+ va_start(arglist, fmt);
+ str = talloc_vasprintf(NULL, fmt, arglist);
+ va_end(arglist);
+
+ eprintf("xenstored corruption: connection id %i: err %s: %s",
+ conn ? (int)conn->id : -1, strerror(saved_errno), str);
+#ifdef TESTING
+ /* Allow them to attach debugger. */
+ sleep(30);
+#endif
+ syslog(LOG_DAEMON,
+ "xenstored corruption: connection id %i: err %s: %s",
+ conn ? (int)conn->id : -1, strerror(saved_errno), str);
+ _exit(2);
+}
+
+static bool write_message(struct connection *conn)
+{
+ int ret;
+ struct buffered_data *out = conn->out;
+
+ if (out->inhdr) {
+ if (verbose)
+ xprintf("Writing msg %i out to %p\n",
+ out->hdr.msg.type, conn);
+ ret = conn->write(conn, out->hdr.raw + out->used,
+ sizeof(out->hdr) - out->used);
+ if (ret < 0)
+ return false;
+
+ out->used += ret;
+ if (out->used < sizeof(out->hdr))
+ return true;
+
+ out->inhdr = false;
+ out->used = 0;
+
+ /* Second write might block if non-zero. */
+ if (out->hdr.msg.len)
+ return true;
+ }
+
+ if (verbose)
+ xprintf("Writing data len %i out to %p\n",
+ out->hdr.msg.len, conn);
+ ret = conn->write(conn, out->buffer + out->used,
+ out->hdr.msg.len - out->used);
+
+ if (ret < 0)
+ return false;
+
+ out->used += ret;
+ if (out->used != out->hdr.msg.len)
+ return true;
+
+ conn->out = NULL;
+
+ /* If this was an event, we wait for ack, otherwise we're done. */
+ if (!is_watch_event(conn, out))
+ talloc_free(out);
+
+ queue_next_event(conn);
+ return true;
+}
+
+static int destroy_conn(void *_conn)
+{
+ struct connection *conn = _conn;
+
+ /* Flush outgoing if possible, but don't block. */
+ if (!conn->domain) {
+ fd_set set;
+ struct timeval none;
+
+ FD_ZERO(&set);
+ FD_SET(conn->fd, &set);
+ none.tv_sec = none.tv_usec = 0;
+
+ while (conn->out
+ && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
+ if (!write_message(conn))
+ break;
+ close(conn->fd);
+ }
+ list_del(&conn->list);
+ return 0;
+}
+
+static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
+ int event_fd)
+{
+ struct connection *i;
+ int max;
+
+ FD_ZERO(inset);
+ FD_ZERO(outset);
+ FD_SET(sock, inset);
+ max = sock;
+ FD_SET(ro_sock, inset);
+ if (ro_sock > max)
+ max = ro_sock;
+ FD_SET(event_fd, inset);
+ if (event_fd > max)
+ max = event_fd;
+ list_for_each_entry(i, &connections, list) {
+ if (i->domain)
+ continue;
+ if (!i->blocked)
+ FD_SET(i->fd, inset);
+ if (i->out)
+ FD_SET(i->fd, outset);
+ if (i->fd > max)
+ max = i->fd;
+ }
+ return max;
+}
+
+/* Read everything from a talloc_open'ed fd. */
+static void *read_all(int *fd, unsigned int *size)
+{
+ unsigned int max = 4;
+ int ret;
+ void *buffer = talloc_size(fd, max);
+
+ *size = 0;
+ while ((ret = read(*fd, buffer + *size, max - *size)) > 0) {
+ *size += ret;
+ if (*size == max)
+ buffer = talloc_realloc_size(fd, buffer, max *= 2);
+ }
+ if (ret < 0)
+ return NULL;
+ return buffer;
+}
+
+static int destroy_fd(void *_fd)
+{
+ int *fd = _fd;
+ close(*fd);
+ return 0;
+}
+
+/* Return a pointer to an fd, self-closing and attached to this pathname. */
+static int *talloc_open(const char *pathname, int flags, int mode)
+{
+ int *fd;
+
+ fd = talloc(pathname, int);
+ *fd = open(pathname, flags, mode);
+ if (*fd < 0) {
+ int saved_errno = errno;
+ talloc_free(fd);
+ errno = saved_errno;
+ return NULL;
+ }
+ talloc_set_destructor(fd, destroy_fd);
+ return fd;
+}
+
+/* Is child a subnode of parent, or equal? */
+bool is_child(const char *child, const char *parent)
+{
+ unsigned int len = strlen(parent);
+
+ /* / should really be "" for this algorithm to work, but that's a
+ * usability nightmare. */
+ if (streq(parent, "/"))
+ return true;
+
+ if (strncmp(child, parent, len) != 0)
+ return false;
+
+ return child[len] == '/' || child[len] == '\0';
+}
+
+/* Answer never ends in /. */
+char *node_dir_outside_transaction(const char *node)
+{
+ if (streq(node, "/"))
+ return talloc_strdup(node, xs_daemon_store());
+ return talloc_asprintf(node, "%s%s", xs_daemon_store(), node);
+}
+
+static char *node_dir(struct transaction *trans, const char *node)
+{
+ if (!trans || !within_transaction(trans, node))
+ return node_dir_outside_transaction(node);
+ return node_dir_inside_transaction(trans, node);
+}
+
+static char *node_datafile(struct transaction *trans, const char *node)
+{
+ return talloc_asprintf(node, "%s/.data", node_dir(trans, node));
+}
+
+static char *node_permfile(struct transaction *trans, const char *node)
+{
+ return talloc_asprintf(node, "%s/.perms", node_dir(trans, node));
+}
+
+struct buffered_data *new_buffer(void *ctx)
+{
+ struct buffered_data *data;
+
+ data = talloc(ctx, struct buffered_data);
+ data->inhdr = true;
+ data->used = 0;
+ data->buffer = NULL;
+
+ return data;
+}
+
+/* Return length of string (including nul) at this offset. */
+unsigned int get_string(const struct buffered_data *data, unsigned int offset)
+{
+ const char *nul;
+
+ if (offset >= data->used)
+ return 0;
+
+ nul = memchr(data->buffer + offset, 0, data->used - offset);
+ if (!nul)
+ return 0;
+
+ return nul - (data->buffer + offset) + 1;
+}
+
+/* Break input into vectors, return the number, fill in up to num of them. */
+unsigned int get_strings(struct buffered_data *data,
+ char *vec[], unsigned int num)
+{
+ unsigned int off, i, len;
+
+ off = i = 0;
+ while ((len = get_string(data, off)) != 0) {
+ if (i < num)
+ vec[i] = data->buffer + off;
+ i++;
+ off += len;
+ }
+ return i;
+}
+
+/* Returns "false", meaning "connection is not blocked". */
+bool send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len)
+{
+ struct buffered_data *bdata;
+
+ /* When data gets freed, we want list entry is destroyed (so
+ * list entry is a child). */
+ bdata = new_buffer(conn);
+ bdata->buffer = talloc_array(bdata, char, len);
+
+ bdata->hdr.msg.type = type;
+ bdata->hdr.msg.len = len;
+ memcpy(bdata->buffer, data, len);
+
+ /* There might be an event going out now. Queue behind it. */
+ if (conn->out) {
+ assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
+ assert(!conn->waiting_reply);
+ conn->waiting_reply = bdata;
+ } else
+ conn->out = bdata;
+ return false;
+}
+
+/* Some routines (write, mkdir, etc) just need a non-error return */
+bool send_ack(struct connection *conn, enum xsd_sockmsg_type type)
+{
+ return send_reply(conn, type, "OK", sizeof("OK"));
+}
+
+bool send_error(struct connection *conn, int error)
+{
+ unsigned int i;
+
+ for (i = 0; error != xsd_errors[i].errnum; i++)
+ if (i == ARRAY_SIZE(xsd_errors) - 1)
+ corrupt(conn, "Unknown error %i (%s)", error,
+ strerror(error));
+
+ return send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
+ strlen(xsd_errors[i].errstring) + 1);
+}
+
+static bool valid_chars(const char *node)
+{
+ /* Nodes can have lots of crap. */
+ return (strspn(node,
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-/_@") == strlen(node));
+}
+
+static bool is_valid_nodename(const char *node)
+{
+ /* Must start in /. */
+ if (!strstarts(node, "/"))
+ return false;
+
+ /* Cannot end in / (unless it's just "/"). */
+ if (strends(node, "/") && !streq(node, "/"))
+ return false;
+
+ /* No double //. */
+ if (strstr(node, "//"))
+ return false;
+
+ return valid_chars(node);
+}
+
+/* We expect one arg in the input: return NULL otherwise. */
+static const char *onearg(struct buffered_data *in)
+{
+ if (get_string(in, 0) != in->used)
+ return NULL;
+ return in->buffer;
+}
+
+/* If it fails, returns NULL and sets errno. */
+static struct xs_permissions *get_perms(struct transaction *transaction,
+ const char *node, unsigned int *num)
+{
+ unsigned int size;
+ char *strings;
+ struct xs_permissions *ret;
+ int *fd;
+
+ fd = talloc_open(node_permfile(transaction, node), O_RDONLY, 0);
+ if (!fd)
+ return NULL;
+ strings = read_all(fd, &size);
+ if (!strings)
+ return NULL;
+
+ *num = count_strings(strings, size);
+ ret = talloc_array(node, struct xs_permissions, *num);
+ if (!strings_to_perms(ret, *num, strings))
+ corrupt(NULL, "Permissions corrupt for %s", node);
+
+ return ret;
+}
+
+static char *perms_to_strings(const char *node,
+ struct xs_permissions *perms, unsigned int num,
+ unsigned int *len)
+{
+ unsigned int i;
+ char *strings = NULL;
+ char buffer[MAX_STRLEN(domid_t) + 1];
+
+ for (*len = 0, i = 0; i < num; i++) {
+ if (!perm_to_string(&perms[i], buffer))
+ return NULL;
+
+ strings = talloc_realloc(node, strings, char,
+ *len + strlen(buffer) + 1);
+ strcpy(strings + *len, buffer);
+ *len += strlen(buffer) + 1;
+ }
+ return strings;
+}
+
+/* Destroy this, and its children, and its children's children. */
+int destroy_path(void *path)
+{
+ DIR *dir;
+ struct dirent *dirent;
+
+ dir = opendir(path);
+ if (!dir) {
+ if (unlink(path) == 0 || errno == ENOENT)
+ return 0;
+ corrupt(NULL, "Destroying path %s", path);
+ }
+
+ while ((dirent = readdir(dir)) != NULL) {
+ char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1];
+ sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name);
+ if (!streq(dirent->d_name,".") && !streq(dirent->d_name,".."))
+ destroy_path(fullpath);
+ }
+ closedir(dir);
+ if (rmdir(path) != 0)
+ corrupt(NULL, "Destroying directory %s", path);
+ return 0;
+}
+
+/* Create a self-destructing temporary file */
+static char *tempfile(const char *path, void *contents, unsigned int len)
+{
+ int *fd;
+ char *tmppath = talloc_asprintf(path, "%s.tmp", path);
+
+ fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
+ if (!fd)
+ return NULL;
+ talloc_set_destructor(tmppath, destroy_path);
+ if (!write_all(*fd, contents, len))
+ return NULL;
+
+ return tmppath;
+}
+
+/* We assume rename() doesn't fail on moves in same dir. */
+static void commit_tempfile(const char *path)
+{
+ char realname[strlen(path) + 1];
+ unsigned int len = strrchr(path, '.') - path;
+
+ memcpy(realname, path, len);
+ realname[len] = '\0';
+ if (rename(path, realname) != 0)
+ corrupt(NULL, "Committing %s", realname);
+ talloc_set_destructor(path, NULL);
+}
+
+static bool set_perms(struct transaction *transaction,
+ const char *node,
+ struct xs_permissions *perms, unsigned int num)
+{
+ unsigned int len;
+ char *permpath, *strings;
+
+ strings = perms_to_strings(node, perms, num, &len);
+ if (!strings)
+ return false;
+
+ /* Create then move. */
+ permpath = tempfile(node_permfile(transaction, node), strings, len);
+ if (!permpath)
+ return false;
+
+ commit_tempfile(permpath);
+ return true;
+}
+
+static char *get_parent(const char *node)
+{
+ char *slash = strrchr(node + 1, '/');
+ if (!slash)
+ return talloc_strdup(node, "/");
+ return talloc_asprintf(node, "%.*s", slash - node, node);
+}
+
+static enum xs_perm_type perm_for_id(domid_t id,
+ struct xs_permissions *perms,
+ unsigned int num)
+{
+ unsigned int i;
+
+ /* Owners and tools get it all... */
+ if (!id || perms[0].id == id)
+ return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_CREATE|XS_PERM_OWNER;
+
+ for (i = 1; i < num; i++)
+ if (perms[i].id == id)
+ return perms[i].perms;
+
+ return perms[0].perms;
+}
+
+/* We have a weird permissions system. You can allow someone into a
+ * specific node without allowing it in the parents. If it's going to
+ * fail, however, we don't want the errno to indicate any information
+ * about the node. */
+static int check_with_parents(struct connection *conn, const char *node,
+ int errnum)
+{
+ struct xs_permissions *perms;
+ unsigned int num;
+
+ /* We always tell them about memory failures. */
+ if (errnum == ENOMEM)
+ return errnum;
+
+ do {
+ node = get_parent(node);
+ perms = get_perms(conn->transaction, node, &num);
+ if (perms)
+ break;
+ } while (!streq(node, "/"));
+
+ /* No permission at root? We're in trouble. */
+ if (!perms)
+ corrupt(conn, "No permissions file at root");
+
+ if (!(perm_for_id(conn->id, perms, num) & XS_PERM_READ))
+ return EACCES;
+
+ return errnum;
+}
+
+bool check_node_perms(struct connection *conn, const char *node,
+ enum xs_perm_type perm)
+{
+ struct xs_permissions *perms;
+ unsigned int num;
+
+ if (!node) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!node || !is_valid_nodename(node)) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!conn->write && (perm & XS_PERM_WRITE)) {
+ errno = EROFS;
+ return false;
+ }
+
+ perms = get_perms(conn->transaction, node, &num);
+ /* No permissions. If we want to create it and
+ * it doesn't exist, check parent directory. */
+ if (!perms && errno == ENOENT && (perm & XS_PERM_CREATE)) {
+ char *parent = get_parent(node);
+ if (!parent)
+ return false;
+
+ perms = get_perms(conn->transaction, parent, &num);
+ }
+ if (!perms) {
+ errno = check_with_parents(conn, node, errno);
+ return false;
+ }
+
+ if (perm_for_id(conn->id, perms, num) & perm)
+ return true;
+
+ errno = check_with_parents(conn, node, EACCES);
+ return false;
+}
+
+static bool send_directory(struct connection *conn, const char *node)
+{
+ char *path, *reply = talloc_strdup(node, "");
+ unsigned int reply_len = 0;
+ DIR *dir;
+ struct dirent *dirent;
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ path = node_dir(conn->transaction, node);
+ dir = opendir(path);
+ if (!dir)
+ return send_error(conn, errno);
+
+ while ((dirent = readdir(dir)) != NULL) {
+ int len = strlen(dirent->d_name) + 1;
+
+ if (!valid_chars(dirent->d_name))
+ continue;
+
+ reply = talloc_realloc(path, reply, char, reply_len + len);
+ strcpy(reply + reply_len, dirent->d_name);
+ reply_len += len;
+ }
+ closedir(dir);
+
+ return send_reply(conn, XS_DIRECTORY, reply, reply_len);
+}
+
+static bool do_read(struct connection *conn, const char *node)
+{
+ char *value;
+ unsigned int size;
+ int *fd;
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0);
+ if (!fd) {
+ /* Data file doesn't exist? We call that a directory */
+ if (errno == ENOENT)
+ errno = EISDIR;
+ return send_error(conn, errno);
+ }
+
+ value = read_all(fd, &size);
+ if (!value)
+ return send_error(conn, errno);
+
+ return send_reply(conn, XS_READ, value, size);
+}
+
+/* Create a new directory. Optionally put data in it (if data != NULL) */
+static bool new_directory(struct connection *conn,
+ const char *node, void *data, unsigned int datalen)
+{
+ struct xs_permissions perms;
+ char *permstr;
+ unsigned int len;
+ int *fd;
+ char *dir = node_dir(conn->transaction, node);
+
+ if (mkdir(dir, 0750) != 0)
+ return false;
+
+ /* Set destructor so we clean up if neccesary. */
+ talloc_set_destructor(dir, destroy_path);
+
+ /* Default permisisons: we own it, noone else has permission. */
+ perms.id = conn->id;
+ perms.perms = XS_PERM_NONE;
+
+ permstr = perms_to_strings(dir, &perms, 1, &len);
+ fd = talloc_open(node_permfile(conn->transaction, node),
+ O_WRONLY|O_CREAT|O_EXCL, 0640);
+ if (!fd || !write_all(*fd, permstr, len))
+ return false;
+
+ if (data) {
+ char *datapath = node_datafile(conn->transaction, node);
+
+ fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
+ if (!fd || !write_all(*fd, data, datalen))
+ return false;
+ }
+
+ /* Finished! */
+ talloc_set_destructor(dir, NULL);
+ return true;
+}
+
+/* path, flags, data... */
+static bool do_write(struct connection *conn, struct buffered_data *in)
+{
+ unsigned int offset, datalen;
+ char *vec[2];
+ char *node, *tmppath;
+ enum xs_perm_type mode;
+ struct stat st;
+
+ /* Extra "strings" can be created by binary data. */
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
+ return send_error(conn, EINVAL);
+
+ node = vec[0];
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ offset = strlen(vec[0]) + strlen(vec[1]) + 2;
+ datalen = in->used - offset;
+
+ if (streq(vec[1], XS_WRITE_NONE))
+ mode = XS_PERM_WRITE;
+ else if (streq(vec[1], XS_WRITE_CREATE))
+ mode = XS_PERM_WRITE|XS_PERM_CREATE;
+ else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
+ mode = XS_PERM_WRITE|XS_PERM_CREATE;
+ else
+ return send_error(conn, EINVAL);
+
+ if (!check_node_perms(conn, node, mode))
+ return send_error(conn, errno);
+
+ if (lstat(node_dir(conn->transaction, node), &st) != 0) {
+ /* Does not exist... */
+ if (errno != ENOENT)
+ return send_error(conn, errno);
+
+ /* Not going to create it? */
+ if (!(mode & XS_PERM_CREATE))
+ return send_error(conn, ENOENT);
+
+ if (!new_directory(conn, node, in->buffer + offset, datalen))
+ return send_error(conn, errno);
+ } else {
+ /* Exists... */
+ if (streq(vec[1], XS_WRITE_CREATE_EXCL))
+ return send_error(conn, EEXIST);
+
+ tmppath = tempfile(node_datafile(conn->transaction, node),
+ in->buffer + offset, datalen);
+ if (!tmppath)
+ return send_error(conn, errno);
+
+ commit_tempfile(tmppath);
+ }
+
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_WRITE);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+static bool do_mkdir(struct connection *conn, const char *node)
+{
+ if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE))
+ return send_error(conn, errno);
+
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ if (!new_directory(conn, node, NULL, 0))
+ return send_error(conn, errno);
+
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_MKDIR);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+static bool do_rm(struct connection *conn, const char *node)
+{
+ char *tmppath, *path;
+
+ if (!check_node_perms(conn, node, XS_PERM_WRITE))
+ return send_error(conn, errno);
+
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ if (streq(node, "/"))
+ return send_error(conn, EINVAL);
+
+ /* We move the directory to temporary name, destructor cleans up. */
+ path = node_dir(conn->transaction, node);
+ tmppath = talloc_asprintf(node, "%s.tmp", path);
+ talloc_set_destructor(tmppath, destroy_path);
+
+ if (rename(path, tmppath) != 0)
+ return send_error(conn, errno);
+
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_RM);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+static bool do_get_perms(struct connection *conn, const char *node)
+{
+ struct xs_permissions *perms;
+ char *strings;
+ unsigned int len, num;
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ perms = get_perms(conn->transaction, node, &num);
+ if (!perms)
+ return send_error(conn, errno);
+
+ strings = perms_to_strings(node, perms, num, &len);
+ if (!strings)
+ return send_error(conn, errno);
+
+ return send_reply(conn, XS_GET_PERMS, strings, len);
+}
+
+static bool do_set_perms(struct connection *conn, struct buffered_data *in)
+{
+ unsigned int num;
+ char *node;
+ struct xs_permissions *perms;
+
+ num = count_strings(in->buffer, in->used);
+ if (num < 2)
+ return send_error(conn, EINVAL);
+
+ /* First arg is node name. */
+ node = in->buffer;
+ in->buffer += strlen(in->buffer) + 1;
+ num--;
+
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ /* We must own node to do this (tools can do this too). */
+ if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER))
+ return send_error(conn, errno);
+
+ perms = talloc_array(node, struct xs_permissions, num);
+ if (!strings_to_perms(perms, num, in->buffer))
+ return send_error(conn, errno);
+
+ if (!set_perms(conn->transaction, node, perms, num))
+ return send_error(conn, errno);
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_SET_PERMS);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+/* Process "in" for conn: "in" will vanish after this conversation, so
+ * we can talloc off it for temporary variables. May free "conn".
+ * Returns true if can't complete due to block.
+ */
+static bool process_message(struct connection *conn, struct buffered_data *in)
+{
+ switch (in->hdr.msg.type) {
+ case XS_DIRECTORY:
+ return send_directory(conn, onearg(in));
+
+ case XS_READ:
+ return do_read(conn, onearg(in));
+
+ case XS_WRITE:
+ return do_write(conn, in);
+
+ case XS_MKDIR:
+ return do_mkdir(conn, onearg(in));
+
+ case XS_RM:
+ return do_rm(conn, onearg(in));
+
+ case XS_GET_PERMS:
+ return do_get_perms(conn, onearg(in));
+
+ case XS_SET_PERMS:
+ return do_set_perms(conn, in);
+
+ case XS_SHUTDOWN:
+ send_ack(conn, XS_SHUTDOWN);
+ /* Everything hangs off auto-free context, freed at exit. */
+ exit(0);
+
+#ifdef TESTING
+ case XS_DEBUG: {
+ /* For testing, we allow them to set id. */
+ if (streq(in->buffer, "setid")) {
+ conn->id = atoi(in->buffer + get_string(in, 0));
+ send_ack(conn, XS_DEBUG);
+ } else if (streq(in->buffer, "failtest")) {
+ if (get_string(in, 0) < in->used)
+ srandom(atoi(in->buffer + get_string(in, 0)));
+ send_ack(conn, XS_DEBUG);
+ failtest = true;
+ }
+ return false;
+ }
+#endif /* TESTING */
+
+ case XS_WATCH:
+ return do_watch(conn, in);
+
+ case XS_WATCH_ACK:
+ return do_watch_ack(conn);
+
+ case XS_UNWATCH:
+ return do_unwatch(conn, onearg(in));
+
+ case XS_TRANSACTION_START:
+ return do_transaction_start(conn, onearg(in));
+
+ case XS_TRANSACTION_END:
+ return do_transaction_end(conn, onearg(in));
+
+ case XS_INTRODUCE:
+ return do_introduce(conn, in);
+
+ case XS_RELEASE:
+ return do_release(conn, onearg(in));
+
+ case XS_GETDOMAINPATH:
+ return do_get_domain_path(conn, onearg(in));
+
+ case XS_WATCH_EVENT:
+ default:
+ eprintf("Client unknown operation %i", in->hdr.msg.type);
+ send_error(conn, ENOSYS);
+ return false;
+ }
+}
+
+static int out_of_mem(void *data)
+{
+ longjmp(*(jmp_buf *)data, 1);
+}
+
+static void consider_message(struct connection *conn)
+{
+ struct buffered_data *in = NULL;
+ enum xsd_sockmsg_type type = conn->in->hdr.msg.type;
+ jmp_buf talloc_fail;
+
+ /* For simplicity, we kill the connection on OOM. */
+ talloc_set_fail_handler(out_of_mem, &talloc_fail);
+ if (setjmp(talloc_fail)) {
+ talloc_free(conn);
+ goto end;
+ }
+
+ if (verbose)
+ xprintf("Got message %i len %i from %p\n",
+ type, conn->in->hdr.msg.len, conn);
+
+ /* We might get a command while waiting for an ack: this means
+ * the other end discarded it: we will re-transmit. */
+ if (type != XS_WATCH_ACK)
+ reset_watch_event(conn);
+
+ /* Careful: process_message may free connection. We detach
+ * "in" beforehand and allocate the new buffer to avoid
+ * touching conn after process_message.
+ */
+ in = talloc_steal(talloc_autofree_context(), conn->in);
+ conn->in = new_buffer(conn);
+ if (process_message(conn, in)) {
+ /* Blocked by transaction: queue for re-xmit. */
+ talloc_free(conn->in);
+ conn->in = in;
+ in = NULL;
+ }
+
+end:
+ talloc_free(in);
+ talloc_set_fail_handler(NULL, NULL);
+ if (talloc_total_blocks(NULL)
+ != talloc_total_blocks(talloc_autofree_context()) + 1)
+ talloc_report_full(NULL, stderr);
+}
+
+/* Errors in reading or allocating here mean we get out of sync, so we
+ * drop the whole client connection. */
+void handle_input(struct connection *conn)
+{
+ int bytes;
+ struct buffered_data *in;
+
+ assert(!conn->blocked);
+ in = conn->in;
+
+ /* Not finished header yet? */
+ if (in->inhdr) {
+ bytes = conn->read(conn, in->hdr.raw + in->used,
+ sizeof(in->hdr) - in->used);
+ if (bytes <= 0)
+ goto bad_client;
+ in->used += bytes;
+ if (in->used != sizeof(in->hdr))
+ return;
+
+ if (in->hdr.msg.len > PATH_MAX) {
+ syslog(LOG_DAEMON, "Client tried to feed us %i",
+ in->hdr.msg.len);
+ goto bad_client;
+ }
+
+ in->buffer = talloc_array(in, char, in->hdr.msg.len);
+ if (!in->buffer)
+ goto bad_client;
+ in->used = 0;
+ in->inhdr = false;
+ return;
+ }
+
+ bytes = conn->read(conn, in->buffer + in->used,
+ in->hdr.msg.len - in->used);
+ if (bytes < 0)
+ goto bad_client;
+
+ in->used += bytes;
+ if (in->used != in->hdr.msg.len)
+ return;
+
+ consider_message(conn);
+ return;
+
+bad_client:
+ /* Kill it. */
+ talloc_free(conn);
+}
+
+void handle_output(struct connection *conn)
+{
+ if (!write_message(conn))
+ talloc_free(conn);
+}
+
+/* If a transaction has ended, see if we can unblock any connections. */
+static void unblock_connections(void)
+{
+ struct connection *i, *tmp;
+
+ list_for_each_entry_safe(i, tmp, &connections, list) {
+ if (!i->blocked)
+ continue;
+
+ if (!transaction_covering_node(i->blocked)) {
+ talloc_free(i->blocked);
+ i->blocked = NULL;
+ consider_message(i);
+ }
+ }
+
+ /* To balance bias, move first entry to end. */
+ if (!list_empty(&connections)) {
+ i = list_top(&connections, struct connection, list);
+ list_del(&i->list);
+ list_add_tail(&i->list, &connections);
+ }
+}
+
+struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
+{
+ struct connection *new;
+ jmp_buf talloc_fail;
+
+ new = talloc(talloc_autofree_context(), struct connection);
+ if (!new)
+ return NULL;
+
+ new->blocked = false;
+ new->out = new->waiting_reply = NULL;
+ new->event = NULL;
+ new->fd = -1;
+ new->id = 0;
+ new->domain = NULL;
+ new->transaction = NULL;
+ new->write = write;
+ new->read = read;
+
+ talloc_set_fail_handler(out_of_mem, &talloc_fail);
+ if (setjmp(talloc_fail)) {
+ talloc_free(new);
+ return NULL;
+ }
+ new->in = new_buffer(new);
+ talloc_set_fail_handler(NULL, NULL);
+
+ list_add_tail(&new->list, &connections);
+ talloc_set_destructor(new, destroy_conn);
+ return new;
+}
+
+static int writefd(struct connection *conn, const void *data, unsigned int len)
+{
+ return write(conn->fd, data, len);
+}
+
+static int readfd(struct connection *conn, void *data, unsigned int len)
+{
+ return read(conn->fd, data, len);
+}
+
+static void accept_connection(int sock, bool canwrite)
+{
+ int fd;
+ struct connection *conn;
+
+ fd = accept(sock, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ conn = new_connection(canwrite ? writefd : NULL, readfd);
+ if (conn)
+ conn->fd = fd;
+ else
+ close(fd);
+}
+
+/* Calc timespan from now to absolute time. */
+static void time_relative_to_now(struct timeval *tv)
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+ if (timercmp(&now, tv, >))
+ timerclear(tv);
+ else {
+ tv->tv_sec -= now.tv_sec;
+ if (now.tv_usec > tv->tv_usec) {
+ tv->tv_sec--;
+ tv->tv_usec += 1000000;
+ }
+ tv->tv_usec -= now.tv_usec;
+ }
+}
+
+static struct option options[] = { { "no-fork", 0, NULL, 'N' },
+ { "verbose", 0, NULL, 'V' },
+ { "output-pid", 0, NULL, 'P' },
+ { NULL, 0, NULL, 0 } };
+
+int main(int argc, char *argv[])
+{
+ int opt, *sock, *ro_sock, event_fd, max, tmpout;
+ struct sockaddr_un addr;
+ fd_set inset, outset;
+ bool dofork = true;
+ bool outputpid = false;
+
+ while ((opt = getopt_long(argc, argv, "DV", options, NULL)) != -1) {
+ switch (opt) {
+ case 'N':
+ dofork = false;
+ break;
+ case 'V':
+ verbose = true;
+ break;
+ case 'P':
+ outputpid = true;
+ break;
+ }
+ }
+ if (optind != argc)
+ barf("%s: No arguments desired", argv[0]);
+
+ talloc_enable_leak_report_full();
+
+ /* Create sockets for them to listen to. */
+ sock = talloc(talloc_autofree_context(), int);
+ *sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (*sock < 0)
+ barf_perror("Could not create socket");
+ ro_sock = talloc(talloc_autofree_context(), int);
+ *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (*ro_sock < 0)
+ barf_perror("Could not create socket");
+ talloc_set_destructor(sock, destroy_fd);
+ talloc_set_destructor(ro_sock, destroy_fd);
+
+ /* Don't kill us with SIGPIPE. */
+ signal(SIGPIPE, SIG_IGN);
+
+ /* FIXME: Be more sophisticated, don't mug running daemon. */
+ unlink(xs_daemon_socket());
+ unlink(xs_daemon_socket_ro());
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, xs_daemon_socket());
+ if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
+ barf_perror("Could not bind socket to %s", xs_daemon_socket());
+ strcpy(addr.sun_path, xs_daemon_socket_ro());
+ if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
+ barf_perror("Could not bind socket to %s",
+ xs_daemon_socket_ro());
+ if (chmod(xs_daemon_socket(), 0600) != 0
+ || chmod(xs_daemon_socket_ro(), 0660) != 0)
+ barf_perror("Could not chmod sockets");
+
+ if (listen(*sock, 1) != 0
+ || listen(*ro_sock, 1) != 0)
+ barf_perror("Could not listen on sockets");
+
+ /* If we're the first, create .perms file for root. */
+ if (mkdir(xs_daemon_store(), 0750) == 0) {
+ struct xs_permissions perms;
+ char *root = talloc_strdup(talloc_autofree_context(), "/");
+
+ perms.id = 0;
+ perms.perms = XS_PERM_READ;
+ if (!set_perms(NULL, root, &perms, 1))
+ barf_perror("Could not create permissions in root");
+ talloc_free(root);
+ mkdir(xs_daemon_transactions(), 0750);
+ } else if (errno != EEXIST)
+ barf_perror("Could not create root %s", xs_daemon_store());
+
+ /* Listen to hypervisor. */
+ event_fd = domain_init();
+
+ /* Debugging: daemonize() closes standard fds, so dup here. */
+ tmpout = dup(STDOUT_FILENO);
+ if (dofork) {
+ openlog("xenstored", 0, LOG_DAEMON);
+ daemonize();
+ }
+
+ if (outputpid) {
+ char buffer[20];
+ sprintf(buffer, "%i\n", getpid());
+ write(tmpout, buffer, strlen(buffer));
+ }
+ close(tmpout);
+
+#ifdef TESTING
+ signal(SIGUSR1, stop_failtest);
+#endif
+
+ /* Get ready to listen to the tools. */
+ max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd);
+
+ /* Main loop. */
+ for (;;) {
+ struct connection *i;
+ struct timeval *tvp = NULL, tv;
+
+ timerclear(&tv);
+ shortest_transaction_timeout(&tv);
+ if (timerisset(&tv)) {
+ time_relative_to_now(&tv);
+ tvp = &tv;
+ }
+
+ if (select(max+1, &inset, &outset, NULL, tvp) < 0) {
+ if (errno == EINTR)
+ continue;
+ barf_perror("Select failed");
+ }
+
+ if (FD_ISSET(*sock, &inset))
+ accept_connection(*sock, true);
+
+ if (FD_ISSET(*ro_sock, &inset))
+ accept_connection(*ro_sock, false);
+
+ if (FD_ISSET(event_fd, &inset))
+ handle_event(event_fd);
+
+ list_for_each_entry(i, &connections, list) {
+ if (i->domain)
+ continue;
+
+ /* Operations can delete themselves or others
+ * (xs_release): list is not safe after input,
+ * so break. */
+ if (FD_ISSET(i->fd, &inset)) {
+ handle_input(i);
+ break;
+ }
+ if (FD_ISSET(i->fd, &outset)) {
+ handle_output(i);
+ break;
+ }
+ }
+
+ if (tvp)
+ check_transaction_timeout();
+
+ /* If transactions ended, we might be able to do more work. */
+ unblock_connections();
+
+ max = initialize_set(&inset, &outset, *sock,*ro_sock,event_fd);
+ }
+}
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
new file mode 100644
index 0000000000..fe6eec8f72
--- /dev/null
+++ b/tools/xenstore/xenstored_core.h
@@ -0,0 +1,123 @@
+/*
+ Internal interfaces for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_INTERNAL_H
+#define _XENSTORED_INTERNAL_H
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+#include "xs_lib.h"
+#include "xenstored.h"
+#include "list.h"
+
+struct buffered_data
+{
+ /* Are we still doing the header? */
+ bool inhdr;
+ /* How far are we? */
+ unsigned int used;
+ union {
+ struct xsd_sockmsg msg;
+ char raw[sizeof(struct xsd_sockmsg)];
+ } hdr;
+ /* The actual data. */
+ char *buffer;
+};
+
+struct connection;
+typedef int connwritefn_t(struct connection *, const void *, unsigned int);
+typedef int connreadfn_t(struct connection *, void *, unsigned int);
+
+struct connection
+{
+ struct list_head list;
+
+ /* The file descriptor we came in on. */
+ int fd;
+
+ /* Who am I? 0 for socket connections. */
+ domid_t id;
+
+ /* Are we blocked waiting for a transaction to end? Contains node. */
+ char *blocked;
+
+ /* Our current event. If all used, we're waiting for ack. */
+ struct watch_event *event;
+
+ /* Buffered incoming data. */
+ struct buffered_data *in;
+
+ /* Buffered output data */
+ struct buffered_data *out;
+
+ /* If we had a watch fire outgoing when we needed to reply... */
+ struct buffered_data *waiting_reply;
+
+ /* My transaction, if any. */
+ struct transaction *transaction;
+
+ /* The domain I'm associated with, if any. */
+ struct domain *domain;
+
+ /* Methods for communicating over this connection: write can be NULL */
+ connwritefn_t *write;
+ connreadfn_t *read;
+};
+
+/* Return length of string (including nul) at this offset. */
+unsigned int get_string(const struct buffered_data *data,
+ unsigned int offset);
+
+/* Break input into vectors, return the number, fill in up to num of them. */
+unsigned int get_strings(struct buffered_data *data,
+ char *vec[], unsigned int num);
+
+/* Is child node a child or equal to parent node? */
+bool is_child(const char *child, const char *parent);
+
+/* Create a new buffer with lifetime of context. */
+struct buffered_data *new_buffer(void *ctx);
+
+bool send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len);
+
+/* Some routines (write, mkdir, etc) just need a non-error return */
+bool send_ack(struct connection *conn, enum xsd_sockmsg_type type);
+
+/* Send an error: error is usually "errno". */
+bool send_error(struct connection *conn, int error);
+
+/* Check permissions on this node. */
+bool check_node_perms(struct connection *conn, const char *node,
+ enum xs_perm_type perm);
+
+/* Path to this node outside transaction. */
+char *node_dir_outside_transaction(const char *node);
+
+/* Fail due to excessive corruption, capitalist pigdogs! */
+void __attribute__((noreturn)) corrupt(struct connection *conn,
+ const char *fmt, ...);
+
+struct connection *new_connection(connwritefn_t *write, connreadfn_t *read);
+
+void handle_input(struct connection *conn);
+void handle_output(struct connection *conn);
+
+/* Convenient talloc-style destructor for paths. */
+int destroy_path(void *path);
+#endif /* _XENSTORED_INTERNAL_H */
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
new file mode 100644
index 0000000000..bcc0a64967
--- /dev/null
+++ b/tools/xenstore/xenstored_domain.c
@@ -0,0 +1,387 @@
+/*
+ Domain communications for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <linux/ioctl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+//#define DEBUG
+#include "utils.h"
+#include "talloc.h"
+#include "xenstored_core.h"
+#include "xenstored_domain.h"
+#include "xenstored_test.h"
+
+static int *xc_handle;
+static int eventchn_fd;
+static unsigned int ringbuf_datasize;
+
+struct domain
+{
+ struct list_head list;
+
+ /* The id of this domain */
+ domid_t domid;
+
+ /* Event channel port */
+ u16 port;
+
+ /* Domain path in store. */
+ char *path;
+
+ /* Shared page. */
+ void *page;
+
+ /* Input and output ringbuffer heads. */
+ struct ringbuf_head *input, *output;
+
+ /* The connection associated with this. */
+ struct connection *conn;
+
+};
+
+static LIST_HEAD(domains);
+
+void domain_set_conn(struct domain *domain, struct connection *conn)
+{
+ domain->conn = conn;
+}
+
+struct ringbuf_head
+{
+ u32 write; /* Next place to write to */
+ u32 read; /* Next place to read from */
+ u8 flags;
+ char buf[0];
+} __attribute__((packed));
+
+#define EVENTCHN_BIND _IO('E', 2)
+#define EVENTCHN_UNBIND _IO('E', 3)
+
+/* FIXME: Mark connection as broken (close it?) when this happens. */
+static bool check_buffer(const struct ringbuf_head *h)
+{
+ return (h->write < ringbuf_datasize && h->read < ringbuf_datasize);
+}
+
+/* We can't fill last byte: would look like empty buffer. */
+static void *get_output_chunk(const struct ringbuf_head *h,
+ void *buf, u32 *len)
+{
+ u32 read_mark;
+
+ if (h->read == 0)
+ read_mark = ringbuf_datasize - 1;
+ else
+ read_mark = h->read - 1;
+
+ /* Here to the end of buffer, unless they haven't read some out. */
+ *len = ringbuf_datasize - h->write;
+ if (read_mark >= h->write)
+ *len = read_mark - h->write;
+ return buf + h->write;
+}
+
+static const void *get_input_chunk(const struct ringbuf_head *h,
+ const void *buf, u32 *len)
+{
+ /* Here to the end of buffer, unless they haven't written some. */
+ *len = ringbuf_datasize - h->read;
+ if (h->write >= h->read)
+ *len = h->write - h->read;
+ return buf + h->read;
+}
+
+static void update_output_chunk(struct ringbuf_head *h, u32 len)
+{
+ h->write += len;
+ if (h->write == ringbuf_datasize)
+ h->write = 0;
+}
+
+static void update_input_chunk(struct ringbuf_head *h, u32 len)
+{
+ h->read += len;
+ if (h->read == ringbuf_datasize)
+ h->read = 0;
+}
+
+static bool buffer_has_input(const struct ringbuf_head *h)
+{
+ u32 len;
+
+ get_input_chunk(h, NULL, &len);
+ return (len != 0);
+}
+
+static bool buffer_has_output_room(const struct ringbuf_head *h)
+{
+ u32 len;
+
+ get_output_chunk(h, NULL, &len);
+ return (len != 0);
+}
+
+static int writechn(struct connection *conn, const void *data, unsigned int len)
+{
+ u32 avail;
+ void *dest;
+ struct ringbuf_head h;
+
+ /* Must read head once, and before anything else, and verified. */
+ h = *conn->domain->output;
+ mb();
+ if (!check_buffer(&h)) {
+ errno = EIO;
+ return -1;
+ }
+
+ dest = get_output_chunk(&h, conn->domain->output->buf, &avail);
+ if (avail < len)
+ len = avail;
+
+ memcpy(dest, data, len);
+ mb();
+ update_output_chunk(conn->domain->output, len);
+ /* FIXME: Probably not neccessary. */
+ mb();
+ xc_evtchn_send(*xc_handle, conn->domain->port);
+ return len;
+}
+
+static int readchn(struct connection *conn, void *data, unsigned int len)
+{
+ u32 avail;
+ const void *src;
+ struct ringbuf_head h;
+ bool was_full;
+
+ /* Must read head once, and before anything else, and verified. */
+ h = *conn->domain->input;
+ mb();
+
+ if (!check_buffer(&h)) {
+ errno = EIO;
+ return -1;
+ }
+
+ src = get_input_chunk(&h, conn->domain->input->buf, &avail);
+ if (avail < len)
+ len = avail;
+
+ was_full = !buffer_has_output_room(&h);
+ memcpy(data, src, len);
+ mb();
+ update_input_chunk(conn->domain->input, len);
+ /* FIXME: Probably not neccessary. */
+ mb();
+
+ /* If it was full, tell them we've taken some. */
+ if (was_full)
+ xc_evtchn_send(*xc_handle, conn->domain->port);
+ return len;
+}
+
+static int destroy_domain(void *_domain)
+{
+ struct domain *domain = _domain;
+
+ list_del(&domain->list);
+
+ if (domain->port &&
+ (ioctl(eventchn_fd, EVENTCHN_UNBIND, domain->port) != 0))
+ eprintf("> Unbinding port %i failed!\n", domain->port);
+
+ if(domain->page)
+ munmap(domain->page, getpagesize());
+
+ return 0;
+}
+
+static struct domain *find_domain(u16 port)
+{
+ struct domain *i;
+
+ list_for_each_entry(i, &domains, list) {
+ if (i->port == port)
+ return i;
+ }
+ return NULL;
+}
+
+void handle_event(int event_fd)
+{
+ u16 port;
+ struct domain *domain;
+
+ if (read(event_fd, &port, sizeof(port)) != sizeof(port))
+ barf_perror("Failed to read from event fd");
+
+ /* We have to handle *all* the data available before we ack:
+ * careful that handle_input/handle_output can destroy conn.
+ */
+ while ((domain = find_domain(port)) != NULL) {
+ if (!domain->conn->blocked && buffer_has_input(domain->input))
+ handle_input(domain->conn);
+ else if (domain->conn->out
+ && buffer_has_output_room(domain->output))
+ handle_output(domain->conn);
+ else
+ break;
+ }
+
+#ifndef TESTING
+ if (write(event_fd, &port, sizeof(port)) != sizeof(port))
+ barf_perror("Failed to write to event fd");
+#endif
+}
+
+/* domid, mfn, evtchn, path */
+bool do_introduce(struct connection *conn, struct buffered_data *in)
+{
+ struct domain *domain;
+ char *vec[4];
+
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
+ return send_error(conn, EINVAL);
+
+ /* Hang domain off "in" until we're finished. */
+ domain = talloc(in, struct domain);
+ domain->domid = atoi(vec[0]);
+ domain->port = atoi(vec[2]);
+ domain->path = talloc_strdup(domain, vec[3]);
+ talloc_set_destructor(domain, destroy_domain);
+ if (!domain->port || !domain->domid)
+ return send_error(conn, EINVAL);
+ domain->page = xc_map_foreign_range(*xc_handle, domain->domid,
+ getpagesize(),
+ PROT_READ|PROT_WRITE,
+ atol(vec[1]));
+ if (!domain->page)
+ return send_error(conn, errno);
+
+ /* One in each half of page. */
+ domain->input = domain->page;
+ domain->output = domain->page + getpagesize()/2;
+
+ /* Tell kernel we're interested in this event. */
+ if (ioctl(eventchn_fd, EVENTCHN_BIND, domain->port) != 0)
+ return send_error(conn, errno);
+
+ domain->conn = new_connection(writechn, readchn);
+ domain->conn->domain = domain;
+
+ talloc_steal(domain->conn, domain);
+ list_add(&domain->list, &domains);
+
+ return send_ack(conn, XS_INTRODUCE);
+}
+
+static struct domain *find_domain_by_domid(domid_t domid)
+{
+ struct domain *i;
+
+ list_for_each_entry(i, &domains, list) {
+ if (i->domid == domid)
+ return i;
+ }
+ return NULL;
+}
+
+/* domid */
+bool do_release(struct connection *conn, const char *domid_str)
+{
+ struct domain *domain;
+ domid_t domid;
+
+ if (!domid_str)
+ return send_error(conn, EINVAL);
+
+ domid = atoi(domid_str);
+ if (!domid)
+ return send_error(conn, EINVAL);
+
+ domain = find_domain_by_domid(domid);
+ if (!domain)
+ return send_error(conn, ENOENT);
+
+ if (!domain->conn)
+ return send_error(conn, EINVAL);
+
+ talloc_free(domain->conn);
+ return send_ack(conn, XS_RELEASE);
+}
+
+bool do_get_domain_path(struct connection *conn, const char *domid_str)
+{
+ struct domain *domain;
+ domid_t domid;
+
+ if (!domid_str)
+ return send_error(conn, EINVAL);
+
+ domid = atoi(domid_str);
+ if (domid == 0)
+ domain = conn->domain;
+ else
+ domain = find_domain_by_domid(domid);
+
+ if (!domain)
+ return send_error(conn, ENOENT);
+
+ return send_reply(conn, XS_GETDOMAINPATH, domain->path,
+ strlen(domain->path) + 1);
+}
+
+static int close_xc_handle(void *_handle)
+{
+ xc_interface_close(*(int *)_handle);
+ return 0;
+}
+
+/* Returns the event channel handle. */
+int domain_init(void)
+{
+ /* The size of the ringbuffer: half a page minus head structure. */
+ ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
+
+ xc_handle = talloc(talloc_autofree_context(), int);
+ if (!xc_handle)
+ barf_perror("Failed to allocate domain handle");
+ *xc_handle = xc_interface_open();
+ if (*xc_handle < 0)
+ barf_perror("Failed to open connection to hypervisor");
+ talloc_set_destructor(xc_handle, close_xc_handle);
+
+#ifdef TESTING
+ eventchn_fd = fake_open_eventchn();
+#else
+ eventchn_fd = open("/dev/xen/evtchn", O_RDWR);
+#endif
+ if (eventchn_fd < 0)
+ barf_perror("Failed to open connection to hypervisor");
+ return eventchn_fd;
+}
diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
new file mode 100644
index 0000000000..20e85a54b5
--- /dev/null
+++ b/tools/xenstore/xenstored_domain.h
@@ -0,0 +1,38 @@
+/*
+ Domain communications for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_DOMAIN_H
+#define _XENSTORED_DOMAIN_H
+
+void handle_event(int event_fd);
+
+/* domid, mfn, eventchn, path */
+bool do_introduce(struct connection *conn, struct buffered_data *in);
+
+/* domid */
+bool do_release(struct connection *conn, const char *domid_str);
+
+/* domid */
+bool do_get_domain_path(struct connection *conn, const char *domid_str);
+
+/* Returns the event channel handle */
+int domain_init(void);
+
+void domain_set_conn(struct domain *domain, struct connection *conn);
+
+#endif /* _XENSTORED_DOMAIN_H */
diff --git a/tools/xenstore/xenstored_test.h b/tools/xenstore/xenstored_test.h
new file mode 100644
index 0000000000..f173a5ca91
--- /dev/null
+++ b/tools/xenstore/xenstored_test.h
@@ -0,0 +1,37 @@
+/*
+ Testing replcements for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_TEST_H
+#define _XENSTORED_TEST_H
+
+#ifdef TESTING
+bool test_write_all(int fd, void *contents, unsigned int len);
+#define write_all test_write_all
+
+int test_mkdir(const char *dir, int perms);
+#define mkdir test_mkdir
+
+int fake_open_eventchn(void);
+void fake_block_events(void);
+void fake_ack_event(void);
+
+#define ioctl(a,b,c) 0
+
+#endif
+
+#endif /* _XENSTORED_INTERNAL_H */
diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
new file mode 100644
index 0000000000..ca37307f8c
--- /dev/null
+++ b/tools/xenstore/xenstored_transaction.c
@@ -0,0 +1,284 @@
+/*
+ Transaction code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include "talloc.h"
+#include "list.h"
+#include "xenstored_transaction.h"
+#include "xenstored_watch.h"
+#include "xs_lib.h"
+#include "utils.h"
+#include "xenstored_test.h"
+
+struct changed_node
+{
+ /* The list within this transaction. */
+ struct list_head list;
+
+ /* The name of the node. */
+ char *node;
+};
+
+struct transaction
+{
+ /* Global list of transactions. */
+ struct list_head list;
+
+ /* My owner (conn->transaction == me). */
+ struct connection *conn;
+
+ /* Subtree this transaction covers */
+ char *node;
+
+ /* Base for this transaction. */
+ char *divert;
+
+ /* List of changed nodes. */
+ struct list_head changes;
+
+ /* Someone's waiting: time limit. */
+ struct timeval timeout;
+
+ /* We've timed out. */
+ bool destined_to_fail;
+};
+static LIST_HEAD(transactions);
+
+bool within_transaction(struct transaction *trans, const char *node)
+{
+ if (!trans)
+ return true;
+ return is_child(node, trans->node);
+}
+
+/* You are on notice: this transaction is blocking someone. */
+static void start_transaction_timeout(struct transaction *trans)
+{
+ if (timerisset(&trans->timeout))
+ return;
+
+ /* One second timeout. */
+ gettimeofday(&trans->timeout, NULL);
+ trans->timeout.tv_sec += 1;
+}
+
+struct transaction *transaction_covering_node(const char *node)
+{
+ struct transaction *i;
+
+ list_for_each_entry(i, &transactions, list) {
+ if (i->destined_to_fail)
+ continue;
+ if (is_child(i->node, node) || is_child(node, i->node))
+ return i;
+ }
+ return NULL;
+}
+
+bool transaction_block(struct connection *conn, const char *node)
+{
+ struct transaction *trans;
+
+ /* Transactions don't overlap, so we can't be blocked by
+ * others if we're in one. */
+ if (conn->transaction)
+ return false;
+
+ trans = transaction_covering_node(node);
+ if (trans) {
+ start_transaction_timeout(trans);
+ conn->blocked = talloc_strdup(conn, node);
+ return true;
+ }
+ return false;
+}
+
+/* Callers get a change node (which can fail) and only commit after they've
+ * finished. This way they don't have to unwind eg. a write. */
+void add_change_node(struct transaction *trans, const char *node)
+{
+ struct changed_node *i;
+
+ if (!trans)
+ return;
+
+ list_for_each_entry(i, &trans->changes, list)
+ if (streq(i->node, node))
+ return;
+
+ i = talloc(trans, struct changed_node);
+ i->node = talloc_strdup(i, node);
+ INIT_LIST_HEAD(&i->list);
+ list_add_tail(&i->list, &trans->changes);
+}
+
+char *node_dir_inside_transaction(struct transaction *trans, const char *node)
+{
+ return talloc_asprintf(node, "%s%s", trans->divert,
+ node + strlen(trans->node));
+}
+
+void shortest_transaction_timeout(struct timeval *tv)
+{
+ struct transaction *i;
+
+ list_for_each_entry(i, &transactions, list) {
+ if (!timerisset(&i->timeout))
+ continue;
+
+ if (!timerisset(tv) || timercmp(&i->timeout, tv, <))
+ *tv = i->timeout;
+ }
+}
+
+void check_transaction_timeout(void)
+{
+ struct transaction *i;
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ list_for_each_entry(i, &transactions, list) {
+ if (!timerisset(&i->timeout))
+ continue;
+
+ if (timercmp(&i->timeout, &now, <))
+ i->destined_to_fail = true;
+ }
+}
+
+/* FIXME: Eliminate all uses of this */
+static bool do_command(const char *cmd)
+{
+ int ret;
+
+ ret = system(cmd);
+ if (ret == -1)
+ return false;
+ if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) {
+ errno = EIO;
+ return false;
+ }
+ return true;
+}
+
+static int destroy_transaction(void *_transaction)
+{
+ struct transaction *trans = _transaction;
+
+ list_del(&trans->list);
+ return destroy_path(trans->divert);
+}
+
+bool do_transaction_start(struct connection *conn, const char *node)
+{
+ struct transaction *transaction;
+ char *dir, *cmd;
+
+ if (conn->transaction)
+ return send_error(conn, EBUSY);
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ dir = node_dir_outside_transaction(node);
+
+ /* Attach transaction to node for autofree until it's complete */
+ transaction = talloc(node, struct transaction);
+ transaction->node = talloc_strdup(transaction, node);
+ transaction->divert = talloc_asprintf(transaction, "%s/%p/",
+ xs_daemon_transactions(),
+ transaction);
+ cmd = talloc_asprintf(node, "cp -a %s %s", dir, transaction->divert);
+ if (!do_command(cmd))
+ corrupt(conn, "Creating transaction %s", transaction->divert);
+
+ talloc_steal(conn, transaction);
+ INIT_LIST_HEAD(&transaction->changes);
+ transaction->conn = conn;
+ timerclear(&transaction->timeout);
+ transaction->destined_to_fail = false;
+ list_add_tail(&transaction->list, &transactions);
+ conn->transaction = transaction;
+ talloc_set_destructor(transaction, destroy_transaction);
+ return send_ack(transaction->conn, XS_TRANSACTION_START);
+}
+
+static bool commit_transaction(struct transaction *trans)
+{
+ char *tmp, *dir;
+ struct changed_node *i;
+
+ /* Move: orig -> .old, repl -> orig. Cleanup deletes .old. */
+ dir = node_dir_outside_transaction(trans->node);
+ tmp = talloc_asprintf(trans, "%s.old", dir);
+
+ if (rename(dir, tmp) != 0)
+ return false;
+ if (rename(trans->divert, dir) != 0)
+ corrupt(trans->conn, "Failed rename %s to %s",
+ trans->divert, dir);
+
+ trans->divert = tmp;
+
+ /* Fire off the watches for everything that changed. */
+ list_for_each_entry(i, &trans->changes, list)
+ fire_watches(NULL, i->node);
+ return true;
+}
+
+bool do_transaction_end(struct connection *conn, const char *arg)
+{
+ if (!arg || (!streq(arg, "T") && !streq(arg, "F")))
+ return send_error(conn, EINVAL);
+
+ if (!conn->transaction)
+ return send_error(conn, ENOENT);
+
+ if (streq(arg, "T")) {
+ if (conn->transaction->destined_to_fail) {
+ send_error(conn, ETIMEDOUT);
+ goto failed;
+ }
+ if (!commit_transaction(conn->transaction)) {
+ send_error(conn, errno);
+ goto failed;
+ }
+ }
+
+ talloc_free(conn->transaction);
+ conn->transaction = NULL;
+ return send_ack(conn, XS_TRANSACTION_END);
+
+failed:
+ talloc_free(conn->transaction);
+ conn->transaction = NULL;
+ return false;
+}
+
diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h
new file mode 100644
index 0000000000..a21bccad72
--- /dev/null
+++ b/tools/xenstore/xenstored_transaction.h
@@ -0,0 +1,50 @@
+/*
+ Transaction code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_TRANSACTION_H
+#define _XENSTORED_TRANSACTION_H
+#include "xenstored_core.h"
+
+struct transaction;
+
+bool do_transaction_start(struct connection *conn, const char *node);
+bool do_transaction_end(struct connection *conn, const char *arg);
+
+/* Is node covered by this transaction? */
+bool within_transaction(struct transaction *trans, const char *node);
+
+/* If a write op on this node blocked by another connections' transaction,
+ * mark conn, setup transaction timeout and return true.
+ */
+bool transaction_block(struct connection *conn, const char *node);
+
+/* Return transaction which covers this node. */
+struct transaction *transaction_covering_node(const char *node);
+
+/* Return directory of node within transaction t. */
+char *node_dir_inside_transaction(struct transaction *t, const char *node);
+
+/* This node was changed: can fail and longjmp. */
+void add_change_node(struct transaction *trans, const char *node);
+
+/* Get shortest timeout: leave tv unset if none. */
+void shortest_transaction_timeout(struct timeval *tv);
+
+/* Have any transactions timed out yet? */
+void check_transaction_timeout(void);
+#endif /* _XENSTORED_TRANSACTION_H */
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
new file mode 100644
index 0000000000..2df83e1a54
--- /dev/null
+++ b/tools/xenstore/xenstored_watch.c
@@ -0,0 +1,279 @@
+/*
+ Watch code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include "talloc.h"
+#include "list.h"
+#include "xenstored_watch.h"
+#include "xs_lib.h"
+#include "utils.h"
+#include "xenstored_test.h"
+
+/* We create this if anyone is interested "node", then we pass it from
+ * watch to watch as each connection acks it.
+ */
+struct watch_event
+{
+ /* The watch we are firing for (watch->events) */
+ struct list_head list;
+
+ /* Watch we are currently attached to. */
+ struct watch *watch;
+
+ struct buffered_data *data;
+};
+
+struct watch
+{
+ struct list_head list;
+ unsigned int priority;
+
+ /* Current outstanding events applying to this watch. */
+ struct list_head events;
+
+ char *node;
+ struct connection *conn;
+};
+static LIST_HEAD(watches);
+
+static void reset_event(struct watch_event *event)
+{
+ event->data->inhdr = true;
+ event->data->used = 0;
+}
+
+/* We received a non-ACK response: re-queue any watch we just sent. */
+void reset_watch_event(struct connection *conn)
+{
+ if (waiting_for_ack(conn))
+ reset_event(conn->event);
+}
+
+/* We're waiting if we have an event and we sent it all. */
+bool waiting_for_ack(struct connection *conn)
+{
+ if (!conn->event)
+ return false;
+
+ if (conn->event->data->inhdr)
+ return false;
+ return conn->event->data->used == conn->event->data->hdr.msg.len;
+}
+
+bool is_watch_event(struct connection *conn, struct buffered_data *out)
+{
+ return (conn->event && out == conn->event->data);
+}
+
+/* Look through our watches: if any of them have an event, queue it. */
+void queue_next_event(struct connection *conn)
+{
+ struct watch *watch;
+
+ /* We had a reply queued already? Send it. */
+ if (conn->waiting_reply) {
+ conn->out = conn->waiting_reply;
+ conn->waiting_reply = NULL;
+ return;
+ }
+
+ /* If we're waiting for ack, don't queue more. */
+ if (waiting_for_ack(conn))
+ return;
+
+ /* Find a good event to send. */
+ if (!conn->event) {
+ list_for_each_entry(watch, &watches, list) {
+ if (watch->conn != conn)
+ continue;
+
+ conn->event = list_top(&watch->events,
+ struct watch_event, list);
+ if (conn->event)
+ break;
+ }
+ if (!conn->event)
+ return;
+ }
+
+ conn->out = conn->event->data;
+}
+
+/* Watch on DIR applies to DIR, DIR/FILE, but not DIRLONG. */
+static bool watch_applies(const struct watch *watch, const char *node)
+{
+ return is_child(node, watch->node);
+}
+
+static struct watch *find_watch(const char *node)
+{
+ struct watch *watch;
+
+ list_for_each_entry(watch, &watches, list) {
+ if (watch_applies(watch, node))
+ return watch;
+ }
+ return NULL;
+}
+
+static struct watch *find_next_watch(struct watch *watch, const char *node)
+{
+ list_for_each_entry_continue(watch, &watches, list) {
+ if (watch_applies(watch, node))
+ return watch;
+ }
+ return NULL;
+}
+
+/* FIXME: we fail to fire on out of memory. Should drop connections. */
+void fire_watches(struct transaction *trans, const char *node)
+{
+ struct watch *watch;
+ struct watch_event *event;
+
+ /* During transactions, don't fire watches. */
+ if (trans)
+ return;
+
+ watch = find_watch(node);
+ if (!watch)
+ return;
+
+ /* Create and fill in info about event. */
+ event = talloc(talloc_autofree_context(), struct watch_event);
+ event->data = new_buffer(event);
+ event->data->hdr.msg.type = XS_WATCH_EVENT;
+ event->data->hdr.msg.len = strlen(node) + 1;
+ event->data->buffer = talloc_strdup(event->data, node);
+
+ /* Tie event to this watch. */
+ event->watch = watch;
+ list_add(&event->list, &watch->events);
+
+ /* If connection not doing anything, queue this. */
+ if (!watch->conn->out)
+ queue_next_event(watch->conn);
+}
+
+/* We're done with this event: see if anyone else wants it. */
+static void move_event_onwards(struct watch_event *event)
+{
+ list_del(&event->list);
+ reset_event(event);
+
+ /* Remove from this watch, and find next watch to put this on. */
+ event->watch = find_next_watch(event->watch, event->data->buffer);
+ if (!event->watch) {
+ talloc_free(event);
+ return;
+ }
+
+ list_add(&event->list, &event->watch->events);
+
+ /* If connection not doing anything, queue this. */
+ if (!event->watch->conn->out)
+ queue_next_event(event->watch->conn);
+}
+
+static int destroy_watch(void *_watch)
+{
+ struct watch *watch = _watch;
+ struct watch_event *event;
+
+ /* Forget about sending out or waiting for acks for this watch. */
+ if (watch->conn->event && watch->conn->event->watch == watch)
+ watch->conn->event = NULL;
+
+ /* If we have pending events, pass them on to others. */
+ while ((event = list_top(&watch->events, struct watch_event, list)))
+ move_event_onwards(event);
+
+ /* Remove from global list. */
+ list_del(&watch->list);
+ return 0;
+}
+
+/* We keep watches in priority order. */
+static void insert_watch(struct watch *watch)
+{
+ struct watch *i;
+
+ list_for_each_entry(i, &watches, list) {
+ if (i->priority <= watch->priority) {
+ list_add_tail(&watch->list, &i->list);
+ return;
+ }
+ }
+
+ list_add_tail(&watch->list, &watches);
+}
+
+bool do_watch(struct connection *conn, struct buffered_data *in)
+{
+ struct watch *watch;
+ char *vec[2];
+
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec))
+ return send_error(conn, EINVAL);
+
+ if (!check_node_perms(conn, vec[0], XS_PERM_READ))
+ return send_error(conn, errno);
+
+ watch = talloc(conn, struct watch);
+ watch->node = talloc_strdup(watch, vec[0]);
+ watch->conn = conn;
+ watch->priority = strtoul(vec[1], NULL, 0);
+ INIT_LIST_HEAD(&watch->events);
+
+ insert_watch(watch);
+ talloc_set_destructor(watch, destroy_watch);
+ return send_ack(conn, XS_WATCH);
+}
+
+bool do_watch_ack(struct connection *conn)
+{
+ struct watch_event *event;
+
+ if (!waiting_for_ack(conn))
+ return send_error(conn, ENOENT);
+
+ /* Remove this watch event. */
+ event = conn->event;
+ conn->event = NULL;
+
+ move_event_onwards(event);
+ return send_ack(conn, XS_WATCH_ACK);
+}
+
+bool do_unwatch(struct connection *conn, const char *node)
+{
+ struct watch *watch;
+
+ list_for_each_entry(watch, &watches, list) {
+ if (watch->conn == conn
+ && streq(watch->node, node)) {
+ talloc_free(watch);
+ return send_ack(conn, XS_UNWATCH);
+ }
+ }
+ return send_error(conn, ENOENT);
+}
diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h
new file mode 100644
index 0000000000..656ce4c36b
--- /dev/null
+++ b/tools/xenstore/xenstored_watch.h
@@ -0,0 +1,42 @@
+/*
+ Watch code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_WATCH_H
+#define _XENSTORED_WATCH_H
+#include "xenstored_core.h"
+
+bool do_watch(struct connection *conn, struct buffered_data *in);
+bool do_watch_ack(struct connection *conn);
+bool do_unwatch(struct connection *conn, const char *node);
+
+/* Is this a watch event message for this connection? */
+bool is_watch_event(struct connection *conn, struct buffered_data *out);
+
+/* Look through our watches: if any of them have an event, queue it. */
+void queue_next_event(struct connection *conn);
+
+/* Is this connection waiting for a watch acknowledgement? */
+bool waiting_for_ack(struct connection *conn);
+
+/* Reset event if we were sending one */
+void reset_watch_event(struct connection *conn);
+
+/* Fire all watches. */
+void fire_watches(struct transaction *trans, const char *node);
+
+#endif /* _XENSTORED_WATCH_H */
diff --git a/tools/xenstore/xs.c b/tools/xenstore/xs.c
new file mode 100644
index 0000000000..d5058abfb3
--- /dev/null
+++ b/tools/xenstore/xs.c
@@ -0,0 +1,551 @@
+/*
+ Xen Store Daemon interface providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdint.h>
+#include <errno.h>
+#include "xs.h"
+#include "xenstored.h"
+#include "xs_lib.h"
+#include "utils.h"
+
+struct xs_handle
+{
+ int fd;
+};
+
+/* Get the socket from the store daemon handle.
+ */
+int xs_fileno(struct xs_handle *h)
+{
+ return h->fd;
+}
+
+static struct xs_handle *get_socket(const char *connect_to)
+{
+ struct sockaddr_un addr;
+ int sock, saved_errno;
+ struct xs_handle *h = NULL;
+
+ sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0)
+ return NULL;
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, connect_to);
+
+ if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == 0) {
+ h = malloc(sizeof(*h));
+ if (h) {
+ h->fd = sock;
+ return h;
+ }
+ }
+
+ saved_errno = errno;
+ close(sock);
+ free(h);
+ errno = saved_errno;
+ return NULL;
+}
+
+struct xs_handle *xs_daemon_open(void)
+{
+ return get_socket(xs_daemon_socket());
+}
+
+struct xs_handle *xs_daemon_open_readonly(void)
+{
+ return get_socket(xs_daemon_socket_ro());
+}
+
+void xs_daemon_close(struct xs_handle *h)
+{
+ if (h->fd >= 0)
+ close(h->fd);
+ free(h);
+}
+
+static bool read_all(int fd, void *data, unsigned int len)
+{
+ while (len) {
+ int done;
+
+ done = read(fd, data, len);
+ if (done < 0) {
+ if (errno == EINTR)
+ continue;
+ return false;
+ }
+ if (done == 0) {
+ /* It closed fd on us? EBADF is appropriate. */
+ errno = EBADF;
+ return false;
+ }
+ data += done;
+ len -= done;
+ }
+
+ return true;
+}
+
+#ifdef XSTEST
+#define read_all read_all_choice
+#define write_all write_all_choice
+#endif
+
+static int get_error(const char *errorstring)
+{
+ unsigned int i;
+
+ for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++)
+ if (i == ARRAY_SIZE(xsd_errors) - 1)
+ return EINVAL;
+ return xsd_errors[i].errnum;
+}
+
+static void *read_reply(int fd, enum xsd_sockmsg_type *type, unsigned int *len)
+{
+ struct xsd_sockmsg msg;
+ void *ret;
+ int saved_errno;
+
+ if (!read_all(fd, &msg, sizeof(msg)))
+ return NULL;
+
+ ret = malloc(msg.len);
+ if (!ret)
+ return NULL;
+
+ if (!read_all(fd, ret, msg.len)) {
+ saved_errno = errno;
+ free(ret);
+ errno = saved_errno;
+ return NULL;
+ }
+
+ *type = msg.type;
+ if (len)
+ *len = msg.len;
+ return ret;
+}
+
+/* Send message to xs, get malloc'ed reply. NULL and set errno on error. */
+static void *xs_talkv(struct xs_handle *h, enum xsd_sockmsg_type type,
+ const struct iovec *iovec,
+ unsigned int num_vecs,
+ unsigned int *len)
+{
+ struct xsd_sockmsg msg;
+ void *ret = NULL;
+ int saved_errno;
+ unsigned int i;
+ struct sigaction ignorepipe, oldact;
+
+ msg.type = type;
+ msg.len = 0;
+ for (i = 0; i < num_vecs; i++)
+ msg.len += iovec[i].iov_len;
+
+ ignorepipe.sa_handler = SIG_IGN;
+ sigemptyset(&ignorepipe.sa_mask);
+ ignorepipe.sa_flags = 0;
+ sigaction(SIGPIPE, &ignorepipe, &oldact);
+
+ if (!write_all(h->fd, &msg, sizeof(msg)))
+ goto fail;
+
+ for (i = 0; i < num_vecs; i++)
+ if (!write_all(h->fd, iovec[i].iov_base, iovec[i].iov_len))
+ goto fail;
+
+ /* Watches can have fired before reply comes: daemon detects
+ * and re-transmits, so we can ignore this. */
+ do {
+ free(ret);
+ ret = read_reply(h->fd, &msg.type, len);
+ if (!ret)
+ goto fail;
+ } while (msg.type == XS_WATCH_EVENT);
+
+ sigaction(SIGPIPE, &oldact, NULL);
+ if (msg.type == XS_ERROR) {
+ saved_errno = get_error(ret);
+ free(ret);
+ errno = saved_errno;
+ return NULL;
+ }
+
+ assert(msg.type == type);
+ return ret;
+
+fail:
+ /* We're in a bad state, so close fd. */
+ saved_errno = errno;
+ sigaction(SIGPIPE, &oldact, NULL);
+ close(h->fd);
+ h->fd = -1;
+ errno = saved_errno;
+ return NULL;
+}
+
+/* free(), but don't change errno. */
+static void free_no_errno(void *p)
+{
+ int saved_errno = errno;
+ free(p);
+ errno = saved_errno;
+}
+
+/* Simplified version of xs_talkv: single message. */
+static void *xs_single(struct xs_handle *h, enum xsd_sockmsg_type type,
+ const char *string, unsigned int *len)
+{
+ struct iovec iovec;
+
+ iovec.iov_base = (void *)string;
+ iovec.iov_len = strlen(string) + 1;
+ return xs_talkv(h, type, &iovec, 1, len);
+}
+
+static bool xs_bool(char *reply)
+{
+ if (!reply)
+ return false;
+ free(reply);
+ return true;
+}
+
+char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num)
+{
+ char *strings, *p, **ret;
+ unsigned int len;
+
+ strings = xs_single(h, XS_DIRECTORY, path, &len);
+ if (!strings)
+ return NULL;
+
+ /* Count the strings. */
+ *num = count_strings(strings, len);
+
+ /* Transfer to one big alloc for easy freeing. */
+ ret = malloc(*num * sizeof(char *) + len);
+ if (!ret) {
+ free_no_errno(strings);
+ return NULL;
+ }
+ memcpy(&ret[*num], strings, len);
+ free_no_errno(strings);
+
+ strings = (char *)&ret[*num];
+ for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
+ ret[(*num)++] = p;
+ return ret;
+}
+
+/* Get the value of a single file.
+ * Returns a malloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xs_read(struct xs_handle *h, const char *path, unsigned int *len)
+{
+ return xs_single(h, XS_READ, path, len);
+}
+
+/* Write the value of a single file.
+ * Returns false on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ */
+bool xs_write(struct xs_handle *h, const char *path,
+ const void *data, unsigned int len, int createflags)
+{
+ const char *flags;
+ struct iovec iovec[3];
+
+ /* Format: Flags (as string), path, data. */
+ if (createflags == 0)
+ flags = XS_WRITE_NONE;
+ else if (createflags == O_CREAT)
+ flags = XS_WRITE_CREATE;
+ else if (createflags == (O_CREAT|O_EXCL))
+ flags = XS_WRITE_CREATE_EXCL;
+ else {
+ errno = EINVAL;
+ return false;
+ }
+
+ iovec[0].iov_base = (void *)path;
+ iovec[0].iov_len = strlen(path) + 1;
+ iovec[1].iov_base = (void *)flags;
+ iovec[1].iov_len = strlen(flags) + 1;
+ iovec[2].iov_base = (void *)data;
+ iovec[2].iov_len = len;
+
+ return xs_bool(xs_talkv(h, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+}
+
+/* Create a new directory.
+ * Returns false on failure.
+ */
+bool xs_mkdir(struct xs_handle *h, const char *path)
+{
+ return xs_bool(xs_single(h, XS_MKDIR, path, NULL));
+}
+
+/* Destroy a file or directory (directories must be empty).
+ * Returns false on failure.
+ */
+bool xs_rm(struct xs_handle *h, const char *path)
+{
+ return xs_bool(xs_single(h, XS_RM, path, NULL));
+}
+
+/* Get permissions of node (first element is owner).
+ * Returns malloced array, or NULL: call free() after use.
+ */
+struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+ const char *path,
+ unsigned int *num)
+{
+ char *strings;
+ unsigned int len;
+ struct xs_permissions *ret;
+
+ strings = xs_single(h, XS_GET_PERMS, path, &len);
+ if (!strings)
+ return NULL;
+
+ /* Count the strings: each one perms then domid. */
+ *num = count_strings(strings, len);
+
+ /* Transfer to one big alloc for easy freeing. */
+ ret = malloc(*num * sizeof(struct xs_permissions));
+ if (!ret) {
+ free_no_errno(strings);
+ return NULL;
+ }
+
+ if (!strings_to_perms(ret, *num, strings)) {
+ free_no_errno(ret);
+ ret = NULL;
+ }
+
+ free(strings);
+ return ret;
+}
+
+/* Set permissions of node (must be owner).
+ * Returns false on failure.
+ */
+bool xs_set_permissions(struct xs_handle *h, const char *path,
+ struct xs_permissions *perms,
+ unsigned int num_perms)
+{
+ unsigned int i;
+ struct iovec iov[1+num_perms];
+
+ iov[0].iov_base = (void *)path;
+ iov[0].iov_len = strlen(path) + 1;
+
+ for (i = 0; i < num_perms; i++) {
+ char buffer[MAX_STRLEN(domid_t)+1];
+
+ if (!perm_to_string(&perms[i], buffer))
+ goto unwind;
+
+ iov[i+1].iov_base = strdup(buffer);
+ iov[i+1].iov_len = strlen(buffer) + 1;
+ if (!iov[i+1].iov_base)
+ goto unwind;
+ }
+
+ if (!xs_bool(xs_talkv(h, XS_SET_PERMS, iov, 1+num_perms, NULL)))
+ goto unwind;
+ for (i = 0; i < num_perms; i++)
+ free(iov[i+1].iov_base);
+ return true;
+
+unwind:
+ num_perms = i;
+ for (i = 0; i < num_perms; i++)
+ free_no_errno(iov[i+1].iov_base);
+ return false;
+}
+
+/* Watch a node for changes (poll on fd to detect, or call read_watch()).
+ * When the node (or any child) changes, fd will become readable.
+ * Priority indicates order if multiple watchers: higher is first.
+ * Returns false on failure.
+ */
+bool xs_watch(struct xs_handle *h, const char *path, unsigned int priority)
+{
+ char prio[MAX_STRLEN(priority)];
+ struct iovec iov[2];
+
+ sprintf(prio, "%u", priority);
+ iov[0].iov_base = (void *)path;
+ iov[0].iov_len = strlen(path) + 1;
+ iov[1].iov_base = prio;
+ iov[1].iov_len = strlen(prio) + 1;
+
+ return xs_bool(xs_talkv(h, XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
+}
+
+/* Find out what node change was on (will block if nothing pending).
+ * Returns malloced path, or NULL: call free() after use.
+ */
+char *xs_read_watch(struct xs_handle *h)
+{
+ struct xsd_sockmsg msg;
+ char *path;
+
+ if (!read_all(h->fd, &msg, sizeof(msg)))
+ return NULL;
+
+ assert(msg.type == XS_WATCH_EVENT);
+ path = malloc(msg.len);
+ if (!path)
+ return NULL;
+
+ if (!read_all(h->fd, path, msg.len)) {
+ free_no_errno(path);
+ return NULL;
+ }
+ return path;
+}
+
+/* Acknowledge watch on node. Watches must be acknowledged before
+ * any other watches can be read.
+ * Returns false on failure.
+ */
+bool xs_acknowledge_watch(struct xs_handle *h)
+{
+ return xs_bool(xs_single(h, XS_WATCH_ACK, "OK", NULL));
+}
+
+/* Remove a watch on a node.
+ * Returns false on failure (no watch on that node).
+ */
+bool xs_unwatch(struct xs_handle *h, const char *path)
+{
+ return xs_bool(xs_single(h, XS_UNWATCH, path, NULL));
+}
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ * Transaction only applies to the given subtree.
+ * You can only have one transaction at any time.
+ * Returns false on failure.
+ */
+bool xs_transaction_start(struct xs_handle *h, const char *subtree)
+{
+ return xs_bool(xs_single(h, XS_TRANSACTION_START, subtree, NULL));
+}
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ * Returns false on failure, which indicates an error: transactions will
+ * not fail spuriously.
+ */
+bool xs_transaction_end(struct xs_handle *h, bool abort)
+{
+ char abortstr[2];
+
+ if (abort)
+ strcpy(abortstr, "F");
+ else
+ strcpy(abortstr, "T");
+ return xs_bool(xs_single(h, XS_TRANSACTION_END, abortstr, NULL));
+}
+
+/* Introduce a new domain.
+ * This tells the store daemon about a shared memory page and event channel
+ * associated with a domain: the domain uses these to communicate.
+ */
+bool xs_introduce_domain(struct xs_handle *h,
+ domid_t domid,
+ unsigned long mfn,
+ unsigned int eventchn,
+ const char *path)
+{
+ char domid_str[MAX_STRLEN(domid)];
+ char mfn_str[MAX_STRLEN(mfn)];
+ char eventchn_str[MAX_STRLEN(eventchn)];
+ struct iovec iov[4];
+
+ sprintf(domid_str, "%u", domid);
+ sprintf(mfn_str, "%lu", mfn);
+ sprintf(eventchn_str, "%u", eventchn);
+
+ iov[0].iov_base = domid_str;
+ iov[0].iov_len = strlen(domid_str) + 1;
+ iov[1].iov_base = mfn_str;
+ iov[1].iov_len = strlen(mfn_str) + 1;
+ iov[2].iov_base = eventchn_str;
+ iov[2].iov_len = strlen(eventchn_str) + 1;
+ iov[3].iov_base = (char *)path;
+ iov[3].iov_len = strlen(path) + 1;
+
+ return xs_bool(xs_talkv(h, XS_INTRODUCE, iov, ARRAY_SIZE(iov), NULL));
+}
+
+bool xs_release_domain(struct xs_handle *h,
+ domid_t domid)
+{
+ char domid_str[MAX_STRLEN(domid)];
+
+ sprintf(domid_str, "%u", domid);
+
+ return xs_bool(xs_single(h, XS_RELEASE, domid_str, NULL));
+}
+
+bool xs_shutdown(struct xs_handle *h)
+{
+ bool ret = xs_bool(xs_single(h, XS_SHUTDOWN, "", NULL));
+ if (ret) {
+ char c;
+ /* Wait for it to actually shutdown. */
+ read(h->fd, &c, 1);
+ }
+ return ret;
+}
+
+/* Only useful for DEBUG versions */
+char *xs_debug_command(struct xs_handle *h, const char *cmd,
+ void *data, unsigned int len)
+{
+ struct iovec iov[2];
+
+ iov[0].iov_base = (void *)cmd;
+ iov[0].iov_len = strlen(cmd) + 1;
+ iov[1].iov_base = data;
+ iov[1].iov_len = len;
+
+ return xs_talkv(h, XS_DEBUG, iov, ARRAY_SIZE(iov), NULL);
+}
diff --git a/tools/xenstore/xs.h b/tools/xenstore/xs.h
new file mode 100644
index 0000000000..ff9481c3a6
--- /dev/null
+++ b/tools/xenstore/xs.h
@@ -0,0 +1,146 @@
+#ifndef _XS_H
+#define _XS_H
+/*
+ Xen Store Daemon providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+/* On failure, these routines set errno. */
+#include "xs_lib.h"
+
+struct xs_handle;
+
+/* Connect to the xs daemon.
+ * Returns a handle or NULL.
+ */
+struct xs_handle *xs_daemon_open(void);
+
+/* Connect to the xs daemon (readonly for non-root clients).
+ * Returns a handle or NULL.
+ */
+struct xs_handle *xs_daemon_open_readonly(void);
+
+/* Close the connection to the xs daemon. */
+void xs_daemon_close(struct xs_handle *);
+
+/* Get contents of a directory.
+ * Returns a malloced array: call free() on it after use.
+ * Num indicates size.
+ */
+char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num);
+
+/* Get the value of a single file.
+ * Returns a malloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xs_read(struct xs_handle *h, const char *path, unsigned int *len);
+
+/* Write the value of a single file.
+ * Returns false on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ */
+bool xs_write(struct xs_handle *h, const char *path, const void *data, unsigned int len,
+ int createflags);
+
+/* Create a new directory.
+ * Returns false on failure.
+ */
+bool xs_mkdir(struct xs_handle *h, const char *path);
+
+/* Destroy a file or directory (and children).
+ * Returns false on failure.
+ */
+bool xs_rm(struct xs_handle *h, const char *path);
+
+/* Get permissions of node (first element is owner, first perms is "other").
+ * Returns malloced array, or NULL: call free() after use.
+ */
+struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+ const char *path,
+ unsigned int *num);
+
+/* Set permissions of node (must be owner).
+ * Returns false on failure.
+ */
+bool xs_set_permissions(struct xs_handle *h,
+ const char *path,
+ struct xs_permissions *perms,
+ unsigned int num_perms);
+
+/* Watch a node for changes (poll on fd to detect, or call read_watch()).
+ * When the node (or any child) changes, fd will become readable.
+ * Priority indicates order if multiple watchers: higher is first.
+ * Returns false on failure.
+ */
+bool xs_watch(struct xs_handle *h, const char *path, unsigned int priority);
+
+/* Return the FD to poll on to see if a watch has fired. */
+int xs_fileno(struct xs_handle *h);
+
+/* Find out what node change was on (will block if nothing pending).
+ * Returns malloced path, or NULL: call free() after use.
+ */
+char *xs_read_watch(struct xs_handle *h);
+
+/* Acknowledge watch on node. Watches must be acknowledged before
+ * any other watches can be read.
+ * Returns false on failure.
+ */
+bool xs_acknowledge_watch(struct xs_handle *h);
+
+/* Remove a watch on a node.
+ * Returns false on failure (no watch on that node).
+ */
+bool xs_unwatch(struct xs_handle *h, const char *path);
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ * Transaction only applies to the given subtree.
+ * You can only have one transaction at any time.
+ * Returns false on failure.
+ */
+bool xs_transaction_start(struct xs_handle *h, const char *subtree);
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ * Returns false on failure, which indicates an error: transactions will
+ * not fail spuriously.
+ */
+bool xs_transaction_end(struct xs_handle *h, bool abort);
+
+/* Introduce a new domain.
+ * This tells the store daemon about a shared memory page, event channel
+ * and store path associated with a domain: the domain uses these to communicate.
+ */
+bool xs_introduce_domain(struct xs_handle *h,
+ domid_t domid,
+ unsigned long mfn,
+ unsigned int eventchn,
+ const char *path);
+
+/* Release a domain.
+ * Tells the store domain to release the memory page to the domain.
+ */
+bool xs_release_domain(struct xs_handle *h, domid_t domid);
+
+/* Only useful for DEBUG versions */
+char *xs_debug_command(struct xs_handle *h, const char *cmd,
+ void *data, unsigned int len);
+
+/* Shut down the daemon. */
+bool xs_shutdown(struct xs_handle *h);
+
+#endif /* _XS_H */
diff --git a/tools/xenstore/xs_lib.c b/tools/xenstore/xs_lib.c
new file mode 100644
index 0000000000..8630eaffce
--- /dev/null
+++ b/tools/xenstore/xs_lib.c
@@ -0,0 +1,141 @@
+#include "xs_lib.h"
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/* Common routines for the Xen store daemon and client library. */
+
+static const char *xs_daemon_rootdir(void)
+{
+ char *s = getenv("XENSTORED_ROOTDIR");
+ return (s ? s : "/var/lib/xenstored");
+}
+
+static const char *xs_daemon_rundir(void)
+{
+ char *s = getenv("XENSTORED_RUNDIR");
+ return (s ? s : "/var/run/xenstored");
+}
+
+const char *xs_daemon_socket(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/socket", xs_daemon_rundir());
+ return buf;
+}
+
+const char *xs_daemon_socket_ro(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/socket_ro", xs_daemon_rundir());
+ return buf;
+}
+
+const char *xs_daemon_store(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/store", xs_daemon_rootdir());
+ return buf;
+}
+
+const char *xs_daemon_transactions(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/transactions", xs_daemon_rootdir());
+ return buf;
+}
+
+/* Simple routines for writing to sockets, etc. */
+bool write_all(int fd, const void *data, unsigned int len)
+{
+ while (len) {
+ int done;
+
+ done = write(fd, data, len);
+ if (done < 0 && errno == EINTR)
+ continue;
+ if (done <= 0)
+ return false;
+ data += done;
+ len -= done;
+ }
+
+ return true;
+}
+
+/* Convert strings to permissions. False if a problem. */
+bool strings_to_perms(struct xs_permissions *perms, unsigned int num,
+ const char *strings)
+{
+ const char *p;
+ char *end;
+ unsigned int i;
+
+ for (p = strings, i = 0; i < num; i++) {
+ /* "r", "w", or "b" for both. */
+ switch (*p) {
+ case 'r':
+ perms[i].perms = XS_PERM_READ;
+ break;
+ case 'w':
+ perms[i].perms = XS_PERM_WRITE;
+ break;
+ case 'b':
+ perms[i].perms = XS_PERM_READ|XS_PERM_WRITE;
+ break;
+ case 'n':
+ perms[i].perms = XS_PERM_NONE;
+ break;
+ default:
+ errno = EINVAL;
+ return false;
+ }
+ p++;
+ perms[i].id = strtol(p, &end, 0);
+ if (*end || !*p) {
+ errno = EINVAL;
+ return false;
+ }
+ p = end + 1;
+ }
+ return true;
+}
+
+/* Convert permissions to a string (up to len MAX_STRLEN(domid_t)+1). */
+bool perm_to_string(const struct xs_permissions *perm, char *buffer)
+{
+ switch (perm->perms) {
+ case XS_PERM_WRITE:
+ *buffer = 'w';
+ break;
+ case XS_PERM_READ:
+ *buffer = 'r';
+ break;
+ case XS_PERM_READ|XS_PERM_WRITE:
+ *buffer = 'b';
+ break;
+ case XS_PERM_NONE:
+ *buffer = 'n';
+ break;
+ default:
+ errno = EINVAL;
+ return false;
+ }
+ sprintf(buffer+1, "%i", (int)perm->id);
+ return true;
+}
+
+/* Given a string and a length, count how many strings (nul terms). */
+unsigned int count_strings(const char *strings, unsigned int len)
+{
+ unsigned int num;
+ const char *p;
+
+ for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
+ num++;
+
+ return num;
+}
+
diff --git a/tools/xenstore/xs_lib.h b/tools/xenstore/xs_lib.h
new file mode 100644
index 0000000000..a946ab0b19
--- /dev/null
+++ b/tools/xenstore/xs_lib.h
@@ -0,0 +1,63 @@
+#ifndef _XR_LIB_H
+#define _XR_LIB_H
+/*
+ Common routines between Xen store user library and daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <stdbool.h>
+#include <limits.h>
+#include <xc.h>
+
+/* Bitmask of permissions. */
+enum xs_perm_type {
+ XS_PERM_NONE = 0,
+ XS_PERM_READ = 1,
+ XS_PERM_WRITE = 2,
+ /* Internal use. */
+ XS_PERM_CREATE = 4,
+ XS_PERM_OWNER = 8,
+};
+
+struct xs_permissions
+{
+ domid_t id;
+ enum xs_perm_type perms;
+};
+
+/* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */
+#define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2)
+
+/* Path for various daemon things: env vars can override. */
+const char *xs_daemon_socket(void);
+const char *xs_daemon_socket_ro(void);
+const char *xs_daemon_store(void);
+const char *xs_daemon_transactions(void);
+
+/* Simple write function: loops for you. */
+bool write_all(int fd, const void *data, unsigned int len);
+
+/* Convert strings to permissions. False if a problem. */
+bool strings_to_perms(struct xs_permissions *perms, unsigned int num,
+ const char *strings);
+
+/* Convert permissions to a string (up to len MAX_STRLEN(domid_t)+1). */
+bool perm_to_string(const struct xs_permissions *perm, char *buffer);
+
+/* Given a string and a length, count how many strings (nul terms). */
+unsigned int count_strings(const char *strings, unsigned int len);
+
+#endif /* _XS_LIB_H */
diff --git a/tools/xenstore/xs_random.c b/tools/xenstore/xs_random.c
new file mode 100644
index 0000000000..ef5d44d0b0
--- /dev/null
+++ b/tools/xenstore/xs_random.c
@@ -0,0 +1,1646 @@
+/* Random tests.
+
+ We check that the results from a real filesystem are the same.
+*/
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include "xs.h"
+#include "talloc.h"
+#include "utils.h"
+
+struct ops
+{
+ char *name;
+
+ char **(*dir)(void *h, const char *path, unsigned int *num);
+
+ void *(*read)(void *h, const char *path, unsigned int *len);
+
+ bool (*write)(void *h, const char *path, const void *data,
+ unsigned int len, int createflags);
+
+ bool (*mkdir)(void *h, const char *path);
+
+ bool (*rm)(void *h, const char *path);
+
+ struct xs_permissions *(*get_perms)(void *h,
+ const char *path,
+ unsigned int *num);
+
+ bool (*set_perms)(void *h,
+ const char *path,
+ struct xs_permissions *perms,
+ unsigned int num);
+
+ bool (*transaction_start)(void *h, const char *subtree);
+ bool (*transaction_end)(void *h, bool abort);
+
+ /* Create and destroy a new handle. */
+ void *(*handle)(const char *path);
+ void (*close)(void *);
+};
+
+struct file_ops_info
+{
+ const char *base;
+ char *transact_base;
+ char *transact;
+};
+
+static void convert_to_dir(const char *dirname)
+{
+ char *tmpname = talloc_asprintf(dirname, "%s.tmp", dirname);
+ if (rename(dirname, tmpname) != 0)
+ barf_perror("Failed to rename %s to %s", dirname, tmpname);
+ if (mkdir(dirname, 0700) != 0)
+ barf_perror("Failed to mkdir %s", dirname);
+ if (rename(tmpname,talloc_asprintf(dirname, "%s/.DATA", dirname)) != 0)
+ barf_perror("Failed to rename into %s", dirname);
+ /* If perms exists, move it in. */
+ rename(talloc_asprintf(dirname, "%s.perms", dirname),
+ talloc_asprintf(dirname, "%s/.perms", dirname));
+}
+
+/* Files can be used as dirs, too. Convert them when they are. */
+static void maybe_convert_to_directory(const char *filename)
+{
+ struct stat st;
+ char *dirname = talloc_asprintf(filename, "%.*s",
+ strrchr(filename, '/') - filename,
+ filename);
+ if (lstat(dirname, &st) == 0 && S_ISREG(st.st_mode))
+ convert_to_dir(dirname);
+}
+
+static char *get_name(struct file_ops_info *info, const char *path)
+{
+ if (info->transact_base)
+ return talloc_asprintf(path, "%s%s", info->transact_base,
+ path);
+ return talloc_asprintf(path, "%s%s", info->base, path);
+}
+
+static char *path_to_name(struct file_ops_info *info, const char *path)
+{
+ char *filename = get_name(info, path);
+ maybe_convert_to_directory(filename);
+ return filename;
+}
+
+/* Is child a subnode of parent, or equal? */
+static bool is_child(const char *child, const char *parent)
+{
+ unsigned int len = strlen(parent);
+
+ /* / should really be "" for this algorithm to work, but that's a
+ * usability nightmare. */
+ if (streq(parent, "/"))
+ return true;
+
+ if (strncmp(child, parent, len) != 0)
+ return false;
+
+ return child[len] == '/' || child[len] == '\0';
+}
+
+static bool write_ok(struct file_ops_info *info, const char *path)
+{
+ if (info->transact && !is_child(path, info->transact)) {
+ errno = EROFS;
+ return false;
+ }
+ return true;
+}
+
+static char **file_directory(struct file_ops_info *info,
+ const char *path, unsigned int *num)
+{
+ char **ret;
+ DIR *dir;
+ struct dirent *dirent;
+ char *p, *dirname = path_to_name(info, path);
+ unsigned int i, len = 0;
+ struct stat st;
+
+ /* If it exists, but isn't a directory, we convert it. */
+ if (lstat(dirname, &st) == 0 && !S_ISDIR(st.st_mode))
+ convert_to_dir(dirname);
+
+ *num = 0;
+ dir = opendir(dirname);
+ if (!dir)
+ return NULL;;
+
+ /* Once to count them. */
+ while ((dirent = readdir(dir)) != NULL) {
+ if (strchr(dirent->d_name, '.'))
+ continue;
+ len += strlen(dirent->d_name) + 1;
+ (*num)++;
+ }
+ rewinddir(dir);
+
+ /* Now allocate and fill in. */
+ ret = malloc(sizeof(char *) * *num + len);
+ p = (char *)&ret[*num];
+ i = 0;
+ while ((dirent = readdir(dir)) != NULL) {
+ if (strchr(dirent->d_name, '.'))
+ continue;
+ ret[i] = p;
+ strcpy(p, dirent->d_name);
+ p += strlen(p) + 1;
+ i++;
+ }
+ closedir(dir);
+
+ return ret;
+}
+
+static char *filename_to_data(const char *filename)
+{
+ struct stat st;
+
+ if (lstat(filename, &st) == 0 && S_ISDIR(st.st_mode))
+ return talloc_asprintf(filename, "%s/.DATA", filename);
+ return (char *)filename;
+}
+
+static void *file_read(struct file_ops_info *info,
+ const char *path, unsigned int *len)
+{
+ void *ret;
+ char *filename = filename_to_data(path_to_name(info, path));
+ unsigned long size;
+
+ ret = grab_file(filename, &size);
+ /* Directory exists, .DATA doesn't. */
+ if (!ret && errno == ENOENT && strends(filename, ".DATA"))
+ errno = EISDIR;
+ *len = size;
+ return ret;
+}
+
+static struct xs_permissions *file_get_perms(struct file_ops_info *info,
+ const char *path,
+ unsigned int *num)
+{
+ void *perms;
+ struct xs_permissions *ret;
+ char *filename = path_to_name(info, path);
+ char *permfile;
+ unsigned long size;
+ struct stat st;
+
+ /* No permfile: we didn't bother, return defaults. */
+ if (lstat(filename, &st) != 0)
+ return NULL;
+
+ if (S_ISDIR(st.st_mode))
+ permfile = talloc_asprintf(path, "%s/.perms", filename);
+ else
+ permfile = talloc_asprintf(path, "%s.perms", filename);
+
+ perms = grab_file(permfile, &size);
+ if (!perms) {
+ ret = new(struct xs_permissions);
+ ret[0].id = 0;
+ /* Default for root is readable. */
+ if (streq(path, "/"))
+ ret[0].perms = XS_PERM_READ;
+ else
+ ret[0].perms = XS_PERM_NONE;
+ *num = 1;
+ release_file(perms, size);
+ return ret;
+ }
+ *num = count_strings(perms, size);
+
+ ret = new_array(struct xs_permissions, *num);
+ if (!strings_to_perms(ret, *num, perms))
+ barf("Reading permissions from %s", permfile);
+ release_file(perms, size);
+ return ret;
+}
+
+static bool file_set_perms(struct file_ops_info *info,
+ const char *path,
+ struct xs_permissions *perms,
+ unsigned int num)
+{
+ unsigned int i;
+ char *filename = path_to_name(info, path);
+ char *permfile;
+ int fd;
+ struct stat st;
+
+ if (num < 1) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!write_ok(info, path))
+ return false;
+
+ /* Check non-perm file exists/ */
+ if (lstat(filename, &st) != 0)
+ return false;
+
+ if (S_ISDIR(st.st_mode))
+ permfile = talloc_asprintf(path, "%s/.perms", filename);
+ else
+ permfile = talloc_asprintf(path, "%s.perms", filename);
+
+ fd = open(permfile, O_WRONLY|O_CREAT|O_TRUNC, 0600);
+ if (fd < 0)
+ return false;
+
+ for (i = 0; i < num; i++) {
+ char buffer[100];
+
+ if (!perm_to_string(&perms[i], buffer)) {
+ int saved_errno = errno;
+ close(fd);
+ errno = saved_errno;
+ return false;
+ }
+ if (write(fd, buffer, strlen(buffer) + 1)
+ != (int)strlen(buffer) + 1)
+ barf_perror("Failed to write perm");
+ }
+ close(fd);
+ return true;
+}
+
+static bool file_write(struct file_ops_info *info,
+ const char *path, const void *data,
+ unsigned int len, int createflags)
+{
+ char *filename = filename_to_data(path_to_name(info, path));
+ int fd;
+
+ /* Kernel isn't strict, but library is. */
+ if (createflags & ~(O_CREAT|O_EXCL)) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!write_ok(info, path))
+ return false;
+
+ /* We regard it as existing if dir exists. */
+ if (strends(filename, ".DATA")) {
+ if (!createflags)
+ createflags = O_CREAT;
+ if (createflags & O_EXCL) {
+ errno = EEXIST;
+ return false;
+ }
+ }
+
+ fd = open(filename, createflags|O_TRUNC|O_WRONLY, 0600);
+ if (fd < 0) {
+ /* FIXME: Another hack. */
+ if (!(createflags & O_CREAT) && errno == EISDIR)
+ errno = EEXIST;
+ return false;
+ }
+
+ if (write(fd, data, len) != (int)len)
+ barf_perror("Bad write to %s", filename);
+
+ close(fd);
+ return true;
+}
+
+static bool file_mkdir(struct file_ops_info *info, const char *path)
+{
+ char *dirname = path_to_name(info, path);
+
+ /* Same effective order as daemon, so error returns are right. */
+ if (mkdir(dirname, 0700) != 0) {
+ if (errno != ENOENT && errno != ENOTDIR)
+ write_ok(info, path);
+ return false;
+ }
+
+ if (!write_ok(info, path)) {
+ int saved_errno = errno;
+ rmdir(dirname);
+ errno = saved_errno;
+ return false;
+ }
+ return true;
+}
+
+static void do_command(const char *cmd)
+{
+ int ret;
+
+ ret = system(cmd);
+ if (ret == -1 || !WIFEXITED(ret) || WEXITSTATUS(ret) != 0)
+ barf_perror("Failed '%s': %i", cmd, ret);
+}
+
+static bool file_rm(struct file_ops_info *info, const char *path)
+{
+ char *filename = path_to_name(info, path);
+ struct stat st;
+
+ if (info->transact && streq(info->transact, path)) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (lstat(filename, &st) != 0)
+ return false;
+
+ if (!write_ok(info, path))
+ return false;
+
+ if (streq(path, "/")) {
+ errno = EINVAL;
+ return false;
+ }
+
+ do_command(talloc_asprintf(path, "rm -f %s.perms; rm -r %s",
+ filename, filename));
+ return true;
+}
+
+static bool file_transaction_start(struct file_ops_info *info,
+ const char *subtree)
+{
+ char *cmd;
+ char *filename = path_to_name(info, subtree);
+ struct stat st;
+
+ if (info->transact) {
+ errno = EBUSY;
+ return false;
+ }
+
+ if (lstat(filename, &st) != 0)
+ return false;
+
+ cmd = talloc_asprintf(NULL, "cp -r %s %s.transact",
+ info->base, info->base);
+ do_command(cmd);
+ talloc_free(cmd);
+
+ info->transact_base = talloc_asprintf(NULL, "%s.transact", info->base);
+ info->transact = talloc_strdup(NULL, subtree);
+ return true;
+}
+
+static bool file_transaction_end(struct file_ops_info *info, bool abort)
+{
+ char *old, *cmd;
+
+ if (!info->transact) {
+ errno = ENOENT;
+ return false;
+ }
+
+ if (abort) {
+ cmd = talloc_asprintf(NULL, "rm -r %s", info->transact_base);
+ do_command(cmd);
+ goto success;
+ }
+
+ old = talloc_asprintf(NULL, "rm -rf %s", info->base);
+ do_command(old);
+ talloc_free(old);
+
+ cmd = talloc_asprintf(NULL, "mv %s %s",
+ info->transact_base, info->base);
+ do_command(cmd);
+
+success:
+ talloc_free(cmd);
+ talloc_free(info->transact);
+ talloc_free(info->transact_base);
+ info->transact = NULL;
+ info->transact_base = NULL;
+ return true;
+}
+
+static struct file_ops_info *file_handle(const char *dir)
+{
+ struct file_ops_info *info = talloc(NULL, struct file_ops_info);
+
+ info->base = dir;
+ info->transact_base = NULL;
+ info->transact = NULL;
+ return info;
+}
+
+static void file_close(struct file_ops_info *handle)
+{
+ talloc_free(handle);
+}
+
+static struct xs_handle *xs_handle(const char *dir __attribute__((unused)))
+{
+ struct xs_handle *h;
+
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Connecting to xs daemon");
+ return h;
+}
+
+static void xs_close(struct xs_handle *handle)
+{
+ xs_daemon_close(handle);
+}
+
+struct ops file_ops = {
+ .name = "FILE",
+ .dir = (void *)file_directory,
+ .read = (void *)file_read,
+ .write = (void *)file_write,
+ .mkdir = (void *)file_mkdir,
+ .rm = (void *)file_rm,
+ .get_perms = (void *)file_get_perms,
+ .set_perms = (void *)file_set_perms,
+ .transaction_start = (void *)file_transaction_start,
+ .transaction_end = (void *)file_transaction_end,
+ .handle = (void *)file_handle,
+ .close = (void *)file_close,
+};
+
+struct ops xs_ops = {
+ .name = "XS",
+ .dir = (void *)xs_directory,
+ .read = (void *)xs_read,
+ .write = (void *)xs_write,
+ .mkdir = (void *)xs_mkdir,
+ .rm = (void *)xs_rm,
+ .get_perms = (void *)xs_get_permissions,
+ .set_perms = (void *)xs_set_permissions,
+ .transaction_start = (void *)xs_transaction_start,
+ .transaction_end = (void *)xs_transaction_end,
+ .handle = (void *)xs_handle,
+ .close = (void *)xs_close,
+};
+
+static int strptrcmp(const void *a, const void *b)
+{
+ return strcmp(*(char **)a, *(char **)b);
+}
+
+static void sort_dir(char **dir, unsigned int num)
+{
+ qsort(dir, num, sizeof(char *), strptrcmp);
+}
+
+static char *dump_dir(struct ops *ops,
+ void *h,
+ const char *node,
+ char **dir,
+ unsigned int numdirs,
+ unsigned int depth)
+{
+ char *ret = talloc_strdup(node, "");
+ unsigned int i;
+ char spacing[depth+1];
+
+ memset(spacing, ' ', depth);
+ spacing[depth] = '\0';
+
+ sort_dir(dir, numdirs);
+
+ for (i = 0; i < numdirs; i++) {
+ struct xs_permissions *perms;
+ unsigned int j, numperms;
+ unsigned int len;
+ char *contents;
+ unsigned int subnum;
+ char **subdirs;
+ char *subret;
+ char *subnode = talloc_asprintf(node, "%s/%s", node, dir[i]);
+
+ perms = ops->get_perms(h, subnode, &numperms);
+ if (!perms)
+ return NULL;
+ ret = talloc_asprintf_append(ret, "%s%s: ", spacing, dir[i]);
+ for (j = 0; j < numperms; j++) {
+ char buffer[100];
+ if (!perm_to_string(&perms[j], buffer))
+ barf("perm to string");
+ ret = talloc_asprintf_append(ret, "%s ", buffer);
+ }
+ free(perms);
+ ret = talloc_asprintf_append(ret, "\n");
+
+ /* Even directories can have contents. */
+ contents = ops->read(h, subnode, &len);
+ if (!contents) {
+ if (errno != EISDIR)
+ return NULL;
+ } else {
+ ret = talloc_asprintf_append(ret, " %s(%.*s)\n",
+ spacing, len, contents);
+ free(contents);
+ }
+
+ /* Every node is a directory. */
+ subdirs = ops->dir(h, subnode, &subnum);
+ if (!subdirs)
+ return NULL;
+ subret = dump_dir(ops, h, subnode, subdirs, subnum, depth+1);
+ if (!subret)
+ return NULL;
+ ret = talloc_asprintf_append(ret, "%s", subret);
+ free(subdirs);
+ }
+ return ret;
+}
+
+static char *dump(struct ops *ops, void *h)
+{
+ char **subdirs;
+ unsigned int subnum;
+ char *ret = NULL, *root = talloc_strdup(NULL, "/");
+
+ subdirs = ops->dir(h, root, &subnum);
+ if (subdirs) {
+ ret = dump_dir(ops, h, talloc_strdup(root, ""), subdirs,
+ subnum, 0);
+ free(subdirs);
+ if (ret)
+ talloc_steal(NULL, ret);
+ }
+ talloc_free(root);
+ return ret;
+}
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
+ * hash(), hash2(), hash3, and mix() are externally useful functions.
+ * Routines to test the hash are included if SELF_TEST is defined.
+ * You can use this free for any purpose. It has no warranty.
+ *
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are surely my fault. -DaveM
+ */
+
+/* NOTE: Arguments are modified. */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/* The golden ration: an arbitrary value */
+#define JHASH_GOLDEN_RATIO 0x9e3779b9
+
+/* The most generic version, hashes an arbitrary sequence
+ * of bytes. No alignment or length assumptions are made about
+ * the input key.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c, len;
+ const u8 *k = key;
+
+ len = length;
+ a = b = JHASH_GOLDEN_RATIO;
+ c = initval;
+
+ while (len >= 12) {
+ a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
+ b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
+ c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
+
+ __jhash_mix(a,b,c);
+
+ k += 12;
+ len -= 12;
+ }
+
+ c += length;
+ switch (len) {
+ case 11: c += ((u32)k[10]<<24);
+ case 10: c += ((u32)k[9]<<16);
+ case 9 : c += ((u32)k[8]<<8);
+ case 8 : b += ((u32)k[7]<<24);
+ case 7 : b += ((u32)k[6]<<16);
+ case 6 : b += ((u32)k[5]<<8);
+ case 5 : b += k[4];
+ case 4 : a += ((u32)k[3]<<24);
+ case 3 : a += ((u32)k[2]<<16);
+ case 2 : a += ((u32)k[1]<<8);
+ case 1 : a += k[0];
+ };
+
+ __jhash_mix(a,b,c);
+
+ return c;
+}
+
+/* A special optimized version that handles 1 or more of u32s.
+ * The length parameter here is the number of u32s in the key.
+ */
+static inline u32 jhash2(u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c, len;
+
+ a = b = JHASH_GOLDEN_RATIO;
+ c = initval;
+ len = length;
+
+ while (len >= 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ k += 3; len -= 3;
+ }
+
+ c += length * 4;
+
+ switch (len) {
+ case 2 : b += k[1];
+ case 1 : a += k[0];
+ };
+
+ __jhash_mix(a,b,c);
+
+ return c;
+}
+
+
+/* A special ultra-optimized versions that knows they are hashing exactly
+ * 3, 2 or 1 word(s).
+ *
+ * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
+ * done at the end is not done here.
+ */
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += JHASH_GOLDEN_RATIO;
+ b += JHASH_GOLDEN_RATIO;
+ c += initval;
+
+ __jhash_mix(a, b, c);
+
+ return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return jhash_3words(a, b, 0, initval);
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+ return jhash_3words(a, 0, 0, initval);
+}
+
+static unsigned int get_randomness(int *state)
+{
+ return jhash_1word((*state)++, *state * 1103515243);
+}
+
+static char *random_path(int *state)
+{
+ unsigned int i;
+ char *ret = NULL;
+
+ if (get_randomness(state) % 20 == 0)
+ return talloc_strdup(NULL, "/");
+
+ for (i = 0; i < 1 || (get_randomness(state) % 2); i++) {
+ ret = talloc_asprintf_append(ret, "/%i",
+ get_randomness(state) % 15);
+ }
+ return ret;
+}
+
+static char *bool_to_errstring(bool result)
+{
+ if (result)
+ return talloc_strdup(NULL, "OK");
+
+ /* Real daemon can never return this. */
+ if (errno == ENOTDIR)
+ errno = ENOENT;
+ return talloc_asprintf(NULL, "FAILED:%s", strerror(errno));
+}
+
+static char *linearize_dir(char **dir, unsigned int *num)
+{
+ char *result = NULL;
+ unsigned int i;
+
+ if (!dir)
+ return bool_to_errstring(false);
+
+ if (!*num) {
+ free(dir);
+ return talloc_strdup(NULL, "");
+ }
+
+ sort_dir(dir, *num);
+ for (i = 0; i < *num; i++)
+ result = talloc_asprintf_append(result, "%s\n", dir[i]);
+ free(dir);
+ return result;
+}
+
+static char *linearize_read(char *read, unsigned int *size)
+{
+ char *ret;
+
+ if (!read)
+ return bool_to_errstring(false);
+
+ ret = talloc_asprintf(NULL, "%i:%.*s", *size, *size, read);
+ free(read);
+ return ret;
+}
+
+static char *linearize_perms(struct xs_permissions *perms, unsigned int *size)
+{
+ char *ret = NULL;
+ unsigned int i;
+
+ if (!perms)
+ return bool_to_errstring(false);
+
+ for (i = 0; i < *size; i++)
+ ret = talloc_asprintf_append(ret, "(%u %u)",
+ perms[i].id, perms[i].perms);
+
+ free(perms);
+ return ret;
+}
+
+static int random_flags(int *state)
+{
+ switch (get_randomness(state) % 4) {
+ case 0:
+ return 0;
+ case 1:
+ return O_CREAT;
+ case 2:
+ return O_CREAT|O_EXCL;
+ default:
+ return get_randomness(state);
+ }
+}
+
+/* Do the next operation, return the results. */
+static char *do_next_op(struct ops *ops, void *h, int state, bool verbose)
+{
+ char *name;
+ unsigned int num;
+ char *ret;
+
+ if (verbose)
+ printf("State %i: ", state);
+
+ name = random_path(&state);
+ switch (get_randomness(&state) % 9) {
+ case 0:
+ if (verbose)
+ printf("DIR %s\n", name);
+ ret = linearize_dir(ops->dir(h, name, &num), &num);
+ break;
+ case 1:
+ if (verbose)
+ printf("READ %s\n", name);
+ ret = linearize_read(ops->read(h, name, &num), &num);
+ break;
+ case 2: {
+ int flags = random_flags(&state);
+ char *contents = talloc_asprintf(NULL, "%i",
+ get_randomness(&state));
+ unsigned int len = get_randomness(&state)%(strlen(contents)+1);
+ if (verbose)
+ printf("WRITE %s %s %.*s\n", name,
+ flags == O_CREAT ? "O_CREAT"
+ : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL"
+ : flags == 0 ? "0" : "CRAPFLAGS",
+ len, contents);
+ ret = bool_to_errstring(ops->write(h, name, contents, len,
+ flags));
+ talloc_steal(ret, contents);
+ break;
+ }
+ case 3:
+ if (verbose)
+ printf("MKDIR %s\n", name);
+ ret = bool_to_errstring(ops->mkdir(h, name));
+ break;
+ case 4:
+ if (verbose)
+ printf("RM %s\n", name);
+ ret = bool_to_errstring(ops->rm(h, name));
+ break;
+ case 5:
+ if (verbose)
+ printf("GETPERMS %s\n", name);
+ ret = linearize_perms(ops->get_perms(h, name, &num),
+ &num);
+ break;
+ case 6: {
+ unsigned int i, num = get_randomness(&state)%8;
+ struct xs_permissions perms[num];
+
+ if (verbose)
+ printf("SETPERMS %s: ", name);
+ for (i = 0; i < num; i++) {
+ perms[i].id = get_randomness(&state)%8;
+ perms[i].perms = get_randomness(&state)%4;
+ if (verbose)
+ printf("%i%c ", perms[i].id,
+ perms[i].perms == XS_PERM_WRITE ? 'W'
+ : perms[i].perms == XS_PERM_READ ? 'R'
+ : perms[i].perms ==
+ (XS_PERM_READ|XS_PERM_WRITE) ? 'B'
+ : 'N');
+ }
+ if (verbose)
+ printf("\n");
+ ret = bool_to_errstring(ops->set_perms(h, name, perms,
+ num));
+ break;
+ }
+ case 7: {
+ if (verbose)
+ printf("START %s\n", name);
+ ret = bool_to_errstring(ops->transaction_start(h, name));
+ if (streq(ret, "OK")) {
+ talloc_free(ret);
+ ret = talloc_asprintf(NULL, "OK:START-TRANSACT:%s",
+ name);
+ }
+
+ break;
+ }
+ case 8: {
+ bool abort = (get_randomness(&state) % 2);
+
+ if (verbose)
+ printf("STOP %s\n", abort ? "ABORT" : "COMMIT");
+ ret = bool_to_errstring(ops->transaction_end(h, abort));
+ if (streq(ret, "OK")) {
+ talloc_free(ret);
+ ret = talloc_strdup(NULL, "OK:STOP-TRANSACT");
+ }
+ break;
+ }
+ default:
+ barf("Impossible randomness");
+ }
+
+ talloc_steal(ret, name);
+ return ret;
+}
+
+static int daemon_pid;
+
+static void cleanup_xs_ops(void)
+{
+ char *cmd;
+ if (daemon_pid) {
+ struct xs_handle *h;
+ h = xs_daemon_open();
+ if (h) {
+ if (xs_shutdown(h)) {
+ waitpid(daemon_pid, NULL, 0);
+ daemon_pid = 0;
+ }
+ xs_daemon_close(h);
+ }
+ if (daemon_pid) {
+ kill(daemon_pid, SIGTERM);
+ waitpid(daemon_pid, NULL, 0);
+ }
+ }
+
+ cmd = talloc_asprintf(NULL, "rm -rf testsuite/tmp/*");
+ do_command(cmd);
+ talloc_free(cmd);
+}
+
+static void cleanup_file_ops(const char *dir)
+{
+ char *cmd;
+
+ cmd = talloc_asprintf(NULL, "rm -rf %s %s.transact", dir, dir);
+ do_command(cmd);
+ talloc_free(cmd);
+}
+
+static void cleanup(const char *dir)
+{
+ cleanup_xs_ops();
+ cleanup_file_ops(dir);
+}
+
+static void setup_file_ops(const char *dir)
+{
+ if (mkdir(dir, 0700) != 0)
+ barf_perror("Creating directory %s", dir);
+}
+
+static void setup_xs_ops(void)
+{
+ int fds[2];
+
+ /* Start daemon. */
+ pipe(fds);
+ if ((daemon_pid = fork())) {
+ /* Child writes PID when its ready: we wait for that. */
+ char buffer[20];
+ close(fds[1]);
+ if (read(fds[0], buffer, sizeof(buffer)) < 0)
+ barf("Failed to summon daemon");
+ close(fds[0]);
+ } else {
+ dup2(fds[1], STDOUT_FILENO);
+ close(fds[0]);
+#if 0
+ execlp("valgrind", "valgrind", "xenstored_test", "--output-pid",
+ "--no-fork", NULL);
+#else
+ execlp("./xenstored_test", "xenstored_test", "--output-pid",
+ "--no-fork", NULL);
+#endif
+ exit(1);
+ }
+}
+
+static void setup(const char *dir)
+{
+ setup_file_ops(dir);
+ setup_xs_ops();
+};
+
+struct simple_data
+{
+ unsigned int seed;
+ bool print_progress;
+ bool fast;
+ struct ops *ops;
+ const char *dir;
+};
+
+/* Just a random test. Don't care about results, just that it doesn't
+ * go boom. */
+static unsigned int try_simple(const bool *trymap,
+ unsigned int number,
+ bool verbose,
+ void *_data)
+{
+ unsigned int i, print;
+ void *h;
+ char *snapshot = NULL;
+ struct simple_data *data = _data;
+
+ if (data->ops == &xs_ops) {
+ cleanup_xs_ops();
+ setup_xs_ops();
+ } else {
+ cleanup_file_ops(data->dir);
+ setup_file_ops(data->dir);
+ }
+ h = data->ops->handle(data->dir);
+
+ print = number / 76;
+ if (!print)
+ print = 1;
+
+ for (i = 0; i < number; i++) {
+ char *ret;
+
+ if (data->print_progress) {
+ if (i % print == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+
+ if (trymap && !trymap[i])
+ continue;
+
+ ret = do_next_op(data->ops, h, i + data->seed, verbose);
+ if (verbose)
+ printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+ if (streq(ret, "FAILED:Bad file descriptor"))
+ goto out;
+ if (kill(daemon_pid, 0) != 0)
+ goto out;
+
+ if (!data->fast) {
+ if (strstarts(ret, "OK:START-TRANSACT:")) {
+ void *pre = data->ops->handle(data->dir);
+
+ snapshot = dump(data->ops, pre);
+ if (!snapshot)
+ goto out;
+ data->ops->close(pre);
+ } else if (streq(ret, "OK:STOP-TRANSACT")) {
+ talloc_free(snapshot);
+ snapshot = NULL;
+ }
+ }
+
+ talloc_free(ret);
+
+ if (snapshot) {
+ void *pre = data->ops->handle(data->dir);
+ char *contents;
+
+ contents = dump(data->ops, pre);
+ if (!contents)
+ goto out;
+
+ if (!streq(contents, snapshot))
+ goto out;
+
+ talloc_free(contents);
+ data->ops->close(pre);
+ }
+ }
+ if (data->print_progress)
+ printf("\n");
+
+out:
+ data->ops->close(h);
+ return i;
+}
+
+/* Binary elimination: try eliminating all of them, then reduce. */
+static void reduce(bool *map,
+ unsigned int number,
+ unsigned int try_start, unsigned int try_num,
+ unsigned int (*try)(const bool *map,
+ unsigned int number,
+ bool verbose,
+ void *),
+ void *data)
+{
+ bool newmap[number];
+
+ if (try_num == 0)
+ return;
+
+ /* Try skipping everything between start and end. */
+ memcpy(newmap, map, sizeof(newmap));
+ memset(newmap + try_start, 0, try_num * sizeof(bool));
+
+ /* We want the *same* failure: must fail at "number-1". */
+ if (try(newmap, number, false, data) == number - 1) {
+ memset(map + try_start, 0, try_num * sizeof(bool));
+ return;
+ }
+
+ if (try_num == 1)
+ return;
+
+ /* Try each half... */
+ reduce(map, number, try_start, try_num/2, try, data);
+ reduce(map, number, try_start + try_num/2, try_num - try_num/2,
+ try, data);
+}
+
+static void reduce_problem(unsigned int failed,
+ unsigned int (*try)(const bool *map,
+ unsigned int number,
+ bool verbose,
+ void *data),
+ void *data)
+{
+ bool map[failed];
+
+ memset(map, 1, sizeof(map));
+ reduce(map, failed, 0, failed-1, try, data);
+
+ printf("Cut down:\n");
+ if (try(map, failed, true, data) != failed - 1) {
+ printf("Except, that didn't actually fail. Bugger!");
+ exit(2);
+ }
+ exit(1);
+}
+
+/* Just a random test. Don't care about results, just that it doesn't
+ * go boom. */
+static void simple_test(const char *dir,
+ unsigned int iters, unsigned int seed,
+ bool fast, bool verbose)
+{
+ struct simple_data data;
+ unsigned int try;
+
+ data.seed = seed;
+ data.print_progress = !verbose;
+ data.fast = fast;
+ data.ops = &xs_ops;
+ data.dir = dir;
+
+ try = try_simple(NULL, iters, verbose, &data);
+ if (try == iters) {
+ cleanup_xs_ops();
+ printf("Succeeded\n");
+ exit(0);
+ }
+ printf("Failed on iteration %u\n", try + 1);
+ data.print_progress = false;
+ reduce_problem(try + 1, try_simple, &data);
+}
+
+static bool ops_equal(struct ops *a, void *ah,
+ struct ops *b, void *bh,
+ const char *node,
+ struct ops **fail)
+{
+ char **dira = NULL, **dirb = NULL;
+ char *dataa = NULL, *datab = NULL;
+ unsigned int i, numa, numb, lena, lenb;
+ struct xs_permissions *permsa = NULL, *permsb = NULL;
+ unsigned int numpermsa, numpermsb;
+ char *nodename;
+ bool ret = false;
+
+ /* FILE backend expects talloc'ed pointer. */
+ nodename = talloc_strdup(NULL, node);
+ permsa = a->get_perms(ah, nodename, &numpermsa);
+ if (!permsa) {
+ *fail = a;
+ goto out;
+ }
+ permsb = b->get_perms(bh, nodename, &numpermsb);
+ if (!permsb) {
+ *fail = b;
+ goto out;
+ }
+ if (numpermsa != numpermsb)
+ goto out;
+ for (i = 0; i < numpermsa; i++) {
+ if (permsa[i].perms != permsb[i].perms)
+ goto out;
+ if (permsa[i].id != permsb[i].id)
+ goto out;
+ }
+
+ /* Non-pure-directory nodes contain data. */
+ dataa = a->read(ah, nodename, &lena);
+ if (!dataa && errno != EISDIR) {
+ *fail = a;
+ goto out;
+ }
+ datab = b->read(bh, nodename, &lenb);
+ if (!datab && errno != EISDIR) {
+ *fail = b;
+ goto out;
+ }
+
+ if (dataa) {
+ if (!datab)
+ goto out;
+ if (lena != lenb)
+ goto out;
+
+ if (memcmp(dataa, datab, lena) != 0)
+ goto out;
+ } else
+ if (datab)
+ goto out;
+
+ /* Everything is a directory. */
+ dira = a->dir(ah, nodename, &numa);
+ if (!dira) {
+ *fail = a;
+ goto out;
+ }
+ dirb = b->dir(bh, nodename, &numb);
+ if (!dirb) {
+ *fail = b;
+ goto out;
+ }
+ if (numa != numb)
+ goto out;
+ sort_dir(dira, numa);
+ sort_dir(dirb, numb);
+ for (i = 0; i < numa; i++) {
+ char subnode[strlen(node) + 1 + strlen(dira[i]) + 1];
+
+ if (!streq(dira[i], dirb[i]))
+ goto out;
+
+ strcpy(subnode, node);
+ if (!streq(node, "/"))
+ strcat(subnode, "/");
+ strcat(subnode, dira[i]);
+ if (!ops_equal(a, ah, b, bh, subnode, fail))
+ goto out;
+ }
+
+ ret = true;
+out:
+ free(permsa);
+ free(permsb);
+ free(dataa);
+ free(datab);
+ free(dira);
+ free(dirb);
+ talloc_free(nodename);
+ return ret;
+}
+
+struct diff_data
+{
+ unsigned int seed;
+ bool print_progress;
+ bool fast;
+ const char *dir;
+};
+
+/* Differential: try both file and xs backend, watch for differences. */
+static unsigned int try_diff(const bool *trymap,
+ unsigned int number,
+ bool verbose,
+ void *_data)
+{
+ void *fileh, *xsh;
+ char *transact = NULL;
+ struct ops *fail;
+ struct diff_data *data = _data;
+ unsigned int i, print;
+
+ cleanup(data->dir);
+ setup(data->dir);
+
+ fileh = file_handle(data->dir);
+ xsh = xs_handle(data->dir);
+
+ print = number / 76;
+ if (!print)
+ print = 1;
+
+ for (i = 0; i < number; i++) {
+ char *file, *xs;
+
+ if (data->print_progress) {
+ if (i % print == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+ if (trymap && !trymap[i])
+ continue;
+
+ if (verbose)
+ printf("FILE: ");
+
+ file = do_next_op(&file_ops, fileh, i+data->seed, verbose);
+ if (verbose)
+ printf("-> %.*s\n", strchr(file, '/') - file, file);
+
+ if (verbose)
+ printf("XS: ");
+ xs = do_next_op(&xs_ops, xsh, i+data->seed, verbose);
+ if (verbose)
+ printf("-> %.*s\n", strchr(xs, '/') - xs, xs);
+
+ if (!streq(file, xs))
+ goto out;
+
+ if (strstarts(file, "OK:START-TRANSACT:"))
+ transact = talloc_strdup(NULL,
+ file +
+ strlen("OK:START-TRANSACT:"));
+ else if (streq(file, "OK:STOP-TRANSACT")) {
+ talloc_free(transact);
+ transact = NULL;
+ }
+
+ talloc_free(file);
+ talloc_free(xs);
+
+ if (data->fast)
+ continue;
+
+ fail = NULL;
+ if (!ops_equal(&xs_ops, xsh, &file_ops, fileh, "/", &fail)) {
+ if (fail)
+ barf("%s failed during test\n", fail->name);
+ if (verbose)
+ printf("Trees differ:\nXS:%s\nFILE%s\n",
+ dump(&xs_ops, xsh),
+ dump(&file_ops, fileh));
+ goto out;
+ }
+
+ if (transact) {
+ void *fileh_pre = file_handle(data->dir);
+ void *xsh_pre = xs_handle(data->dir);
+
+ fail = NULL;
+ if (!ops_equal(&xs_ops, xsh_pre, &file_ops, fileh_pre,
+ transact, &fail)) {
+ if (fail)
+ barf("%s failed during transact\n",
+ fail->name);
+
+ xs_daemon_close(xsh_pre);
+ talloc_free(fileh_pre);
+ goto out;
+ }
+ xs_daemon_close(xsh_pre);
+ talloc_free(fileh_pre);
+ }
+ }
+ if (data->print_progress)
+ printf("\n");
+
+ fail = NULL;
+ if (data->fast)
+ if (!ops_equal(&xs_ops, xsh, &file_ops, fileh, "/", &fail))
+ barf("Final result not the same: try without --fast");
+out:
+ file_ops.close(fileh);
+ xs_ops.close(xsh);
+ return i;
+}
+
+/* Differential random test: compare results against file backend. */
+static void diff_test(const char *dir,
+ unsigned int iters, unsigned int seed, bool fast,
+ bool verbose)
+{
+ struct diff_data data;
+ unsigned int try;
+
+ data.seed = seed;
+ data.print_progress = !verbose;
+ data.fast = fast;
+ data.dir = dir;
+
+ try = try_diff(NULL, iters, verbose, &data);
+ if (try == iters) {
+ cleanup_xs_ops();
+ printf("Succeeded\n");
+ exit(0);
+ }
+ printf("Failed on iteration %u\n", try + 1);
+ data.print_progress = false;
+ reduce_problem(try + 1, try_diff, &data);
+}
+
+struct fail_data
+{
+ unsigned int seed;
+ bool print_progress;
+ const char *dir;
+};
+
+/* Try xs with inserted failures: every op should either succeed or fail. */
+static unsigned int try_fail(const bool *trymap,
+ unsigned int number,
+ bool verbose,
+ void *_data)
+{
+ unsigned int i, print, tried = 0, aborted = 0;
+ struct fail_data *data = _data;
+ struct xs_handle *tmpxsh;
+ struct file_ops_info *tmpfileh;
+ void *fileh, *xsh;
+ struct ops *fail;
+ char seed[20];
+
+ /* Make sure failures off to shut down. */
+ if (daemon_pid)
+ kill(daemon_pid, SIGUSR1);
+ cleanup(data->dir);
+ setup(data->dir);
+
+ fileh = file_handle(data->dir);
+ xsh = xs_handle(data->dir);
+
+ sprintf(seed, "%i", data->seed);
+ free(xs_debug_command(xsh, "failtest", seed, strlen(seed)+1));
+
+ print = number / 76;
+ if (!print)
+ print = 1;
+
+ for (i = 0; i < number; i++) {
+ unsigned int limit, failed;
+ char *ret;
+
+ /* A few times we fail due to other end OOM. */
+ limit = 0;
+ while (!xsh) {
+ xsh = xs_handle(data->dir);
+ if (!xsh && errno == ECONNREFUSED) {
+ if (verbose)
+ printf("Daemon refused connection\n");
+ goto out;
+ }
+ if (!xsh && limit++ == 5) {
+ printf("Daemon failed conn 5 times\n");
+ goto out;
+ }
+ }
+
+ if (data->print_progress) {
+ if (i % print == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+ if (trymap && !trymap[i])
+ continue;
+
+ if (verbose)
+ printf("(%i) ", i);
+ ret = do_next_op(&xs_ops, xsh, i + data->seed, verbose);
+ if (streq(ret, "FAILED:Connection reset by peer")
+ || streq(ret, "FAILED:Bad file descriptor")
+ || streq(ret, "FAILED:Broken pipe")) {
+ xs_close(xsh);
+ xsh = NULL;
+ failed = 2;
+ } else if (strstarts(ret, "OK"))
+ failed = 0;
+ else
+ failed = 1;
+
+ tried++;
+ if (xsh)
+ aborted++;
+
+ if (verbose)
+ printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+
+ talloc_free(ret);
+
+ /* Turn off failures using signal. */
+ if (kill(daemon_pid, SIGUSR1) != 0) {
+ if (verbose)
+ printf("Failed to signal daemon\n");
+ goto out;
+ }
+
+ if (failed == 0) {
+ /* Succeeded? Do same thing to file backend
+ * to compare */
+ try_applying:
+ ret = do_next_op(&file_ops, fileh, i + data->seed,
+ false);
+ if (!strstarts(ret, "OK")) {
+ if (!verbose)
+ printf("File op failed on %i\n",
+ i + data->seed);
+ talloc_free(ret);
+ goto out;
+ }
+ talloc_free(ret);
+ }
+
+ tmpxsh = xs_handle(data->dir);
+ if (!tmpxsh) {
+ if (verbose)
+ printf("Failed to open signalled daemon");
+ goto out;
+ }
+ tmpfileh = file_handle(data->dir);
+
+ fail = NULL;
+ if (!ops_equal(&xs_ops, tmpxsh, &file_ops, tmpfileh, "/",
+ &fail)) {
+ xs_close(tmpxsh);
+ file_close(tmpfileh);
+ if (fail) {
+ if (verbose)
+ printf("%s failed\n", fail->name);
+ goto out;
+ }
+ /* Maybe op succeeded: try comparing after local op? */
+ if (failed == 2) {
+ failed = 0;
+ if (verbose)
+ printf("(Looks like it succeeded)\n");
+ goto try_applying;
+ }
+ if (verbose)
+ printf("Two backends not equal\n");
+ goto out;
+ }
+
+ /* If we lost the xs handle, that ended the transaction */
+ if (!xsh)
+ file_transaction_end(fileh, true);
+
+ /* Turn failures back on. */
+ free(xs_debug_command(tmpxsh, "failtest", NULL, 0));
+ xs_close(tmpxsh);
+ file_close(tmpfileh);
+ }
+
+ printf("Total %u of %u not aborted\n", tried - aborted, tried);
+out:
+ if (xsh)
+ xs_close(xsh);
+ return i;
+}
+
+static void fail_test(const char *dir,
+ unsigned int iters, unsigned int seed,
+ bool fast __attribute__((unused)), bool verbose)
+{
+ struct fail_data data;
+ unsigned int try;
+
+ data.seed = seed;
+ data.print_progress = !verbose;
+ data.dir = dir;
+
+ try = try_fail(NULL, iters, verbose, &data);
+ if (try == iters) {
+ cleanup_xs_ops();
+ printf("Succeeded\n");
+ exit(0);
+ }
+ printf("Failed on iteration %u\n", try + 1);
+ fflush(stdout);
+ data.print_progress = false;
+ reduce_problem(try + 1, try_fail, &data);
+}
+
+int main(int argc, char *argv[])
+{
+ bool verbose = false;
+ bool simple = false;
+ bool fast = false;
+ bool fail = false;
+
+ if (argv[1] && streq(argv[1], "--fail")) {
+ fail = true;
+ argv++;
+ argc--;
+ }
+
+ if (argv[1] && streq(argv[1], "--simple")) {
+ simple = true;
+ argv++;
+ argc--;
+ }
+
+ if (argv[1] && streq(argv[1], "--fast")) {
+ fast = true;
+ argv++;
+ argc--;
+ }
+
+ if (argv[1] && streq(argv[1], "--verbose")) {
+ verbose = true;
+ argv++;
+ argc--;
+ }
+
+ if (argc != 4)
+ barf("Usage: xs_random [--fail|--simple] [--fast] [--verbose] <directory> <iterations> <seed>");
+
+ talloc_enable_null_tracking();
+
+ if (fail)
+ fail_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose);
+ else if (simple)
+ simple_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose);
+ else
+ diff_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose);
+ exit(2);
+}
diff --git a/tools/xenstore/xs_stress.c b/tools/xenstore/xs_stress.c
new file mode 100644
index 0000000000..9c480b1553
--- /dev/null
+++ b/tools/xenstore/xs_stress.c
@@ -0,0 +1,207 @@
+/* Stress test for Xen Store: multiple people hammering transactions */
+#include "xs.h"
+#include "utils.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define NUM_HANDLES 2
+#define DIR_FANOUT 3
+#define DIR_DEPTH 3
+
+/* How often to print progress */
+static int print;
+
+/* Layout looks like /<num>/<num>/count. */
+static void work(unsigned int cycles, unsigned int childnum)
+{
+ unsigned int i;
+ struct xs_handle *handles[NUM_HANDLES];
+ char id;
+
+ if (childnum < 10)
+ id = '0' + childnum;
+ else
+ id = 'A' + childnum - 10;
+
+ for (i = 0; i < NUM_HANDLES; i++) {
+ handles[i] = xs_daemon_open();
+ if (!handles[i])
+ barf_perror("Opening handle %i", i);
+ }
+
+ srandom(childnum);
+ for (i = 0; i < cycles; i++) {
+ unsigned int lockdepth, j, len;
+ char file[100] = "", lockdir[100];
+ char *contents, tmp[100];
+ struct xs_handle *h = handles[random() % NUM_HANDLES];
+
+ lockdepth = random() % DIR_DEPTH;
+ for (j = 0; j < DIR_DEPTH; j++) {
+ if (j == lockdepth)
+ strcpy(lockdir, file);
+ sprintf(file + strlen(file), "/%li",
+ random()%DIR_FANOUT);
+ }
+ if (streq(lockdir, ""))
+ strcpy(lockdir, "/");
+
+ if (!xs_transaction_start(h, lockdir))
+ barf_perror("%i: starting transaction %i on %s",
+ childnum, i, lockdir);
+
+ sprintf(file + strlen(file), "/count");
+ contents = xs_read(h, file, &len);
+ if (!contents)
+ barf_perror("%i: can't read %s iter %i",
+ childnum, file, i);
+ sprintf(tmp, "%i", atoi(contents) + 1);
+ if (!xs_write(h, file, tmp, strlen(tmp)+1, 0))
+ barf_perror("%i: can't write %s iter %i",
+ childnum, file, i);
+
+ /* Abandon 1 in 10 */
+ if (random() % 10 == 0) {
+ if (!xs_transaction_end(h, true))
+ barf_perror("%i: can't abort transact %s",
+ childnum, lockdir);
+ i--;
+ } else {
+ if (!xs_transaction_end(h, false))
+ barf_perror("%i: can't commit transact %s",
+ childnum, lockdir);
+
+ /* Offset when we print . so kids don't all
+ * print at once. */
+ if ((i + print/(childnum+1)) % print == 0)
+ write(STDOUT_FILENO, &id, 1);
+ }
+ }
+}
+
+static void create_dirs(struct xs_handle *h, const char *base, int togo)
+{
+ unsigned int i;
+ char filename[100];
+
+ if (togo == 0) {
+ sprintf(filename, "%s/count", base);
+ if (!xs_write(h, filename, "0", 2, O_EXCL|O_CREAT))
+ barf_perror("Writing to %s", filename);
+ return;
+ }
+
+ for (i = 0; i < DIR_FANOUT; i++) {
+ sprintf(filename, "%s/%i", base, i);
+ if (!xs_mkdir(h, filename))
+ barf_perror("xs_mkdir %s", filename);
+ create_dirs(h, filename, togo-1);
+ }
+}
+
+static unsigned int add_count(struct xs_handle *h, const char *base, int togo)
+{
+ unsigned int i, count;
+ char filename[100];
+
+ if (togo == 0) {
+ char *answer;
+ unsigned int len;
+
+ sprintf(filename, "%s/count", base);
+ answer = xs_read(h, filename, &len);
+ if (!answer)
+ barf_perror("Reading %s", filename);
+ count = atoi(answer);
+ free(answer);
+ return count;
+ }
+
+ count = 0;
+ for (i = 0; i < DIR_FANOUT; i++) {
+ sprintf(filename, "%s/%i", base, i);
+ count += add_count(h, filename, togo-1);
+ }
+ return count;
+}
+
+static void setup(void)
+{
+ struct xs_handle *h;
+
+ /* Do setup. */
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Contacting daemon");
+ create_dirs(h, "", DIR_DEPTH);
+ xs_daemon_close(h);
+}
+
+static unsigned int tally_counts(void)
+{
+ struct xs_handle *h;
+ unsigned int ret;
+
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Contacting daemon");
+
+ ret = add_count(h, "", DIR_DEPTH);
+ xs_daemon_close(h);
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ bool failed = false;
+ int kids[10];
+
+ if (argc != 2)
+ barf("Usage: xs_stress <iterations>");
+
+ printf("Setting up directories...\n");
+ setup();
+
+ print = atoi(argv[1]) / 76;
+ if (!print)
+ print = 1;
+
+ printf("Running %i children...\n", ARRAY_SIZE(kids));
+ for (i = 0; i < ARRAY_SIZE(kids); i++) {
+ kids[i] = fork();
+ if (kids[i] == -1)
+ barf_perror("fork");
+ if (kids[i] == 0) {
+ work(atoi(argv[1]) / ARRAY_SIZE(kids), i);
+ exit(0);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(kids); i++) {
+ int status;
+ if (waitpid(kids[i], &status, 0) == -1)
+ barf_perror("waitpid");
+ if (!WIFEXITED(status))
+ barf("Kid %i died via signal %i\n",
+ i, WTERMSIG(status));
+ if (WEXITSTATUS(status) != 0) {
+ printf("Child %i exited %i\n", i, WEXITSTATUS(status));
+ failed = true;
+ }
+ }
+ if (failed)
+ exit(1);
+
+ printf("\nCounting results...\n");
+ i = tally_counts();
+ if (i != (unsigned)atoi(argv[1]))
+ barf("Total counts %i not %s", i, atoi(argv[1]));
+ printf("Success!\n");
+ exit(0);
+}
diff --git a/tools/xenstore/xs_test.c b/tools/xenstore/xs_test.c
new file mode 100644
index 0000000000..f1e66cbe28
--- /dev/null
+++ b/tools/xenstore/xs_test.c
@@ -0,0 +1,647 @@
+/*
+ Xen Store Daemon Test tool
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include "utils.h"
+#include "xs_lib.h"
+
+#define XSTEST
+
+static struct xs_handle *handles[10] = { NULL };
+
+struct ringbuf_head
+{
+ uint32_t write; /* Next place to write to */
+ uint32_t read; /* Next place to read from */
+ uint8_t flags;
+ char buf[0];
+} __attribute__((packed));
+
+static struct ringbuf_head *out, *in;
+static unsigned int ringbuf_datasize;
+static int daemon_pid;
+
+/* FIXME: Mark connection as broken (close it?) when this happens. */
+static bool check_buffer(const struct ringbuf_head *h)
+{
+ return (h->write < ringbuf_datasize && h->read < ringbuf_datasize);
+}
+
+/* We can't fill last byte: would look like empty buffer. */
+static void *get_output_chunk(const struct ringbuf_head *h,
+ void *buf, uint32_t *len)
+{
+ uint32_t read_mark;
+
+ if (h->read == 0)
+ read_mark = ringbuf_datasize - 1;
+ else
+ read_mark = h->read - 1;
+
+ /* Here to the end of buffer, unless they haven't read some out. */
+ *len = ringbuf_datasize - h->write;
+ if (read_mark >= h->write)
+ *len = read_mark - h->write;
+ return buf + h->write;
+}
+
+static const void *get_input_chunk(const struct ringbuf_head *h,
+ const void *buf, uint32_t *len)
+{
+ /* Here to the end of buffer, unless they haven't written some. */
+ *len = ringbuf_datasize - h->read;
+ if (h->write >= h->read)
+ *len = h->write - h->read;
+ return buf + h->read;
+}
+
+static void update_output_chunk(struct ringbuf_head *h, uint32_t len)
+{
+ h->write += len;
+ if (h->write == ringbuf_datasize)
+ h->write = 0;
+}
+
+static void update_input_chunk(struct ringbuf_head *h, uint32_t len)
+{
+ h->read += len;
+ if (h->read == ringbuf_datasize)
+ h->read = 0;
+}
+
+/* FIXME: We spin, and we're sloppy. */
+static bool read_all_shmem(int fd __attribute__((unused)),
+ void *data, unsigned int len)
+{
+ unsigned int avail;
+
+ if (!check_buffer(in))
+ barf("Corrupt buffer");
+
+ while (len) {
+ const void *src = get_input_chunk(in, in->buf, &avail);
+ if (avail > len)
+ avail = len;
+ memcpy(data, src, avail);
+ data += avail;
+ len -= avail;
+ update_input_chunk(in, avail);
+ }
+
+ /* Tell other end we read something. */
+ kill(daemon_pid, SIGUSR2);
+ return true;
+}
+
+static bool write_all_shmem(int fd __attribute__((unused)),
+ const void *data, unsigned int len)
+{
+ uint32_t avail;
+
+ if (!check_buffer(out))
+ barf("Corrupt buffer");
+
+ while (len) {
+ void *dst = get_output_chunk(out, out->buf, &avail);
+ if (avail > len)
+ avail = len;
+ memcpy(dst, data, avail);
+ data += avail;
+ len -= avail;
+ update_output_chunk(out, avail);
+ }
+
+ /* Tell other end we wrote something. */
+ kill(daemon_pid, SIGUSR2);
+ return true;
+}
+
+static bool read_all(int fd, void *data, unsigned int len);
+static bool read_all_choice(int fd, void *data, unsigned int len)
+{
+ if (fd == -2)
+ return read_all_shmem(fd, data, len);
+ return read_all(fd, data, len);
+}
+
+static bool write_all_choice(int fd, const void *data, unsigned int len)
+{
+ if (fd == -2)
+ return write_all_shmem(fd, data, len);
+ return write_all(fd, data, len);
+}
+
+/* We want access to internal functions. */
+#include "xs.c"
+
+static void __attribute__((noreturn)) usage(void)
+{
+ barf("Usage:\n"
+ " xs_test [--readonly] [--notimeout]\n"
+ "Reads commands from stdin, one per line:"
+ " dir <path>\n"
+ " read <path>\n"
+ " write <path> <flags> <value>...\n"
+ " setid <id>\n"
+ " mkdir <path>\n"
+ " rm <path>\n"
+ " getperm <path>\n"
+ " setperm <path> <id> <flags> ...\n"
+ " shutdown\n"
+ " watch <path> <prio>\n"
+ " waitwatch\n"
+ " ackwatch\n"
+ " unwatch <path>\n"
+ " close\n"
+ " start <node>\n"
+ " abort\n"
+ " introduce <domid> <mfn> <eventchn>\n"
+ " commit\n"
+ " sleep <seconds>\n"
+ " dump\n");
+}
+
+static char *arg(char *line, unsigned int num)
+{
+ static char *args[10];
+ unsigned int i, len = 0;
+
+ for (i = 0; i <= num; i++) {
+ line += len;
+ line += strspn(line, " \t\n");
+ len = strcspn(line, " \t\n");
+ if (!len)
+ barf("Can't get arg %u", num);
+ }
+
+ free(args[num]);
+ args[num] = malloc(len + 1);
+ memcpy(args[num], line, len);
+ args[num][len] = '\0';
+ return args[num];
+}
+
+static char *command;
+static void __attribute__((noreturn)) failed(int handle)
+{
+ if (handle)
+ barf_perror("%i: %s", handle, command);
+ barf_perror("%s", command);
+}
+
+static void do_dir(unsigned int handle, char *path)
+{
+ char **entries;
+ unsigned int i, num;
+
+ entries = xs_directory(handles[handle], path, &num);
+ if (!entries)
+ failed(handle);
+
+ for (i = 0; i < num; i++)
+ if (handle)
+ printf("%i:%s\n", handle, entries[i]);
+ else
+ printf("%s\n", entries[i]);
+ free(entries);
+}
+
+static void do_read(unsigned int handle, char *path)
+{
+ char *value;
+ unsigned int len;
+
+ value = xs_read(handles[handle], path, &len);
+ if (!value)
+ failed(handle);
+
+ if (handle)
+ printf("%i:%.*s\n", handle, len, value);
+ else
+ printf("%.*s\n", len, value);
+}
+
+static void do_write(unsigned int handle, char *path, char *flags, char *data)
+{
+ int f;
+
+ if (streq(flags, "none"))
+ f = 0;
+ else if (streq(flags, "create"))
+ f = O_CREAT;
+ else if (streq(flags, "excl"))
+ f = O_CREAT | O_EXCL;
+ else if (streq(flags, "crap"))
+ f = 100;
+ else
+ barf("write flags 'none', 'create' or 'excl' only");
+
+ if (!xs_write(handles[handle], path, data, strlen(data)+1, f))
+ failed(handle);
+}
+
+static void do_setid(unsigned int handle, char *id)
+{
+ if (!xs_bool(xs_debug_command(handles[handle], "setid", id,
+ strlen(id)+1)))
+ failed(handle);
+}
+
+static void do_mkdir(unsigned int handle, char *path)
+{
+ if (!xs_mkdir(handles[handle], path))
+ failed(handle);
+}
+
+static void do_rm(unsigned int handle, char *path)
+{
+ if (!xs_rm(handles[handle], path))
+ failed(handle);
+}
+
+static void do_getperm(unsigned int handle, char *path)
+{
+ unsigned int i, num;
+ struct xs_permissions *perms;
+
+ perms = xs_get_permissions(handles[handle], path, &num);
+ if (!perms)
+ failed(handle);
+
+ for (i = 0; i < num; i++) {
+ char *permstring;
+
+ switch (perms[i].perms) {
+ case XS_PERM_NONE:
+ permstring = "NONE";
+ break;
+ case XS_PERM_WRITE:
+ permstring = "WRITE";
+ break;
+ case XS_PERM_READ:
+ permstring = "READ";
+ break;
+ case XS_PERM_READ|XS_PERM_WRITE:
+ permstring = "READ/WRITE";
+ break;
+ default:
+ barf("bad perm value %i", perms[i].perms);
+ }
+
+ if (handle)
+ printf("%i:%i %s\n", handle, perms[i].id, permstring);
+ else
+ printf("%i %s\n", perms[i].id, permstring);
+ }
+ free(perms);
+}
+
+static void do_setperm(unsigned int handle, char *path, char *line)
+{
+ unsigned int i;
+ struct xs_permissions perms[100];
+
+ strtok(line, " \t\n");
+ strtok(NULL, " \t\n");
+ for (i = 0; ; i++) {
+ char *arg = strtok(NULL, " \t\n");
+ if (!arg)
+ break;
+ perms[i].id = atoi(arg);
+ arg = strtok(NULL, " \t\n");
+ if (!arg)
+ break;
+ if (streq(arg, "WRITE"))
+ perms[i].perms = XS_PERM_WRITE;
+ else if (streq(arg, "READ"))
+ perms[i].perms = XS_PERM_READ;
+ else if (streq(arg, "READ/WRITE"))
+ perms[i].perms = XS_PERM_READ|XS_PERM_WRITE;
+ else if (streq(arg, "NONE"))
+ perms[i].perms = XS_PERM_NONE;
+ else
+ barf("bad flags %s\n", arg);
+ }
+
+ if (!xs_set_permissions(handles[handle], path, perms, i))
+ failed(handle);
+}
+
+static void do_shutdown(unsigned int handle)
+{
+ if (!xs_shutdown(handles[handle]))
+ failed(handle);
+}
+
+static void do_watch(unsigned int handle, const char *node, const char *pri)
+{
+ if (!xs_watch(handles[handle], node, atoi(pri)))
+ failed(handle);
+}
+
+static void do_waitwatch(unsigned int handle)
+{
+ char *node;
+
+ node = xs_read_watch(handles[handle]);
+ if (!node)
+ failed(handle);
+
+ if (handle)
+ printf("%i:%s\n", handle, node);
+ else
+ printf("%s\n", node);
+ free(node);
+}
+
+static void do_ackwatch(unsigned int handle)
+{
+ if (!xs_acknowledge_watch(handles[handle]))
+ failed(handle);
+}
+
+static void do_unwatch(unsigned int handle, const char *node)
+{
+ if (!xs_unwatch(handles[handle], node))
+ failed(handle);
+}
+
+static void do_start(unsigned int handle, const char *node)
+{
+ if (!xs_transaction_start(handles[handle], node))
+ failed(handle);
+}
+
+static void do_end(unsigned int handle, bool abort)
+{
+ if (!xs_transaction_end(handles[handle], abort))
+ failed(handle);
+}
+
+static void do_introduce(unsigned int handle,
+ const char *domid,
+ const char *mfn,
+ const char *eventchn,
+ const char *path)
+{
+ unsigned int i;
+ int fd;
+
+ /* We poll, so ignore signal */
+ signal(SIGUSR2, SIG_IGN);
+ for (i = 0; i < ARRAY_SIZE(handles); i++)
+ if (!handles[i])
+ break;
+
+ fd = open("/tmp/xcmap", O_RDWR);
+ /* Set in and out pointers. */
+ out = mmap(NULL, getpagesize(), PROT_WRITE|PROT_READ, MAP_SHARED,fd,0);
+ if (out == MAP_FAILED)
+ barf_perror("Failed to map /tmp/xcmap page");
+ in = (void *)out + getpagesize() / 2;
+ close(fd);
+
+ /* Tell them the event channel and our PID. */
+ *(int *)((void *)out + 32) = getpid();
+ *(u16 *)((void *)out + 36) = atoi(eventchn);
+
+ /* Create new handle. */
+ handles[i] = new(struct xs_handle);
+ handles[i]->fd = -2;
+
+ if (!xs_introduce_domain(handles[handle], atoi(domid),
+ atol(mfn), atoi(eventchn), path))
+ failed(handle);
+ printf("handle is %i\n", i);
+
+ /* Read in daemon pid. */
+ daemon_pid = *(int *)((void *)out + 32);
+}
+
+static void do_release(unsigned int handle, const char *domid)
+{
+ if (!xs_release_domain(handles[handle], atoi(domid)))
+ failed(handle);
+}
+
+static int strptrcmp(const void *a, const void *b)
+{
+ return strcmp(*(char **)a, *(char **)b);
+}
+
+static void sort_dir(char **dir, unsigned int num)
+{
+ qsort(dir, num, sizeof(char *), strptrcmp);
+}
+
+static void dump_dir(unsigned int handle,
+ const char *node,
+ char **dir,
+ unsigned int numdirs,
+ unsigned int depth)
+{
+ unsigned int i;
+ char spacing[depth+1];
+
+ memset(spacing, ' ', depth);
+ spacing[depth] = '\0';
+
+ sort_dir(dir, numdirs);
+
+ for (i = 0; i < numdirs; i++) {
+ struct xs_permissions *perms;
+ unsigned int j, numperms;
+ unsigned int len;
+ char *contents;
+ unsigned int subnum;
+ char **subdirs;
+ char subnode[strlen(node) + 1 + strlen(dir[i]) + 1];
+
+ sprintf(subnode, "%s/%s", node, dir[i]);
+
+ perms = xs_get_permissions(handles[handle], subnode,&numperms);
+ if (!perms)
+ failed(handle);
+
+ printf("%s%s: ", spacing, dir[i]);
+ for (j = 0; j < numperms; j++) {
+ char buffer[100];
+ if (!perm_to_string(&perms[j], buffer))
+ barf("perm to string");
+ printf("%s ", buffer);
+ }
+ free(perms);
+ printf("\n");
+
+ /* Even directories can have contents. */
+ contents = xs_read(handles[handle], subnode, &len);
+ if (!contents) {
+ if (errno != EISDIR)
+ failed(handle);
+ } else {
+ printf(" %s(%.*s)\n", spacing, len, contents);
+ free(contents);
+ }
+
+ /* Every node is a directory. */
+ subdirs = xs_directory(handles[handle], subnode, &subnum);
+ if (!subdirs)
+ failed(handle);
+ dump_dir(handle, subnode, subdirs, subnum, depth+1);
+ free(subdirs);
+ }
+}
+
+static void dump(int handle)
+{
+ char **subdirs;
+ unsigned int subnum;
+
+ subdirs = xs_directory(handles[handle], "/", &subnum);
+ if (!subdirs)
+ failed(handle);
+
+ dump_dir(handle, "", subdirs, subnum, 0);
+ free(subdirs);
+}
+
+int main(int argc, char *argv[])
+{
+ char line[1024];
+ bool readonly = false, timeout = true;
+ int handle;
+
+ static void alarmed(int sig __attribute__((unused)))
+ {
+ if (handle) {
+ char handlename[10];
+ sprintf(handlename, "%u:", handle);
+ write(STDOUT_FILENO, handlename, strlen(handlename));
+ }
+ write(STDOUT_FILENO, command, strlen(command));
+ write(STDOUT_FILENO, " timeout\n", strlen(" timeout\n"));
+ exit(1);
+ }
+
+ if (argc > 1 && streq(argv[1], "--readonly")) {
+ readonly = true;
+ argc--;
+ argv++;
+ }
+
+ if (argc > 1 && streq(argv[1], "--notimeout")) {
+ timeout = false;
+ argc--;
+ argv++;
+ }
+
+ if (argc != 1)
+ usage();
+
+ /* The size of the ringbuffer: half a page minus head structure. */
+ ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
+
+ signal(SIGALRM, alarmed);
+ while (fgets(line, sizeof(line), stdin)) {
+ char *endp;
+
+ if (strspn(line, " \n") == strlen(line))
+ continue;
+ if (strstarts(line, "#"))
+ continue;
+
+ handle = strtoul(line, &endp, 10);
+ if (endp != line)
+ memmove(line, endp+1, strlen(endp));
+ else
+ handle = 0;
+
+ if (!handles[handle]) {
+ if (readonly)
+ handles[handle] = xs_daemon_open_readonly();
+ else
+ handles[handle] = xs_daemon_open();
+ if (!handles[handle])
+ barf_perror("Opening connection to daemon");
+ }
+ command = arg(line, 0);
+
+ if (timeout)
+ alarm(5);
+ if (streq(command, "dir"))
+ do_dir(handle, arg(line, 1));
+ else if (streq(command, "read"))
+ do_read(handle, arg(line, 1));
+ else if (streq(command, "write"))
+ do_write(handle,
+ arg(line, 1), arg(line, 2), arg(line, 3));
+ else if (streq(command, "setid"))
+ do_setid(handle, arg(line, 1));
+ else if (streq(command, "mkdir"))
+ do_mkdir(handle, arg(line, 1));
+ else if (streq(command, "rm"))
+ do_rm(handle, arg(line, 1));
+ else if (streq(command, "getperm"))
+ do_getperm(handle, arg(line, 1));
+ else if (streq(command, "setperm"))
+ do_setperm(handle, arg(line, 1), line);
+ else if (streq(command, "shutdown"))
+ do_shutdown(handle);
+ else if (streq(command, "watch"))
+ do_watch(handle, arg(line, 1), arg(line, 2));
+ else if (streq(command, "waitwatch"))
+ do_waitwatch(handle);
+ else if (streq(command, "ackwatch"))
+ do_ackwatch(handle);
+ else if (streq(command, "unwatch"))
+ do_unwatch(handle, arg(line, 1));
+ else if (streq(command, "close")) {
+ xs_daemon_close(handles[handle]);
+ handles[handle] = NULL;
+ } else if (streq(command, "start"))
+ do_start(handle, arg(line, 1));
+ else if (streq(command, "commit"))
+ do_end(handle, false);
+ else if (streq(command, "abort"))
+ do_end(handle, true);
+ else if (streq(command, "introduce"))
+ do_introduce(handle, arg(line, 1), arg(line, 2),
+ arg(line, 3), arg(line, 4));
+ else if (streq(command, "release"))
+ do_release(handle, arg(line, 1));
+ else if (streq(command, "dump"))
+ dump(handle);
+ else if (streq(command, "sleep"))
+ sleep(atoi(arg(line, 1)));
+ else
+ barf("Unknown command %s", command);
+ fflush(stdout);
+ alarm(0);
+ }
+ return 0;
+}
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 19b9ae58bd..d46472c16c 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -432,27 +432,30 @@ typedef struct shared_info_st
#define MAX_GUEST_CMDLINE 1024
typedef struct {
- /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
- memory_t nr_pages; /* 0: Total pages allocated to this domain. */
+ /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
+ memory_t nr_pages; /* 0: Total pages allocated to this domain. */
_MEMORY_PADDING(A);
- memory_t shared_info; /* 8: MACHINE address of shared info struct.*/
+ memory_t shared_info; /* 8: MACHINE address of shared info struct. */
_MEMORY_PADDING(B);
- u32 flags; /* 16: SIF_xxx flags. */
+ u32 flags; /* 16: SIF_xxx flags. */
u16 domain_controller_evtchn; /* 20 */
u16 __pad;
- /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
- memory_t pt_base; /* 24: VIRTUAL address of page directory. */
+ /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
+ memory_t pt_base; /* 24: VIRTUAL address of page directory. */
_MEMORY_PADDING(C);
- memory_t nr_pt_frames; /* 32: Number of bootstrap p.t. frames. */
+ memory_t nr_pt_frames; /* 32: Number of bootstrap p.t. frames. */
_MEMORY_PADDING(D);
- memory_t mfn_list; /* 40: VIRTUAL address of page-frame list. */
+ memory_t mfn_list; /* 40: VIRTUAL address of page-frame list. */
_MEMORY_PADDING(E);
- memory_t mod_start; /* 48: VIRTUAL address of pre-loaded module. */
+ memory_t mod_start; /* 48: VIRTUAL address of pre-loaded module. */
_MEMORY_PADDING(F);
- memory_t mod_len; /* 56: Size (bytes) of pre-loaded module. */
+ memory_t mod_len; /* 56: Size (bytes) of pre-loaded module. */
_MEMORY_PADDING(G);
s8 cmd_line[MAX_GUEST_CMDLINE]; /* 64 */
-} PACKED start_info_t; /* 1088 bytes */
+ memory_t store_page; /* 1088: VIRTUAL address of store page. */
+ _MEMORY_PADDING(H);
+ u16 store_evtchn; /* 1096: Event channel for store communication. */
+} PACKED start_info_t; /* 1098 bytes */
/* These flags are passed in the 'flags' field of start_info_t. */
#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */