aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>2005-06-07 12:43:58 +0000
committercl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>2005-06-07 12:43:58 +0000
commit29c9e570b1eddfd6df789e08da65cf4ddec5f6fe (patch)
treebf79ad3040d05ee9e05a60df3b8a364fcfa236dc
parent636a81e9701d001f4c9108f722014f48f59eabbd (diff)
downloadxen-29c9e570b1eddfd6df789e08da65cf4ddec5f6fe.tar.gz
xen-29c9e570b1eddfd6df789e08da65cf4ddec5f6fe.tar.bz2
xen-29c9e570b1eddfd6df789e08da65cf4ddec5f6fe.zip
bitkeeper revision 1.1662.1.15 (42a5968eiZE_DjdIFPjxvzLw6ACvCQ)
Add xenstore daemon and library. Makefile: Add xenstore subdirectory. Remove xs_stress on clean. Many files: new file ignore: Update ignore list for xenstore. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (authored) Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
-rw-r--r--.rootkeys37
-rw-r--r--BitKeeper/etc/ignore7
-rw-r--r--tools/Makefile1
-rw-r--r--tools/xenstore/.gdbinit4
-rw-r--r--tools/xenstore/Makefile97
-rw-r--r--tools/xenstore/TODO7
-rw-r--r--tools/xenstore/fake_libxc.c119
-rw-r--r--tools/xenstore/list.h508
-rw-r--r--tools/xenstore/talloc.c1143
-rw-r--r--tools/xenstore/talloc.h134
-rw-r--r--tools/xenstore/talloc_guide.txt569
-rw-r--r--tools/xenstore/testsuite/01simple.sh4
-rw-r--r--tools/xenstore/testsuite/02directory.sh31
-rw-r--r--tools/xenstore/testsuite/03write.sh17
-rw-r--r--tools/xenstore/testsuite/04rm.sh18
-rw-r--r--tools/xenstore/testsuite/05filepermissions.sh49
-rw-r--r--tools/xenstore/testsuite/06dirpermissions.sh61
-rw-r--r--tools/xenstore/testsuite/07watch.sh32
-rw-r--r--tools/xenstore/testsuite/08transaction.sh54
-rw-r--r--tools/xenstore/testsuite/09domain.sh15
-rwxr-xr-xtools/xenstore/testsuite/test.sh44
-rw-r--r--tools/xenstore/utils.c143
-rw-r--r--tools/xenstore/utils.h61
-rw-r--r--tools/xenstore/xenstored.h81
-rw-r--r--tools/xenstore/xenstored_core.c1354
-rw-r--r--tools/xenstore/xenstored_core.h123
-rw-r--r--tools/xenstore/xenstored_domain.c387
-rw-r--r--tools/xenstore/xenstored_domain.h38
-rw-r--r--tools/xenstore/xenstored_test.h37
-rw-r--r--tools/xenstore/xenstored_transaction.c284
-rw-r--r--tools/xenstore/xenstored_transaction.h50
-rw-r--r--tools/xenstore/xenstored_watch.c279
-rw-r--r--tools/xenstore/xenstored_watch.h42
-rw-r--r--tools/xenstore/xs.c551
-rw-r--r--tools/xenstore/xs.h146
-rw-r--r--tools/xenstore/xs_lib.c141
-rw-r--r--tools/xenstore/xs_lib.h63
-rw-r--r--tools/xenstore/xs_random.c1646
-rw-r--r--tools/xenstore/xs_stress.c207
-rw-r--r--tools/xenstore/xs_test.c647
40 files changed, 9230 insertions, 1 deletions
diff --git a/.rootkeys b/.rootkeys
index 77b06993c5..a14deeaee8 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -996,6 +996,43 @@
4292540couq-V0TPwyQ6bspNEWNcvw tools/xcutils/Makefile
42925407VysDb9O06OK_RUzTZxfLoA tools/xcutils/xc_restore.c
42936745WTLYamYsmXm_JGJ72JX-_Q tools/xcutils/xc_save.c
+42a57d97mxMTlPnxBKep6R4ViI5rjg tools/xenstore/.gdbinit
+42a57d97ZEoHuhMAFTuBMlLzA9v_ng tools/xenstore/Makefile
+42a57d97ccA4uY-RxONvIH0P8U0gqg tools/xenstore/TODO
+42a57d972RzmyLgsoH9b8qqk-UjcCA tools/xenstore/fake_libxc.c
+42a57d97IjoPvbIVc4BUzwoKyM0VSw tools/xenstore/list.h
+42a57d97fKgtf0HQLiQkAkVsOvuSyA tools/xenstore/talloc.c
+42a57d98U3p0XP6xzCybTuaVQscUdw tools/xenstore/talloc.h
+42a57d98LFN6Mug-uR4xgAxCE7lwUg tools/xenstore/talloc_guide.txt
+42a57d98S69vKJYwO_WUjoFQZ6KzQg tools/xenstore/testsuite/01simple.sh
+42a57d98BHcFpZz_fXHweylUEUU97Q tools/xenstore/testsuite/02directory.sh
+42a57d98ua4Xeb6pmtbFNTAI833dyw tools/xenstore/testsuite/03write.sh
+42a57d98nbuCUsVT0RJj1zA1JyMDsw tools/xenstore/testsuite/04rm.sh
+42a57d98_ULKHP3_uX1PK2nPMTzWSQ tools/xenstore/testsuite/05filepermissions.sh
+42a57d98YGCLyTDSGmoyFqRqQUlagQ tools/xenstore/testsuite/06dirpermissions.sh
+42a57d98fdO519YyATk4_Zwr1STNfQ tools/xenstore/testsuite/07watch.sh
+42a57d98zZUtvirUMjmHxFphJjmO7Q tools/xenstore/testsuite/08transaction.sh
+42a57d98sn9RbpBgHRv1D99Kt7LwYA tools/xenstore/testsuite/09domain.sh
+42a57d98tSuoFCHnnM2GgENXJrRQmw tools/xenstore/testsuite/test.sh
+42a57d98zxDP2Ti7dTznGROi66rUGw tools/xenstore/utils.c
+42a57d98SDvOYCEjmCjwHSk6390GLA tools/xenstore/utils.h
+42a57d98hFKbOY9D0mCE4H4NDoKr1w tools/xenstore/xenstored.h
+42a57d981KFHLmJ0CjKkn1_gZhYvdw tools/xenstore/xenstored_core.c
+42a57d98bcgE13vYaFxGTusmWbrFDA tools/xenstore/xenstored_core.h
+42a57d98cD9wOFyRYfaEP0QgtqL1Xw tools/xenstore/xenstored_domain.c
+42a57d98noLWvXU8ePbcqvvmu4p2Gw tools/xenstore/xenstored_domain.h
+42a57d98kxHaQ1ApS7RpqmFoEnDmbg tools/xenstore/xenstored_test.h
+42a57d981c9P3aFkWtxWEIRUapt_FQ tools/xenstore/xenstored_transaction.c
+42a57d99pVo__10bbckp_b_rm6i59A tools/xenstore/xenstored_transaction.h
+42a57d99izTIjWfG-IjQAPqYlDWJNg tools/xenstore/xenstored_watch.c
+42a57d99-zLxBjzC7rfj_perV-orUg tools/xenstore/xenstored_watch.h
+42a57d99BnkhISKgCCRcUqhteyuxCw tools/xenstore/xs.c
+42a57d99FyiYSz9AkKKROrRydnA-gQ tools/xenstore/xs.h
+42a57d99SrtsJCDUlKyRPf3EX86A1Q tools/xenstore/xs_lib.c
+42a57d99L2pYeMFyjQ_4Rnb17xTSMg tools/xenstore/xs_lib.h
+42a57d99Kl6Ba8oCHv2fggl7QN9QZA tools/xenstore/xs_random.c
+42a57d99SHYR1lQOD0shuErPDg9NKQ tools/xenstore/xs_stress.c
+42a57d996aBawpkQNOWkNWXD6LrhPg tools/xenstore/xs_test.c
403a3edbrr8RE34gkbR40zep98SXbg tools/xentrace/Makefile
40a107afN60pFdURgBv9KwEzgRl5mQ tools/xentrace/formats
420d52d2_znVbT4JAPIU36vQOme83g tools/xentrace/xenctx.c
diff --git a/BitKeeper/etc/ignore b/BitKeeper/etc/ignore
index d996d45a72..b591ce7458 100644
--- a/BitKeeper/etc/ignore
+++ b/BitKeeper/etc/ignore
@@ -128,8 +128,13 @@ tools/xcs/xcs
tools/xcs/xcsdump
tools/xcutils/xc_restore
tools/xcutils/xc_save
+tools/xenstore/testsuite/tmp/*
+tools/xenstore/xen
+tools/xenstore/xenstored_test
+tools/xenstore/xs_random
+tools/xenstore/xs_stress
+tools/xenstore/xs_test
tools/xentrace/xentrace
-tools/xfrd/xfrd
xen/BLOG
xen/TAGS
xen/arch/x86/asm-offsets.s
diff --git a/tools/Makefile b/tools/Makefile
index 95e8989d4c..5e4a2bd586 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -9,6 +9,7 @@ SUBDIRS += xentrace
SUBDIRS += python
SUBDIRS += xcs
SUBDIRS += xcutils
+SUBDIRS += xenstore
SUBDIRS += pygrub
.PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean
diff --git a/tools/xenstore/.gdbinit b/tools/xenstore/.gdbinit
new file mode 100644
index 0000000000..9a71b20ac4
--- /dev/null
+++ b/tools/xenstore/.gdbinit
@@ -0,0 +1,4 @@
+set environment XENSTORED_RUNDIR=testsuite/tmp
+set environment XENSTORED_ROOTDIR=testsuite/tmp
+handle SIGUSR1 noprint nostop
+handle SIGPIPE noprint nostop
diff --git a/tools/xenstore/Makefile b/tools/xenstore/Makefile
new file mode 100644
index 0000000000..cd4a7b3079
--- /dev/null
+++ b/tools/xenstore/Makefile
@@ -0,0 +1,97 @@
+XEN_ROOT=../..
+# This does something wrong to TARGET_ARCH.
+#include $(XEN_ROOT)/tools/Rules.mk
+LIBDIR = lib
+XEN_LIBXC = $(XEN_ROOT)/tools/libxc
+
+INSTALL = install
+INSTALL_DATA = $(INSTALL) -m0644
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+PROFILE=#-pg
+BASECFLAGS=-Wall -W -g
+# Make gcc generate dependencies.
+BASECFLAGS += -Wp,-MD,.$(@F).d
+PROG_DEP = .*.d
+#BASECFLAGS+= -O3 $(PROFILE)
+#BASECFLAGS+= -I$(XEN_ROOT)/tools
+BASECFLAGS+= -I$(XEN_ROOT)/tools/libxc
+BASECFLAGS+= -I$(XEN_ROOT)/xen/include/public
+BASECFLAGS+= -I.
+
+CFLAGS+=$(BASECFLAGS)
+LDFLAGS=$(PROFILE) -L$(XEN_LIBXC)
+TESTDIR=`pwd`/testsuite/tmp
+TESTFLAGS=-DTESTING
+TESTENV=XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR)
+
+all: xen xenstored libxenstore.a
+
+testcode: xen xs_test xenstored_test xs_random
+
+xen:
+ ln -sf $(XEN_ROOT)/xen/include/public $@
+
+xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o
+ $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxc -o $@
+
+xenstored_test: xenstored_core_test.o xenstored_watch_test.o xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o fake_libxc.o utils.o
+ $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
+
+xs_test: xs_test.o xs_lib.o utils.o
+xs_random: xs_random.o xs_test_lib.o xs_lib.o talloc.o utils.o
+xs_stress: xs_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o
+
+xs_test.o xs_stress.o xenstored_core_test.o xenstored_watch_test.o xenstored_transaction_test.o xenstored_domain_test.o xs_random.o xs_test_lib.o talloc_test.o fake_libxc.o: CFLAGS=$(BASECFLAGS) $(TESTFLAGS)
+
+xenstored_%_test.o: xenstored_%.c
+ $(COMPILE.c) -o $@ $<
+
+xs_test_lib.o: xs.c
+ $(COMPILE.c) -o $@ $<
+
+talloc_test.o: talloc.c
+ $(COMPILE.c) -o $@ $<
+
+libxenstore.a: libxenstore.a(xs.o) libxenstore.a(xs_lib.o)
+
+clean: testsuite-clean
+ rm -f *.o *.a xs_test xenstored xenstored_test xs_random xs_stress xen
+ -$(RM) $(PROG_DEP)
+
+check: testsuite-run randomcheck stresstest
+
+testsuite-run: xen xenstored_test xs_test
+ $(TESTENV) testsuite/test.sh
+
+testsuite-clean:
+ rm -rf $(TESTDIR)
+
+# Make this visible so they can see repeat tests without --fast if they
+# fail.
+RANDSEED=$(shell date +%s)
+randomcheck: xs_random xenstored_test
+ $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000 $(RANDSEED)
+ $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED)
+ $(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED)
+
+stresstest: xs_stress xenstored_test
+ rm -rf $(TESTDIR)/store
+ export $(TESTENV); PID=`./xenstored_test --output-pid`; ./xs_stress 10000; ret=$$?; kill $$PID; exit $$ret
+
+TAGS:
+ etags `find . -name '*.[ch]'`
+
+tarball: clean
+ cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/
+
+install: xenstored libxenstore.a
+ $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored
+ $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
+ $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxenstore.a $(DESTDIR)/usr/$(LIBDIR)
+
+-include $(PROG_DEP)
diff --git a/tools/xenstore/TODO b/tools/xenstore/TODO
new file mode 100644
index 0000000000..9e22afe536
--- /dev/null
+++ b/tools/xenstore/TODO
@@ -0,0 +1,7 @@
+TODO in no particular order. Some of these will never be done. There
+are omissions of important but necessary things. It is up to the
+reader to fill in the blanks.
+
+- Remove calls to system() from daemon
+- Timeout failed watch responses
+- Timeout blocking transactions
diff --git a/tools/xenstore/fake_libxc.c b/tools/xenstore/fake_libxc.c
new file mode 100644
index 0000000000..decfb4001d
--- /dev/null
+++ b/tools/xenstore/fake_libxc.c
@@ -0,0 +1,119 @@
+/*
+ Fake libxc which doesn't require hypervisor but talks to xs_test.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <assert.h>
+#include <signal.h>
+#include "utils.h"
+#include "xenstored_core.h"
+#include "xenstored_domain.h"
+#include "xenstored_test.h"
+
+static int sigfd;
+static int xs_test_pid;
+static u16 port;
+
+/* The event channel maps to a signal, shared page to an mmapped file. */
+int xc_evtchn_send(int xc_handle __attribute__((unused)), int local_port)
+{
+ assert(local_port == port);
+ if (kill(xs_test_pid, SIGUSR2) != 0)
+ barf_perror("fake event channel failed");
+ return 0;
+}
+
+void *xc_map_foreign_range(int xc_handle, u32 dom __attribute__((unused)),
+ int size, int prot,
+ unsigned long mfn __attribute__((unused)))
+{
+ void *ret;
+
+ ret = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
+ if (ret == MAP_FAILED)
+ return NULL;
+
+ /* xs_test tells us pid and port by putting it in buffer, we reply. */
+ xs_test_pid = *(int *)(ret + 32);
+ port = *(int *)(ret + 36);
+ *(int *)(ret + 32) = getpid();
+ return ret;
+}
+
+int xc_interface_open(void)
+{
+ int fd;
+ char page[getpagesize()];
+
+ fd = open("/tmp/xcmap", O_RDWR|O_CREAT|O_TRUNC, 0600);
+ if (fd < 0)
+ return fd;
+
+ memset(page, 0, sizeof(page));
+ if (!write_all(fd, page, sizeof(page)))
+ barf_perror("Failed to write /tmp/xcmap page");
+
+ return fd;
+}
+
+int xc_interface_close(int xc_handle)
+{
+ close(xc_handle);
+ return 0;
+}
+
+static void send_to_fd(int signo __attribute__((unused)))
+{
+ int saved_errno = errno;
+ write(sigfd, &port, sizeof(port));
+ errno = saved_errno;
+}
+
+void fake_block_events(void)
+{
+ signal(SIGUSR2, SIG_IGN);
+}
+
+void fake_ack_event(void)
+{
+ signal(SIGUSR2, send_to_fd);
+}
+
+int fake_open_eventchn(void)
+{
+ int fds[2];
+
+ if (pipe(fds) != 0)
+ return -1;
+
+ if (signal(SIGUSR2, send_to_fd) == SIG_ERR) {
+ int saved_errno = errno;
+ close(fds[0]);
+ close(fds[1]);
+ errno = saved_errno;
+ return -1;
+ }
+ sigfd = fds[1];
+ return fds[0];
+}
diff --git a/tools/xenstore/list.h b/tools/xenstore/list.h
new file mode 100644
index 0000000000..eb35293d7f
--- /dev/null
+++ b/tools/xenstore/list.h
@@ -0,0 +1,508 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+/* Taken from Linux kernel code, but de-kernelized for userspace. */
+#include <stddef.h>
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *) 0x00100100)
+#define LIST_POISON2 ((void *) 0x00200200)
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+#define list_top(head, type, member) \
+({ \
+ struct list_head *_head = (head); \
+ list_empty(_head) ? NULL : list_entry(_head->next, type, member); \
+})
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add_rcu(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ new->next = next;
+ new->prev = prev;
+ next->prev = new;
+ prev->next = new;
+}
+
+/**
+ * list_add_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add_rcu(struct list_head *new, struct list_head *head)
+{
+ __list_add_rcu(new, head, head->next);
+}
+
+/**
+ * list_add_tail_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail_rcu(struct list_head *new, struct list_head *head)
+{
+ __list_add_rcu(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_rcu - deletes entry from list without re-initialization
+ * @entry: the element to delete from the list.
+ *
+ * Note: list_empty on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the list.
+ */
+static inline void list_del_rcu(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev - iterate over a list backwards
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev; pos != (head); pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+
+/**
+ * list_for_each_entry_continue - iterate over list of given type
+ * continuing after existing point
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_continue(pos, head, member) \
+ for (pos = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+#define INIT_HLIST_NODE(ptr) ((ptr)->next = NULL, (ptr)->pprev = NULL)
+
+static __inline__ int hlist_unhashed(struct hlist_node *h)
+{
+ return !h->pprev;
+}
+
+static __inline__ int hlist_empty(struct hlist_head *h)
+{
+ return !h->first;
+}
+
+static __inline__ void __hlist_del(struct hlist_node *n)
+{
+ struct hlist_node *next = n->next;
+ struct hlist_node **pprev = n->pprev;
+ *pprev = next;
+ if (next)
+ next->pprev = pprev;
+}
+
+static __inline__ void hlist_del(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->next = LIST_POISON1;
+ n->pprev = LIST_POISON2;
+}
+
+/**
+ * hlist_del_rcu - deletes entry from hash list without re-initialization
+ * @entry: the element to delete from the hash list.
+ *
+ * Note: list_unhashed() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the hash list.
+ */
+static inline void hlist_del_rcu(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->pprev = LIST_POISON2;
+}
+
+static __inline__ void hlist_del_init(struct hlist_node *n)
+{
+ if (n->pprev) {
+ __hlist_del(n);
+ INIT_HLIST_NODE(n);
+ }
+}
+
+#define hlist_del_rcu_init hlist_del_init
+
+static __inline__ void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ h->first = n;
+ n->pprev = &h->first;
+}
+
+static __inline__ void hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+ n->next = first;
+ n->pprev = &h->first;
+ if (first)
+ first->pprev = &n->next;
+ h->first = n;
+}
+
+/* next must be != NULL */
+static __inline__ void hlist_add_before(struct hlist_node *n, struct hlist_node *next)
+{
+ n->pprev = next->pprev;
+ n->next = next;
+ next->pprev = &n->next;
+ *(n->pprev) = n;
+}
+
+static __inline__ void hlist_add_after(struct hlist_node *n,
+ struct hlist_node *next)
+{
+ next->next = n->next;
+ *(next->pprev) = n;
+ n->next = next;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+/* Cannot easily do prefetch unfortunately */
+#define hlist_for_each(pos, head) \
+ for (pos = (head)->first; pos; pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+ for (pos = (head)->first; n = pos ? pos->next : 0, pos; \
+ pos = n)
+
+/**
+ * hlist_for_each_entry - iterate over list of given type
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member) \
+ for (pos = (head)->first; \
+ pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member) \
+ for (pos = (pos)->next; \
+ pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from existing point
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member) \
+ for (; pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @n: another &struct hlist_node to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \
+ for (pos = (head)->first; \
+ pos && ({ n = pos->next; 1; }) && \
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = n)
+
+#endif
diff --git a/tools/xenstore/talloc.c b/tools/xenstore/talloc.c
new file mode 100644
index 0000000000..8e93c28fe3
--- /dev/null
+++ b/tools/xenstore/talloc.c
@@ -0,0 +1,1143 @@
+/*
+ Samba Unix SMB/CIFS implementation.
+
+ Samba trivial allocation library - new interface
+
+ NOTE: Please read talloc_guide.txt for full documentation
+
+ Copyright (C) Andrew Tridgell 2004
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+/*
+ inspired by http://swapped.cc/halloc/
+*/
+
+
+#ifdef _SAMBA_BUILD_
+#include "includes.h"
+#if ((SAMBA_VERSION_MAJOR==3)&&(SAMBA_VERSION_MINOR<9))
+/* This is to circumvent SAMBA3's paranoid malloc checker. Here in this file
+ * we trust ourselves... */
+#ifdef malloc
+#undef malloc
+#endif
+#ifdef realloc
+#undef realloc
+#endif
+#endif
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include "talloc.h"
+/* assume a modern system */
+#define HAVE_VA_COPY
+#endif
+
+/* use this to force every realloc to change the pointer, to stress test
+ code that might not cope */
+#ifdef TESTING
+#define ALWAYS_REALLOC 1
+void *test_malloc(size_t size);
+#define malloc test_malloc
+#endif
+
+#define MAX_TALLOC_SIZE 0x10000000
+#define TALLOC_MAGIC 0xe814ec4f
+#define TALLOC_MAGIC_FREE 0x7faebef3
+#define TALLOC_MAGIC_REFERENCE ((const char *)1)
+
+/* by default we abort when given a bad pointer (such as when talloc_free() is called
+ on a pointer that came from malloc() */
+#ifndef TALLOC_ABORT
+#define TALLOC_ABORT(reason) abort()
+#endif
+
+#ifndef discard_const_p
+#if defined(__intptr_t_defined) || defined(HAVE_INTPTR_T)
+# define discard_const_p(type, ptr) ((type *)((intptr_t)(ptr)))
+#else
+# define discard_const_p(type, ptr) ((type *)(ptr))
+#endif
+#endif
+
+/* this null_context is only used if talloc_enable_leak_report() or
+ talloc_enable_leak_report_full() is called, otherwise it remains
+ NULL
+*/
+static const void *null_context;
+static void *cleanup_context;
+static int (*malloc_fail_handler)(void *);
+static void *malloc_fail_data;
+
+struct talloc_reference_handle {
+ struct talloc_reference_handle *next, *prev;
+ void *ptr;
+};
+
+typedef int (*talloc_destructor_t)(void *);
+
+struct talloc_chunk {
+ struct talloc_chunk *next, *prev;
+ struct talloc_chunk *parent, *child;
+ struct talloc_reference_handle *refs;
+ size_t size;
+ unsigned magic;
+ talloc_destructor_t destructor;
+ const char *name;
+};
+
+/* panic if we get a bad magic value */
+static struct talloc_chunk *talloc_chunk_from_ptr(const void *ptr)
+{
+ struct talloc_chunk *tc = discard_const_p(struct talloc_chunk, ptr)-1;
+ if (tc->magic != TALLOC_MAGIC) {
+ if (tc->magic == TALLOC_MAGIC_FREE) {
+ TALLOC_ABORT("Bad talloc magic value - double free");
+ } else {
+ TALLOC_ABORT("Bad talloc magic value - unknown value");
+ }
+ }
+
+ return tc;
+}
+
+/* hook into the front of the list */
+#define _TLIST_ADD(list, p) \
+do { \
+ if (!(list)) { \
+ (list) = (p); \
+ (p)->next = (p)->prev = NULL; \
+ } else { \
+ (list)->prev = (p); \
+ (p)->next = (list); \
+ (p)->prev = NULL; \
+ (list) = (p); \
+ }\
+} while (0)
+
+/* remove an element from a list - element doesn't have to be in list. */
+#define _TLIST_REMOVE(list, p) \
+do { \
+ if ((p) == (list)) { \
+ (list) = (p)->next; \
+ if (list) (list)->prev = NULL; \
+ } else { \
+ if ((p)->prev) (p)->prev->next = (p)->next; \
+ if ((p)->next) (p)->next->prev = (p)->prev; \
+ } \
+ if ((p) && ((p) != (list))) (p)->next = (p)->prev = NULL; \
+} while (0)
+
+
+/*
+ return the parent chunk of a pointer
+*/
+static struct talloc_chunk *talloc_parent_chunk(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ while (tc->prev) tc=tc->prev;
+ return tc->parent;
+}
+
+void *talloc_parent(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_parent_chunk(ptr);
+ return (void *)(tc+1);
+}
+
+/*
+ Allocate a bit of memory as a child of an existing pointer
+*/
+void *_talloc(const void *context, size_t size)
+{
+ struct talloc_chunk *tc;
+
+ if (context == NULL) {
+ context = null_context;
+ }
+
+ if (size >= MAX_TALLOC_SIZE) {
+ return NULL;
+ }
+
+ tc = malloc(sizeof(*tc)+size);
+ if (tc == NULL) {
+ if (malloc_fail_handler)
+ if (malloc_fail_handler(malloc_fail_data))
+ tc = malloc(sizeof(*tc)+size);
+ if (!tc)
+ return NULL;
+ }
+
+ tc->size = size;
+ tc->magic = TALLOC_MAGIC;
+ tc->destructor = NULL;
+ tc->child = NULL;
+ tc->name = NULL;
+ tc->refs = NULL;
+
+ if (context) {
+ struct talloc_chunk *parent = talloc_chunk_from_ptr(context);
+
+ tc->parent = parent;
+
+ if (parent->child) {
+ parent->child->parent = NULL;
+ }
+
+ _TLIST_ADD(parent->child, tc);
+ } else {
+ tc->next = tc->prev = tc->parent = NULL;
+ }
+
+ return (void *)(tc+1);
+}
+
+
+/*
+ setup a destructor to be called on free of a pointer
+ the destructor should return 0 on success, or -1 on failure.
+ if the destructor fails then the free is failed, and the memory can
+ be continued to be used
+*/
+void talloc_set_destructor(const void *ptr, int (*destructor)(void *))
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ tc->destructor = destructor;
+}
+
+/*
+ increase the reference count on a piece of memory.
+*/
+void talloc_increase_ref_count(const void *ptr)
+{
+ talloc_reference(null_context, ptr);
+}
+
+/*
+ helper for talloc_reference()
+*/
+static int talloc_reference_destructor(void *ptr)
+{
+ struct talloc_reference_handle *handle = ptr;
+ struct talloc_chunk *tc1 = talloc_chunk_from_ptr(ptr);
+ struct talloc_chunk *tc2 = talloc_chunk_from_ptr(handle->ptr);
+ if (tc1->destructor != (talloc_destructor_t)-1) {
+ tc1->destructor = NULL;
+ }
+ _TLIST_REMOVE(tc2->refs, handle);
+ talloc_free(handle);
+ return 0;
+}
+
+/*
+ make a secondary reference to a pointer, hanging off the given context.
+ the pointer remains valid until both the original caller and this given
+ context are freed.
+
+ the major use for this is when two different structures need to reference the
+ same underlying data, and you want to be able to free the two instances separately,
+ and in either order
+*/
+void *talloc_reference(const void *context, const void *ptr)
+{
+ struct talloc_chunk *tc;
+ struct talloc_reference_handle *handle;
+ if (ptr == NULL) return NULL;
+
+ tc = talloc_chunk_from_ptr(ptr);
+ handle = talloc_named_const(context, sizeof(*handle), TALLOC_MAGIC_REFERENCE);
+
+ if (handle == NULL) return NULL;
+
+ /* note that we hang the destructor off the handle, not the
+ main context as that allows the caller to still setup their
+ own destructor on the context if they want to */
+ talloc_set_destructor(handle, talloc_reference_destructor);
+ handle->ptr = discard_const_p(void, ptr);
+ _TLIST_ADD(tc->refs, handle);
+ return handle->ptr;
+}
+
+/*
+ remove a secondary reference to a pointer. This undo's what
+ talloc_reference() has done. The context and pointer arguments
+ must match those given to a talloc_reference()
+*/
+static int talloc_unreference(const void *context, const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ struct talloc_reference_handle *h;
+
+ if (context == NULL) {
+ context = null_context;
+ }
+
+ for (h=tc->refs;h;h=h->next) {
+ struct talloc_chunk *p = talloc_parent_chunk(h);
+ if ((p==NULL && context==NULL) || p+1 == context) break;
+ }
+ if (h == NULL) {
+ return -1;
+ }
+
+ talloc_set_destructor(h, NULL);
+ _TLIST_REMOVE(tc->refs, h);
+ talloc_free(h);
+ return 0;
+}
+
+/*
+ remove a specific parent context from a pointer. This is a more
+ controlled varient of talloc_free()
+*/
+int talloc_unlink(const void *context, void *ptr)
+{
+ struct talloc_chunk *tc_p, *new_p;
+ void *new_parent;
+
+ if (ptr == NULL) {
+ return -1;
+ }
+
+ if (context == NULL) {
+ context = null_context;
+ }
+
+ if (talloc_unreference(context, ptr) == 0) {
+ return 0;
+ }
+
+ if (context == NULL) {
+ if (talloc_parent_chunk(ptr) != NULL) {
+ return -1;
+ }
+ } else {
+ if (talloc_chunk_from_ptr(context) != talloc_parent_chunk(ptr)) {
+ return -1;
+ }
+ }
+
+ tc_p = talloc_chunk_from_ptr(ptr);
+
+ if (tc_p->refs == NULL) {
+ return talloc_free(ptr);
+ }
+
+ new_p = talloc_parent_chunk(tc_p->refs);
+ if (new_p) {
+ new_parent = new_p+1;
+ } else {
+ new_parent = NULL;
+ }
+
+ if (talloc_unreference(new_parent, ptr) != 0) {
+ return -1;
+ }
+
+ talloc_steal(new_parent, ptr);
+
+ return 0;
+}
+
+/*
+ add a name to an existing pointer - va_list version
+*/
+static void talloc_set_name_v(const void *ptr, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0);
+
+static void talloc_set_name_v(const void *ptr, const char *fmt, va_list ap)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ tc->name = talloc_vasprintf(ptr, fmt, ap);
+ if (tc->name) {
+ talloc_set_name_const(tc->name, ".name");
+ }
+}
+
+/*
+ add a name to an existing pointer
+*/
+void talloc_set_name(const void *ptr, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ talloc_set_name_v(ptr, fmt, ap);
+ va_end(ap);
+}
+
+/*
+ more efficient way to add a name to a pointer - the name must point to a
+ true string constant
+*/
+void talloc_set_name_const(const void *ptr, const char *name)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ tc->name = name;
+}
+
+/*
+ create a named talloc pointer. Any talloc pointer can be named, and
+ talloc_named() operates just like talloc() except that it allows you
+ to name the pointer.
+*/
+void *talloc_named(const void *context, size_t size, const char *fmt, ...)
+{
+ va_list ap;
+ void *ptr;
+
+ ptr = _talloc(context, size);
+ if (ptr == NULL) return NULL;
+
+ va_start(ap, fmt);
+ talloc_set_name_v(ptr, fmt, ap);
+ va_end(ap);
+
+ return ptr;
+}
+
+/*
+ create a named talloc pointer. Any talloc pointer can be named, and
+ talloc_named() operates just like talloc() except that it allows you
+ to name the pointer.
+*/
+void *talloc_named_const(const void *context, size_t size, const char *name)
+{
+ void *ptr;
+
+ ptr = _talloc(context, size);
+ if (ptr == NULL) {
+ return NULL;
+ }
+
+ talloc_set_name_const(ptr, name);
+
+ return ptr;
+}
+
+/*
+ return the name of a talloc ptr, or "UNNAMED"
+*/
+const char *talloc_get_name(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ if (tc->name == TALLOC_MAGIC_REFERENCE) {
+ return ".reference";
+ }
+ if (tc->name) {
+ return tc->name;
+ }
+ return "UNNAMED";
+}
+
+
+/*
+ check if a pointer has the given name. If it does, return the pointer,
+ otherwise return NULL
+*/
+void *talloc_check_name(const void *ptr, const char *name)
+{
+ const char *pname;
+ if (ptr == NULL) return NULL;
+ pname = talloc_get_name(ptr);
+ if (pname == name || strcmp(pname, name) == 0) {
+ return discard_const_p(void, ptr);
+ }
+ return NULL;
+}
+
+
+/*
+ this is for compatibility with older versions of talloc
+*/
+void *talloc_init(const char *fmt, ...)
+{
+ va_list ap;
+ void *ptr;
+
+ ptr = _talloc(NULL, 0);
+ if (ptr == NULL) return NULL;
+
+ va_start(ap, fmt);
+ talloc_set_name_v(ptr, fmt, ap);
+ va_end(ap);
+
+ return ptr;
+}
+
+/*
+ this is a replacement for the Samba3 talloc_destroy_pool functionality. It
+ should probably not be used in new code. It's in here to keep the talloc
+ code consistent across Samba 3 and 4.
+*/
+void talloc_free_children(void *ptr)
+{
+ struct talloc_chunk *tc;
+
+ if (ptr == NULL) {
+ return;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ while (tc->child) {
+ /* we need to work out who will own an abandoned child
+ if it cannot be freed. In priority order, the first
+ choice is owner of any remaining reference to this
+ pointer, the second choice is our parent, and the
+ final choice is the null context. */
+ void *child = tc->child+1;
+ const void *new_parent = null_context;
+ if (tc->child->refs) {
+ struct talloc_chunk *p = talloc_parent_chunk(tc->child->refs);
+ if (p) new_parent = p+1;
+ }
+ if (talloc_free(child) == -1) {
+ if (new_parent == null_context) {
+ struct talloc_chunk *p = talloc_parent_chunk(ptr);
+ if (p) new_parent = p+1;
+ }
+ talloc_steal(new_parent, child);
+ }
+ }
+}
+
+/*
+ free a talloc pointer. This also frees all child pointers of this
+ pointer recursively
+
+ return 0 if the memory is actually freed, otherwise -1. The memory
+ will not be freed if the ref_count is > 1 or the destructor (if
+ any) returns non-zero
+*/
+int talloc_free(void *ptr)
+{
+ struct talloc_chunk *tc;
+
+ if (ptr == NULL) {
+ return -1;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ if (tc->refs) {
+ talloc_reference_destructor(tc->refs);
+ return -1;
+ }
+
+ if (tc->destructor) {
+ talloc_destructor_t d = tc->destructor;
+ if (d == (talloc_destructor_t)-1) {
+ return -1;
+ }
+ tc->destructor = (talloc_destructor_t)-1;
+ if (d(ptr) == -1) {
+ tc->destructor = d;
+ return -1;
+ }
+ tc->destructor = NULL;
+ }
+
+ talloc_free_children(ptr);
+
+ if (tc->parent) {
+ _TLIST_REMOVE(tc->parent->child, tc);
+ if (tc->parent->child) {
+ tc->parent->child->parent = tc->parent;
+ }
+ } else {
+ if (tc->prev) tc->prev->next = tc->next;
+ if (tc->next) tc->next->prev = tc->prev;
+ }
+
+ tc->magic = TALLOC_MAGIC_FREE;
+
+ free(tc);
+ return 0;
+}
+
+
+
+/*
+ A talloc version of realloc. The context argument is only used if
+ ptr is NULL
+*/
+void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *name)
+{
+ struct talloc_chunk *tc;
+ void *new_ptr;
+
+ /* size zero is equivalent to free() */
+ if (size == 0) {
+ talloc_free(ptr);
+ return NULL;
+ }
+
+ if (size >= MAX_TALLOC_SIZE) {
+ return NULL;
+ }
+
+ /* realloc(NULL) is equavalent to malloc() */
+ if (ptr == NULL) {
+ return talloc_named_const(context, size, name);
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ /* don't allow realloc on referenced pointers */
+ if (tc->refs) {
+ return NULL;
+ }
+
+ /* by resetting magic we catch users of the old memory */
+ tc->magic = TALLOC_MAGIC_FREE;
+
+#if ALWAYS_REALLOC
+ new_ptr = malloc(size + sizeof(*tc));
+ if (!new_ptr) {
+ tc->magic = TALLOC_MAGIC;
+ if (malloc_fail_handler)
+ if (malloc_fail_handler(malloc_fail_data))
+ new_ptr = malloc(size + sizeof(*tc));
+ }
+ if (new_ptr) {
+ memcpy(new_ptr, tc, tc->size + sizeof(*tc));
+ free(tc);
+ }
+#else
+ new_ptr = realloc(tc, size + sizeof(*tc));
+ if (!new_ptr) {
+ tc->magic = TALLOC_MAGIC;
+ if (malloc_fail_handler)
+ if (malloc_fail_handler(malloc_fail_data))
+ new_ptr = realloc(tc, size + sizeof(*tc));
+ }
+#endif
+ if (!new_ptr) {
+ tc->magic = TALLOC_MAGIC;
+ return NULL;
+ }
+
+ tc = new_ptr;
+ tc->magic = TALLOC_MAGIC;
+ if (tc->parent) {
+ tc->parent->child = new_ptr;
+ }
+ if (tc->child) {
+ tc->child->parent = new_ptr;
+ }
+
+ if (tc->prev) {
+ tc->prev->next = tc;
+ }
+ if (tc->next) {
+ tc->next->prev = tc;
+ }
+
+ tc->size = size;
+ talloc_set_name_const(tc+1, name);
+
+ return (void *)(tc+1);
+}
+
+/*
+ move a lump of memory from one talloc context to another return the
+ ptr on success, or NULL if it could not be transferred.
+ passing NULL as ptr will always return NULL with no side effects.
+*/
+void *talloc_steal(const void *new_ctx, const void *ptr)
+{
+ struct talloc_chunk *tc, *new_tc;
+
+ if (!ptr) {
+ return NULL;
+ }
+
+ if (new_ctx == NULL) {
+ new_ctx = null_context;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ if (new_ctx == NULL) {
+ if (tc->parent) {
+ _TLIST_REMOVE(tc->parent->child, tc);
+ if (tc->parent->child) {
+ tc->parent->child->parent = tc->parent;
+ }
+ } else {
+ if (tc->prev) tc->prev->next = tc->next;
+ if (tc->next) tc->next->prev = tc->prev;
+ }
+
+ tc->parent = tc->next = tc->prev = NULL;
+ return discard_const_p(void, ptr);
+ }
+
+ new_tc = talloc_chunk_from_ptr(new_ctx);
+
+ if (tc == new_tc) {
+ return discard_const_p(void, ptr);
+ }
+
+ if (tc->parent) {
+ _TLIST_REMOVE(tc->parent->child, tc);
+ if (tc->parent->child) {
+ tc->parent->child->parent = tc->parent;
+ }
+ } else {
+ if (tc->prev) tc->prev->next = tc->next;
+ if (tc->next) tc->next->prev = tc->prev;
+ }
+
+ tc->parent = new_tc;
+ if (new_tc->child) new_tc->child->parent = NULL;
+ _TLIST_ADD(new_tc->child, tc);
+
+ return discard_const_p(void, ptr);
+}
+
+/*
+ return the total size of a talloc pool (subtree)
+*/
+off_t talloc_total_size(const void *ptr)
+{
+ off_t total = 0;
+ struct talloc_chunk *c, *tc;
+
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) {
+ return 0;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ total = tc->size;
+ for (c=tc->child;c;c=c->next) {
+ total += talloc_total_size(c+1);
+ }
+ return total;
+}
+
+/*
+ return the total number of blocks in a talloc pool (subtree)
+*/
+off_t talloc_total_blocks(const void *ptr)
+{
+ off_t total = 0;
+ struct talloc_chunk *c, *tc;
+
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) {
+ return 0;
+ }
+ tc = talloc_chunk_from_ptr(ptr);
+
+ total++;
+ for (c=tc->child;c;c=c->next) {
+ total += talloc_total_blocks(c+1);
+ }
+ return total;
+}
+
+/*
+ return the number of external references to a pointer
+*/
+static int talloc_reference_count(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ struct talloc_reference_handle *h;
+ int ret = 0;
+
+ for (h=tc->refs;h;h=h->next) {
+ ret++;
+ }
+ return ret;
+}
+
+/*
+ report on memory usage by all children of a pointer, giving a full tree view
+*/
+void talloc_report_depth(const void *ptr, FILE *f, int depth)
+{
+ struct talloc_chunk *c, *tc = talloc_chunk_from_ptr(ptr);
+
+ for (c=tc->child;c;c=c->next) {
+ if (c->name == TALLOC_MAGIC_REFERENCE) {
+ struct talloc_reference_handle *handle = (void *)(c+1);
+ const char *name2 = talloc_get_name(handle->ptr);
+ fprintf(f, "%*sreference to: %s\n", depth*4, "", name2);
+ } else {
+ const char *name = talloc_get_name(c+1);
+ fprintf(f, "%*s%-30s contains %6lu bytes in %3lu blocks (ref %d)\n",
+ depth*4, "",
+ name,
+ (unsigned long)talloc_total_size(c+1),
+ (unsigned long)talloc_total_blocks(c+1),
+ talloc_reference_count(c+1));
+ talloc_report_depth(c+1, f, depth+1);
+ }
+ }
+
+}
+
+/*
+ report on memory usage by all children of a pointer, giving a full tree view
+*/
+void talloc_report_full(const void *ptr, FILE *f)
+{
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) return;
+
+ fprintf(f,"full talloc report on '%s' (total %lu bytes in %lu blocks)\n",
+ talloc_get_name(ptr),
+ (unsigned long)talloc_total_size(ptr),
+ (unsigned long)talloc_total_blocks(ptr));
+
+ talloc_report_depth(ptr, f, 1);
+ fflush(f);
+}
+
+/*
+ report on memory usage by all children of a pointer
+*/
+void talloc_report(const void *ptr, FILE *f)
+{
+ struct talloc_chunk *c, *tc;
+
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) return;
+
+ fprintf(f,"talloc report on '%s' (total %lu bytes in %lu blocks)\n",
+ talloc_get_name(ptr),
+ (unsigned long)talloc_total_size(ptr),
+ (unsigned long)talloc_total_blocks(ptr));
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ for (c=tc->child;c;c=c->next) {
+ fprintf(f, "\t%-30s contains %6lu bytes in %3lu blocks\n",
+ talloc_get_name(c+1),
+ (unsigned long)talloc_total_size(c+1),
+ (unsigned long)talloc_total_blocks(c+1));
+ }
+ fflush(f);
+}
+
+/*
+ report on any memory hanging off the null context
+*/
+static void talloc_report_null(void)
+{
+ if (talloc_total_size(null_context) != 0) {
+ talloc_report(null_context, stderr);
+ }
+}
+
+/*
+ report on any memory hanging off the null context
+*/
+static void talloc_report_null_full(void)
+{
+ if (talloc_total_size(null_context) != 0) {
+ talloc_report_full(null_context, stderr);
+ }
+}
+
+/*
+ enable tracking of the NULL context
+*/
+void talloc_enable_null_tracking(void)
+{
+ if (null_context == NULL) {
+ null_context = talloc_named_const(NULL, 0, "null_context");
+ }
+}
+
+/*
+ enable leak reporting on exit
+*/
+void talloc_enable_leak_report(void)
+{
+ talloc_enable_null_tracking();
+ atexit(talloc_report_null);
+}
+
+/*
+ enable full leak reporting on exit
+*/
+void talloc_enable_leak_report_full(void)
+{
+ talloc_enable_null_tracking();
+ atexit(talloc_report_null_full);
+}
+
+/*
+ talloc and zero memory.
+*/
+void *_talloc_zero(const void *ctx, size_t size, const char *name)
+{
+ void *p = talloc_named_const(ctx, size, name);
+
+ if (p) {
+ memset(p, '\0', size);
+ }
+
+ return p;
+}
+
+
+/*
+ memdup with a talloc.
+*/
+void *_talloc_memdup(const void *t, const void *p, size_t size, const char *name)
+{
+ void *newp = talloc_named_const(t, size, name);
+
+ if (newp) {
+ memcpy(newp, p, size);
+ }
+
+ return newp;
+}
+
+/*
+ strdup with a talloc
+*/
+char *talloc_strdup(const void *t, const char *p)
+{
+ char *ret;
+ if (!p) {
+ return NULL;
+ }
+ ret = talloc_memdup(t, p, strlen(p) + 1);
+ if (ret) {
+ talloc_set_name_const(ret, ret);
+ }
+ return ret;
+}
+
+/*
+ strndup with a talloc
+*/
+char *talloc_strndup(const void *t, const char *p, size_t n)
+{
+ size_t len;
+ char *ret;
+
+ for (len=0; p[len] && len<n; len++) ;
+
+ ret = _talloc(t, len + 1);
+ if (!ret) { return NULL; }
+ memcpy(ret, p, len);
+ ret[len] = 0;
+ talloc_set_name_const(ret, ret);
+ return ret;
+}
+
+#ifndef VA_COPY
+#ifdef HAVE_VA_COPY
+#define VA_COPY(dest, src) va_copy(dest, src)
+#elif defined(HAVE___VA_COPY)
+#define VA_COPY(dest, src) __va_copy(dest, src)
+#else
+#define VA_COPY(dest, src) (dest) = (src)
+#endif
+#endif
+
+char *talloc_vasprintf(const void *t, const char *fmt, va_list ap)
+{
+ int len;
+ char *ret;
+ va_list ap2;
+
+ VA_COPY(ap2, ap);
+
+ len = vsnprintf(NULL, 0, fmt, ap2);
+
+ ret = _talloc(t, len+1);
+ if (ret) {
+ VA_COPY(ap2, ap);
+ vsnprintf(ret, len+1, fmt, ap2);
+ talloc_set_name_const(ret, ret);
+ }
+
+ return ret;
+}
+
+
+/*
+ Perform string formatting, and return a pointer to newly allocated
+ memory holding the result, inside a memory pool.
+ */
+char *talloc_asprintf(const void *t, const char *fmt, ...)
+{
+ va_list ap;
+ char *ret;
+
+ va_start(ap, fmt);
+ ret = talloc_vasprintf(t, fmt, ap);
+ va_end(ap);
+ return ret;
+}
+
+
+/**
+ * Realloc @p s to append the formatted result of @p fmt and @p ap,
+ * and return @p s, which may have moved. Good for gradually
+ * accumulating output into a string buffer.
+ **/
+
+static char *talloc_vasprintf_append(char *s, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0);
+
+static char *talloc_vasprintf_append(char *s, const char *fmt, va_list ap)
+{
+ struct talloc_chunk *tc;
+ int len, s_len;
+ va_list ap2;
+
+ if (s == NULL) {
+ return talloc_vasprintf(NULL, fmt, ap);
+ }
+
+ tc = talloc_chunk_from_ptr(s);
+
+ VA_COPY(ap2, ap);
+
+ s_len = tc->size - 1;
+ len = vsnprintf(NULL, 0, fmt, ap2);
+
+ s = talloc_realloc(NULL, s, char, s_len + len+1);
+ if (!s) return NULL;
+
+ VA_COPY(ap2, ap);
+
+ vsnprintf(s+s_len, len+1, fmt, ap2);
+ talloc_set_name_const(s, s);
+
+ return s;
+}
+
+/*
+ Realloc @p s to append the formatted result of @p fmt and return @p
+ s, which may have moved. Good for gradually accumulating output
+ into a string buffer.
+ */
+char *talloc_asprintf_append(char *s, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ s = talloc_vasprintf_append(s, fmt, ap);
+ va_end(ap);
+ return s;
+}
+
+/*
+ alloc an array, checking for integer overflow in the array size
+*/
+void *_talloc_array(const void *ctx, size_t el_size, unsigned count, const char *name)
+{
+ if (count >= MAX_TALLOC_SIZE/el_size) {
+ return NULL;
+ }
+ return talloc_named_const(ctx, el_size * count, name);
+}
+
+/*
+ alloc an zero array, checking for integer overflow in the array size
+*/
+void *_talloc_zero_array(const void *ctx, size_t el_size, unsigned count, const char *name)
+{
+ if (count >= MAX_TALLOC_SIZE/el_size) {
+ return NULL;
+ }
+ return _talloc_zero(ctx, el_size * count, name);
+}
+
+
+/*
+ realloc an array, checking for integer overflow in the array size
+*/
+void *_talloc_realloc_array(const void *ctx, void *ptr, size_t el_size, unsigned count, const char *name)
+{
+ if (count >= MAX_TALLOC_SIZE/el_size) {
+ return NULL;
+ }
+ return _talloc_realloc(ctx, ptr, el_size * count, name);
+}
+
+/*
+ a function version of talloc_realloc(), so it can be passed as a function pointer
+ to libraries that want a realloc function (a realloc function encapsulates
+ all the basic capabilities of an allocation library, which is why this is useful)
+*/
+void *talloc_realloc_fn(const void *context, void *ptr, size_t size)
+{
+ return _talloc_realloc(context, ptr, size, NULL);
+}
+
+
+static void talloc_autofree(void)
+{
+ talloc_free(cleanup_context);
+ cleanup_context = NULL;
+}
+
+/*
+ return a context which will be auto-freed on exit
+ this is useful for reducing the noise in leak reports
+*/
+void *talloc_autofree_context(void)
+{
+ if (cleanup_context == NULL) {
+ cleanup_context = talloc_named_const(NULL, 0, "autofree_context");
+ atexit(talloc_autofree);
+ }
+ return cleanup_context;
+}
+
+size_t talloc_get_size(const void *context)
+{
+ struct talloc_chunk *tc;
+
+ if (context == NULL)
+ return 0;
+
+ tc = talloc_chunk_from_ptr(context);
+
+ return tc->size;
+}
+
+talloc_fail_handler *talloc_set_fail_handler(talloc_fail_handler *handler,
+ void *data)
+{
+ talloc_fail_handler *old = malloc_fail_handler;
+ malloc_fail_handler = handler;
+ malloc_fail_data = data;
+ return old;
+}
diff --git a/tools/xenstore/talloc.h b/tools/xenstore/talloc.h
new file mode 100644
index 0000000000..39bcb53fb7
--- /dev/null
+++ b/tools/xenstore/talloc.h
@@ -0,0 +1,134 @@
+#ifndef _TALLOC_H_
+#define _TALLOC_H_
+/*
+ Unix SMB/CIFS implementation.
+ Samba temporary memory allocation functions
+
+ Copyright (C) Andrew Tridgell 2004-2005
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+/* this is only needed for compatibility with the old talloc */
+typedef void TALLOC_CTX;
+
+/*
+ this uses a little trick to allow __LINE__ to be stringified
+*/
+#define _STRING_LINE_(s) #s
+#define _STRING_LINE2_(s) _STRING_LINE_(s)
+#define __LINESTR__ _STRING_LINE2_(__LINE__)
+#define __location__ __FILE__ ":" __LINESTR__
+
+#ifndef TALLOC_DEPRECATED
+#define TALLOC_DEPRECATED 0
+#endif
+
+/* useful macros for creating type checked pointers */
+#define talloc(ctx, type) (type *)talloc_named_const(ctx, sizeof(type), #type)
+#define talloc_size(ctx, size) talloc_named_const(ctx, size, __location__)
+
+#define talloc_new(ctx) talloc_named_const(ctx, 0, "talloc_new: " __location__)
+
+#define talloc_zero(ctx, type) (type *)_talloc_zero(ctx, sizeof(type), #type)
+#define talloc_zero_size(ctx, size) _talloc_zero(ctx, size, __location__)
+
+#define talloc_zero_array(ctx, type, count) (type *)_talloc_zero_array(ctx, sizeof(type), count, #type)
+#define talloc_array(ctx, type, count) (type *)_talloc_array(ctx, sizeof(type), count, #type)
+#define talloc_array_size(ctx, size, count) _talloc_array(ctx, size, count, __location__)
+
+#define talloc_realloc(ctx, p, type, count) (type *)_talloc_realloc_array(ctx, p, sizeof(type), count, #type)
+#define talloc_realloc_size(ctx, ptr, size) _talloc_realloc(ctx, ptr, size, __location__)
+
+#define talloc_memdup(t, p, size) _talloc_memdup(t, p, size, __location__)
+
+#define malloc_p(type) (type *)malloc(sizeof(type))
+#define malloc_array_p(type, count) (type *)realloc_array(NULL, sizeof(type), count)
+#define realloc_p(p, type, count) (type *)realloc_array(p, sizeof(type), count)
+
+#define data_blob(ptr, size) data_blob_named(ptr, size, "DATA_BLOB: "__location__)
+#define data_blob_talloc(ctx, ptr, size) data_blob_talloc_named(ctx, ptr, size, "DATA_BLOB: "__location__)
+#define data_blob_dup_talloc(ctx, blob) data_blob_talloc_named(ctx, (blob)->data, (blob)->length, "DATA_BLOB: "__location__)
+
+#define talloc_set_type(ptr, type) talloc_set_name_const(ptr, #type)
+#define talloc_get_type(ptr, type) (type *)talloc_check_name(ptr, #type)
+
+
+#if TALLOC_DEPRECATED
+#define talloc_zero_p(ctx, type) talloc_zero(ctx, type)
+#define talloc_p(ctx, type) talloc(ctx, type)
+#define talloc_array_p(ctx, type, count) talloc_array(ctx, type, count)
+#define talloc_realloc_p(ctx, p, type, count) talloc_realloc(ctx, p, type, count)
+#define talloc_destroy(ctx) talloc_free(ctx)
+#endif
+
+#ifndef PRINTF_ATTRIBUTE
+#if (__GNUC__ >= 3)
+/** Use gcc attribute to check printf fns. a1 is the 1-based index of
+ * the parameter containing the format, and a2 the index of the first
+ * argument. Note that some gcc 2.x versions don't handle this
+ * properly **/
+#define PRINTF_ATTRIBUTE(a1, a2) __attribute__ ((format (__printf__, a1, a2)))
+#else
+#define PRINTF_ATTRIBUTE(a1, a2)
+#endif
+#endif
+
+
+/* The following definitions come from talloc.c */
+void *_talloc(const void *context, size_t size);
+void talloc_set_destructor(const void *ptr, int (*destructor)(void *));
+void talloc_increase_ref_count(const void *ptr);
+void *talloc_reference(const void *context, const void *ptr);
+int talloc_unlink(const void *context, void *ptr);
+void talloc_set_name(const void *ptr, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+void talloc_set_name_const(const void *ptr, const char *name);
+void *talloc_named(const void *context, size_t size,
+ const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
+void *talloc_named_const(const void *context, size_t size, const char *name);
+const char *talloc_get_name(const void *ptr);
+void *talloc_check_name(const void *ptr, const char *name);
+void talloc_report_depth(const void *ptr, FILE *f, int depth);
+void *talloc_parent(const void *ptr);
+void *talloc_init(const char *fmt, ...) PRINTF_ATTRIBUTE(1,2);
+int talloc_free(void *ptr);
+void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *name);
+void *talloc_steal(const void *new_ctx, const void *ptr);
+off_t talloc_total_size(const void *ptr);
+off_t talloc_total_blocks(const void *ptr);
+void talloc_report_full(const void *ptr, FILE *f);
+void talloc_report(const void *ptr, FILE *f);
+void talloc_enable_null_tracking(void);
+void talloc_enable_leak_report(void);
+void talloc_enable_leak_report_full(void);
+void *_talloc_zero(const void *ctx, size_t size, const char *name);
+void *_talloc_memdup(const void *t, const void *p, size_t size, const char *name);
+char *talloc_strdup(const void *t, const char *p);
+char *talloc_strndup(const void *t, const char *p, size_t n);
+char *talloc_vasprintf(const void *t, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0);
+char *talloc_asprintf(const void *t, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+char *talloc_asprintf_append(char *s,
+ const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+void *_talloc_array(const void *ctx, size_t el_size, unsigned count, const char *name);
+void *_talloc_zero_array(const void *ctx, size_t el_size, unsigned count, const char *name);
+void *_talloc_realloc_array(const void *ctx, void *ptr, size_t el_size, unsigned count, const char *name);
+void *talloc_realloc_fn(const void *context, void *ptr, size_t size);
+void *talloc_autofree_context(void);
+size_t talloc_get_size(const void *ctx);
+
+typedef int talloc_fail_handler(void *);
+talloc_fail_handler *talloc_set_fail_handler(talloc_fail_handler *, void *);
+#endif
+
diff --git a/tools/xenstore/talloc_guide.txt b/tools/xenstore/talloc_guide.txt
new file mode 100644
index 0000000000..c23ac77cad
--- /dev/null
+++ b/tools/xenstore/talloc_guide.txt
@@ -0,0 +1,569 @@
+Using talloc in Samba4
+----------------------
+
+Andrew Tridgell
+September 2004
+
+The most current version of this document is available at
+ http://samba.org/ftp/unpacked/samba4/source/lib/talloc/talloc_guide.txt
+
+If you are used to talloc from Samba3 then please read this carefully,
+as talloc has changed a lot.
+
+The new talloc is a hierarchical, reference counted memory pool system
+with destructors. Quite a mounthful really, but not too bad once you
+get used to it.
+
+Perhaps the biggest change from Samba3 is that there is no distinction
+between a "talloc context" and a "talloc pointer". Any pointer
+returned from talloc() is itself a valid talloc context. This means
+you can do this:
+
+ struct foo *X = talloc(mem_ctx, struct foo);
+ X->name = talloc_strdup(X, "foo");
+
+and the pointer X->name would be a "child" of the talloc context "X"
+which is itself a child of mem_ctx. So if you do talloc_free(mem_ctx)
+then it is all destroyed, whereas if you do talloc_free(X) then just X
+and X->name are destroyed, and if you do talloc_free(X->name) then
+just the name element of X is destroyed.
+
+If you think about this, then what this effectively gives you is an
+n-ary tree, where you can free any part of the tree with
+talloc_free().
+
+If you find this confusing, then I suggest you run the testsuite to
+watch talloc in action. You may also like to add your own tests to
+testsuite.c to clarify how some particular situation is handled.
+
+
+Performance
+-----------
+
+All the additional features of talloc() over malloc() do come at a
+price. We have a simple performance test in Samba4 that measures
+talloc() versus malloc() performance, and it seems that talloc() is
+about 10% slower than malloc() on my x86 Debian Linux box. For Samba,
+the great reduction in code complexity that we get by using talloc
+makes this worthwhile, especially as the total overhead of
+talloc/malloc in Samba is already quite small.
+
+
+talloc API
+----------
+
+The following is a complete guide to the talloc API. Read it all at
+least twice.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc(const void *context, type);
+
+The talloc() macro is the core of the talloc library. It takes a
+memory context and a type, and returns a pointer to a new area of
+memory of the given type.
+
+The returned pointer is itself a talloc context, so you can use it as
+the context argument to more calls to talloc if you wish.
+
+The returned pointer is a "child" of the supplied context. This means
+that if you talloc_free() the context then the new child disappears as
+well. Alternatively you can free just the child.
+
+The context argument to talloc() can be NULL, in which case a new top
+level context is created.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_size(const void *context, size_t size);
+
+The function talloc_size() should be used when you don't have a
+convenient type to pass to talloc(). Unlike talloc(), it is not type
+safe (as it returns a void *), so you are on your own for type checking.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+int talloc_free(void *ptr);
+
+The talloc_free() function frees a piece of talloc memory, and all its
+children. You can call talloc_free() on any pointer returned by
+talloc().
+
+The return value of talloc_free() indicates success or failure, with 0
+returned for success and -1 for failure. The only possible failure
+condition is if the pointer had a destructor attached to it and the
+destructor returned -1. See talloc_set_destructor() for details on
+destructors.
+
+If this pointer has an additional parent when talloc_free() is called
+then the memory is not actually released, but instead the most
+recently established parent is destroyed. See talloc_reference() for
+details on establishing additional parents.
+
+For more control on which parent is removed, see talloc_unlink()
+
+talloc_free() operates recursively on its children.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+int talloc_free_children(void *ptr);
+
+The talloc_free_children() walks along the list of all children of a
+talloc context and talloc_free()s only the children, not the context
+itself.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_reference(const void *context, const void *ptr);
+
+The talloc_reference() function makes "context" an additional parent
+of "ptr".
+
+The return value of talloc_reference() is always the original pointer
+"ptr", unless talloc ran out of memory in creating the reference in
+which case it will return NULL (each additional reference consumes
+around 48 bytes of memory on intel x86 platforms).
+
+If "ptr" is NULL, then the function is a no-op, and simply returns NULL.
+
+After creating a reference you can free it in one of the following
+ways:
+
+ - you can talloc_free() any parent of the original pointer. That
+ will reduce the number of parents of this pointer by 1, and will
+ cause this pointer to be freed if it runs out of parents.
+
+ - you can talloc_free() the pointer itself. That will destroy the
+ most recently established parent to the pointer and leave the
+ pointer as a child of its current parent.
+
+For more control on which parent to remove, see talloc_unlink()
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+int talloc_unlink(const void *context, const void *ptr);
+
+The talloc_unlink() function removes a specific parent from ptr. The
+context passed must either be a context used in talloc_reference()
+with this pointer, or must be a direct parent of ptr.
+
+Note that if the parent has already been removed using talloc_free()
+then this function will fail and will return -1. Likewise, if "ptr"
+is NULL, then the function will make no modifications and return -1.
+
+Usually you can just use talloc_free() instead of talloc_unlink(), but
+sometimes it is useful to have the additional control on which parent
+is removed.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_set_destructor(const void *ptr, int (*destructor)(void *));
+
+The function talloc_set_destructor() sets the "destructor" for the
+pointer "ptr". A destructor is a function that is called when the
+memory used by a pointer is about to be released. The destructor
+receives the pointer as an argument, and should return 0 for success
+and -1 for failure.
+
+The destructor can do anything it wants to, including freeing other
+pieces of memory. A common use for destructors is to clean up
+operating system resources (such as open file descriptors) contained
+in the structure the destructor is placed on.
+
+You can only place one destructor on a pointer. If you need more than
+one destructor then you can create a zero-length child of the pointer
+and place an additional destructor on that.
+
+To remove a destructor call talloc_set_destructor() with NULL for the
+destructor.
+
+If your destructor attempts to talloc_free() the pointer that it is
+the destructor for then talloc_free() will return -1 and the free will
+be ignored. This would be a pointless operation anyway, as the
+destructor is only called when the memory is just about to go away.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_increase_ref_count(const void *ptr);
+
+The talloc_increase_ref_count(ptr) function is exactly equivalent to:
+
+ talloc_reference(NULL, ptr);
+
+You can use either syntax, depending on which you think is clearer in
+your code.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_set_name(const void *ptr, const char *fmt, ...);
+
+Each talloc pointer has a "name". The name is used principally for
+debugging purposes, although it is also possible to set and get the
+name on a pointer in as a way of "marking" pointers in your code.
+
+The main use for names on pointer is for "talloc reports". See
+talloc_report() and talloc_report_full() for details. Also see
+talloc_enable_leak_report() and talloc_enable_leak_report_full().
+
+The talloc_set_name() function allocates memory as a child of the
+pointer. It is logically equivalent to:
+ talloc_set_name_const(ptr, talloc_asprintf(ptr, fmt, ...));
+
+Note that multiple calls to talloc_set_name() will allocate more
+memory without releasing the name. All of the memory is released when
+the ptr is freed using talloc_free().
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_set_name_const(const void *ptr, const char *name);
+
+The function talloc_set_name_const() is just like talloc_set_name(),
+but it takes a string constant, and is much faster. It is extensively
+used by the "auto naming" macros, such as talloc_p().
+
+This function does not allocate any memory. It just copies the
+supplied pointer into the internal representation of the talloc
+ptr. This means you must not pass a name pointer to memory that will
+disappear before the ptr is freed with talloc_free().
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_named(const void *context, size_t size, const char *fmt, ...);
+
+The talloc_named() function creates a named talloc pointer. It is
+equivalent to:
+
+ ptr = talloc_size(context, size);
+ talloc_set_name(ptr, fmt, ....);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_named_const(const void *context, size_t size, const char *name);
+
+This is equivalent to:
+
+ ptr = talloc_size(context, size);
+ talloc_set_name_const(ptr, name);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+const char *talloc_get_name(const void *ptr);
+
+This returns the current name for the given talloc pointer. See
+talloc_set_name() for details.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_init(const char *fmt, ...);
+
+This function creates a zero length named talloc context as a top
+level context. It is equivalent to:
+
+ talloc_named(NULL, 0, fmt, ...);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_new(void *ctx);
+
+This is a utility macro that creates a new memory context hanging
+off an exiting context, automatically naming it "talloc_new: __location__"
+where __location__ is the source line it is called from. It is
+particularly useful for creating a new temporary working context.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_realloc(const void *context, void *ptr, type, count);
+
+The talloc_realloc() macro changes the size of a talloc
+pointer. The "count" argument is the number of elements of type "type"
+that you want the resulting pointer to hold.
+
+talloc_realloc() has the following equivalences:
+
+ talloc_realloc(context, NULL, type, 1) ==> talloc(context, type);
+ talloc_realloc(context, NULL, type, N) ==> talloc_array(context, type, N);
+ talloc_realloc(context, ptr, type, 0) ==> talloc_free(ptr);
+
+The "context" argument is only used if "ptr" is not NULL, otherwise it
+is ignored.
+
+talloc_realloc() returns the new pointer, or NULL on failure. The call
+will fail either due to a lack of memory, or because the pointer has
+more than one parent (see talloc_reference()).
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_realloc_size(const void *context, void *ptr, size_t size);
+
+the talloc_realloc_size() function is useful when the type is not
+known so the typesafe talloc_realloc() cannot be used.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_steal(const void *new_ctx, const void *ptr);
+
+The talloc_steal() function changes the parent context of a talloc
+pointer. It is typically used when the context that the pointer is
+currently a child of is going to be freed and you wish to keep the
+memory for a longer time.
+
+The talloc_steal() function returns the pointer that you pass it. It
+does not have any failure modes.
+
+NOTE: It is possible to produce loops in the parent/child relationship
+if you are not careful with talloc_steal(). No guarantees are provided
+as to your sanity or the safety of your data if you do this.
+
+talloc_steal (new_ctx, NULL) will return NULL with no sideeffects.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+off_t talloc_total_size(const void *ptr);
+
+The talloc_total_size() function returns the total size in bytes used
+by this pointer and all child pointers. Mostly useful for debugging.
+
+Passing NULL is allowed, but it will only give a meaningful result if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+off_t talloc_total_blocks(const void *ptr);
+
+The talloc_total_blocks() function returns the total memory block
+count used by this pointer and all child pointers. Mostly useful for
+debugging.
+
+Passing NULL is allowed, but it will only give a meaningful result if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_report(const void *ptr, FILE *f);
+
+The talloc_report() function prints a summary report of all memory
+used by ptr. One line of report is printed for each immediate child of
+ptr, showing the total memory and number of blocks used by that child.
+
+You can pass NULL for the pointer, in which case a report is printed
+for the top level memory context, but only if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_report_full(const void *ptr, FILE *f);
+
+This provides a more detailed report than talloc_report(). It will
+recursively print the ensire tree of memory referenced by the
+pointer. References in the tree are shown by giving the name of the
+pointer that is referenced.
+
+You can pass NULL for the pointer, in which case a report is printed
+for the top level memory context, but only if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_enable_leak_report(void);
+
+This enables calling of talloc_report(NULL, stderr) when the program
+exits. In Samba4 this is enabled by using the --leak-report command
+line option.
+
+For it to be useful, this function must be called before any other
+talloc function as it establishes a "null context" that acts as the
+top of the tree. If you don't call this function first then passing
+NULL to talloc_report() or talloc_report_full() won't give you the
+full tree printout.
+
+Here is a typical talloc report:
+
+talloc report on 'null_context' (total 267 bytes in 15 blocks)
+ libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks
+ libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks
+ iconv(UTF8,CP850) contains 42 bytes in 2 blocks
+ libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks
+ iconv(CP850,UTF8) contains 42 bytes in 2 blocks
+ iconv(UTF8,UTF-16LE) contains 45 bytes in 2 blocks
+ iconv(UTF-16LE,UTF8) contains 45 bytes in 2 blocks
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_enable_leak_report_full(void);
+
+This enables calling of talloc_report_full(NULL, stderr) when the
+program exits. In Samba4 this is enabled by using the
+--leak-report-full command line option.
+
+For it to be useful, this function must be called before any other
+talloc function as it establishes a "null context" that acts as the
+top of the tree. If you don't call this function first then passing
+NULL to talloc_report() or talloc_report_full() won't give you the
+full tree printout.
+
+Here is a typical full report:
+
+full talloc report on 'root' (total 18 bytes in 8 blocks)
+ p1 contains 18 bytes in 7 blocks (ref 0)
+ r1 contains 13 bytes in 2 blocks (ref 0)
+ reference to: p2
+ p2 contains 1 bytes in 1 blocks (ref 1)
+ x3 contains 1 bytes in 1 blocks (ref 0)
+ x2 contains 1 bytes in 1 blocks (ref 0)
+ x1 contains 1 bytes in 1 blocks (ref 0)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_enable_null_tracking(void);
+
+This enables tracking of the NULL memory context without enabling leak
+reporting on exit. Useful for when you want to do your own leak
+reporting call via talloc_report_null_full();
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_zero(const void *ctx, type);
+
+The talloc_zero() macro is equivalent to:
+
+ ptr = talloc(ctx, type);
+ if (ptr) memset(ptr, 0, sizeof(type));
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_zero_size(const void *ctx, size_t size)
+
+The talloc_zero_size() function is useful when you don't have a known type
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_memdup(const void *ctx, const void *p, size_t size);
+
+The talloc_memdup() function is equivalent to:
+
+ ptr = talloc_size(ctx, size);
+ if (ptr) memcpy(ptr, p, size);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_strdup(const void *ctx, const char *p);
+
+The talloc_strdup() function is equivalent to:
+
+ ptr = talloc_size(ctx, strlen(p)+1);
+ if (ptr) memcpy(ptr, p, strlen(p)+1);
+
+This functions sets the name of the new pointer to the passed
+string. This is equivalent to:
+ talloc_set_name_const(ptr, ptr)
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_strndup(const void *t, const char *p, size_t n);
+
+The talloc_strndup() function is the talloc equivalent of the C
+library function strndup()
+
+This functions sets the name of the new pointer to the passed
+string. This is equivalent to:
+ talloc_set_name_const(ptr, ptr)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_vasprintf(const void *t, const char *fmt, va_list ap);
+
+The talloc_vasprintf() function is the talloc equivalent of the C
+library function vasprintf()
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_asprintf(const void *t, const char *fmt, ...);
+
+The talloc_asprintf() function is the talloc equivalent of the C
+library function asprintf()
+
+This functions sets the name of the new pointer to the passed
+string. This is equivalent to:
+ talloc_set_name_const(ptr, ptr)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_asprintf_append(char *s, const char *fmt, ...);
+
+The talloc_asprintf_append() function appends the given formatted
+string to the given string.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_array(const void *ctx, type, uint_t count);
+
+The talloc_array() macro is equivalent to:
+
+ (type *)talloc_size(ctx, sizeof(type) * count);
+
+except that it provides integer overflow protection for the multiply,
+returning NULL if the multiply overflows.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_array_size(const void *ctx, size_t size, uint_t count);
+
+The talloc_array_size() function is useful when the type is not
+known. It operates in the same way as talloc_array(), but takes a size
+instead of a type.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_realloc_fn(const void *ctx, void *ptr, size_t size);
+
+This is a non-macro version of talloc_realloc(), which is useful
+as libraries sometimes want a ralloc function pointer. A realloc()
+implementation encapsulates the functionality of malloc(), free() and
+realloc() in one call, which is why it is useful to be able to pass
+around a single function pointer.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_autofree_context(void);
+
+This is a handy utility function that returns a talloc context
+which will be automatically freed on program exit. This can be used
+to reduce the noise in memory leak reports.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_check_name(const void *ptr, const char *name);
+
+This function checks if a pointer has the specified name. If it does
+then the pointer is returned. It it doesn't then NULL is returned.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_get_type(const void *ptr, type);
+
+This macro allows you to do type checking on talloc pointers. It is
+particularly useful for void* private pointers. It is equivalent to
+this:
+
+ (type *)talloc_check_name(ptr, #type)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+talloc_set_type(const void *ptr, type);
+
+This macro allows you to force the name of a pointer to be a
+particular type. This can be used in conjunction with
+talloc_get_type() to do type checking on void* pointers.
+
+It is equivalent to this:
+ talloc_set_name_const(ptr, #type)
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+talloc_get_size(const void *ctx);
+
+This function lets you know the amount of memory alloced so far by
+this context. It does NOT account for subcontext memory.
+This can be used to calculate the size of an array.
+
diff --git a/tools/xenstore/testsuite/01simple.sh b/tools/xenstore/testsuite/01simple.sh
new file mode 100644
index 0000000000..9b1eb8f5c3
--- /dev/null
+++ b/tools/xenstore/testsuite/01simple.sh
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+# Create an entry, read it.
+[ "`echo -e 'write /test create contents\nread /test' | ./xs_test 2>&1`" = "contents" ]
diff --git a/tools/xenstore/testsuite/02directory.sh b/tools/xenstore/testsuite/02directory.sh
new file mode 100644
index 0000000000..f63ef1ff3d
--- /dev/null
+++ b/tools/xenstore/testsuite/02directory.sh
@@ -0,0 +1,31 @@
+#! /bin/sh
+
+# Root directory has nothing in it.
+[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "" ]
+
+# Create a file.
+[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ]
+
+# Directory shows it.
+[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "test" ]
+
+# Make a new directory.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+
+# Check it's there.
+DIR="`echo -e 'dir /' | ./xs_test 2>&1`"
+[ "$DIR" = "test
+dir" ] || [ "$DIR" = "dir
+test" ]
+
+# Check it's empty.
+[ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "" ]
+
+# Create a file, check it exists.
+[ "`echo -e 'write /dir/test2 create contents2' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "test2" ]
+[ "`echo -e 'read /dir/test2' | ./xs_test 2>&1`" = "contents2" ]
+
+# Creating dir over the top should fail.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "FATAL: mkdir: File exists" ]
+[ "`echo -e 'mkdir /dir/test2' | ./xs_test 2>&1`" = "FATAL: mkdir: File exists" ]
diff --git a/tools/xenstore/testsuite/03write.sh b/tools/xenstore/testsuite/03write.sh
new file mode 100644
index 0000000000..cf5f897c54
--- /dev/null
+++ b/tools/xenstore/testsuite/03write.sh
@@ -0,0 +1,17 @@
+#! /bin/sh
+
+# Write without create fails.
+[ "`echo -e 'write /test none contents' | ./xs_test 2>&1`" = "FATAL: write: No such file or directory" ]
+
+# Exclusive write succeeds
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents" ]
+
+# Exclusive write fails to overwrite.
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "FATAL: write: File exists" ]
+
+# Non-exclusive overwrite succeeds.
+[ "`echo -e 'write /test none contents2' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents2" ]
+[ "`echo -e 'write /test create contents3' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents3" ]
diff --git a/tools/xenstore/testsuite/04rm.sh b/tools/xenstore/testsuite/04rm.sh
new file mode 100644
index 0000000000..abadd6110a
--- /dev/null
+++ b/tools/xenstore/testsuite/04rm.sh
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+# Remove non-existant fails.
+[ "`echo -e 'rm /test' | ./xs_test 2>&1`" = "FATAL: rm: No such file or directory" ]
+[ "`echo -e 'rm /dir/test' | ./xs_test 2>&1`" = "FATAL: rm: No such file or directory" ]
+
+# Create file and remove it
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'rm /test' | ./xs_test 2>&1`" = "" ]
+
+# Create directory and remove it.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'rm /dir' | ./xs_test 2>&1`" = "" ]
+
+# Create directory, create file, remove all.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'write /dir/test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'rm /dir' | ./xs_test 2>&1`" = "" ]
diff --git a/tools/xenstore/testsuite/05filepermissions.sh b/tools/xenstore/testsuite/05filepermissions.sh
new file mode 100644
index 0000000000..9d9043f191
--- /dev/null
+++ b/tools/xenstore/testsuite/05filepermissions.sh
@@ -0,0 +1,49 @@
+#! /bin/sh
+
+# Fail to get perms on non-existent file.
+[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: No such file or directory" ]
+[ "`echo -e 'getperm /dir/test' | ./xs_test 2>&1`" = "FATAL: getperm: No such file or directory" ]
+
+# Create file: we own it, noone has access.
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "0 NONE" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone read access to file.
+[ "`echo -e 'setperm /test 0 READ' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "0 READ" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents" ]
+[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone write access to file.
+[ "`echo -e 'setperm /test 0 WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents2" ]
+
+# Grant everyone both read and write access.
+[ "`echo -e 'setperm /test 0 READ/WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "0 READ/WRITE" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents2" ]
+[ "`echo -e 'setid 1\nwrite /test none contents3' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents3" ]
+
+# Change so that user 1 owns it, noone else can do anything.
+[ "`echo -e 'setperm /test 1 NONE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents3" ]
+[ "`echo -e 'setid 1\nwrite /test none contents4' | ./xs_test 2>&1`" = "" ]
+
+# User 2 can do nothing.
+[ "`echo -e 'setid 2\nsetperm /test 2 NONE' | ./xs_test 2>&1`" = "FATAL: setperm: Permission denied" ]
+[ "`echo -e 'setid 2\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 2\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /test none contents4' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Tools can always access things.
+[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents4" ]
+[ "`echo -e 'write /test none contents5' | ./xs_test 2>&1`" = "" ]
diff --git a/tools/xenstore/testsuite/06dirpermissions.sh b/tools/xenstore/testsuite/06dirpermissions.sh
new file mode 100644
index 0000000000..922a794f04
--- /dev/null
+++ b/tools/xenstore/testsuite/06dirpermissions.sh
@@ -0,0 +1,61 @@
+#! /bin/sh
+
+# Root directory: owned by tool, everyone has read access.
+[ "`echo -e 'getperm /' | ./xs_test 2>&1`" = "0 READ" ]
+
+# Create directory: we own it, noone has access.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'getperm /dir' | ./xs_test 2>&1`" = "0 NONE" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ]
+[ "`echo -e 'setid 1\nread /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone read access to directoy.
+[ "`echo -e 'setperm /dir 0 READ' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "0 READ" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\nwrite /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone write access to directory.
+[ "`echo -e 'setperm /dir 0 WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /dir/test create contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /dir/test' | ./xs_test 2>&1`" = "contents" ]
+
+# Grant everyone both read and write access.
+[ "`echo -e 'setperm /dir 0 READ/WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "0 READ/WRITE" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "test" ]
+[ "`echo -e 'setid 1\nwrite /dir/test2 create contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\nread /dir/test2' | ./xs_test 2>&1`" = "contents" ]
+
+# Change so that user 1 owns it, noone else can do anything.
+[ "`echo -e 'setperm /dir 1 NONE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1 | sort`" = "test
+test2" ]
+[ "`echo -e 'setid 1\nwrite /dir/test3 create contents' | ./xs_test 2>&1`" = "" ]
+
+# User 2 can do nothing. Can't even tell if file exists.
+[ "`echo -e 'setid 2\nsetperm /dir 2 NONE' | ./xs_test 2>&1`" = "FATAL: setperm: Permission denied" ]
+[ "`echo -e 'setid 2\ngetperm /dir' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 2\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test2' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test3' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test4' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test none contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test create contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test excl contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test4 none contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test4 create contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test4 excl contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Tools can always access things.
+[ "`echo -e 'getperm /dir' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'dir /dir' | ./xs_test 2>&1 | sort`" = "test
+test2
+test3" ]
+[ "`echo -e 'write /dir/test4 create contents' | ./xs_test 2>&1`" = "" ]
+
diff --git a/tools/xenstore/testsuite/07watch.sh b/tools/xenstore/testsuite/07watch.sh
new file mode 100644
index 0000000000..bedce6ad5b
--- /dev/null
+++ b/tools/xenstore/testsuite/07watch.sh
@@ -0,0 +1,32 @@
+#! /bin/sh
+
+# Watch something, write to it, check watch has fired.
+[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ]
+
+[ "`echo -e '1 watch /test 100\n2 write /test create contents2\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/test" ]
+
+# Check that reads don't set it off.
+[ "`echo -e '1 watch /test 100\n2 read /test\n1 waitwatch' | ./xs_test 2>&1`" = "2:contents2
+1:waitwatch timeout" ]
+
+# mkdir, setperm and rm should (also /tests watching dirs)
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e '1 watch /dir 100\n2 mkdir /dir/newdir\n1 waitwatch\n1 ackwatch\n2 setperm /dir/newdir 0 READ\n1 waitwatch\n1 ackwatch\n2 rm /dir/newdir\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/dir/newdir
+1:/dir/newdir
+1:/dir/newdir" ]
+
+# ignore watches while doing commands, should work.
+[ "`echo -e 'watch /dir 100\nwrite /dir/test create contents\nread /dir/test\nwaitwatch\nackwatch' | ./xs_test 2>&1`" = "contents
+/dir/test" ]
+
+# watch priority /test.
+[ "`echo -e '1 watch /dir 1\n3 watch /dir 3\n2 watch /dir 2\nwrite /dir/test create contents\n3 waitwatch\n3 ackwatch\n2 waitwatch\n2 ackwatch\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "3:/dir/test
+2:/dir/test
+1:/dir/test" ]
+
+# If one dies (without acking), the other should still get ack.
+[ "`echo -e '1 watch /dir 0\n2 watch /dir 1\nwrite /dir/test create contents\n2 waitwatch\n2 close\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "2:/dir/test
+1:/dir/test" ]
+
+# If one dies (without reading at all), the other should still get ack.
+[ "`echo -e '1 watch /dir 0\n2 watch /dir 1\nwrite /dir/test create contents\n2 close\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/dir/test" ]
diff --git a/tools/xenstore/testsuite/08transaction.sh b/tools/xenstore/testsuite/08transaction.sh
new file mode 100644
index 0000000000..2c23ed2496
--- /dev/null
+++ b/tools/xenstore/testsuite/08transaction.sh
@@ -0,0 +1,54 @@
+#! /bin/sh
+# Test transactions.
+
+# Simple transaction: create a file inside transaction.
+[ "`echo -e '1 start /
+1 write /entry1 create contents
+2 dir /
+1 dir /
+1 commit
+2 read /entry1' | ./xs_test`" = "1:entry1
+2:contents" ]
+echo rm /entry1 | ./xs_test
+
+# Create a file and abort transaction.
+[ "`echo -e '1 start /
+1 write /entry1 create contents
+2 dir /
+1 dir /
+1 abort
+2 dir /' | ./xs_test`" = "1:entry1" ]
+
+echo write /entry1 create contents | ./xs_test
+# Delete in transaction, commit
+[ "`echo -e '1 start /
+1 rm /entry1
+2 dir /
+1 dir /
+1 commit
+2 dir /' | ./xs_test`" = "2:entry1" ]
+
+# Delete in transaction, abort.
+echo write /entry1 create contents | ./xs_test
+[ "`echo -e '1 start /
+1 rm /entry1
+2 dir /
+1 dir /
+1 abort
+2 dir /' | ./xs_test`" = "2:entry1
+2:entry1" ]
+
+# Transactions can take as long as the want...
+[ "`echo -e 'start /
+sleep 1
+rm /entry1
+commit
+dir /' | ./xs_test`" = "" ]
+
+# ... as long as noone is waiting.
+[ "`echo -e '1 start /
+2 mkdir /dir
+1 mkdir /dir
+1 dir /
+1 commit' | ./xs_test 2>&1`" = "1:dir
+FATAL: 1: commit: Connection timed out" ]
diff --git a/tools/xenstore/testsuite/09domain.sh b/tools/xenstore/testsuite/09domain.sh
new file mode 100644
index 0000000000..9208dda0ec
--- /dev/null
+++ b/tools/xenstore/testsuite/09domain.sh
@@ -0,0 +1,15 @@
+#! /bin/sh
+# Test domain communication.
+
+# Create a domain, write an entry.
+[ "`echo -e 'introduce 1 100 7 /my/home
+1 write /entry1 create contents
+dir /' | ./xs_test 2>&1`" = "handle is 1
+entry1" ]
+
+# Release that domain.
+[ "`echo -e 'release 1' | ./xs_test`" = "" ]
+
+# Introduce and release by same connection.
+[ "`echo -e 'introduce 1 100 7 /my/home
+release 1' | ./xs_test 2>&1`" = "handle is 1" ]
diff --git a/tools/xenstore/testsuite/test.sh b/tools/xenstore/testsuite/test.sh
new file mode 100755
index 0000000000..5718e84a15
--- /dev/null
+++ b/tools/xenstore/testsuite/test.sh
@@ -0,0 +1,44 @@
+#! /bin/sh
+
+set -e
+set -m
+
+run_test()
+{
+ rm -rf $XENSTORED_ROOTDIR
+ mkdir $XENSTORED_ROOTDIR
+# Weird failures with this.
+ if type valgrind >/dev/null 2>&1; then
+ valgrind -q --logfile-fd=3 ./xenstored_test --output-pid --no-fork 3>testsuite/tmp/vgout > /tmp/pid &
+ while [ ! -s /tmp/pid ]; do sleep 0; done
+ PID=`cat /tmp/pid`
+ rm /tmp/pid
+ else
+ PID=`./xenstored_test --output-pid`
+ fi
+ if sh -e $2 $1; then
+ if [ -s testsuite/tmp/vgout ]; then
+ kill $PID
+ echo VALGRIND errors:
+ cat testsuite/tmp/vgout
+ return 1
+ fi
+ echo shutdown | ./xs_test
+ return 0
+ else
+ # In case daemon is wedged.
+ kill $PID
+ sleep 1
+ return 1
+ fi
+}
+
+for f in testsuite/[0-9]*.sh; do
+ if run_test $f; then
+ echo Test $f passed...
+ else
+ echo Test $f failed, running verbosely...
+ run_test $f -x
+ exit 1
+ fi
+done
diff --git a/tools/xenstore/utils.c b/tools/xenstore/utils.c
new file mode 100644
index 0000000000..2345021f70
--- /dev/null
+++ b/tools/xenstore/utils.c
@@ -0,0 +1,143 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "utils.h"
+
+void xprintf(const char *fmt, ...)
+{
+ static FILE *out = NULL;
+ va_list args;
+ if (!out)
+ out = fopen("/dev/console", "w");
+ if (!out)
+ out = stderr;
+
+ va_start(args, fmt);
+ vfprintf(out, fmt, args);
+ va_end(args);
+ fflush(out);
+}
+
+void barf(const char *fmt, ...)
+{
+ char *str;
+ va_list arglist;
+
+ xprintf("FATAL: ");
+
+ va_start(arglist, fmt);
+ vasprintf(&str, fmt, arglist);
+ va_end(arglist);
+
+ xprintf("%s\n", str);
+ free(str);
+ exit(1);
+}
+
+void barf_perror(const char *fmt, ...)
+{
+ char *str;
+ int err = errno;
+ va_list arglist;
+
+ xprintf("FATAL: ");
+
+ va_start(arglist, fmt);
+ vasprintf(&str, fmt, arglist);
+ va_end(arglist);
+
+ xprintf("%s: %s\n", str, strerror(err));
+ free(str);
+ exit(1);
+}
+
+void *_realloc_array(void *ptr, size_t size, size_t num)
+{
+ if (num >= SIZE_MAX/size)
+ return NULL;
+ return realloc_nofail(ptr, size * num);
+}
+
+void *realloc_nofail(void *ptr, size_t size)
+{
+ ptr = realloc(ptr, size);
+ if (ptr)
+ return ptr;
+ barf("realloc of %zu failed", size);
+}
+
+void *malloc_nofail(size_t size)
+{
+ void *ptr = malloc(size);
+ if (ptr)
+ return ptr;
+ barf("malloc of %zu failed", size);
+}
+
+/* Stevens. */
+void daemonize(void)
+{
+ pid_t pid;
+
+ /* Separate from our parent via fork, so init inherits us. */
+ if ((pid = fork()) < 0)
+ barf_perror("Failed to fork daemon");
+ if (pid != 0)
+ exit(0);
+
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+
+ /* Session leader so ^C doesn't whack us. */
+ setsid();
+ /* Move off any mount points we might be in. */
+ chdir("/");
+ /* Discard our parent's old-fashioned umask prejudices. */
+ umask(0);
+}
+
+
+/* This version adds one byte (for nul term) */
+void *grab_file(const char *filename, unsigned long *size)
+{
+ unsigned int max = 16384;
+ int ret, fd;
+ void *buffer;
+
+ if (streq(filename, "-"))
+ fd = dup(STDIN_FILENO);
+ else
+ fd = open(filename, O_RDONLY, 0);
+
+ if (fd < 0)
+ return NULL;
+
+ buffer = malloc(max+1);
+ *size = 0;
+ while ((ret = read(fd, buffer + *size, max - *size)) > 0) {
+ *size += ret;
+ if (*size == max)
+ buffer = realloc(buffer, max *= 2 + 1);
+ }
+ if (ret < 0) {
+ free(buffer);
+ buffer = NULL;
+ } else
+ ((char *)buffer)[*size] = '\0';
+ close(fd);
+ return buffer;
+}
+
+void release_file(void *data, unsigned long size __attribute__((unused)))
+{
+ free(data);
+}
diff --git a/tools/xenstore/utils.h b/tools/xenstore/utils.h
new file mode 100644
index 0000000000..a84f19a22a
--- /dev/null
+++ b/tools/xenstore/utils.h
@@ -0,0 +1,61 @@
+#ifndef _UTILS_H
+#define _UTILS_H
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+
+/* Is A == B ? */
+#define streq(a,b) (strcmp((a),(b)) == 0)
+
+/* Does A start with B ? */
+#define strstarts(a,b) (strncmp((a),(b),strlen(b)) == 0)
+
+/* Does A end in B ? */
+static inline bool strends(const char *a, const char *b)
+{
+ if (strlen(a) < strlen(b))
+ return false;
+
+ return streq(a + strlen(a) - strlen(b), b);
+}
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define ___stringify(x) #x
+#define __stringify(x) ___stringify(x)
+
+/* Convenient wrappers for malloc and realloc. Use them. */
+#define new(type) ((type *)malloc_nofail(sizeof(type)))
+#define new_array(type, num) realloc_array((type *)0, (num))
+#define realloc_array(ptr, num) ((__typeof__(ptr))_realloc_array((ptr), sizeof((*ptr)), (num)))
+
+void *malloc_nofail(size_t size);
+void *realloc_nofail(void *ptr, size_t size);
+void *_realloc_array(void *ptr, size_t size, size_t num);
+
+void barf(const char *fmt, ...) __attribute__((noreturn));
+void barf_perror(const char *fmt, ...) __attribute__((noreturn));
+
+/* This version adds one byte (for nul term) */
+void *grab_file(const char *filename, unsigned long *size);
+void release_file(void *data, unsigned long size);
+
+/* For writing daemons, based on Stevens. */
+void daemonize(void);
+
+/* Signal handling: returns fd to listen on. */
+int signal_to_fd(int signal);
+void close_signal(int fd);
+
+void xprintf(const char *fmt, ...);
+
+#define eprintf(_fmt, _args...) xprintf("[ERR] %s" _fmt, __FUNCTION__, ##_args)
+#define iprintf(_fmt, _args...) xprintf("[INF] %s" _fmt, __FUNCTION__, ##_args)
+
+#ifdef DEBUG
+#define dprintf(_fmt, _args...) xprintf("[DBG] %s" _fmt, __FUNCTION__, ##_args)
+#else
+#define dprintf(_fmt, _args...) ((void)0)
+#endif
+
+#endif /* _UTILS_H */
diff --git a/tools/xenstore/xenstored.h b/tools/xenstore/xenstored.h
new file mode 100644
index 0000000000..784ec987a8
--- /dev/null
+++ b/tools/xenstore/xenstored.h
@@ -0,0 +1,81 @@
+/*
+ Simple prototyle Xen Store Daemon providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_H
+#define _XENSTORED_H
+
+enum xsd_sockmsg_type
+{
+ XS_DEBUG,
+ XS_SHUTDOWN,
+ XS_DIRECTORY,
+ XS_READ,
+ XS_GET_PERMS,
+ XS_WATCH,
+ XS_WATCH_ACK,
+ XS_UNWATCH,
+ XS_TRANSACTION_START,
+ XS_TRANSACTION_END,
+ XS_OP_READ_ONLY = XS_TRANSACTION_END,
+ XS_INTRODUCE,
+ XS_RELEASE,
+ XS_GETDOMAINPATH,
+ XS_WRITE,
+ XS_MKDIR,
+ XS_RM,
+ XS_SET_PERMS,
+ XS_WATCH_EVENT,
+ XS_ERROR,
+};
+
+#define XS_WRITE_NONE "NONE"
+#define XS_WRITE_CREATE "CREATE"
+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
+
+/* We hand errors as strings, for portability. */
+struct xsd_errors
+{
+ int errnum;
+ const char *errstring;
+};
+#define XSD_ERROR(x) { x, #x }
+static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
+ XSD_ERROR(EINVAL),
+ XSD_ERROR(EACCES),
+ XSD_ERROR(EEXIST),
+ XSD_ERROR(EISDIR),
+ XSD_ERROR(ENOENT),
+ XSD_ERROR(ENOMEM),
+ XSD_ERROR(ENOSPC),
+ XSD_ERROR(EIO),
+ XSD_ERROR(ENOTEMPTY),
+ XSD_ERROR(ENOSYS),
+ XSD_ERROR(EROFS),
+ XSD_ERROR(EBUSY),
+ XSD_ERROR(ETIMEDOUT),
+ XSD_ERROR(EISCONN),
+};
+struct xsd_sockmsg
+{
+ u32 type;
+ u32 len; /* Length of data following this. */
+
+ /* Generally followed by nul-terminated string(s). */
+};
+
+#endif /* _XENSTORED_H */
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
new file mode 100644
index 0000000000..9d15848463
--- /dev/null
+++ b/tools/xenstore/xenstored_core.c
@@ -0,0 +1,1354 @@
+/*
+ Simple prototype Xen Store Daemon providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <string.h>
+#include <errno.h>
+#include <dirent.h>
+#include <getopt.h>
+#include <signal.h>
+#include <assert.h>
+#include <setjmp.h>
+
+//#define DEBUG
+#include "utils.h"
+#include "list.h"
+#include "talloc.h"
+#include "xs_lib.h"
+#include "xenstored.h"
+#include "xenstored_core.h"
+#include "xenstored_watch.h"
+#include "xenstored_transaction.h"
+#include "xenstored_domain.h"
+
+static bool verbose;
+static LIST_HEAD(connections);
+
+#ifdef TESTING
+static bool failtest = false;
+
+/* We override talloc's malloc. */
+void *test_malloc(size_t size)
+{
+ /* 1 in 20 means only about 50% of connections establish. */
+ if (failtest && (random() % 32) == 0)
+ return NULL;
+ return malloc(size);
+}
+
+static void stop_failtest(int signum __attribute__((unused)))
+{
+ failtest = false;
+}
+
+/* Need these before we #define away write_all/mkdir in testing.h */
+bool test_write_all(int fd, void *contents, unsigned int len);
+bool test_write_all(int fd, void *contents, unsigned int len)
+{
+ if (failtest && (random() % 8) == 0) {
+ if (len)
+ len = random() % len;
+ write(fd, contents, len);
+ errno = ENOSPC;
+ return false;
+ }
+ return write_all(fd, contents, len);
+}
+
+int test_mkdir(const char *dir, int perms);
+int test_mkdir(const char *dir, int perms)
+{
+ if (failtest && (random() % 8) == 0) {
+ errno = ENOSPC;
+ return -1;
+ }
+ return mkdir(dir, perms);
+}
+#endif /* TESTING */
+
+#include "xenstored_test.h"
+
+/* FIXME: Ideally, this should never be called. Some can be eliminated. */
+/* Something is horribly wrong: shutdown immediately. */
+void __attribute__((noreturn)) corrupt(struct connection *conn,
+ const char *fmt, ...)
+{
+ va_list arglist;
+ char *str;
+ int saved_errno = errno;
+
+ va_start(arglist, fmt);
+ str = talloc_vasprintf(NULL, fmt, arglist);
+ va_end(arglist);
+
+ eprintf("xenstored corruption: connection id %i: err %s: %s",
+ conn ? (int)conn->id : -1, strerror(saved_errno), str);
+#ifdef TESTING
+ /* Allow them to attach debugger. */
+ sleep(30);
+#endif
+ syslog(LOG_DAEMON,
+ "xenstored corruption: connection id %i: err %s: %s",
+ conn ? (int)conn->id : -1, strerror(saved_errno), str);
+ _exit(2);
+}
+
+static bool write_message(struct connection *conn)
+{
+ int ret;
+ struct buffered_data *out = conn->out;
+
+ if (out->inhdr) {
+ if (verbose)
+ xprintf("Writing msg %i out to %p\n",
+ out->hdr.msg.type, conn);
+ ret = conn->write(conn, out->hdr.raw + out->used,
+ sizeof(out->hdr) - out->used);
+ if (ret < 0)
+ return false;
+
+ out->used += ret;
+ if (out->used < sizeof(out->hdr))
+ return true;
+
+ out->inhdr = false;
+ out->used = 0;
+
+ /* Second write might block if non-zero. */
+ if (out->hdr.msg.len)
+ return true;
+ }
+
+ if (verbose)
+ xprintf("Writing data len %i out to %p\n",
+ out->hdr.msg.len, conn);
+ ret = conn->write(conn, out->buffer + out->used,
+ out->hdr.msg.len - out->used);
+
+ if (ret < 0)
+ return false;
+
+ out->used += ret;
+ if (out->used != out->hdr.msg.len)
+ return true;
+
+ conn->out = NULL;
+
+ /* If this was an event, we wait for ack, otherwise we're done. */
+ if (!is_watch_event(conn, out))
+ talloc_free(out);
+
+ queue_next_event(conn);
+ return true;
+}
+
+static int destroy_conn(void *_conn)
+{
+ struct connection *conn = _conn;
+
+ /* Flush outgoing if possible, but don't block. */
+ if (!conn->domain) {
+ fd_set set;
+ struct timeval none;
+
+ FD_ZERO(&set);
+ FD_SET(conn->fd, &set);
+ none.tv_sec = none.tv_usec = 0;
+
+ while (conn->out
+ && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
+ if (!write_message(conn))
+ break;
+ close(conn->fd);
+ }
+ list_del(&conn->list);
+ return 0;
+}
+
+static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
+ int event_fd)
+{
+ struct connection *i;
+ int max;
+
+ FD_ZERO(inset);
+ FD_ZERO(outset);
+ FD_SET(sock, inset);
+ max = sock;
+ FD_SET(ro_sock, inset);
+ if (ro_sock > max)
+ max = ro_sock;
+ FD_SET(event_fd, inset);
+ if (event_fd > max)
+ max = event_fd;
+ list_for_each_entry(i, &connections, list) {
+ if (i->domain)
+ continue;
+ if (!i->blocked)
+ FD_SET(i->fd, inset);
+ if (i->out)
+ FD_SET(i->fd, outset);
+ if (i->fd > max)
+ max = i->fd;
+ }
+ return max;
+}
+
+/* Read everything from a talloc_open'ed fd. */
+static void *read_all(int *fd, unsigned int *size)
+{
+ unsigned int max = 4;
+ int ret;
+ void *buffer = talloc_size(fd, max);
+
+ *size = 0;
+ while ((ret = read(*fd, buffer + *size, max - *size)) > 0) {
+ *size += ret;
+ if (*size == max)
+ buffer = talloc_realloc_size(fd, buffer, max *= 2);
+ }
+ if (ret < 0)
+ return NULL;
+ return buffer;
+}
+
+static int destroy_fd(void *_fd)
+{
+ int *fd = _fd;
+ close(*fd);
+ return 0;
+}
+
+/* Return a pointer to an fd, self-closing and attached to this pathname. */
+static int *talloc_open(const char *pathname, int flags, int mode)
+{
+ int *fd;
+
+ fd = talloc(pathname, int);
+ *fd = open(pathname, flags, mode);
+ if (*fd < 0) {
+ int saved_errno = errno;
+ talloc_free(fd);
+ errno = saved_errno;
+ return NULL;
+ }
+ talloc_set_destructor(fd, destroy_fd);
+ return fd;
+}
+
+/* Is child a subnode of parent, or equal? */
+bool is_child(const char *child, const char *parent)
+{
+ unsigned int len = strlen(parent);
+
+ /* / should really be "" for this algorithm to work, but that's a
+ * usability nightmare. */
+ if (streq(parent, "/"))
+ return true;
+
+ if (strncmp(child, parent, len) != 0)
+ return false;
+
+ return child[len] == '/' || child[len] == '\0';
+}
+
+/* Answer never ends in /. */
+char *node_dir_outside_transaction(const char *node)
+{
+ if (streq(node, "/"))
+ return talloc_strdup(node, xs_daemon_store());
+ return talloc_asprintf(node, "%s%s", xs_daemon_store(), node);
+}
+
+static char *node_dir(struct transaction *trans, const char *node)
+{
+ if (!trans || !within_transaction(trans, node))
+ return node_dir_outside_transaction(node);
+ return node_dir_inside_transaction(trans, node);
+}
+
+static char *node_datafile(struct transaction *trans, const char *node)
+{
+ return talloc_asprintf(node, "%s/.data", node_dir(trans, node));
+}
+
+static char *node_permfile(struct transaction *trans, const char *node)
+{
+ return talloc_asprintf(node, "%s/.perms", node_dir(trans, node));
+}
+
+struct buffered_data *new_buffer(void *ctx)
+{
+ struct buffered_data *data;
+
+ data = talloc(ctx, struct buffered_data);
+ data->inhdr = true;
+ data->used = 0;
+ data->buffer = NULL;
+
+ return data;
+}
+
+/* Return length of string (including nul) at this offset. */
+unsigned int get_string(const struct buffered_data *data, unsigned int offset)
+{
+ const char *nul;
+
+ if (offset >= data->used)
+ return 0;
+
+ nul = memchr(data->buffer + offset, 0, data->used - offset);
+ if (!nul)
+ return 0;
+
+ return nul - (data->buffer + offset) + 1;
+}
+
+/* Break input into vectors, return the number, fill in up to num of them. */
+unsigned int get_strings(struct buffered_data *data,
+ char *vec[], unsigned int num)
+{
+ unsigned int off, i, len;
+
+ off = i = 0;
+ while ((len = get_string(data, off)) != 0) {
+ if (i < num)
+ vec[i] = data->buffer + off;
+ i++;
+ off += len;
+ }
+ return i;
+}
+
+/* Returns "false", meaning "connection is not blocked". */
+bool send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len)
+{
+ struct buffered_data *bdata;
+
+ /* When data gets freed, we want list entry is destroyed (so
+ * list entry is a child). */
+ bdata = new_buffer(conn);
+ bdata->buffer = talloc_array(bdata, char, len);
+
+ bdata->hdr.msg.type = type;
+ bdata->hdr.msg.len = len;
+ memcpy(bdata->buffer, data, len);
+
+ /* There might be an event going out now. Queue behind it. */
+ if (conn->out) {
+ assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
+ assert(!conn->waiting_reply);
+ conn->waiting_reply = bdata;
+ } else
+ conn->out = bdata;
+ return false;
+}
+
+/* Some routines (write, mkdir, etc) just need a non-error return */
+bool send_ack(struct connection *conn, enum xsd_sockmsg_type type)
+{
+ return send_reply(conn, type, "OK", sizeof("OK"));
+}
+
+bool send_error(struct connection *conn, int error)
+{
+ unsigned int i;
+
+ for (i = 0; error != xsd_errors[i].errnum; i++)
+ if (i == ARRAY_SIZE(xsd_errors) - 1)
+ corrupt(conn, "Unknown error %i (%s)", error,
+ strerror(error));
+
+ return send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
+ strlen(xsd_errors[i].errstring) + 1);
+}
+
+static bool valid_chars(const char *node)
+{
+ /* Nodes can have lots of crap. */
+ return (strspn(node,
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-/_@") == strlen(node));
+}
+
+static bool is_valid_nodename(const char *node)
+{
+ /* Must start in /. */
+ if (!strstarts(node, "/"))
+ return false;
+
+ /* Cannot end in / (unless it's just "/"). */
+ if (strends(node, "/") && !streq(node, "/"))
+ return false;
+
+ /* No double //. */
+ if (strstr(node, "//"))
+ return false;
+
+ return valid_chars(node);
+}
+
+/* We expect one arg in the input: return NULL otherwise. */
+static const char *onearg(struct buffered_data *in)
+{
+ if (get_string(in, 0) != in->used)
+ return NULL;
+ return in->buffer;
+}
+
+/* If it fails, returns NULL and sets errno. */
+static struct xs_permissions *get_perms(struct transaction *transaction,
+ const char *node, unsigned int *num)
+{
+ unsigned int size;
+ char *strings;
+ struct xs_permissions *ret;
+ int *fd;
+
+ fd = talloc_open(node_permfile(transaction, node), O_RDONLY, 0);
+ if (!fd)
+ return NULL;
+ strings = read_all(fd, &size);
+ if (!strings)
+ return NULL;
+
+ *num = count_strings(strings, size);
+ ret = talloc_array(node, struct xs_permissions, *num);
+ if (!strings_to_perms(ret, *num, strings))
+ corrupt(NULL, "Permissions corrupt for %s", node);
+
+ return ret;
+}
+
+static char *perms_to_strings(const char *node,
+ struct xs_permissions *perms, unsigned int num,
+ unsigned int *len)
+{
+ unsigned int i;
+ char *strings = NULL;
+ char buffer[MAX_STRLEN(domid_t) + 1];
+
+ for (*len = 0, i = 0; i < num; i++) {
+ if (!perm_to_string(&perms[i], buffer))
+ return NULL;
+
+ strings = talloc_realloc(node, strings, char,
+ *len + strlen(buffer) + 1);
+ strcpy(strings + *len, buffer);
+ *len += strlen(buffer) + 1;
+ }
+ return strings;
+}
+
+/* Destroy this, and its children, and its children's children. */
+int destroy_path(void *path)
+{
+ DIR *dir;
+ struct dirent *dirent;
+
+ dir = opendir(path);
+ if (!dir) {
+ if (unlink(path) == 0 || errno == ENOENT)
+ return 0;
+ corrupt(NULL, "Destroying path %s", path);
+ }
+
+ while ((dirent = readdir(dir)) != NULL) {
+ char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1];
+ sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name);
+ if (!streq(dirent->d_name,".") && !streq(dirent->d_name,".."))
+ destroy_path(fullpath);
+ }
+ closedir(dir);
+ if (rmdir(path) != 0)
+ corrupt(NULL, "Destroying directory %s", path);
+ return 0;
+}
+
+/* Create a self-destructing temporary file */
+static char *tempfile(const char *path, void *contents, unsigned int len)
+{
+ int *fd;
+ char *tmppath = talloc_asprintf(path, "%s.tmp", path);
+
+ fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
+ if (!fd)
+ return NULL;
+ talloc_set_destructor(tmppath, destroy_path);
+ if (!write_all(*fd, contents, len))
+ return NULL;
+
+ return tmppath;
+}
+
+/* We assume rename() doesn't fail on moves in same dir. */
+static void commit_tempfile(const char *path)
+{
+ char realname[strlen(path) + 1];
+ unsigned int len = strrchr(path, '.') - path;
+
+ memcpy(realname, path, len);
+ realname[len] = '\0';
+ if (rename(path, realname) != 0)
+ corrupt(NULL, "Committing %s", realname);
+ talloc_set_destructor(path, NULL);
+}
+
+static bool set_perms(struct transaction *transaction,
+ const char *node,
+ struct xs_permissions *perms, unsigned int num)
+{
+ unsigned int len;
+ char *permpath, *strings;
+
+ strings = perms_to_strings(node, perms, num, &len);
+ if (!strings)
+ return false;
+
+ /* Create then move. */
+ permpath = tempfile(node_permfile(transaction, node), strings, len);
+ if (!permpath)
+ return false;
+
+ commit_tempfile(permpath);
+ return true;
+}
+
+static char *get_parent(const char *node)
+{
+ char *slash = strrchr(node + 1, '/');
+ if (!slash)
+ return talloc_strdup(node, "/");
+ return talloc_asprintf(node, "%.*s", slash - node, node);
+}
+
+static enum xs_perm_type perm_for_id(domid_t id,
+ struct xs_permissions *perms,
+ unsigned int num)
+{
+ unsigned int i;
+
+ /* Owners and tools get it all... */
+ if (!id || perms[0].id == id)
+ return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_CREATE|XS_PERM_OWNER;
+
+ for (i = 1; i < num; i++)
+ if (perms[i].id == id)
+ return perms[i].perms;
+
+ return perms[0].perms;
+}
+
+/* We have a weird permissions system. You can allow someone into a
+ * specific node without allowing it in the parents. If it's going to
+ * fail, however, we don't want the errno to indicate any information
+ * about the node. */
+static int check_with_parents(struct connection *conn, const char *node,
+ int errnum)
+{
+ struct xs_permissions *perms;
+ unsigned int num;
+
+ /* We always tell them about memory failures. */
+ if (errnum == ENOMEM)
+ return errnum;
+
+ do {
+ node = get_parent(node);
+ perms = get_perms(conn->transaction, node, &num);
+ if (perms)
+ break;
+ } while (!streq(node, "/"));
+
+ /* No permission at root? We're in trouble. */
+ if (!perms)
+ corrupt(conn, "No permissions file at root");
+
+ if (!(perm_for_id(conn->id, perms, num) & XS_PERM_READ))
+ return EACCES;
+
+ return errnum;
+}
+
+bool check_node_perms(struct connection *conn, const char *node,
+ enum xs_perm_type perm)
+{
+ struct xs_permissions *perms;
+ unsigned int num;
+
+ if (!node) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!node || !is_valid_nodename(node)) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!conn->write && (perm & XS_PERM_WRITE)) {
+ errno = EROFS;
+ return false;
+ }
+
+ perms = get_perms(conn->transaction, node, &num);
+ /* No permissions. If we want to create it and
+ * it doesn't exist, check parent directory. */
+ if (!perms && errno == ENOENT && (perm & XS_PERM_CREATE)) {
+ char *parent = get_parent(node);
+ if (!parent)
+ return false;
+
+ perms = get_perms(conn->transaction, parent, &num);
+ }
+ if (!perms) {
+ errno = check_with_parents(conn, node, errno);
+ return false;
+ }
+
+ if (perm_for_id(conn->id, perms, num) & perm)
+ return true;
+
+ errno = check_with_parents(conn, node, EACCES);
+ return false;
+}
+
+static bool send_directory(struct connection *conn, const char *node)
+{
+ char *path, *reply = talloc_strdup(node, "");
+ unsigned int reply_len = 0;
+ DIR *dir;
+ struct dirent *dirent;
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ path = node_dir(conn->transaction, node);
+ dir = opendir(path);
+ if (!dir)
+ return send_error(conn, errno);
+
+ while ((dirent = readdir(dir)) != NULL) {
+ int len = strlen(dirent->d_name) + 1;
+
+ if (!valid_chars(dirent->d_name))
+ continue;
+
+ reply = talloc_realloc(path, reply, char, reply_len + len);
+ strcpy(reply + reply_len, dirent->d_name);
+ reply_len += len;
+ }
+ closedir(dir);
+
+ return send_reply(conn, XS_DIRECTORY, reply, reply_len);
+}
+
+static bool do_read(struct connection *conn, const char *node)
+{
+ char *value;
+ unsigned int size;
+ int *fd;
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0);
+ if (!fd) {
+ /* Data file doesn't exist? We call that a directory */
+ if (errno == ENOENT)
+ errno = EISDIR;
+ return send_error(conn, errno);
+ }
+
+ value = read_all(fd, &size);
+ if (!value)
+ return send_error(conn, errno);
+
+ return send_reply(conn, XS_READ, value, size);
+}
+
+/* Create a new directory. Optionally put data in it (if data != NULL) */
+static bool new_directory(struct connection *conn,
+ const char *node, void *data, unsigned int datalen)
+{
+ struct xs_permissions perms;
+ char *permstr;
+ unsigned int len;
+ int *fd;
+ char *dir = node_dir(conn->transaction, node);
+
+ if (mkdir(dir, 0750) != 0)
+ return false;
+
+ /* Set destructor so we clean up if neccesary. */
+ talloc_set_destructor(dir, destroy_path);
+
+ /* Default permisisons: we own it, noone else has permission. */
+ perms.id = conn->id;
+ perms.perms = XS_PERM_NONE;
+
+ permstr = perms_to_strings(dir, &perms, 1, &len);
+ fd = talloc_open(node_permfile(conn->transaction, node),
+ O_WRONLY|O_CREAT|O_EXCL, 0640);
+ if (!fd || !write_all(*fd, permstr, len))
+ return false;
+
+ if (data) {
+ char *datapath = node_datafile(conn->transaction, node);
+
+ fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
+ if (!fd || !write_all(*fd, data, datalen))
+ return false;
+ }
+
+ /* Finished! */
+ talloc_set_destructor(dir, NULL);
+ return true;
+}
+
+/* path, flags, data... */
+static bool do_write(struct connection *conn, struct buffered_data *in)
+{
+ unsigned int offset, datalen;
+ char *vec[2];
+ char *node, *tmppath;
+ enum xs_perm_type mode;
+ struct stat st;
+
+ /* Extra "strings" can be created by binary data. */
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
+ return send_error(conn, EINVAL);
+
+ node = vec[0];
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ offset = strlen(vec[0]) + strlen(vec[1]) + 2;
+ datalen = in->used - offset;
+
+ if (streq(vec[1], XS_WRITE_NONE))
+ mode = XS_PERM_WRITE;
+ else if (streq(vec[1], XS_WRITE_CREATE))
+ mode = XS_PERM_WRITE|XS_PERM_CREATE;
+ else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
+ mode = XS_PERM_WRITE|XS_PERM_CREATE;
+ else
+ return send_error(conn, EINVAL);
+
+ if (!check_node_perms(conn, node, mode))
+ return send_error(conn, errno);
+
+ if (lstat(node_dir(conn->transaction, node), &st) != 0) {
+ /* Does not exist... */
+ if (errno != ENOENT)
+ return send_error(conn, errno);
+
+ /* Not going to create it? */
+ if (!(mode & XS_PERM_CREATE))
+ return send_error(conn, ENOENT);
+
+ if (!new_directory(conn, node, in->buffer + offset, datalen))
+ return send_error(conn, errno);
+ } else {
+ /* Exists... */
+ if (streq(vec[1], XS_WRITE_CREATE_EXCL))
+ return send_error(conn, EEXIST);
+
+ tmppath = tempfile(node_datafile(conn->transaction, node),
+ in->buffer + offset, datalen);
+ if (!tmppath)
+ return send_error(conn, errno);
+
+ commit_tempfile(tmppath);
+ }
+
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_WRITE);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+static bool do_mkdir(struct connection *conn, const char *node)
+{
+ if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE))
+ return send_error(conn, errno);
+
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ if (!new_directory(conn, node, NULL, 0))
+ return send_error(conn, errno);
+
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_MKDIR);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+static bool do_rm(struct connection *conn, const char *node)
+{
+ char *tmppath, *path;
+
+ if (!check_node_perms(conn, node, XS_PERM_WRITE))
+ return send_error(conn, errno);
+
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ if (streq(node, "/"))
+ return send_error(conn, EINVAL);
+
+ /* We move the directory to temporary name, destructor cleans up. */
+ path = node_dir(conn->transaction, node);
+ tmppath = talloc_asprintf(node, "%s.tmp", path);
+ talloc_set_destructor(tmppath, destroy_path);
+
+ if (rename(path, tmppath) != 0)
+ return send_error(conn, errno);
+
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_RM);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+static bool do_get_perms(struct connection *conn, const char *node)
+{
+ struct xs_permissions *perms;
+ char *strings;
+ unsigned int len, num;
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ perms = get_perms(conn->transaction, node, &num);
+ if (!perms)
+ return send_error(conn, errno);
+
+ strings = perms_to_strings(node, perms, num, &len);
+ if (!strings)
+ return send_error(conn, errno);
+
+ return send_reply(conn, XS_GET_PERMS, strings, len);
+}
+
+static bool do_set_perms(struct connection *conn, struct buffered_data *in)
+{
+ unsigned int num;
+ char *node;
+ struct xs_permissions *perms;
+
+ num = count_strings(in->buffer, in->used);
+ if (num < 2)
+ return send_error(conn, EINVAL);
+
+ /* First arg is node name. */
+ node = in->buffer;
+ in->buffer += strlen(in->buffer) + 1;
+ num--;
+
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ /* We must own node to do this (tools can do this too). */
+ if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER))
+ return send_error(conn, errno);
+
+ perms = talloc_array(node, struct xs_permissions, num);
+ if (!strings_to_perms(perms, num, in->buffer))
+ return send_error(conn, errno);
+
+ if (!set_perms(conn->transaction, node, perms, num))
+ return send_error(conn, errno);
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_SET_PERMS);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+/* Process "in" for conn: "in" will vanish after this conversation, so
+ * we can talloc off it for temporary variables. May free "conn".
+ * Returns true if can't complete due to block.
+ */
+static bool process_message(struct connection *conn, struct buffered_data *in)
+{
+ switch (in->hdr.msg.type) {
+ case XS_DIRECTORY:
+ return send_directory(conn, onearg(in));
+
+ case XS_READ:
+ return do_read(conn, onearg(in));
+
+ case XS_WRITE:
+ return do_write(conn, in);
+
+ case XS_MKDIR:
+ return do_mkdir(conn, onearg(in));
+
+ case XS_RM:
+ return do_rm(conn, onearg(in));
+
+ case XS_GET_PERMS:
+ return do_get_perms(conn, onearg(in));
+
+ case XS_SET_PERMS:
+ return do_set_perms(conn, in);
+
+ case XS_SHUTDOWN:
+ send_ack(conn, XS_SHUTDOWN);
+ /* Everything hangs off auto-free context, freed at exit. */
+ exit(0);
+
+#ifdef TESTING
+ case XS_DEBUG: {
+ /* For testing, we allow them to set id. */
+ if (streq(in->buffer, "setid")) {
+ conn->id = atoi(in->buffer + get_string(in, 0));
+ send_ack(conn, XS_DEBUG);
+ } else if (streq(in->buffer, "failtest")) {
+ if (get_string(in, 0) < in->used)
+ srandom(atoi(in->buffer + get_string(in, 0)));
+ send_ack(conn, XS_DEBUG);
+ failtest = true;
+ }
+ return false;
+ }
+#endif /* TESTING */
+
+ case XS_WATCH:
+ return do_watch(conn, in);
+
+ case XS_WATCH_ACK:
+ return do_watch_ack(conn);
+
+ case XS_UNWATCH:
+ return do_unwatch(conn, onearg(in));
+
+ case XS_TRANSACTION_START:
+ return do_transaction_start(conn, onearg(in));
+
+ case XS_TRANSACTION_END:
+ return do_transaction_end(conn, onearg(in));
+
+ case XS_INTRODUCE:
+ return do_introduce(conn, in);
+
+ case XS_RELEASE:
+ return do_release(conn, onearg(in));
+
+ case XS_GETDOMAINPATH:
+ return do_get_domain_path(conn, onearg(in));
+
+ case XS_WATCH_EVENT:
+ default:
+ eprintf("Client unknown operation %i", in->hdr.msg.type);
+ send_error(conn, ENOSYS);
+ return false;
+ }
+}
+
+static int out_of_mem(void *data)
+{
+ longjmp(*(jmp_buf *)data, 1);
+}
+
+static void consider_message(struct connection *conn)
+{
+ struct buffered_data *in = NULL;
+ enum xsd_sockmsg_type type = conn->in->hdr.msg.type;
+ jmp_buf talloc_fail;
+
+ /* For simplicity, we kill the connection on OOM. */
+ talloc_set_fail_handler(out_of_mem, &talloc_fail);
+ if (setjmp(talloc_fail)) {
+ talloc_free(conn);
+ goto end;
+ }
+
+ if (verbose)
+ xprintf("Got message %i len %i from %p\n",
+ type, conn->in->hdr.msg.len, conn);
+
+ /* We might get a command while waiting for an ack: this means
+ * the other end discarded it: we will re-transmit. */
+ if (type != XS_WATCH_ACK)
+ reset_watch_event(conn);
+
+ /* Careful: process_message may free connection. We detach
+ * "in" beforehand and allocate the new buffer to avoid
+ * touching conn after process_message.
+ */
+ in = talloc_steal(talloc_autofree_context(), conn->in);
+ conn->in = new_buffer(conn);
+ if (process_message(conn, in)) {
+ /* Blocked by transaction: queue for re-xmit. */
+ talloc_free(conn->in);
+ conn->in = in;
+ in = NULL;
+ }
+
+end:
+ talloc_free(in);
+ talloc_set_fail_handler(NULL, NULL);
+ if (talloc_total_blocks(NULL)
+ != talloc_total_blocks(talloc_autofree_context()) + 1)
+ talloc_report_full(NULL, stderr);
+}
+
+/* Errors in reading or allocating here mean we get out of sync, so we
+ * drop the whole client connection. */
+void handle_input(struct connection *conn)
+{
+ int bytes;
+ struct buffered_data *in;
+
+ assert(!conn->blocked);
+ in = conn->in;
+
+ /* Not finished header yet? */
+ if (in->inhdr) {
+ bytes = conn->read(conn, in->hdr.raw + in->used,
+ sizeof(in->hdr) - in->used);
+ if (bytes <= 0)
+ goto bad_client;
+ in->used += bytes;
+ if (in->used != sizeof(in->hdr))
+ return;
+
+ if (in->hdr.msg.len > PATH_MAX) {
+ syslog(LOG_DAEMON, "Client tried to feed us %i",
+ in->hdr.msg.len);
+ goto bad_client;
+ }
+
+ in->buffer = talloc_array(in, char, in->hdr.msg.len);
+ if (!in->buffer)
+ goto bad_client;
+ in->used = 0;
+ in->inhdr = false;
+ return;
+ }
+
+ bytes = conn->read(conn, in->buffer + in->used,
+ in->hdr.msg.len - in->used);
+ if (bytes < 0)
+ goto bad_client;
+
+ in->used += bytes;
+ if (in->used != in->hdr.msg.len)
+ return;
+
+ consider_message(conn);
+ return;
+
+bad_client:
+ /* Kill it. */
+ talloc_free(conn);
+}
+
+void handle_output(struct connection *conn)
+{
+ if (!write_message(conn))
+ talloc_free(conn);
+}
+
+/* If a transaction has ended, see if we can unblock any connections. */
+static void unblock_connections(void)
+{
+ struct connection *i, *tmp;
+
+ list_for_each_entry_safe(i, tmp, &connections, list) {
+ if (!i->blocked)
+ continue;
+
+ if (!transaction_covering_node(i->blocked)) {
+ talloc_free(i->blocked);
+ i->blocked = NULL;
+ consider_message(i);
+ }
+ }
+
+ /* To balance bias, move first entry to end. */
+ if (!list_empty(&connections)) {
+ i = list_top(&connections, struct connection, list);
+ list_del(&i->list);
+ list_add_tail(&i->list, &connections);
+ }
+}
+
+struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
+{
+ struct connection *new;
+ jmp_buf talloc_fail;
+
+ new = talloc(talloc_autofree_context(), struct connection);
+ if (!new)
+ return NULL;
+
+ new->blocked = false;
+ new->out = new->waiting_reply = NULL;
+ new->event = NULL;
+ new->fd = -1;
+ new->id = 0;
+ new->domain = NULL;
+ new->transaction = NULL;
+ new->write = write;
+ new->read = read;
+
+ talloc_set_fail_handler(out_of_mem, &talloc_fail);
+ if (setjmp(talloc_fail)) {
+ talloc_free(new);
+ return NULL;
+ }
+ new->in = new_buffer(new);
+ talloc_set_fail_handler(NULL, NULL);
+
+ list_add_tail(&new->list, &connections);
+ talloc_set_destructor(new, destroy_conn);
+ return new;
+}
+
+static int writefd(struct connection *conn, const void *data, unsigned int len)
+{
+ return write(conn->fd, data, len);
+}
+
+static int readfd(struct connection *conn, void *data, unsigned int len)
+{
+ return read(conn->fd, data, len);
+}
+
+static void accept_connection(int sock, bool canwrite)
+{
+ int fd;
+ struct connection *conn;
+
+ fd = accept(sock, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ conn = new_connection(canwrite ? writefd : NULL, readfd);
+ if (conn)
+ conn->fd = fd;
+ else
+ close(fd);
+}
+
+/* Calc timespan from now to absolute time. */
+static void time_relative_to_now(struct timeval *tv)
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+ if (timercmp(&now, tv, >))
+ timerclear(tv);
+ else {
+ tv->tv_sec -= now.tv_sec;
+ if (now.tv_usec > tv->tv_usec) {
+ tv->tv_sec--;
+ tv->tv_usec += 1000000;
+ }
+ tv->tv_usec -= now.tv_usec;
+ }
+}
+
+static struct option options[] = { { "no-fork", 0, NULL, 'N' },
+ { "verbose", 0, NULL, 'V' },
+ { "output-pid", 0, NULL, 'P' },
+ { NULL, 0, NULL, 0 } };
+
+int main(int argc, char *argv[])
+{
+ int opt, *sock, *ro_sock, event_fd, max, tmpout;
+ struct sockaddr_un addr;
+ fd_set inset, outset;
+ bool dofork = true;
+ bool outputpid = false;
+
+ while ((opt = getopt_long(argc, argv, "DV", options, NULL)) != -1) {
+ switch (opt) {
+ case 'N':
+ dofork = false;
+ break;
+ case 'V':
+ verbose = true;
+ break;
+ case 'P':
+ outputpid = true;
+ break;
+ }
+ }
+ if (optind != argc)
+ barf("%s: No arguments desired", argv[0]);
+
+ talloc_enable_leak_report_full();
+
+ /* Create sockets for them to listen to. */
+ sock = talloc(talloc_autofree_context(), int);
+ *sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (*sock < 0)
+ barf_perror("Could not create socket");
+ ro_sock = talloc(talloc_autofree_context(), int);
+ *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (*ro_sock < 0)
+ barf_perror("Could not create socket");
+ talloc_set_destructor(sock, destroy_fd);
+ talloc_set_destructor(ro_sock, destroy_fd);
+
+ /* Don't kill us with SIGPIPE. */
+ signal(SIGPIPE, SIG_IGN);
+
+ /* FIXME: Be more sophisticated, don't mug running daemon. */
+ unlink(xs_daemon_socket());
+ unlink(xs_daemon_socket_ro());
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, xs_daemon_socket());
+ if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
+ barf_perror("Could not bind socket to %s", xs_daemon_socket());
+ strcpy(addr.sun_path, xs_daemon_socket_ro());
+ if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
+ barf_perror("Could not bind socket to %s",
+ xs_daemon_socket_ro());
+ if (chmod(xs_daemon_socket(), 0600) != 0
+ || chmod(xs_daemon_socket_ro(), 0660) != 0)
+ barf_perror("Could not chmod sockets");
+
+ if (listen(*sock, 1) != 0
+ || listen(*ro_sock, 1) != 0)
+ barf_perror("Could not listen on sockets");
+
+ /* If we're the first, create .perms file for root. */
+ if (mkdir(xs_daemon_store(), 0750) == 0) {
+ struct xs_permissions perms;
+ char *root = talloc_strdup(talloc_autofree_context(), "/");
+
+ perms.id = 0;
+ perms.perms = XS_PERM_READ;
+ if (!set_perms(NULL, root, &perms, 1))
+ barf_perror("Could not create permissions in root");
+ talloc_free(root);
+ mkdir(xs_daemon_transactions(), 0750);
+ } else if (errno != EEXIST)
+ barf_perror("Could not create root %s", xs_daemon_store());
+
+ /* Listen to hypervisor. */
+ event_fd = domain_init();
+
+ /* Debugging: daemonize() closes standard fds, so dup here. */
+ tmpout = dup(STDOUT_FILENO);
+ if (dofork) {
+ openlog("xenstored", 0, LOG_DAEMON);
+ daemonize();
+ }
+
+ if (outputpid) {
+ char buffer[20];
+ sprintf(buffer, "%i\n", getpid());
+ write(tmpout, buffer, strlen(buffer));
+ }
+ close(tmpout);
+
+#ifdef TESTING
+ signal(SIGUSR1, stop_failtest);
+#endif
+
+ /* Get ready to listen to the tools. */
+ max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd);
+
+ /* Main loop. */
+ for (;;) {
+ struct connection *i;
+ struct timeval *tvp = NULL, tv;
+
+ timerclear(&tv);
+ shortest_transaction_timeout(&tv);
+ if (timerisset(&tv)) {
+ time_relative_to_now(&tv);
+ tvp = &tv;
+ }
+
+ if (select(max+1, &inset, &outset, NULL, tvp) < 0) {
+ if (errno == EINTR)
+ continue;
+ barf_perror("Select failed");
+ }
+
+ if (FD_ISSET(*sock, &inset))
+ accept_connection(*sock, true);
+
+ if (FD_ISSET(*ro_sock, &inset))
+ accept_connection(*ro_sock, false);
+
+ if (FD_ISSET(event_fd, &inset))
+ handle_event(event_fd);
+
+ list_for_each_entry(i, &connections, list) {
+ if (i->domain)
+ continue;
+
+ /* Operations can delete themselves or others
+ * (xs_release): list is not safe after input,
+ * so break. */
+ if (FD_ISSET(i->fd, &inset)) {
+ handle_input(i);
+ break;
+ }
+ if (FD_ISSET(i->fd, &outset)) {
+ handle_output(i);
+ break;
+ }
+ }
+
+ if (tvp)
+ check_transaction_timeout();
+
+ /* If transactions ended, we might be able to do more work. */
+ unblock_connections();
+
+ max = initialize_set(&inset, &outset, *sock,*ro_sock,event_fd);
+ }
+}
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
new file mode 100644
index 0000000000..fe6eec8f72
--- /dev/null
+++ b/tools/xenstore/xenstored_core.h
@@ -0,0 +1,123 @@
+/*
+ Internal interfaces for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_INTERNAL_H
+#define _XENSTORED_INTERNAL_H
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+#include "xs_lib.h"
+#include "xenstored.h"
+#include "list.h"
+
+struct buffered_data
+{
+ /* Are we still doing the header? */
+ bool inhdr;
+ /* How far are we? */
+ unsigned int used;
+ union {
+ struct xsd_sockmsg msg;
+ char raw[sizeof(struct xsd_sockmsg)];
+ } hdr;
+ /* The actual data. */
+ char *buffer;
+};
+
+struct connection;
+typedef int connwritefn_t(struct connection *, const void *, unsigned int);
+typedef int connreadfn_t(struct connection *, void *, unsigned int);
+
+struct connection
+{
+ struct list_head list;
+
+ /* The file descriptor we came in on. */
+ int fd;
+
+ /* Who am I? 0 for socket connections. */
+ domid_t id;
+
+ /* Are we blocked waiting for a transaction to end? Contains node. */
+ char *blocked;
+
+ /* Our current event. If all used, we're waiting for ack. */
+ struct watch_event *event;
+
+ /* Buffered incoming data. */
+ struct buffered_data *in;
+
+ /* Buffered output data */
+ struct buffered_data *out;
+
+ /* If we had a watch fire outgoing when we needed to reply... */
+ struct buffered_data *waiting_reply;
+
+ /* My transaction, if any. */
+ struct transaction *transaction;
+
+ /* The domain I'm associated with, if any. */
+ struct domain *domain;
+
+ /* Methods for communicating over this connection: write can be NULL */
+ connwritefn_t *write;
+ connreadfn_t *read;
+};
+
+/* Return length of string (including nul) at this offset. */
+unsigned int get_string(const struct buffered_data *data,
+ unsigned int offset);
+
+/* Break input into vectors, return the number, fill in up to num of them. */
+unsigned int get_strings(struct buffered_data *data,
+ char *vec[], unsigned int num);
+
+/* Is child node a child or equal to parent node? */
+bool is_child(const char *child, const char *parent);
+
+/* Create a new buffer with lifetime of context. */
+struct buffered_data *new_buffer(void *ctx);
+
+bool send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len);
+
+/* Some routines (write, mkdir, etc) just need a non-error return */
+bool send_ack(struct connection *conn, enum xsd_sockmsg_type type);
+
+/* Send an error: error is usually "errno". */
+bool send_error(struct connection *conn, int error);
+
+/* Check permissions on this node. */
+bool check_node_perms(struct connection *conn, const char *node,
+ enum xs_perm_type perm);
+
+/* Path to this node outside transaction. */
+char *node_dir_outside_transaction(const char *node);
+
+/* Fail due to excessive corruption, capitalist pigdogs! */
+void __attribute__((noreturn)) corrupt(struct connection *conn,
+ const char *fmt, ...);
+
+struct connection *new_connection(connwritefn_t *write, connreadfn_t *read);
+
+void handle_input(struct connection *conn);
+void handle_output(struct connection *conn);
+
+/* Convenient talloc-style destructor for paths. */
+int destroy_path(void *path);
+#endif /* _XENSTORED_INTERNAL_H */
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
new file mode 100644
index 0000000000..bcc0a64967
--- /dev/null
+++ b/tools/xenstore/xenstored_domain.c
@@ -0,0 +1,387 @@
+/*
+ Domain communications for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <linux/ioctl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+//#define DEBUG
+#include "utils.h"
+#include "talloc.h"
+#include "xenstored_core.h"
+#include "xenstored_domain.h"
+#include "xenstored_test.h"
+
+static int *xc_handle;
+static int eventchn_fd;
+static unsigned int ringbuf_datasize;
+
+struct domain
+{
+ struct list_head list;
+
+ /* The id of this domain */
+ domid_t domid;
+
+ /* Event channel port */
+ u16 port;
+
+ /* Domain path in store. */
+ char *path;
+
+ /* Shared page. */
+ void *page;
+
+ /* Input and output ringbuffer heads. */
+ struct ringbuf_head *input, *output;
+
+ /* The connection associated with this. */
+ struct connection *conn;
+
+};
+
+static LIST_HEAD(domains);
+
+void domain_set_conn(struct domain *domain, struct connection *conn)
+{
+ domain->conn = conn;
+}
+
+struct ringbuf_head
+{
+ u32 write; /* Next place to write to */
+ u32 read; /* Next place to read from */
+ u8 flags;
+ char buf[0];
+} __attribute__((packed));
+
+#define EVENTCHN_BIND _IO('E', 2)
+#define EVENTCHN_UNBIND _IO('E', 3)
+
+/* FIXME: Mark connection as broken (close it?) when this happens. */
+static bool check_buffer(const struct ringbuf_head *h)
+{
+ return (h->write < ringbuf_datasize && h->read < ringbuf_datasize);
+}
+
+/* We can't fill last byte: would look like empty buffer. */
+static void *get_output_chunk(const struct ringbuf_head *h,
+ void *buf, u32 *len)
+{
+ u32 read_mark;
+
+ if (h->read == 0)
+ read_mark = ringbuf_datasize - 1;
+ else
+ read_mark = h->read - 1;
+
+ /* Here to the end of buffer, unless they haven't read some out. */
+ *len = ringbuf_datasize - h->write;
+ if (read_mark >= h->write)
+ *len = read_mark - h->write;
+ return buf + h->write;
+}
+
+static const void *get_input_chunk(const struct ringbuf_head *h,
+ const void *buf, u32 *len)
+{
+ /* Here to the end of buffer, unless they haven't written some. */
+ *len = ringbuf_datasize - h->read;
+ if (h->write >= h->read)
+ *len = h->write - h->read;
+ return buf + h->read;
+}
+
+static void update_output_chunk(struct ringbuf_head *h, u32 len)
+{
+ h->write += len;
+ if (h->write == ringbuf_datasize)
+ h->write = 0;
+}
+
+static void update_input_chunk(struct ringbuf_head *h, u32 len)
+{
+ h->read += len;
+ if (h->read == ringbuf_datasize)
+ h->read = 0;
+}
+
+static bool buffer_has_input(const struct ringbuf_head *h)
+{
+ u32 len;
+
+ get_input_chunk(h, NULL, &len);
+ return (len != 0);
+}
+
+static bool buffer_has_output_room(const struct ringbuf_head *h)
+{
+ u32 len;
+
+ get_output_chunk(h, NULL, &len);
+ return (len != 0);
+}
+
+static int writechn(struct connection *conn, const void *data, unsigned int len)
+{
+ u32 avail;
+ void *dest;
+ struct ringbuf_head h;
+
+ /* Must read head once, and before anything else, and verified. */
+ h = *conn->domain->output;
+ mb();
+ if (!check_buffer(&h)) {
+ errno = EIO;
+ return -1;
+ }
+
+ dest = get_output_chunk(&h, conn->domain->output->buf, &avail);
+ if (avail < len)
+ len = avail;
+
+ memcpy(dest, data, len);
+ mb();
+ update_output_chunk(conn->domain->output, len);
+ /* FIXME: Probably not neccessary. */
+ mb();
+ xc_evtchn_send(*xc_handle, conn->domain->port);
+ return len;
+}
+
+static int readchn(struct connection *conn, void *data, unsigned int len)
+{
+ u32 avail;
+ const void *src;
+ struct ringbuf_head h;
+ bool was_full;
+
+ /* Must read head once, and before anything else, and verified. */
+ h = *conn->domain->input;
+ mb();
+
+ if (!check_buffer(&h)) {
+ errno = EIO;
+ return -1;
+ }
+
+ src = get_input_chunk(&h, conn->domain->input->buf, &avail);
+ if (avail < len)
+ len = avail;
+
+ was_full = !buffer_has_output_room(&h);
+ memcpy(data, src, len);
+ mb();
+ update_input_chunk(conn->domain->input, len);
+ /* FIXME: Probably not neccessary. */
+ mb();
+
+ /* If it was full, tell them we've taken some. */
+ if (was_full)
+ xc_evtchn_send(*xc_handle, conn->domain->port);
+ return len;
+}
+
+static int destroy_domain(void *_domain)
+{
+ struct domain *domain = _domain;
+
+ list_del(&domain->list);
+
+ if (domain->port &&
+ (ioctl(eventchn_fd, EVENTCHN_UNBIND, domain->port) != 0))
+ eprintf("> Unbinding port %i failed!\n", domain->port);
+
+ if(domain->page)
+ munmap(domain->page, getpagesize());
+
+ return 0;
+}
+
+static struct domain *find_domain(u16 port)
+{
+ struct domain *i;
+
+ list_for_each_entry(i, &domains, list) {
+ if (i->port == port)
+ return i;
+ }
+ return NULL;
+}
+
+void handle_event(int event_fd)
+{
+ u16 port;
+ struct domain *domain;
+
+ if (read(event_fd, &port, sizeof(port)) != sizeof(port))
+ barf_perror("Failed to read from event fd");
+
+ /* We have to handle *all* the data available before we ack:
+ * careful that handle_input/handle_output can destroy conn.
+ */
+ while ((domain = find_domain(port)) != NULL) {
+ if (!domain->conn->blocked && buffer_has_input(domain->input))
+ handle_input(domain->conn);
+ else if (domain->conn->out
+ && buffer_has_output_room(domain->output))
+ handle_output(domain->conn);
+ else
+ break;
+ }
+
+#ifndef TESTING
+ if (write(event_fd, &port, sizeof(port)) != sizeof(port))
+ barf_perror("Failed to write to event fd");
+#endif
+}
+
+/* domid, mfn, evtchn, path */
+bool do_introduce(struct connection *conn, struct buffered_data *in)
+{
+ struct domain *domain;
+ char *vec[4];
+
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
+ return send_error(conn, EINVAL);
+
+ /* Hang domain off "in" until we're finished. */
+ domain = talloc(in, struct domain);
+ domain->domid = atoi(vec[0]);
+ domain->port = atoi(vec[2]);
+ domain->path = talloc_strdup(domain, vec[3]);
+ talloc_set_destructor(domain, destroy_domain);
+ if (!domain->port || !domain->domid)
+ return send_error(conn, EINVAL);
+ domain->page = xc_map_foreign_range(*xc_handle, domain->domid,
+ getpagesize(),
+ PROT_READ|PROT_WRITE,
+ atol(vec[1]));
+ if (!domain->page)
+ return send_error(conn, errno);
+
+ /* One in each half of page. */
+ domain->input = domain->page;
+ domain->output = domain->page + getpagesize()/2;
+
+ /* Tell kernel we're interested in this event. */
+ if (ioctl(eventchn_fd, EVENTCHN_BIND, domain->port) != 0)
+ return send_error(conn, errno);
+
+ domain->conn = new_connection(writechn, readchn);
+ domain->conn->domain = domain;
+
+ talloc_steal(domain->conn, domain);
+ list_add(&domain->list, &domains);
+
+ return send_ack(conn, XS_INTRODUCE);
+}
+
+static struct domain *find_domain_by_domid(domid_t domid)
+{
+ struct domain *i;
+
+ list_for_each_entry(i, &domains, list) {
+ if (i->domid == domid)
+ return i;
+ }
+ return NULL;
+}
+
+/* domid */
+bool do_release(struct connection *conn, const char *domid_str)
+{
+ struct domain *domain;
+ domid_t domid;
+
+ if (!domid_str)
+ return send_error(conn, EINVAL);
+
+ domid = atoi(domid_str);
+ if (!domid)
+ return send_error(conn, EINVAL);
+
+ domain = find_domain_by_domid(domid);
+ if (!domain)
+ return send_error(conn, ENOENT);
+
+ if (!domain->conn)
+ return send_error(conn, EINVAL);
+
+ talloc_free(domain->conn);
+ return send_ack(conn, XS_RELEASE);
+}
+
+bool do_get_domain_path(struct connection *conn, const char *domid_str)
+{
+ struct domain *domain;
+ domid_t domid;
+
+ if (!domid_str)
+ return send_error(conn, EINVAL);
+
+ domid = atoi(domid_str);
+ if (domid == 0)
+ domain = conn->domain;
+ else
+ domain = find_domain_by_domid(domid);
+
+ if (!domain)
+ return send_error(conn, ENOENT);
+
+ return send_reply(conn, XS_GETDOMAINPATH, domain->path,
+ strlen(domain->path) + 1);
+}
+
+static int close_xc_handle(void *_handle)
+{
+ xc_interface_close(*(int *)_handle);
+ return 0;
+}
+
+/* Returns the event channel handle. */
+int domain_init(void)
+{
+ /* The size of the ringbuffer: half a page minus head structure. */
+ ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
+
+ xc_handle = talloc(talloc_autofree_context(), int);
+ if (!xc_handle)
+ barf_perror("Failed to allocate domain handle");
+ *xc_handle = xc_interface_open();
+ if (*xc_handle < 0)
+ barf_perror("Failed to open connection to hypervisor");
+ talloc_set_destructor(xc_handle, close_xc_handle);
+
+#ifdef TESTING
+ eventchn_fd = fake_open_eventchn();
+#else
+ eventchn_fd = open("/dev/xen/evtchn", O_RDWR);
+#endif
+ if (eventchn_fd < 0)
+ barf_perror("Failed to open connection to hypervisor");
+ return eventchn_fd;
+}
diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
new file mode 100644
index 0000000000..20e85a54b5
--- /dev/null
+++ b/tools/xenstore/xenstored_domain.h
@@ -0,0 +1,38 @@
+/*
+ Domain communications for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_DOMAIN_H
+#define _XENSTORED_DOMAIN_H
+
+void handle_event(int event_fd);
+
+/* domid, mfn, eventchn, path */
+bool do_introduce(struct connection *conn, struct buffered_data *in);
+
+/* domid */
+bool do_release(struct connection *conn, const char *domid_str);
+
+/* domid */
+bool do_get_domain_path(struct connection *conn, const char *domid_str);
+
+/* Returns the event channel handle */
+int domain_init(void);
+
+void domain_set_conn(struct domain *domain, struct connection *conn);
+
+#endif /* _XENSTORED_DOMAIN_H */
diff --git a/tools/xenstore/xenstored_test.h b/tools/xenstore/xenstored_test.h
new file mode 100644
index 0000000000..f173a5ca91
--- /dev/null
+++ b/tools/xenstore/xenstored_test.h
@@ -0,0 +1,37 @@
+/*
+ Testing replcements for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_TEST_H
+#define _XENSTORED_TEST_H
+
+#ifdef TESTING
+bool test_write_all(int fd, void *contents, unsigned int len);
+#define write_all test_write_all
+
+int test_mkdir(const char *dir, int perms);
+#define mkdir test_mkdir
+
+int fake_open_eventchn(void);
+void fake_block_events(void);
+void fake_ack_event(void);
+
+#define ioctl(a,b,c) 0
+
+#endif
+
+#endif /* _XENSTORED_INTERNAL_H */
diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
new file mode 100644
index 0000000000..ca37307f8c
--- /dev/null
+++ b/tools/xenstore/xenstored_transaction.c
@@ -0,0 +1,284 @@
+/*
+ Transaction code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include "talloc.h"
+#include "list.h"
+#include "xenstored_transaction.h"
+#include "xenstored_watch.h"
+#include "xs_lib.h"
+#include "utils.h"
+#include "xenstored_test.h"
+
+struct changed_node
+{
+ /* The list within this transaction. */
+ struct list_head list;
+
+ /* The name of the node. */
+ char *node;
+};
+
+struct transaction
+{
+ /* Global list of transactions. */
+ struct list_head list;
+
+ /* My owner (conn->transaction == me). */
+ struct connection *conn;
+
+ /* Subtree this transaction covers */
+ char *node;
+
+ /* Base for this transaction. */
+ char *divert;
+
+ /* List of changed nodes. */
+ struct list_head changes;
+
+ /* Someone's waiting: time limit. */
+ struct timeval timeout;
+
+ /* We've timed out. */
+ bool destined_to_fail;
+};
+static LIST_HEAD(transactions);
+
+bool within_transaction(struct transaction *trans, const char *node)
+{
+ if (!trans)
+ return true;
+ return is_child(node, trans->node);
+}
+
+/* You are on notice: this transaction is blocking someone. */
+static void start_transaction_timeout(struct transaction *trans)
+{
+ if (timerisset(&trans->timeout))
+ return;
+
+ /* One second timeout. */
+ gettimeofday(&trans->timeout, NULL);
+ trans->timeout.tv_sec += 1;
+}
+
+struct transaction *transaction_covering_node(const char *node)
+{
+ struct transaction *i;
+
+ list_for_each_entry(i, &transactions, list) {
+ if (i->destined_to_fail)
+ continue;
+ if (is_child(i->node, node) || is_child(node, i->node))
+ return i;
+ }
+ return NULL;
+}
+
+bool transaction_block(struct connection *conn, const char *node)
+{
+ struct transaction *trans;
+
+ /* Transactions don't overlap, so we can't be blocked by
+ * others if we're in one. */
+ if (conn->transaction)
+ return false;
+
+ trans = transaction_covering_node(node);
+ if (trans) {
+ start_transaction_timeout(trans);
+ conn->blocked = talloc_strdup(conn, node);
+ return true;
+ }
+ return false;
+}
+
+/* Callers get a change node (which can fail) and only commit after they've
+ * finished. This way they don't have to unwind eg. a write. */
+void add_change_node(struct transaction *trans, const char *node)
+{
+ struct changed_node *i;
+
+ if (!trans)
+ return;
+
+ list_for_each_entry(i, &trans->changes, list)
+ if (streq(i->node, node))
+ return;
+
+ i = talloc(trans, struct changed_node);
+ i->node = talloc_strdup(i, node);
+ INIT_LIST_HEAD(&i->list);
+ list_add_tail(&i->list, &trans->changes);
+}
+
+char *node_dir_inside_transaction(struct transaction *trans, const char *node)
+{
+ return talloc_asprintf(node, "%s%s", trans->divert,
+ node + strlen(trans->node));
+}
+
+void shortest_transaction_timeout(struct timeval *tv)
+{
+ struct transaction *i;
+
+ list_for_each_entry(i, &transactions, list) {
+ if (!timerisset(&i->timeout))
+ continue;
+
+ if (!timerisset(tv) || timercmp(&i->timeout, tv, <))
+ *tv = i->timeout;
+ }
+}
+
+void check_transaction_timeout(void)
+{
+ struct transaction *i;
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ list_for_each_entry(i, &transactions, list) {
+ if (!timerisset(&i->timeout))
+ continue;
+
+ if (timercmp(&i->timeout, &now, <))
+ i->destined_to_fail = true;
+ }
+}
+
+/* FIXME: Eliminate all uses of this */
+static bool do_command(const char *cmd)
+{
+ int ret;
+
+ ret = system(cmd);
+ if (ret == -1)
+ return false;
+ if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) {
+ errno = EIO;
+ return false;
+ }
+ return true;
+}
+
+static int destroy_transaction(void *_transaction)
+{
+ struct transaction *trans = _transaction;
+
+ list_del(&trans->list);
+ return destroy_path(trans->divert);
+}
+
+bool do_transaction_start(struct connection *conn, const char *node)
+{
+ struct transaction *transaction;
+ char *dir, *cmd;
+
+ if (conn->transaction)
+ return send_error(conn, EBUSY);
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ dir = node_dir_outside_transaction(node);
+
+ /* Attach transaction to node for autofree until it's complete */
+ transaction = talloc(node, struct transaction);
+ transaction->node = talloc_strdup(transaction, node);
+ transaction->divert = talloc_asprintf(transaction, "%s/%p/",
+ xs_daemon_transactions(),
+ transaction);
+ cmd = talloc_asprintf(node, "cp -a %s %s", dir, transaction->divert);
+ if (!do_command(cmd))
+ corrupt(conn, "Creating transaction %s", transaction->divert);
+
+ talloc_steal(conn, transaction);
+ INIT_LIST_HEAD(&transaction->changes);
+ transaction->conn = conn;
+ timerclear(&transaction->timeout);
+ transaction->destined_to_fail = false;
+ list_add_tail(&transaction->list, &transactions);
+ conn->transaction = transaction;
+ talloc_set_destructor(transaction, destroy_transaction);
+ return send_ack(transaction->conn, XS_TRANSACTION_START);
+}
+
+static bool commit_transaction(struct transaction *trans)
+{
+ char *tmp, *dir;
+ struct changed_node *i;
+
+ /* Move: orig -> .old, repl -> orig. Cleanup deletes .old. */
+ dir = node_dir_outside_transaction(trans->node);
+ tmp = talloc_asprintf(trans, "%s.old", dir);
+
+ if (rename(dir, tmp) != 0)
+ return false;
+ if (rename(trans->divert, dir) != 0)
+ corrupt(trans->conn, "Failed rename %s to %s",
+ trans->divert, dir);
+
+ trans->divert = tmp;
+
+ /* Fire off the watches for everything that changed. */
+ list_for_each_entry(i, &trans->changes, list)
+ fire_watches(NULL, i->node);
+ return true;
+}
+
+bool do_transaction_end(struct connection *conn, const char *arg)
+{
+ if (!arg || (!streq(arg, "T") && !streq(arg, "F")))
+ return send_error(conn, EINVAL);
+
+ if (!conn->transaction)
+ return send_error(conn, ENOENT);
+
+ if (streq(arg, "T")) {
+ if (conn->transaction->destined_to_fail) {
+ send_error(conn, ETIMEDOUT);
+ goto failed;
+ }
+ if (!commit_transaction(conn->transaction)) {
+ send_error(conn, errno);
+ goto failed;
+ }
+ }
+
+ talloc_free(conn->transaction);
+ conn->transaction = NULL;
+ return send_ack(conn, XS_TRANSACTION_END);
+
+failed:
+ talloc_free(conn->transaction);
+ conn->transaction = NULL;
+ return false;
+}
+
diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h
new file mode 100644
index 0000000000..a21bccad72
--- /dev/null
+++ b/tools/xenstore/xenstored_transaction.h
@@ -0,0 +1,50 @@
+/*
+ Transaction code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_TRANSACTION_H
+#define _XENSTORED_TRANSACTION_H
+#include "xenstored_core.h"
+
+struct transaction;
+
+bool do_transaction_start(struct connection *conn, const char *node);
+bool do_transaction_end(struct connection *conn, const char *arg);
+
+/* Is node covered by this transaction? */
+bool within_transaction(struct transaction *trans, const char *node);
+
+/* If a write op on this node blocked by another connections' transaction,
+ * mark conn, setup transaction timeout and return true.
+ */
+bool transaction_block(struct connection *conn, const char *node);
+
+/* Return transaction which covers this node. */
+struct transaction *transaction_covering_node(const char *node);
+
+/* Return directory of node within transaction t. */
+char *node_dir_inside_transaction(struct transaction *t, const char *node);
+
+/* This node was changed: can fail and longjmp. */
+void add_change_node(struct transaction *trans, const char *node);
+
+/* Get shortest timeout: leave tv unset if none. */
+void shortest_transaction_timeout(struct timeval *tv);
+
+/* Have any transactions timed out yet? */
+void check_transaction_timeout(void);
+#endif /* _XENSTORED_TRANSACTION_H */
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
new file mode 100644
index 0000000000..2df83e1a54
--- /dev/null
+++ b/tools/xenstore/xenstored_watch.c
@@ -0,0 +1,279 @@
+/*
+ Watch code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include "talloc.h"
+#include "list.h"
+#include "xenstored_watch.h"
+#include "xs_lib.h"
+#include "utils.h"
+#include "xenstored_test.h"
+
+/* We create this if anyone is interested "node", then we pass it from
+ * watch to watch as each connection acks it.
+ */
+struct watch_event
+{
+ /* The watch we are firing for (watch->events) */
+ struct list_head list;
+
+ /* Watch we are currently attached to. */
+ struct watch *watch;
+
+ struct buffered_data *data;
+};
+
+struct watch
+{
+ struct list_head list;
+ unsigned int priority;
+
+ /* Current outstanding events applying to this watch. */
+ struct list_head events;
+
+ char *node;
+ struct connection *conn;
+};
+static LIST_HEAD(watches);
+
+static void reset_event(struct watch_event *event)
+{
+ event->data->inhdr = true;
+ event->data->used = 0;
+}
+
+/* We received a non-ACK response: re-queue any watch we just sent. */
+void reset_watch_event(struct connection *conn)
+{
+ if (waiting_for_ack(conn))
+ reset_event(conn->event);
+}
+
+/* We're waiting if we have an event and we sent it all. */
+bool waiting_for_ack(struct connection *conn)
+{
+ if (!conn->event)
+ return false;
+
+ if (conn->event->data->inhdr)
+ return false;
+ return conn->event->data->used == conn->event->data->hdr.msg.len;
+}
+
+bool is_watch_event(struct connection *conn, struct buffered_data *out)
+{
+ return (conn->event && out == conn->event->data);
+}
+
+/* Look through our watches: if any of them have an event, queue it. */
+void queue_next_event(struct connection *conn)
+{
+ struct watch *watch;
+
+ /* We had a reply queued already? Send it. */
+ if (conn->waiting_reply) {
+ conn->out = conn->waiting_reply;
+ conn->waiting_reply = NULL;
+ return;
+ }
+
+ /* If we're waiting for ack, don't queue more. */
+ if (waiting_for_ack(conn))
+ return;
+
+ /* Find a good event to send. */
+ if (!conn->event) {
+ list_for_each_entry(watch, &watches, list) {
+ if (watch->conn != conn)
+ continue;
+
+ conn->event = list_top(&watch->events,
+ struct watch_event, list);
+ if (conn->event)
+ break;
+ }
+ if (!conn->event)
+ return;
+ }
+
+ conn->out = conn->event->data;
+}
+
+/* Watch on DIR applies to DIR, DIR/FILE, but not DIRLONG. */
+static bool watch_applies(const struct watch *watch, const char *node)
+{
+ return is_child(node, watch->node);
+}
+
+static struct watch *find_watch(const char *node)
+{
+ struct watch *watch;
+
+ list_for_each_entry(watch, &watches, list) {
+ if (watch_applies(watch, node))
+ return watch;
+ }
+ return NULL;
+}
+
+static struct watch *find_next_watch(struct watch *watch, const char *node)
+{
+ list_for_each_entry_continue(watch, &watches, list) {
+ if (watch_applies(watch, node))
+ return watch;
+ }
+ return NULL;
+}
+
+/* FIXME: we fail to fire on out of memory. Should drop connections. */
+void fire_watches(struct transaction *trans, const char *node)
+{
+ struct watch *watch;
+ struct watch_event *event;
+
+ /* During transactions, don't fire watches. */
+ if (trans)
+ return;
+
+ watch = find_watch(node);
+ if (!watch)
+ return;
+
+ /* Create and fill in info about event. */
+ event = talloc(talloc_autofree_context(), struct watch_event);
+ event->data = new_buffer(event);
+ event->data->hdr.msg.type = XS_WATCH_EVENT;
+ event->data->hdr.msg.len = strlen(node) + 1;
+ event->data->buffer = talloc_strdup(event->data, node);
+
+ /* Tie event to this watch. */
+ event->watch = watch;
+ list_add(&event->list, &watch->events);
+
+ /* If connection not doing anything, queue this. */
+ if (!watch->conn->out)
+ queue_next_event(watch->conn);
+}
+
+/* We're done with this event: see if anyone else wants it. */
+static void move_event_onwards(struct watch_event *event)
+{
+ list_del(&event->list);
+ reset_event(event);
+
+ /* Remove from this watch, and find next watch to put this on. */
+ event->watch = find_next_watch(event->watch, event->data->buffer);
+ if (!event->watch) {
+ talloc_free(event);
+ return;
+ }
+
+ list_add(&event->list, &event->watch->events);
+
+ /* If connection not doing anything, queue this. */
+ if (!event->watch->conn->out)
+ queue_next_event(event->watch->conn);
+}
+
+static int destroy_watch(void *_watch)
+{
+ struct watch *watch = _watch;
+ struct watch_event *event;
+
+ /* Forget about sending out or waiting for acks for this watch. */
+ if (watch->conn->event && watch->conn->event->watch == watch)
+ watch->conn->event = NULL;
+
+ /* If we have pending events, pass them on to others. */
+ while ((event = list_top(&watch->events, struct watch_event, list)))
+ move_event_onwards(event);
+
+ /* Remove from global list. */
+ list_del(&watch->list);
+ return 0;
+}
+
+/* We keep watches in priority order. */
+static void insert_watch(struct watch *watch)
+{
+ struct watch *i;
+
+ list_for_each_entry(i, &watches, list) {
+ if (i->priority <= watch->priority) {
+ list_add_tail(&watch->list, &i->list);
+ return;
+ }
+ }
+
+ list_add_tail(&watch->list, &watches);
+}
+
+bool do_watch(struct connection *conn, struct buffered_data *in)
+{
+ struct watch *watch;
+ char *vec[2];
+
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec))
+ return send_error(conn, EINVAL);
+
+ if (!check_node_perms(conn, vec[0], XS_PERM_READ))
+ return send_error(conn, errno);
+
+ watch = talloc(conn, struct watch);
+ watch->node = talloc_strdup(watch, vec[0]);
+ watch->conn = conn;
+ watch->priority = strtoul(vec[1], NULL, 0);
+ INIT_LIST_HEAD(&watch->events);
+
+ insert_watch(watch);
+ talloc_set_destructor(watch, destroy_watch);
+ return send_ack(conn, XS_WATCH);
+}
+
+bool do_watch_ack(struct connection *conn)
+{
+ struct watch_event *event;
+
+ if (!waiting_for_ack(conn))
+ return send_error(conn, ENOENT);
+
+ /* Remove this watch event. */
+ event = conn->event;
+ conn->event = NULL;
+
+ move_event_onwards(event);
+ return send_ack(conn, XS_WATCH_ACK);
+}
+
+bool do_unwatch(struct connection *conn, const char *node)
+{
+ struct watch *watch;
+
+ list_for_each_entry(watch, &watches, list) {
+ if (watch->conn == conn
+ && streq(watch->node, node)) {
+ talloc_free(watch);
+ return send_ack(conn, XS_UNWATCH);
+ }
+ }
+ return send_error(conn, ENOENT);
+}
diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h
new file mode 100644
index 0000000000..656ce4c36b
--- /dev/null
+++ b/tools/xenstore/xenstored_watch.h
@@ -0,0 +1,42 @@
+/*
+ Watch code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_WATCH_H
+#define _XENSTORED_WATCH_H
+#include "xenstored_core.h"
+
+bool do_watch(struct connection *conn, struct buffered_data *in);
+bool do_watch_ack(struct connection *conn);
+bool do_unwatch(struct connection *conn, const char *node);
+
+/* Is this a watch event message for this connection? */
+bool is_watch_event(struct connection *conn, struct buffered_data *out);
+
+/* Look through our watches: if any of them have an event, queue it. */
+void queue_next_event(struct connection *conn);
+
+/* Is this connection waiting for a watch acknowledgement? */
+bool waiting_for_ack(struct connection *conn);
+
+/* Reset event if we were sending one */
+void reset_watch_event(struct connection *conn);
+
+/* Fire all watches. */
+void fire_watches(struct transaction *trans, const char *node);
+
+#endif /* _XENSTORED_WATCH_H */
diff --git a/tools/xenstore/xs.c b/tools/xenstore/xs.c
new file mode 100644
index 0000000000..d5058abfb3
--- /dev/null
+++ b/tools/xenstore/xs.c
@@ -0,0 +1,551 @@
+/*
+ Xen Store Daemon interface providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdint.h>
+#include <errno.h>
+#include "xs.h"
+#include "xenstored.h"
+#include "xs_lib.h"
+#include "utils.h"
+
+struct xs_handle
+{
+ int fd;
+};
+
+/* Get the socket from the store daemon handle.
+ */
+int xs_fileno(struct xs_handle *h)
+{
+ return h->fd;
+}
+
+static struct xs_handle *get_socket(const char *connect_to)
+{
+ struct sockaddr_un addr;
+ int sock, saved_errno;
+ struct xs_handle *h = NULL;
+
+ sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0)
+ return NULL;
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, connect_to);
+
+ if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == 0) {
+ h = malloc(sizeof(*h));
+ if (h) {
+ h->fd = sock;
+ return h;
+ }
+ }
+
+ saved_errno = errno;
+ close(sock);
+ free(h);
+ errno = saved_errno;
+ return NULL;
+}
+
+struct xs_handle *xs_daemon_open(void)
+{
+ return get_socket(xs_daemon_socket());
+}
+
+struct xs_handle *xs_daemon_open_readonly(void)
+{
+ return get_socket(xs_daemon_socket_ro());
+}
+
+void xs_daemon_close(struct xs_handle *h)
+{
+ if (h->fd >= 0)
+ close(h->fd);
+ free(h);
+}
+
+static bool read_all(int fd, void *data, unsigned int len)
+{
+ while (len) {
+ int done;
+
+ done = read(fd, data, len);
+ if (done < 0) {
+ if (errno == EINTR)
+ continue;
+ return false;
+ }
+ if (done == 0) {
+ /* It closed fd on us? EBADF is appropriate. */
+ errno = EBADF;
+ return false;
+ }
+ data += done;
+ len -= done;
+ }
+
+ return true;
+}
+
+#ifdef XSTEST
+#define read_all read_all_choice
+#define write_all write_all_choice
+#endif
+
+static int get_error(const char *errorstring)
+{
+ unsigned int i;
+
+ for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++)
+ if (i == ARRAY_SIZE(xsd_errors) - 1)
+ return EINVAL;
+ return xsd_errors[i].errnum;
+}
+
+static void *read_reply(int fd, enum xsd_sockmsg_type *type, unsigned int *len)
+{
+ struct xsd_sockmsg msg;
+ void *ret;
+ int saved_errno;
+
+ if (!read_all(fd, &msg, sizeof(msg)))
+ return NULL;
+
+ ret = malloc(msg.len);
+ if (!ret)
+ return NULL;
+
+ if (!read_all(fd, ret, msg.len)) {
+ saved_errno = errno;
+ free(ret);
+ errno = saved_errno;
+ return NULL;
+ }
+
+ *type = msg.type;
+ if (len)
+ *len = msg.len;
+ return ret;
+}
+
+/* Send message to xs, get malloc'ed reply. NULL and set errno on error. */
+static void *xs_talkv(struct xs_handle *h, enum xsd_sockmsg_type type,
+ const struct iovec *iovec,
+ unsigned int num_vecs,
+ unsigned int *len)
+{
+ struct xsd_sockmsg msg;
+ void *ret = NULL;
+ int saved_errno;
+ unsigned int i;
+ struct sigaction ignorepipe, oldact;
+
+ msg.type = type;
+ msg.len = 0;
+ for (i = 0; i < num_vecs; i++)
+ msg.len += iovec[i].iov_len;
+
+ ignorepipe.sa_handler = SIG_IGN;
+ sigemptyset(&ignorepipe.sa_mask);
+ ignorepipe.sa_flags = 0;
+ sigaction(SIGPIPE, &ignorepipe, &oldact);
+
+ if (!write_all(h->fd, &msg, sizeof(msg)))
+ goto fail;
+
+ for (i = 0; i < num_vecs; i++)
+ if (!write_all(h->fd, iovec[i].iov_base, iovec[i].iov_len))
+ goto fail;
+
+ /* Watches can have fired before reply comes: daemon detects
+ * and re-transmits, so we can ignore this. */
+ do {
+ free(ret);
+ ret = read_reply(h->fd, &msg.type, len);
+ if (!ret)
+ goto fail;
+ } while (msg.type == XS_WATCH_EVENT);
+
+ sigaction(SIGPIPE, &oldact, NULL);
+ if (msg.type == XS_ERROR) {
+ saved_errno = get_error(ret);
+ free(ret);
+ errno = saved_errno;
+ return NULL;
+ }
+
+ assert(msg.type == type);
+ return ret;
+
+fail:
+ /* We're in a bad state, so close fd. */
+ saved_errno = errno;
+ sigaction(SIGPIPE, &oldact, NULL);
+ close(h->fd);
+ h->fd = -1;
+ errno = saved_errno;
+ return NULL;
+}
+
+/* free(), but don't change errno. */
+static void free_no_errno(void *p)
+{
+ int saved_errno = errno;
+ free(p);
+ errno = saved_errno;
+}
+
+/* Simplified version of xs_talkv: single message. */
+static void *xs_single(struct xs_handle *h, enum xsd_sockmsg_type type,
+ const char *string, unsigned int *len)
+{
+ struct iovec iovec;
+
+ iovec.iov_base = (void *)string;
+ iovec.iov_len = strlen(string) + 1;
+ return xs_talkv(h, type, &iovec, 1, len);
+}
+
+static bool xs_bool(char *reply)
+{
+ if (!reply)
+ return false;
+ free(reply);
+ return true;
+}
+
+char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num)
+{
+ char *strings, *p, **ret;
+ unsigned int len;
+
+ strings = xs_single(h, XS_DIRECTORY, path, &len);
+ if (!strings)
+ return NULL;
+
+ /* Count the strings. */
+ *num = count_strings(strings, len);
+
+ /* Transfer to one big alloc for easy freeing. */
+ ret = malloc(*num * sizeof(char *) + len);
+ if (!ret) {
+ free_no_errno(strings);
+ return NULL;
+ }
+ memcpy(&ret[*num], strings, len);
+ free_no_errno(strings);
+
+ strings = (char *)&ret[*num];
+ for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
+ ret[(*num)++] = p;
+ return ret;
+}
+
+/* Get the value of a single file.
+ * Returns a malloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xs_read(struct xs_handle *h, const char *path, unsigned int *len)
+{
+ return xs_single(h, XS_READ, path, len);
+}
+
+/* Write the value of a single file.
+ * Returns false on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ */
+bool xs_write(struct xs_handle *h, const char *path,
+ const void *data, unsigned int len, int createflags)
+{
+ const char *flags;
+ struct iovec iovec[3];
+
+ /* Format: Flags (as string), path, data. */
+ if (createflags == 0)
+ flags = XS_WRITE_NONE;
+ else if (createflags == O_CREAT)
+ flags = XS_WRITE_CREATE;
+ else if (createflags == (O_CREAT|O_EXCL))
+ flags = XS_WRITE_CREATE_EXCL;
+ else {
+ errno = EINVAL;
+ return false;
+ }
+
+ iovec[0].iov_base = (void *)path;
+ iovec[0].iov_len = strlen(path) + 1;
+ iovec[1].iov_base = (void *)flags;
+ iovec[1].iov_len = strlen(flags) + 1;
+ iovec[2].iov_base = (void *)data;
+ iovec[2].iov_len = len;
+
+ return xs_bool(xs_talkv(h, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+}
+
+/* Create a new directory.
+ * Returns false on failure.
+ */
+bool xs_mkdir(struct xs_handle *h, const char *path)
+{
+ return xs_bool(xs_single(h, XS_MKDIR, path, NULL));
+}
+
+/* Destroy a file or directory (directories must be empty).
+ * Returns false on failure.
+ */
+bool xs_rm(struct xs_handle *h, const char *path)
+{
+ return xs_bool(xs_single(h, XS_RM, path, NULL));
+}
+
+/* Get permissions of node (first element is owner).
+ * Returns malloced array, or NULL: call free() after use.
+ */
+struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+ const char *path,
+ unsigned int *num)
+{
+ char *strings;
+ unsigned int len;
+ struct xs_permissions *ret;
+
+ strings = xs_single(h, XS_GET_PERMS, path, &len);
+ if (!strings)
+ return NULL;
+
+ /* Count the strings: each one perms then domid. */
+ *num = count_strings(strings, len);
+
+ /* Transfer to one big alloc for easy freeing. */
+ ret = malloc(*num * sizeof(struct xs_permissions));
+ if (!ret) {
+ free_no_errno(strings);
+ return NULL;
+ }
+
+ if (!strings_to_perms(ret, *num, strings)) {
+ free_no_errno(ret);
+ ret = NULL;
+ }
+
+ free(strings);
+ return ret;
+}
+
+/* Set permissions of node (must be owner).
+ * Returns false on failure.
+ */
+bool xs_set_permissions(struct xs_handle *h, const char *path,
+ struct xs_permissions *perms,
+ unsigned int num_perms)
+{
+ unsigned int i;
+ struct iovec iov[1+num_perms];
+
+ iov[0].iov_base = (void *)path;
+ iov[0].iov_len = strlen(path) + 1;
+
+ for (i = 0; i < num_perms; i++) {
+ char buffer[MAX_STRLEN(domid_t)+1];
+
+ if (!perm_to_string(&perms[i], buffer))
+ goto unwind;
+
+ iov[i+1].iov_base = strdup(buffer);
+ iov[i+1].iov_len = strlen(buffer) + 1;
+ if (!iov[i+1].iov_base)
+ goto unwind;
+ }
+
+ if (!xs_bool(xs_talkv(h, XS_SET_PERMS, iov, 1+num_perms, NULL)))
+ goto unwind;
+ for (i = 0; i < num_perms; i++)
+ free(iov[i+1].iov_base);
+ return true;
+
+unwind:
+ num_perms = i;
+ for (i = 0; i < num_perms; i++)
+ free_no_errno(iov[i+1].iov_base);
+ return false;
+}
+
+/* Watch a node for changes (poll on fd to detect, or call read_watch()).
+ * When the node (or any child) changes, fd will become readable.
+ * Priority indicates order if multiple watchers: higher is first.
+ * Returns false on failure.
+ */
+bool xs_watch(struct xs_handle *h, const char *path, unsigned int priority)
+{
+ char prio[MAX_STRLEN(priority)];
+ struct iovec iov[2];
+
+ sprintf(prio, "%u", priority);
+ iov[0].iov_base = (void *)path;
+ iov[0].iov_len = strlen(path) + 1;
+ iov[1].iov_base = prio;
+ iov[1].iov_len = strlen(prio) + 1;
+
+ return xs_bool(xs_talkv(h, XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
+}
+
+/* Find out what node change was on (will block if nothing pending).
+ * Returns malloced path, or NULL: call free() after use.
+ */
+char *xs_read_watch(struct xs_handle *h)
+{
+ struct xsd_sockmsg msg;
+ char *path;
+
+ if (!read_all(h->fd, &msg, sizeof(msg)))
+ return NULL;
+
+ assert(msg.type == XS_WATCH_EVENT);
+ path = malloc(msg.len);
+ if (!path)
+ return NULL;
+
+ if (!read_all(h->fd, path, msg.len)) {
+ free_no_errno(path);
+ return NULL;
+ }
+ return path;
+}
+
+/* Acknowledge watch on node. Watches must be acknowledged before
+ * any other watches can be read.
+ * Returns false on failure.
+ */
+bool xs_acknowledge_watch(struct xs_handle *h)
+{
+ return xs_bool(xs_single(h, XS_WATCH_ACK, "OK", NULL));
+}
+
+/* Remove a watch on a node.
+ * Returns false on failure (no watch on that node).
+ */
+bool xs_unwatch(struct xs_handle *h, const char *path)
+{
+ return xs_bool(xs_single(h, XS_UNWATCH, path, NULL));
+}
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ * Transaction only applies to the given subtree.
+ * You can only have one transaction at any time.
+ * Returns false on failure.
+ */
+bool xs_transaction_start(struct xs_handle *h, const char *subtree)
+{
+ return xs_bool(xs_single(h, XS_TRANSACTION_START, subtree, NULL));
+}
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ * Returns false on failure, which indicates an error: transactions will
+ * not fail spuriously.
+ */
+bool xs_transaction_end(struct xs_handle *h, bool abort)
+{
+ char abortstr[2];
+
+ if (abort)
+ strcpy(abortstr, "F");
+ else
+ strcpy(abortstr, "T");
+ return xs_bool(xs_single(h, XS_TRANSACTION_END, abortstr, NULL));
+}
+
+/* Introduce a new domain.
+ * This tells the store daemon about a shared memory page and event channel
+ * associated with a domain: the domain uses these to communicate.
+ */
+bool xs_introduce_domain(struct xs_handle *h,
+ domid_t domid,
+ unsigned long mfn,
+ unsigned int eventchn,
+ const char *path)
+{
+ char domid_str[MAX_STRLEN(domid)];
+ char mfn_str[MAX_STRLEN(mfn)];
+ char eventchn_str[MAX_STRLEN(eventchn)];
+ struct iovec iov[4];
+
+ sprintf(domid_str, "%u", domid);
+ sprintf(mfn_str, "%lu", mfn);
+ sprintf(eventchn_str, "%u", eventchn);
+
+ iov[0].iov_base = domid_str;
+ iov[0].iov_len = strlen(domid_str) + 1;
+ iov[1].iov_base = mfn_str;
+ iov[1].iov_len = strlen(mfn_str) + 1;
+ iov[2].iov_base = eventchn_str;
+ iov[2].iov_len = strlen(eventchn_str) + 1;
+ iov[3].iov_base = (char *)path;
+ iov[3].iov_len = strlen(path) + 1;
+
+ return xs_bool(xs_talkv(h, XS_INTRODUCE, iov, ARRAY_SIZE(iov), NULL));
+}
+
+bool xs_release_domain(struct xs_handle *h,
+ domid_t domid)
+{
+ char domid_str[MAX_STRLEN(domid)];
+
+ sprintf(domid_str, "%u", domid);
+
+ return xs_bool(xs_single(h, XS_RELEASE, domid_str, NULL));
+}
+
+bool xs_shutdown(struct xs_handle *h)
+{
+ bool ret = xs_bool(xs_single(h, XS_SHUTDOWN, "", NULL));
+ if (ret) {
+ char c;
+ /* Wait for it to actually shutdown. */
+ read(h->fd, &c, 1);
+ }
+ return ret;
+}
+
+/* Only useful for DEBUG versions */
+char *xs_debug_command(struct xs_handle *h, const char *cmd,
+ void *data, unsigned int len)
+{
+ struct iovec iov[2];
+
+ iov[0].iov_base = (void *)cmd;
+ iov[0].iov_len = strlen(cmd) + 1;
+ iov[1].iov_base = data;
+ iov[1].iov_len = len;
+
+ return xs_talkv(h, XS_DEBUG, iov, ARRAY_SIZE(iov), NULL);
+}
diff --git a/tools/xenstore/xs.h b/tools/xenstore/xs.h
new file mode 100644
index 0000000000..ff9481c3a6
--- /dev/null
+++ b/tools/xenstore/xs.h
@@ -0,0 +1,146 @@
+#ifndef _XS_H
+#define _XS_H
+/*
+ Xen Store Daemon providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+/* On failure, these routines set errno. */
+#include "xs_lib.h"
+
+struct xs_handle;
+
+/* Connect to the xs daemon.
+ * Returns a handle or NULL.
+ */
+struct xs_handle *xs_daemon_open(void);
+
+/* Connect to the xs daemon (readonly for non-root clients).
+ * Returns a handle or NULL.
+ */
+struct xs_handle *xs_daemon_open_readonly(void);
+
+/* Close the connection to the xs daemon. */
+void xs_daemon_close(struct xs_handle *);
+
+/* Get contents of a directory.
+ * Returns a malloced array: call free() on it after use.
+ * Num indicates size.
+ */
+char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num);
+
+/* Get the value of a single file.
+ * Returns a malloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xs_read(struct xs_handle *h, const char *path, unsigned int *len);
+
+/* Write the value of a single file.
+ * Returns false on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ */
+bool xs_write(struct xs_handle *h, const char *path, const void *data, unsigned int len,
+ int createflags);
+
+/* Create a new directory.
+ * Returns false on failure.
+ */
+bool xs_mkdir(struct xs_handle *h, const char *path);
+
+/* Destroy a file or directory (and children).
+ * Returns false on failure.
+ */
+bool xs_rm(struct xs_handle *h, const char *path);
+
+/* Get permissions of node (first element is owner, first perms is "other").
+ * Returns malloced array, or NULL: call free() after use.
+ */
+struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+ const char *path,
+ unsigned int *num);
+
+/* Set permissions of node (must be owner).
+ * Returns false on failure.
+ */
+bool xs_set_permissions(struct xs_handle *h,
+ const char *path,
+ struct xs_permissions *perms,
+ unsigned int num_perms);
+
+/* Watch a node for changes (poll on fd to detect, or call read_watch()).
+ * When the node (or any child) changes, fd will become readable.
+ * Priority indicates order if multiple watchers: higher is first.
+ * Returns false on failure.
+ */
+bool xs_watch(struct xs_handle *h, const char *path, unsigned int priority);
+
+/* Return the FD to poll on to see if a watch has fired. */
+int xs_fileno(struct xs_handle *h);
+
+/* Find out what node change was on (will block if nothing pending).
+ * Returns malloced path, or NULL: call free() after use.
+ */
+char *xs_read_watch(struct xs_handle *h);
+
+/* Acknowledge watch on node. Watches must be acknowledged before
+ * any other watches can be read.
+ * Returns false on failure.
+ */
+bool xs_acknowledge_watch(struct xs_handle *h);
+
+/* Remove a watch on a node.
+ * Returns false on failure (no watch on that node).
+ */
+bool xs_unwatch(struct xs_handle *h, const char *path);
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ * Transaction only applies to the given subtree.
+ * You can only have one transaction at any time.
+ * Returns false on failure.
+ */
+bool xs_transaction_start(struct xs_handle *h, const char *subtree);
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ * Returns false on failure, which indicates an error: transactions will
+ * not fail spuriously.
+ */
+bool xs_transaction_end(struct xs_handle *h, bool abort);
+
+/* Introduce a new domain.
+ * This tells the store daemon about a shared memory page, event channel
+ * and store path associated with a domain: the domain uses these to communicate.
+ */
+bool xs_introduce_domain(struct xs_handle *h,
+ domid_t domid,
+ unsigned long mfn,
+ unsigned int eventchn,
+ const char *path);
+
+/* Release a domain.
+ * Tells the store domain to release the memory page to the domain.
+ */
+bool xs_release_domain(struct xs_handle *h, domid_t domid);
+
+/* Only useful for DEBUG versions */
+char *xs_debug_command(struct xs_handle *h, const char *cmd,
+ void *data, unsigned int len);
+
+/* Shut down the daemon. */
+bool xs_shutdown(struct xs_handle *h);
+
+#endif /* _XS_H */
diff --git a/tools/xenstore/xs_lib.c b/tools/xenstore/xs_lib.c
new file mode 100644
index 0000000000..8630eaffce
--- /dev/null
+++ b/tools/xenstore/xs_lib.c
@@ -0,0 +1,141 @@
+#include "xs_lib.h"
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/* Common routines for the Xen store daemon and client library. */
+
+static const char *xs_daemon_rootdir(void)
+{
+ char *s = getenv("XENSTORED_ROOTDIR");
+ return (s ? s : "/var/lib/xenstored");
+}
+
+static const char *xs_daemon_rundir(void)
+{
+ char *s = getenv("XENSTORED_RUNDIR");
+ return (s ? s : "/var/run/xenstored");
+}
+
+const char *xs_daemon_socket(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/socket", xs_daemon_rundir());
+ return buf;
+}
+
+const char *xs_daemon_socket_ro(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/socket_ro", xs_daemon_rundir());
+ return buf;
+}
+
+const char *xs_daemon_store(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/store", xs_daemon_rootdir());
+ return buf;
+}
+
+const char *xs_daemon_transactions(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/transactions", xs_daemon_rootdir());
+ return buf;
+}
+
+/* Simple routines for writing to sockets, etc. */
+bool write_all(int fd, const void *data, unsigned int len)
+{
+ while (len) {
+ int done;
+
+ done = write(fd, data, len);
+ if (done < 0 && errno == EINTR)
+ continue;
+ if (done <= 0)
+ return false;
+ data += done;
+ len -= done;
+ }
+
+ return true;
+}
+
+/* Convert strings to permissions. False if a problem. */
+bool strings_to_perms(struct xs_permissions *perms, unsigned int num,
+ const char *strings)
+{
+ const char *p;
+ char *end;
+ unsigned int i;
+
+ for (p = strings, i = 0; i < num; i++) {
+ /* "r", "w", or "b" for both. */
+ switch (*p) {
+ case 'r':
+ perms[i].perms = XS_PERM_READ;
+ break;
+ case 'w':
+ perms[i].perms = XS_PERM_WRITE;
+ break;
+ case 'b':
+ perms[i].perms = XS_PERM_READ|XS_PERM_WRITE;
+ break;
+ case 'n':
+ perms[i].perms = XS_PERM_NONE;
+ break;
+ default:
+ errno = EINVAL;
+ return false;
+ }
+ p++;
+ perms[i].id = strtol(p, &end, 0);
+ if (*end || !*p) {
+ errno = EINVAL;
+ return false;
+ }
+ p = end + 1;
+ }
+ return true;
+}
+
+/* Convert permissions to a string (up to len MAX_STRLEN(domid_t)+1). */
+bool perm_to_string(const struct xs_permissions *perm, char *buffer)
+{
+ switch (perm->perms) {
+ case XS_PERM_WRITE:
+ *buffer = 'w';
+ break;
+ case XS_PERM_READ:
+ *buffer = 'r';
+ break;
+ case XS_PERM_READ|XS_PERM_WRITE:
+ *buffer = 'b';
+ break;
+ case XS_PERM_NONE:
+ *buffer = 'n';
+ break;
+ default:
+ errno = EINVAL;
+ return false;
+ }
+ sprintf(buffer+1, "%i", (int)perm->id);
+ return true;
+}
+
+/* Given a string and a length, count how many strings (nul terms). */
+unsigned int count_strings(const char *strings, unsigned int len)
+{
+ unsigned int num;
+ const char *p;
+
+ for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
+ num++;
+
+ return num;
+}
+
diff --git a/tools/xenstore/xs_lib.h b/tools/xenstore/xs_lib.h
new file mode 100644
index 0000000000..a946ab0b19
--- /dev/null
+++ b/tools/xenstore/xs_lib.h
@@ -0,0 +1,63 @@
+#ifndef _XR_LIB_H
+#define _XR_LIB_H
+/*
+ Common routines between Xen store user library and daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <stdbool.h>
+#include <limits.h>
+#include <xc.h>
+
+/* Bitmask of permissions. */
+enum xs_perm_type {
+ XS_PERM_NONE = 0,
+ XS_PERM_READ = 1,
+ XS_PERM_WRITE = 2,
+ /* Internal use. */
+ XS_PERM_CREATE = 4,
+ XS_PERM_OWNER = 8,
+};
+
+struct xs_permissions
+{
+ domid_t id;
+ enum xs_perm_type perms;
+};
+
+/* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */
+#define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2)
+
+/* Path for various daemon things: env vars can override. */
+const char *xs_daemon_socket(void);
+const char *xs_daemon_socket_ro(void);
+const char *xs_daemon_store(void);
+const char *xs_daemon_transactions(void);
+
+/* Simple write function: loops for you. */
+bool write_all(int fd, const void *data, unsigned int len);
+
+/* Convert strings to permissions. False if a problem. */
+bool strings_to_perms(struct xs_permissions *perms, unsigned int num,
+ const char *strings);
+
+/* Convert permissions to a string (up to len MAX_STRLEN(domid_t)+1). */
+bool perm_to_string(const struct xs_permissions *perm, char *buffer);
+
+/* Given a string and a length, count how many strings (nul terms). */
+unsigned int count_strings(const char *strings, unsigned int len);
+
+#endif /* _XS_LIB_H */
diff --git a/tools/xenstore/xs_random.c b/tools/xenstore/xs_random.c
new file mode 100644
index 0000000000..ef5d44d0b0
--- /dev/null
+++ b/tools/xenstore/xs_random.c
@@ -0,0 +1,1646 @@
+/* Random tests.
+
+ We check that the results from a real filesystem are the same.
+*/
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include "xs.h"
+#include "talloc.h"
+#include "utils.h"
+
+struct ops
+{
+ char *name;
+
+ char **(*dir)(void *h, const char *path, unsigned int *num);
+
+ void *(*read)(void *h, const char *path, unsigned int *len);
+
+ bool (*write)(void *h, const char *path, const void *data,
+ unsigned int len, int createflags);
+
+ bool (*mkdir)(void *h, const char *path);
+
+ bool (*rm)(void *h, const char *path);
+
+ struct xs_permissions *(*get_perms)(void *h,
+ const char *path,
+ unsigned int *num);
+
+ bool (*set_perms)(void *h,
+ const char *path,
+ struct xs_permissions *perms,
+ unsigned int num);
+
+ bool (*transaction_start)(void *h, const char *subtree);
+ bool (*transaction_end)(void *h, bool abort);
+
+ /* Create and destroy a new handle. */
+ void *(*handle)(const char *path);
+ void (*close)(void *);
+};
+
+struct file_ops_info
+{
+ const char *base;
+ char *transact_base;
+ char *transact;
+};
+
+static void convert_to_dir(const char *dirname)
+{
+ char *tmpname = talloc_asprintf(dirname, "%s.tmp", dirname);
+ if (rename(dirname, tmpname) != 0)
+ barf_perror("Failed to rename %s to %s", dirname, tmpname);
+ if (mkdir(dirname, 0700) != 0)
+ barf_perror("Failed to mkdir %s", dirname);
+ if (rename(tmpname,talloc_asprintf(dirname, "%s/.DATA", dirname)) != 0)
+ barf_perror("Failed to rename into %s", dirname);
+ /* If perms exists, move it in. */
+ rename(talloc_asprintf(dirname, "%s.perms", dirname),
+ talloc_asprintf(dirname, "%s/.perms", dirname));
+}
+
+/* Files can be used as dirs, too. Convert them when they are. */
+static void maybe_convert_to_directory(const char *filename)
+{
+ struct stat st;
+ char *dirname = talloc_asprintf(filename, "%.*s",
+ strrchr(filename, '/') - filename,
+ filename);
+ if (lstat(dirname, &st) == 0 && S_ISREG(st.st_mode))
+ convert_to_dir(dirname);
+}
+
+static char *get_name(struct file_ops_info *info, const char *path)
+{
+ if (info->transact_base)
+ return talloc_asprintf(path, "%s%s", info->transact_base,
+ path);
+ return talloc_asprintf(path, "%s%s", info->base, path);
+}
+
+static char *path_to_name(struct file_ops_info *info, const char *path)
+{
+ char *filename = get_name(info, path);
+ maybe_convert_to_directory(filename);
+ return filename;
+}
+
+/* Is child a subnode of parent, or equal? */
+static bool is_child(const char *child, const char *parent)
+{
+ unsigned int len = strlen(parent);
+
+ /* / should really be "" for this algorithm to work, but that's a
+ * usability nightmare. */
+ if (streq(parent, "/"))
+ return true;
+
+ if (strncmp(child, parent, len) != 0)
+ return false;
+
+ return child[len] == '/' || child[len] == '\0';
+}
+
+static bool write_ok(struct file_ops_info *info, const char *path)
+{
+ if (info->transact && !is_child(path, info->transact)) {
+ errno = EROFS;
+ return false;
+ }
+ return true;
+}
+
+static char **file_directory(struct file_ops_info *info,
+ const char *path, unsigned int *num)
+{
+ char **ret;
+ DIR *dir;
+ struct dirent *dirent;
+ char *p, *dirname = path_to_name(info, path);
+ unsigned int i, len = 0;
+ struct stat st;
+
+ /* If it exists, but isn't a directory, we convert it. */
+ if (lstat(dirname, &st) == 0 && !S_ISDIR(st.st_mode))
+ convert_to_dir(dirname);
+
+ *num = 0;
+ dir = opendir(dirname);
+ if (!dir)
+ return NULL;;
+
+ /* Once to count them. */
+ while ((dirent = readdir(dir)) != NULL) {
+ if (strchr(dirent->d_name, '.'))
+ continue;
+ len += strlen(dirent->d_name) + 1;
+ (*num)++;
+ }
+ rewinddir(dir);
+
+ /* Now allocate and fill in. */
+ ret = malloc(sizeof(char *) * *num + len);
+ p = (char *)&ret[*num];
+ i = 0;
+ while ((dirent = readdir(dir)) != NULL) {
+ if (strchr(dirent->d_name, '.'))
+ continue;
+ ret[i] = p;
+ strcpy(p, dirent->d_name);
+ p += strlen(p) + 1;
+ i++;
+ }
+ closedir(dir);
+
+ return ret;
+}
+
+static char *filename_to_data(const char *filename)
+{
+ struct stat st;
+
+ if (lstat(filename, &st) == 0 && S_ISDIR(st.st_mode))
+ return talloc_asprintf(filename, "%s/.DATA", filename);
+ return (char *)filename;
+}
+
+static void *file_read(struct file_ops_info *info,
+ const char *path, unsigned int *len)
+{
+ void *ret;
+ char *filename = filename_to_data(path_to_name(info, path));
+ unsigned long size;
+
+ ret = grab_file(filename, &size);
+ /* Directory exists, .DATA doesn't. */
+ if (!ret && errno == ENOENT && strends(filename, ".DATA"))
+ errno = EISDIR;
+ *len = size;
+ return ret;
+}
+
+static struct xs_permissions *file_get_perms(struct file_ops_info *info,
+ const char *path,
+ unsigned int *num)
+{
+ void *perms;
+ struct xs_permissions *ret;
+ char *filename = path_to_name(info, path);
+ char *permfile;
+ unsigned long size;
+ struct stat st;
+
+ /* No permfile: we didn't bother, return defaults. */
+ if (lstat(filename, &st) != 0)
+ return NULL;
+
+ if (S_ISDIR(st.st_mode))
+ permfile = talloc_asprintf(path, "%s/.perms", filename);
+ else
+ permfile = talloc_asprintf(path, "%s.perms", filename);
+
+ perms = grab_file(permfile, &size);
+ if (!perms) {
+ ret = new(struct xs_permissions);
+ ret[0].id = 0;
+ /* Default for root is readable. */
+ if (streq(path, "/"))
+ ret[0].perms = XS_PERM_READ;
+ else
+ ret[0].perms = XS_PERM_NONE;
+ *num = 1;
+ release_file(perms, size);
+ return ret;
+ }
+ *num = count_strings(perms, size);
+
+ ret = new_array(struct xs_permissions, *num);
+ if (!strings_to_perms(ret, *num, perms))
+ barf("Reading permissions from %s", permfile);
+ release_file(perms, size);
+ return ret;
+}
+
+static bool file_set_perms(struct file_ops_info *info,
+ const char *path,
+ struct xs_permissions *perms,
+ unsigned int num)
+{
+ unsigned int i;
+ char *filename = path_to_name(info, path);
+ char *permfile;
+ int fd;
+ struct stat st;
+
+ if (num < 1) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!write_ok(info, path))
+ return false;
+
+ /* Check non-perm file exists/ */
+ if (lstat(filename, &st) != 0)
+ return false;
+
+ if (S_ISDIR(st.st_mode))
+ permfile = talloc_asprintf(path, "%s/.perms", filename);
+ else
+ permfile = talloc_asprintf(path, "%s.perms", filename);
+
+ fd = open(permfile, O_WRONLY|O_CREAT|O_TRUNC, 0600);
+ if (fd < 0)
+ return false;
+
+ for (i = 0; i < num; i++) {
+ char buffer[100];
+
+ if (!perm_to_string(&perms[i], buffer)) {
+ int saved_errno = errno;
+ close(fd);
+ errno = saved_errno;
+ return false;
+ }
+ if (write(fd, buffer, strlen(buffer) + 1)
+ != (int)strlen(buffer) + 1)
+ barf_perror("Failed to write perm");
+ }
+ close(fd);
+ return true;
+}
+
+static bool file_write(struct file_ops_info *info,
+ const char *path, const void *data,
+ unsigned int len, int createflags)
+{
+ char *filename = filename_to_data(path_to_name(info, path));
+ int fd;
+
+ /* Kernel isn't strict, but library is. */
+ if (createflags & ~(O_CREAT|O_EXCL)) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!write_ok(info, path))
+ return false;
+
+ /* We regard it as existing if dir exists. */
+ if (strends(filename, ".DATA")) {
+ if (!createflags)
+ createflags = O_CREAT;
+ if (createflags & O_EXCL) {
+ errno = EEXIST;
+ return false;
+ }
+ }
+
+ fd = open(filename, createflags|O_TRUNC|O_WRONLY, 0600);
+ if (fd < 0) {
+ /* FIXME: Another hack. */
+ if (!(createflags & O_CREAT) && errno == EISDIR)
+ errno = EEXIST;
+ return false;
+ }
+
+ if (write(fd, data, len) != (int)len)
+ barf_perror("Bad write to %s", filename);
+
+ close(fd);
+ return true;
+}
+
+static bool file_mkdir(struct file_ops_info *info, const char *path)
+{
+ char *dirname = path_to_name(info, path);
+
+ /* Same effective order as daemon, so error returns are right. */
+ if (mkdir(dirname, 0700) != 0) {
+ if (errno != ENOENT && errno != ENOTDIR)
+ write_ok(info, path);
+ return false;
+ }
+
+ if (!write_ok(info, path)) {
+ int saved_errno = errno;
+ rmdir(dirname);
+ errno = saved_errno;
+ return false;
+ }
+ return true;
+}
+
+static void do_command(const char *cmd)
+{
+ int ret;
+
+ ret = system(cmd);
+ if (ret == -1 || !WIFEXITED(ret) || WEXITSTATUS(ret) != 0)
+ barf_perror("Failed '%s': %i", cmd, ret);
+}
+
+static bool file_rm(struct file_ops_info *info, const char *path)
+{
+ char *filename = path_to_name(info, path);
+ struct stat st;
+
+ if (info->transact && streq(info->transact, path)) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (lstat(filename, &st) != 0)
+ return false;
+
+ if (!write_ok(info, path))
+ return false;
+
+ if (streq(path, "/")) {
+ errno = EINVAL;
+ return false;
+ }
+
+ do_command(talloc_asprintf(path, "rm -f %s.perms; rm -r %s",
+ filename, filename));
+ return true;
+}
+
+static bool file_transaction_start(struct file_ops_info *info,
+ const char *subtree)
+{
+ char *cmd;
+ char *filename = path_to_name(info, subtree);
+ struct stat st;
+
+ if (info->transact) {
+ errno = EBUSY;
+ return false;
+ }
+
+ if (lstat(filename, &st) != 0)
+ return false;
+
+ cmd = talloc_asprintf(NULL, "cp -r %s %s.transact",
+ info->base, info->base);
+ do_command(cmd);
+ talloc_free(cmd);
+
+ info->transact_base = talloc_asprintf(NULL, "%s.transact", info->base);
+ info->transact = talloc_strdup(NULL, subtree);
+ return true;
+}
+
+static bool file_transaction_end(struct file_ops_info *info, bool abort)
+{
+ char *old, *cmd;
+
+ if (!info->transact) {
+ errno = ENOENT;
+ return false;
+ }
+
+ if (abort) {
+ cmd = talloc_asprintf(NULL, "rm -r %s", info->transact_base);
+ do_command(cmd);
+ goto success;
+ }
+
+ old = talloc_asprintf(NULL, "rm -rf %s", info->base);
+ do_command(old);
+ talloc_free(old);
+
+ cmd = talloc_asprintf(NULL, "mv %s %s",
+ info->transact_base, info->base);
+ do_command(cmd);
+
+success:
+ talloc_free(cmd);
+ talloc_free(info->transact);
+ talloc_free(info->transact_base);
+ info->transact = NULL;
+ info->transact_base = NULL;
+ return true;
+}
+
+static struct file_ops_info *file_handle(const char *dir)
+{
+ struct file_ops_info *info = talloc(NULL, struct file_ops_info);
+
+ info->base = dir;
+ info->transact_base = NULL;
+ info->transact = NULL;
+ return info;
+}
+
+static void file_close(struct file_ops_info *handle)
+{
+ talloc_free(handle);
+}
+
+static struct xs_handle *xs_handle(const char *dir __attribute__((unused)))
+{
+ struct xs_handle *h;
+
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Connecting to xs daemon");
+ return h;
+}
+
+static void xs_close(struct xs_handle *handle)
+{
+ xs_daemon_close(handle);
+}
+
+struct ops file_ops = {
+ .name = "FILE",
+ .dir = (void *)file_directory,
+ .read = (void *)file_read,
+ .write = (void *)file_write,
+ .mkdir = (void *)file_mkdir,
+ .rm = (void *)file_rm,
+ .get_perms = (void *)file_get_perms,
+ .set_perms = (void *)file_set_perms,
+ .transaction_start = (void *)file_transaction_start,
+ .transaction_end = (void *)file_transaction_end,
+ .handle = (void *)file_handle,
+ .close = (void *)file_close,
+};
+
+struct ops xs_ops = {
+ .name = "XS",
+ .dir = (void *)xs_directory,
+ .read = (void *)xs_read,
+ .write = (void *)xs_write,
+ .mkdir = (void *)xs_mkdir,
+ .rm = (void *)xs_rm,
+ .get_perms = (void *)xs_get_permissions,
+ .set_perms = (void *)xs_set_permissions,
+ .transaction_start = (void *)xs_transaction_start,
+ .transaction_end = (void *)xs_transaction_end,
+ .handle = (void *)xs_handle,
+ .close = (void *)xs_close,
+};
+
+static int strptrcmp(const void *a, const void *b)
+{
+ return strcmp(*(char **)a, *(char **)b);
+}
+
+static void sort_dir(char **dir, unsigned int num)
+{
+ qsort(dir, num, sizeof(char *), strptrcmp);
+}
+
+static char *dump_dir(struct ops *ops,
+ void *h,
+ const char *node,
+ char **dir,
+ unsigned int numdirs,
+ unsigned int depth)
+{
+ char *ret = talloc_strdup(node, "");
+ unsigned int i;
+ char spacing[depth+1];
+
+ memset(spacing, ' ', depth);
+ spacing[depth] = '\0';
+
+ sort_dir(dir, numdirs);
+
+ for (i = 0; i < numdirs; i++) {
+ struct xs_permissions *perms;
+ unsigned int j, numperms;
+ unsigned int len;
+ char *contents;
+ unsigned int subnum;
+ char **subdirs;
+ char *subret;
+ char *subnode = talloc_asprintf(node, "%s/%s", node, dir[i]);
+
+ perms = ops->get_perms(h, subnode, &numperms);
+ if (!perms)
+ return NULL;
+ ret = talloc_asprintf_append(ret, "%s%s: ", spacing, dir[i]);
+ for (j = 0; j < numperms; j++) {
+ char buffer[100];
+ if (!perm_to_string(&perms[j], buffer))
+ barf("perm to string");
+ ret = talloc_asprintf_append(ret, "%s ", buffer);
+ }
+ free(perms);
+ ret = talloc_asprintf_append(ret, "\n");
+
+ /* Even directories can have contents. */
+ contents = ops->read(h, subnode, &len);
+ if (!contents) {
+ if (errno != EISDIR)
+ return NULL;
+ } else {
+ ret = talloc_asprintf_append(ret, " %s(%.*s)\n",
+ spacing, len, contents);
+ free(contents);
+ }
+
+ /* Every node is a directory. */
+ subdirs = ops->dir(h, subnode, &subnum);
+ if (!subdirs)
+ return NULL;
+ subret = dump_dir(ops, h, subnode, subdirs, subnum, depth+1);
+ if (!subret)
+ return NULL;
+ ret = talloc_asprintf_append(ret, "%s", subret);
+ free(subdirs);
+ }
+ return ret;
+}
+
+static char *dump(struct ops *ops, void *h)
+{
+ char **subdirs;
+ unsigned int subnum;
+ char *ret = NULL, *root = talloc_strdup(NULL, "/");
+
+ subdirs = ops->dir(h, root, &subnum);
+ if (subdirs) {
+ ret = dump_dir(ops, h, talloc_strdup(root, ""), subdirs,
+ subnum, 0);
+ free(subdirs);
+ if (ret)
+ talloc_steal(NULL, ret);
+ }
+ talloc_free(root);
+ return ret;
+}
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
+ * hash(), hash2(), hash3, and mix() are externally useful functions.
+ * Routines to test the hash are included if SELF_TEST is defined.
+ * You can use this free for any purpose. It has no warranty.
+ *
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are surely my fault. -DaveM
+ */
+
+/* NOTE: Arguments are modified. */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/* The golden ration: an arbitrary value */
+#define JHASH_GOLDEN_RATIO 0x9e3779b9
+
+/* The most generic version, hashes an arbitrary sequence
+ * of bytes. No alignment or length assumptions are made about
+ * the input key.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c, len;
+ const u8 *k = key;
+
+ len = length;
+ a = b = JHASH_GOLDEN_RATIO;
+ c = initval;
+
+ while (len >= 12) {
+ a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
+ b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
+ c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
+
+ __jhash_mix(a,b,c);
+
+ k += 12;
+ len -= 12;
+ }
+
+ c += length;
+ switch (len) {
+ case 11: c += ((u32)k[10]<<24);
+ case 10: c += ((u32)k[9]<<16);
+ case 9 : c += ((u32)k[8]<<8);
+ case 8 : b += ((u32)k[7]<<24);
+ case 7 : b += ((u32)k[6]<<16);
+ case 6 : b += ((u32)k[5]<<8);
+ case 5 : b += k[4];
+ case 4 : a += ((u32)k[3]<<24);
+ case 3 : a += ((u32)k[2]<<16);
+ case 2 : a += ((u32)k[1]<<8);
+ case 1 : a += k[0];
+ };
+
+ __jhash_mix(a,b,c);
+
+ return c;
+}
+
+/* A special optimized version that handles 1 or more of u32s.
+ * The length parameter here is the number of u32s in the key.
+ */
+static inline u32 jhash2(u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c, len;
+
+ a = b = JHASH_GOLDEN_RATIO;
+ c = initval;
+ len = length;
+
+ while (len >= 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ k += 3; len -= 3;
+ }
+
+ c += length * 4;
+
+ switch (len) {
+ case 2 : b += k[1];
+ case 1 : a += k[0];
+ };
+
+ __jhash_mix(a,b,c);
+
+ return c;
+}
+
+
+/* A special ultra-optimized versions that knows they are hashing exactly
+ * 3, 2 or 1 word(s).
+ *
+ * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
+ * done at the end is not done here.
+ */
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += JHASH_GOLDEN_RATIO;
+ b += JHASH_GOLDEN_RATIO;
+ c += initval;
+
+ __jhash_mix(a, b, c);
+
+ return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return jhash_3words(a, b, 0, initval);
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+ return jhash_3words(a, 0, 0, initval);
+}
+
+static unsigned int get_randomness(int *state)
+{
+ return jhash_1word((*state)++, *state * 1103515243);
+}
+
+static char *random_path(int *state)
+{
+ unsigned int i;
+ char *ret = NULL;
+
+ if (get_randomness(state) % 20 == 0)
+ return talloc_strdup(NULL, "/");
+
+ for (i = 0; i < 1 || (get_randomness(state) % 2); i++) {
+ ret = talloc_asprintf_append(ret, "/%i",
+ get_randomness(state) % 15);
+ }
+ return ret;
+}
+
+static char *bool_to_errstring(bool result)
+{
+ if (result)
+ return talloc_strdup(NULL, "OK");
+
+ /* Real daemon can never return this. */
+ if (errno == ENOTDIR)
+ errno = ENOENT;
+ return talloc_asprintf(NULL, "FAILED:%s", strerror(errno));
+}
+
+static char *linearize_dir(char **dir, unsigned int *num)
+{
+ char *result = NULL;
+ unsigned int i;
+
+ if (!dir)
+ return bool_to_errstring(false);
+
+ if (!*num) {
+ free(dir);
+ return talloc_strdup(NULL, "");
+ }
+
+ sort_dir(dir, *num);
+ for (i = 0; i < *num; i++)
+ result = talloc_asprintf_append(result, "%s\n", dir[i]);
+ free(dir);
+ return result;
+}
+
+static char *linearize_read(char *read, unsigned int *size)
+{
+ char *ret;
+
+ if (!read)
+ return bool_to_errstring(false);
+
+ ret = talloc_asprintf(NULL, "%i:%.*s", *size, *size, read);
+ free(read);
+ return ret;
+}
+
+static char *linearize_perms(struct xs_permissions *perms, unsigned int *size)
+{
+ char *ret = NULL;
+ unsigned int i;
+
+ if (!perms)
+ return bool_to_errstring(false);
+
+ for (i = 0; i < *size; i++)
+ ret = talloc_asprintf_append(ret, "(%u %u)",
+ perms[i].id, perms[i].perms);
+
+ free(perms);
+ return ret;
+}
+
+static int random_flags(int *state)
+{
+ switch (get_randomness(state) % 4) {
+ case 0:
+ return 0;
+ case 1:
+ return O_CREAT;
+ case 2:
+ return O_CREAT|O_EXCL;
+ default:
+ return get_randomness(state);
+ }
+}
+
+/* Do the next operation, return the results. */
+static char *do_next_op(struct ops *ops, void *h, int state, bool verbose)
+{
+ char *name;
+ unsigned int num;
+ char *ret;
+
+ if (verbose)
+ printf("State %i: ", state);
+
+ name = random_path(&state);
+ switch (get_randomness(&state) % 9) {
+ case 0:
+ if (verbose)
+ printf("DIR %s\n", name);
+ ret = linearize_dir(ops->dir(h, name, &num), &num);
+ break;
+ case 1:
+ if (verbose)
+ printf("READ %s\n", name);
+ ret = linearize_read(ops->read(h, name, &num), &num);
+ break;
+ case 2: {
+ int flags = random_flags(&state);
+ char *contents = talloc_asprintf(NULL, "%i",
+ get_randomness(&state));
+ unsigned int len = get_randomness(&state)%(strlen(contents)+1);
+ if (verbose)
+ printf("WRITE %s %s %.*s\n", name,
+ flags == O_CREAT ? "O_CREAT"
+ : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL"
+ : flags == 0 ? "0" : "CRAPFLAGS",
+ len, contents);
+ ret = bool_to_errstring(ops->write(h, name, contents, len,
+ flags));
+ talloc_steal(ret, contents);
+ break;
+ }
+ case 3:
+ if (verbose)
+ printf("MKDIR %s\n", name);
+ ret = bool_to_errstring(ops->mkdir(h, name));
+ break;
+ case 4:
+ if (verbose)
+ printf("RM %s\n", name);
+ ret = bool_to_errstring(ops->rm(h, name));
+ break;
+ case 5:
+ if (verbose)
+ printf("GETPERMS %s\n", name);
+ ret = linearize_perms(ops->get_perms(h, name, &num),
+ &num);
+ break;
+ case 6: {
+ unsigned int i, num = get_randomness(&state)%8;
+ struct xs_permissions perms[num];
+
+ if (verbose)
+ printf("SETPERMS %s: ", name);
+ for (i = 0; i < num; i++) {
+ perms[i].id = get_randomness(&state)%8;
+ perms[i].perms = get_randomness(&state)%4;
+ if (verbose)
+ printf("%i%c ", perms[i].id,
+ perms[i].perms == XS_PERM_WRITE ? 'W'
+ : perms[i].perms == XS_PERM_READ ? 'R'
+ : perms[i].perms ==
+ (XS_PERM_READ|XS_PERM_WRITE) ? 'B'
+ : 'N');
+ }
+ if (verbose)
+ printf("\n");
+ ret = bool_to_errstring(ops->set_perms(h, name, perms,
+ num));
+ break;
+ }
+ case 7: {
+ if (verbose)
+ printf("START %s\n", name);
+ ret = bool_to_errstring(ops->transaction_start(h, name));
+ if (streq(ret, "OK")) {
+ talloc_free(ret);
+ ret = talloc_asprintf(NULL, "OK:START-TRANSACT:%s",
+ name);
+ }
+
+ break;
+ }
+ case 8: {
+ bool abort = (get_randomness(&state) % 2);
+
+ if (verbose)
+ printf("STOP %s\n", abort ? "ABORT" : "COMMIT");
+ ret = bool_to_errstring(ops->transaction_end(h, abort));
+ if (streq(ret, "OK")) {
+ talloc_free(ret);
+ ret = talloc_strdup(NULL, "OK:STOP-TRANSACT");
+ }
+ break;
+ }
+ default:
+ barf("Impossible randomness");
+ }
+
+ talloc_steal(ret, name);
+ return ret;
+}
+
+static int daemon_pid;
+
+static void cleanup_xs_ops(void)
+{
+ char *cmd;
+ if (daemon_pid) {
+ struct xs_handle *h;
+ h = xs_daemon_open();
+ if (h) {
+ if (xs_shutdown(h)) {
+ waitpid(daemon_pid, NULL, 0);
+ daemon_pid = 0;
+ }
+ xs_daemon_close(h);
+ }
+ if (daemon_pid) {
+ kill(daemon_pid, SIGTERM);
+ waitpid(daemon_pid, NULL, 0);
+ }
+ }
+
+ cmd = talloc_asprintf(NULL, "rm -rf testsuite/tmp/*");
+ do_command(cmd);
+ talloc_free(cmd);
+}
+
+static void cleanup_file_ops(const char *dir)
+{
+ char *cmd;
+
+ cmd = talloc_asprintf(NULL, "rm -rf %s %s.transact", dir, dir);
+ do_command(cmd);
+ talloc_free(cmd);
+}
+
+static void cleanup(const char *dir)
+{
+ cleanup_xs_ops();
+ cleanup_file_ops(dir);
+}
+
+static void setup_file_ops(const char *dir)
+{
+ if (mkdir(dir, 0700) != 0)
+ barf_perror("Creating directory %s", dir);
+}
+
+static void setup_xs_ops(void)
+{
+ int fds[2];
+
+ /* Start daemon. */
+ pipe(fds);
+ if ((daemon_pid = fork())) {
+ /* Child writes PID when its ready: we wait for that. */
+ char buffer[20];
+ close(fds[1]);
+ if (read(fds[0], buffer, sizeof(buffer)) < 0)
+ barf("Failed to summon daemon");
+ close(fds[0]);
+ } else {
+ dup2(fds[1], STDOUT_FILENO);
+ close(fds[0]);
+#if 0
+ execlp("valgrind", "valgrind", "xenstored_test", "--output-pid",
+ "--no-fork", NULL);
+#else
+ execlp("./xenstored_test", "xenstored_test", "--output-pid",
+ "--no-fork", NULL);
+#endif
+ exit(1);
+ }
+}
+
+static void setup(const char *dir)
+{
+ setup_file_ops(dir);
+ setup_xs_ops();
+};
+
+struct simple_data
+{
+ unsigned int seed;
+ bool print_progress;
+ bool fast;
+ struct ops *ops;
+ const char *dir;
+};
+
+/* Just a random test. Don't care about results, just that it doesn't
+ * go boom. */
+static unsigned int try_simple(const bool *trymap,
+ unsigned int number,
+ bool verbose,
+ void *_data)
+{
+ unsigned int i, print;
+ void *h;
+ char *snapshot = NULL;
+ struct simple_data *data = _data;
+
+ if (data->ops == &xs_ops) {
+ cleanup_xs_ops();
+ setup_xs_ops();
+ } else {
+ cleanup_file_ops(data->dir);
+ setup_file_ops(data->dir);
+ }
+ h = data->ops->handle(data->dir);
+
+ print = number / 76;
+ if (!print)
+ print = 1;
+
+ for (i = 0; i < number; i++) {
+ char *ret;
+
+ if (data->print_progress) {
+ if (i % print == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+
+ if (trymap && !trymap[i])
+ continue;
+
+ ret = do_next_op(data->ops, h, i + data->seed, verbose);
+ if (verbose)
+ printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+ if (streq(ret, "FAILED:Bad file descriptor"))
+ goto out;
+ if (kill(daemon_pid, 0) != 0)
+ goto out;
+
+ if (!data->fast) {
+ if (strstarts(ret, "OK:START-TRANSACT:")) {
+ void *pre = data->ops->handle(data->dir);
+
+ snapshot = dump(data->ops, pre);
+ if (!snapshot)
+ goto out;
+ data->ops->close(pre);
+ } else if (streq(ret, "OK:STOP-TRANSACT")) {
+ talloc_free(snapshot);
+ snapshot = NULL;
+ }
+ }
+
+ talloc_free(ret);
+
+ if (snapshot) {
+ void *pre = data->ops->handle(data->dir);
+ char *contents;
+
+ contents = dump(data->ops, pre);
+ if (!contents)
+ goto out;
+
+ if (!streq(contents, snapshot))
+ goto out;
+
+ talloc_free(contents);
+ data->ops->close(pre);
+ }
+ }
+ if (data->print_progress)
+ printf("\n");
+
+out:
+ data->ops->close(h);
+ return i;
+}
+
+/* Binary elimination: try eliminating all of them, then reduce. */
+static void reduce(bool *map,
+ unsigned int number,
+ unsigned int try_start, unsigned int try_num,
+ unsigned int (*try)(const bool *map,
+ unsigned int number,
+ bool verbose,
+ void *),
+ void *data)
+{
+ bool newmap[number];
+
+ if (try_num == 0)
+ return;
+
+ /* Try skipping everything between start and end. */
+ memcpy(newmap, map, sizeof(newmap));
+ memset(newmap + try_start, 0, try_num * sizeof(bool));
+
+ /* We want the *same* failure: must fail at "number-1". */
+ if (try(newmap, number, false, data) == number - 1) {
+ memset(map + try_start, 0, try_num * sizeof(bool));
+ return;
+ }
+
+ if (try_num == 1)
+ return;
+
+ /* Try each half... */
+ reduce(map, number, try_start, try_num/2, try, data);
+ reduce(map, number, try_start + try_num/2, try_num - try_num/2,
+ try, data);
+}
+
+static void reduce_problem(unsigned int failed,
+ unsigned int (*try)(const bool *map,
+ unsigned int number,
+ bool verbose,
+ void *data),
+ void *data)
+{
+ bool map[failed];
+
+ memset(map, 1, sizeof(map));
+ reduce(map, failed, 0, failed-1, try, data);
+
+ printf("Cut down:\n");
+ if (try(map, failed, true, data) != failed - 1) {
+ printf("Except, that didn't actually fail. Bugger!");
+ exit(2);
+ }
+ exit(1);
+}
+
+/* Just a random test. Don't care about results, just that it doesn't
+ * go boom. */
+static void simple_test(const char *dir,
+ unsigned int iters, unsigned int seed,
+ bool fast, bool verbose)
+{
+ struct simple_data data;
+ unsigned int try;
+
+ data.seed = seed;
+ data.print_progress = !verbose;
+ data.fast = fast;
+ data.ops = &xs_ops;
+ data.dir = dir;
+
+ try = try_simple(NULL, iters, verbose, &data);
+ if (try == iters) {
+ cleanup_xs_ops();
+ printf("Succeeded\n");
+ exit(0);
+ }
+ printf("Failed on iteration %u\n", try + 1);
+ data.print_progress = false;
+ reduce_problem(try + 1, try_simple, &data);
+}
+
+static bool ops_equal(struct ops *a, void *ah,
+ struct ops *b, void *bh,
+ const char *node,
+ struct ops **fail)
+{
+ char **dira = NULL, **dirb = NULL;
+ char *dataa = NULL, *datab = NULL;
+ unsigned int i, numa, numb, lena, lenb;
+ struct xs_permissions *permsa = NULL, *permsb = NULL;
+ unsigned int numpermsa, numpermsb;
+ char *nodename;
+ bool ret = false;
+
+ /* FILE backend expects talloc'ed pointer. */
+ nodename = talloc_strdup(NULL, node);
+ permsa = a->get_perms(ah, nodename, &numpermsa);
+ if (!permsa) {
+ *fail = a;
+ goto out;
+ }
+ permsb = b->get_perms(bh, nodename, &numpermsb);
+ if (!permsb) {
+ *fail = b;
+ goto out;
+ }
+ if (numpermsa != numpermsb)
+ goto out;
+ for (i = 0; i < numpermsa; i++) {
+ if (permsa[i].perms != permsb[i].perms)
+ goto out;
+ if (permsa[i].id != permsb[i].id)
+ goto out;
+ }
+
+ /* Non-pure-directory nodes contain data. */
+ dataa = a->read(ah, nodename, &lena);
+ if (!dataa && errno != EISDIR) {
+ *fail = a;
+ goto out;
+ }
+ datab = b->read(bh, nodename, &lenb);
+ if (!datab && errno != EISDIR) {
+ *fail = b;
+ goto out;
+ }
+
+ if (dataa) {
+ if (!datab)
+ goto out;
+ if (lena != lenb)
+ goto out;
+
+ if (memcmp(dataa, datab, lena) != 0)
+ goto out;
+ } else
+ if (datab)
+ goto out;
+
+ /* Everything is a directory. */
+ dira = a->dir(ah, nodename, &numa);
+ if (!dira) {
+ *fail = a;
+ goto out;
+ }
+ dirb = b->dir(bh, nodename, &numb);
+ if (!dirb) {
+ *fail = b;
+ goto out;
+ }
+ if (numa != numb)
+ goto out;
+ sort_dir(dira, numa);
+ sort_dir(dirb, numb);
+ for (i = 0; i < numa; i++) {
+ char subnode[strlen(node) + 1 + strlen(dira[i]) + 1];
+
+ if (!streq(dira[i], dirb[i]))
+ goto out;
+
+ strcpy(subnode, node);
+ if (!streq(node, "/"))
+ strcat(subnode, "/");
+ strcat(subnode, dira[i]);
+ if (!ops_equal(a, ah, b, bh, subnode, fail))
+ goto out;
+ }
+
+ ret = true;
+out:
+ free(permsa);
+ free(permsb);
+ free(dataa);
+ free(datab);
+ free(dira);
+ free(dirb);
+ talloc_free(nodename);
+ return ret;
+}
+
+struct diff_data
+{
+ unsigned int seed;
+ bool print_progress;
+ bool fast;
+ const char *dir;
+};
+
+/* Differential: try both file and xs backend, watch for differences. */
+static unsigned int try_diff(const bool *trymap,
+ unsigned int number,
+ bool verbose,
+ void *_data)
+{
+ void *fileh, *xsh;
+ char *transact = NULL;
+ struct ops *fail;
+ struct diff_data *data = _data;
+ unsigned int i, print;
+
+ cleanup(data->dir);
+ setup(data->dir);
+
+ fileh = file_handle(data->dir);
+ xsh = xs_handle(data->dir);
+
+ print = number / 76;
+ if (!print)
+ print = 1;
+
+ for (i = 0; i < number; i++) {
+ char *file, *xs;
+
+ if (data->print_progress) {
+ if (i % print == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+ if (trymap && !trymap[i])
+ continue;
+
+ if (verbose)
+ printf("FILE: ");
+
+ file = do_next_op(&file_ops, fileh, i+data->seed, verbose);
+ if (verbose)
+ printf("-> %.*s\n", strchr(file, '/') - file, file);
+
+ if (verbose)
+ printf("XS: ");
+ xs = do_next_op(&xs_ops, xsh, i+data->seed, verbose);
+ if (verbose)
+ printf("-> %.*s\n", strchr(xs, '/') - xs, xs);
+
+ if (!streq(file, xs))
+ goto out;
+
+ if (strstarts(file, "OK:START-TRANSACT:"))
+ transact = talloc_strdup(NULL,
+ file +
+ strlen("OK:START-TRANSACT:"));
+ else if (streq(file, "OK:STOP-TRANSACT")) {
+ talloc_free(transact);
+ transact = NULL;
+ }
+
+ talloc_free(file);
+ talloc_free(xs);
+
+ if (data->fast)
+ continue;
+
+ fail = NULL;
+ if (!ops_equal(&xs_ops, xsh, &file_ops, fileh, "/", &fail)) {
+ if (fail)
+ barf("%s failed during test\n", fail->name);
+ if (verbose)
+ printf("Trees differ:\nXS:%s\nFILE%s\n",
+ dump(&xs_ops, xsh),
+ dump(&file_ops, fileh));
+ goto out;
+ }
+
+ if (transact) {
+ void *fileh_pre = file_handle(data->dir);
+ void *xsh_pre = xs_handle(data->dir);
+
+ fail = NULL;
+ if (!ops_equal(&xs_ops, xsh_pre, &file_ops, fileh_pre,
+ transact, &fail)) {
+ if (fail)
+ barf("%s failed during transact\n",
+ fail->name);
+
+ xs_daemon_close(xsh_pre);
+ talloc_free(fileh_pre);
+ goto out;
+ }
+ xs_daemon_close(xsh_pre);
+ talloc_free(fileh_pre);
+ }
+ }
+ if (data->print_progress)
+ printf("\n");
+
+ fail = NULL;
+ if (data->fast)
+ if (!ops_equal(&xs_ops, xsh, &file_ops, fileh, "/", &fail))
+ barf("Final result not the same: try without --fast");
+out:
+ file_ops.close(fileh);
+ xs_ops.close(xsh);
+ return i;
+}
+
+/* Differential random test: compare results against file backend. */
+static void diff_test(const char *dir,
+ unsigned int iters, unsigned int seed, bool fast,
+ bool verbose)
+{
+ struct diff_data data;
+ unsigned int try;
+
+ data.seed = seed;
+ data.print_progress = !verbose;
+ data.fast = fast;
+ data.dir = dir;
+
+ try = try_diff(NULL, iters, verbose, &data);
+ if (try == iters) {
+ cleanup_xs_ops();
+ printf("Succeeded\n");
+ exit(0);
+ }
+ printf("Failed on iteration %u\n", try + 1);
+ data.print_progress = false;
+ reduce_problem(try + 1, try_diff, &data);
+}
+
+struct fail_data
+{
+ unsigned int seed;
+ bool print_progress;
+ const char *dir;
+};
+
+/* Try xs with inserted failures: every op should either succeed or fail. */
+static unsigned int try_fail(const bool *trymap,
+ unsigned int number,
+ bool verbose,
+ void *_data)
+{
+ unsigned int i, print, tried = 0, aborted = 0;
+ struct fail_data *data = _data;
+ struct xs_handle *tmpxsh;
+ struct file_ops_info *tmpfileh;
+ void *fileh, *xsh;
+ struct ops *fail;
+ char seed[20];
+
+ /* Make sure failures off to shut down. */
+ if (daemon_pid)
+ kill(daemon_pid, SIGUSR1);
+ cleanup(data->dir);
+ setup(data->dir);
+
+ fileh = file_handle(data->dir);
+ xsh = xs_handle(data->dir);
+
+ sprintf(seed, "%i", data->seed);
+ free(xs_debug_command(xsh, "failtest", seed, strlen(seed)+1));
+
+ print = number / 76;
+ if (!print)
+ print = 1;
+
+ for (i = 0; i < number; i++) {
+ unsigned int limit, failed;
+ char *ret;
+
+ /* A few times we fail due to other end OOM. */
+ limit = 0;
+ while (!xsh) {
+ xsh = xs_handle(data->dir);
+ if (!xsh && errno == ECONNREFUSED) {
+ if (verbose)
+ printf("Daemon refused connection\n");
+ goto out;
+ }
+ if (!xsh && limit++ == 5) {
+ printf("Daemon failed conn 5 times\n");
+ goto out;
+ }
+ }
+
+ if (data->print_progress) {
+ if (i % print == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+ if (trymap && !trymap[i])
+ continue;
+
+ if (verbose)
+ printf("(%i) ", i);
+ ret = do_next_op(&xs_ops, xsh, i + data->seed, verbose);
+ if (streq(ret, "FAILED:Connection reset by peer")
+ || streq(ret, "FAILED:Bad file descriptor")
+ || streq(ret, "FAILED:Broken pipe")) {
+ xs_close(xsh);
+ xsh = NULL;
+ failed = 2;
+ } else if (strstarts(ret, "OK"))
+ failed = 0;
+ else
+ failed = 1;
+
+ tried++;
+ if (xsh)
+ aborted++;
+
+ if (verbose)
+ printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+
+ talloc_free(ret);
+
+ /* Turn off failures using signal. */
+ if (kill(daemon_pid, SIGUSR1) != 0) {
+ if (verbose)
+ printf("Failed to signal daemon\n");
+ goto out;
+ }
+
+ if (failed == 0) {
+ /* Succeeded? Do same thing to file backend
+ * to compare */
+ try_applying:
+ ret = do_next_op(&file_ops, fileh, i + data->seed,
+ false);
+ if (!strstarts(ret, "OK")) {
+ if (!verbose)
+ printf("File op failed on %i\n",
+ i + data->seed);
+ talloc_free(ret);
+ goto out;
+ }
+ talloc_free(ret);
+ }
+
+ tmpxsh = xs_handle(data->dir);
+ if (!tmpxsh) {
+ if (verbose)
+ printf("Failed to open signalled daemon");
+ goto out;
+ }
+ tmpfileh = file_handle(data->dir);
+
+ fail = NULL;
+ if (!ops_equal(&xs_ops, tmpxsh, &file_ops, tmpfileh, "/",
+ &fail)) {
+ xs_close(tmpxsh);
+ file_close(tmpfileh);
+ if (fail) {
+ if (verbose)
+ printf("%s failed\n", fail->name);
+ goto out;
+ }
+ /* Maybe op succeeded: try comparing after local op? */
+ if (failed == 2) {
+ failed = 0;
+ if (verbose)
+ printf("(Looks like it succeeded)\n");
+ goto try_applying;
+ }
+ if (verbose)
+ printf("Two backends not equal\n");
+ goto out;
+ }
+
+ /* If we lost the xs handle, that ended the transaction */
+ if (!xsh)
+ file_transaction_end(fileh, true);
+
+ /* Turn failures back on. */
+ free(xs_debug_command(tmpxsh, "failtest", NULL, 0));
+ xs_close(tmpxsh);
+ file_close(tmpfileh);
+ }
+
+ printf("Total %u of %u not aborted\n", tried - aborted, tried);
+out:
+ if (xsh)
+ xs_close(xsh);
+ return i;
+}
+
+static void fail_test(const char *dir,
+ unsigned int iters, unsigned int seed,
+ bool fast __attribute__((unused)), bool verbose)
+{
+ struct fail_data data;
+ unsigned int try;
+
+ data.seed = seed;
+ data.print_progress = !verbose;
+ data.dir = dir;
+
+ try = try_fail(NULL, iters, verbose, &data);
+ if (try == iters) {
+ cleanup_xs_ops();
+ printf("Succeeded\n");
+ exit(0);
+ }
+ printf("Failed on iteration %u\n", try + 1);
+ fflush(stdout);
+ data.print_progress = false;
+ reduce_problem(try + 1, try_fail, &data);
+}
+
+int main(int argc, char *argv[])
+{
+ bool verbose = false;
+ bool simple = false;
+ bool fast = false;
+ bool fail = false;
+
+ if (argv[1] && streq(argv[1], "--fail")) {
+ fail = true;
+ argv++;
+ argc--;
+ }
+
+ if (argv[1] && streq(argv[1], "--simple")) {
+ simple = true;
+ argv++;
+ argc--;
+ }
+
+ if (argv[1] && streq(argv[1], "--fast")) {
+ fast = true;
+ argv++;
+ argc--;
+ }
+
+ if (argv[1] && streq(argv[1], "--verbose")) {
+ verbose = true;
+ argv++;
+ argc--;
+ }
+
+ if (argc != 4)
+ barf("Usage: xs_random [--fail|--simple] [--fast] [--verbose] <directory> <iterations> <seed>");
+
+ talloc_enable_null_tracking();
+
+ if (fail)
+ fail_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose);
+ else if (simple)
+ simple_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose);
+ else
+ diff_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose);
+ exit(2);
+}
diff --git a/tools/xenstore/xs_stress.c b/tools/xenstore/xs_stress.c
new file mode 100644
index 0000000000..9c480b1553
--- /dev/null
+++ b/tools/xenstore/xs_stress.c
@@ -0,0 +1,207 @@
+/* Stress test for Xen Store: multiple people hammering transactions */
+#include "xs.h"
+#include "utils.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define NUM_HANDLES 2
+#define DIR_FANOUT 3
+#define DIR_DEPTH 3
+
+/* How often to print progress */
+static int print;
+
+/* Layout looks like /<num>/<num>/count. */
+static void work(unsigned int cycles, unsigned int childnum)
+{
+ unsigned int i;
+ struct xs_handle *handles[NUM_HANDLES];
+ char id;
+
+ if (childnum < 10)
+ id = '0' + childnum;
+ else
+ id = 'A' + childnum - 10;
+
+ for (i = 0; i < NUM_HANDLES; i++) {
+ handles[i] = xs_daemon_open();
+ if (!handles[i])
+ barf_perror("Opening handle %i", i);
+ }
+
+ srandom(childnum);
+ for (i = 0; i < cycles; i++) {
+ unsigned int lockdepth, j, len;
+ char file[100] = "", lockdir[100];
+ char *contents, tmp[100];
+ struct xs_handle *h = handles[random() % NUM_HANDLES];
+
+ lockdepth = random() % DIR_DEPTH;
+ for (j = 0; j < DIR_DEPTH; j++) {
+ if (j == lockdepth)
+ strcpy(lockdir, file);
+ sprintf(file + strlen(file), "/%li",
+ random()%DIR_FANOUT);
+ }
+ if (streq(lockdir, ""))
+ strcpy(lockdir, "/");
+
+ if (!xs_transaction_start(h, lockdir))
+ barf_perror("%i: starting transaction %i on %s",
+ childnum, i, lockdir);
+
+ sprintf(file + strlen(file), "/count");
+ contents = xs_read(h, file, &len);
+ if (!contents)
+ barf_perror("%i: can't read %s iter %i",
+ childnum, file, i);
+ sprintf(tmp, "%i", atoi(contents) + 1);
+ if (!xs_write(h, file, tmp, strlen(tmp)+1, 0))
+ barf_perror("%i: can't write %s iter %i",
+ childnum, file, i);
+
+ /* Abandon 1 in 10 */
+ if (random() % 10 == 0) {
+ if (!xs_transaction_end(h, true))
+ barf_perror("%i: can't abort transact %s",
+ childnum, lockdir);
+ i--;
+ } else {
+ if (!xs_transaction_end(h, false))
+ barf_perror("%i: can't commit transact %s",
+ childnum, lockdir);
+
+ /* Offset when we print . so kids don't all
+ * print at once. */
+ if ((i + print/(childnum+1)) % print == 0)
+ write(STDOUT_FILENO, &id, 1);
+ }
+ }
+}
+
+static void create_dirs(struct xs_handle *h, const char *base, int togo)
+{
+ unsigned int i;
+ char filename[100];
+
+ if (togo == 0) {
+ sprintf(filename, "%s/count", base);
+ if (!xs_write(h, filename, "0", 2, O_EXCL|O_CREAT))
+ barf_perror("Writing to %s", filename);
+ return;
+ }
+
+ for (i = 0; i < DIR_FANOUT; i++) {
+ sprintf(filename, "%s/%i", base, i);
+ if (!xs_mkdir(h, filename))
+ barf_perror("xs_mkdir %s", filename);
+ create_dirs(h, filename, togo-1);
+ }
+}
+
+static unsigned int add_count(struct xs_handle *h, const char *base, int togo)
+{
+ unsigned int i, count;
+ char filename[100];
+
+ if (togo == 0) {
+ char *answer;
+ unsigned int len;
+
+ sprintf(filename, "%s/count", base);
+ answer = xs_read(h, filename, &len);
+ if (!answer)
+ barf_perror("Reading %s", filename);
+ count = atoi(answer);
+ free(answer);
+ return count;
+ }
+
+ count = 0;
+ for (i = 0; i < DIR_FANOUT; i++) {
+ sprintf(filename, "%s/%i", base, i);
+ count += add_count(h, filename, togo-1);
+ }
+ return count;
+}
+
+static void setup(void)
+{
+ struct xs_handle *h;
+
+ /* Do setup. */
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Contacting daemon");
+ create_dirs(h, "", DIR_DEPTH);
+ xs_daemon_close(h);
+}
+
+static unsigned int tally_counts(void)
+{
+ struct xs_handle *h;
+ unsigned int ret;
+
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Contacting daemon");
+
+ ret = add_count(h, "", DIR_DEPTH);
+ xs_daemon_close(h);
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ bool failed = false;
+ int kids[10];
+
+ if (argc != 2)
+ barf("Usage: xs_stress <iterations>");
+
+ printf("Setting up directories...\n");
+ setup();
+
+ print = atoi(argv[1]) / 76;
+ if (!print)
+ print = 1;
+
+ printf("Running %i children...\n", ARRAY_SIZE(kids));
+ for (i = 0; i < ARRAY_SIZE(kids); i++) {
+ kids[i] = fork();
+ if (kids[i] == -1)
+ barf_perror("fork");
+ if (kids[i] == 0) {
+ work(atoi(argv[1]) / ARRAY_SIZE(kids), i);
+ exit(0);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(kids); i++) {
+ int status;
+ if (waitpid(kids[i], &status, 0) == -1)
+ barf_perror("waitpid");
+ if (!WIFEXITED(status))
+ barf("Kid %i died via signal %i\n",
+ i, WTERMSIG(status));
+ if (WEXITSTATUS(status) != 0) {
+ printf("Child %i exited %i\n", i, WEXITSTATUS(status));
+ failed = true;
+ }
+ }
+ if (failed)
+ exit(1);
+
+ printf("\nCounting results...\n");
+ i = tally_counts();
+ if (i != (unsigned)atoi(argv[1]))
+ barf("Total counts %i not %s", i, atoi(argv[1]));
+ printf("Success!\n");
+ exit(0);
+}
diff --git a/tools/xenstore/xs_test.c b/tools/xenstore/xs_test.c
new file mode 100644
index 0000000000..f1e66cbe28
--- /dev/null
+++ b/tools/xenstore/xs_test.c
@@ -0,0 +1,647 @@
+/*
+ Xen Store Daemon Test tool
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include "utils.h"
+#include "xs_lib.h"
+
+#define XSTEST
+
+static struct xs_handle *handles[10] = { NULL };
+
+struct ringbuf_head
+{
+ uint32_t write; /* Next place to write to */
+ uint32_t read; /* Next place to read from */
+ uint8_t flags;
+ char buf[0];
+} __attribute__((packed));
+
+static struct ringbuf_head *out, *in;
+static unsigned int ringbuf_datasize;
+static int daemon_pid;
+
+/* FIXME: Mark connection as broken (close it?) when this happens. */
+static bool check_buffer(const struct ringbuf_head *h)
+{
+ return (h->write < ringbuf_datasize && h->read < ringbuf_datasize);
+}
+
+/* We can't fill last byte: would look like empty buffer. */
+static void *get_output_chunk(const struct ringbuf_head *h,
+ void *buf, uint32_t *len)
+{
+ uint32_t read_mark;
+
+ if (h->read == 0)
+ read_mark = ringbuf_datasize - 1;
+ else
+ read_mark = h->read - 1;
+
+ /* Here to the end of buffer, unless they haven't read some out. */
+ *len = ringbuf_datasize - h->write;
+ if (read_mark >= h->write)
+ *len = read_mark - h->write;
+ return buf + h->write;
+}
+
+static const void *get_input_chunk(const struct ringbuf_head *h,
+ const void *buf, uint32_t *len)
+{
+ /* Here to the end of buffer, unless they haven't written some. */
+ *len = ringbuf_datasize - h->read;
+ if (h->write >= h->read)
+ *len = h->write - h->read;
+ return buf + h->read;
+}
+
+static void update_output_chunk(struct ringbuf_head *h, uint32_t len)
+{
+ h->write += len;
+ if (h->write == ringbuf_datasize)
+ h->write = 0;
+}
+
+static void update_input_chunk(struct ringbuf_head *h, uint32_t len)
+{
+ h->read += len;
+ if (h->read == ringbuf_datasize)
+ h->read = 0;
+}
+
+/* FIXME: We spin, and we're sloppy. */
+static bool read_all_shmem(int fd __attribute__((unused)),
+ void *data, unsigned int len)
+{
+ unsigned int avail;
+
+ if (!check_buffer(in))
+ barf("Corrupt buffer");
+
+ while (len) {
+ const void *src = get_input_chunk(in, in->buf, &avail);
+ if (avail > len)
+ avail = len;
+ memcpy(data, src, avail);
+ data += avail;
+ len -= avail;
+ update_input_chunk(in, avail);
+ }
+
+ /* Tell other end we read something. */
+ kill(daemon_pid, SIGUSR2);
+ return true;
+}
+
+static bool write_all_shmem(int fd __attribute__((unused)),
+ const void *data, unsigned int len)
+{
+ uint32_t avail;
+
+ if (!check_buffer(out))
+ barf("Corrupt buffer");
+
+ while (len) {
+ void *dst = get_output_chunk(out, out->buf, &avail);
+ if (avail > len)
+ avail = len;
+ memcpy(dst, data, avail);
+ data += avail;
+ len -= avail;
+ update_output_chunk(out, avail);
+ }
+
+ /* Tell other end we wrote something. */
+ kill(daemon_pid, SIGUSR2);
+ return true;
+}
+
+static bool read_all(int fd, void *data, unsigned int len);
+static bool read_all_choice(int fd, void *data, unsigned int len)
+{
+ if (fd == -2)
+ return read_all_shmem(fd, data, len);
+ return read_all(fd, data, len);
+}
+
+static bool write_all_choice(int fd, const void *data, unsigned int len)
+{
+ if (fd == -2)
+ return write_all_shmem(fd, data, len);
+ return write_all(fd, data, len);
+}
+
+/* We want access to internal functions. */
+#include "xs.c"
+
+static void __attribute__((noreturn)) usage(void)
+{
+ barf("Usage:\n"
+ " xs_test [--readonly] [--notimeout]\n"
+ "Reads commands from stdin, one per line:"
+ " dir <path>\n"
+ " read <path>\n"
+ " write <path> <flags> <value>...\n"
+ " setid <id>\n"
+ " mkdir <path>\n"
+ " rm <path>\n"
+ " getperm <path>\n"
+ " setperm <path> <id> <flags> ...\n"
+ " shutdown\n"
+ " watch <path> <prio>\n"
+ " waitwatch\n"
+ " ackwatch\n"
+ " unwatch <path>\n"
+ " close\n"
+ " start <node>\n"
+ " abort\n"
+ " introduce <domid> <mfn> <eventchn>\n"
+ " commit\n"
+ " sleep <seconds>\n"
+ " dump\n");
+}
+
+static char *arg(char *line, unsigned int num)
+{
+ static char *args[10];
+ unsigned int i, len = 0;
+
+ for (i = 0; i <= num; i++) {
+ line += len;
+ line += strspn(line, " \t\n");
+ len = strcspn(line, " \t\n");
+ if (!len)
+ barf("Can't get arg %u", num);
+ }
+
+ free(args[num]);
+ args[num] = malloc(len + 1);
+ memcpy(args[num], line, len);
+ args[num][len] = '\0';
+ return args[num];
+}
+
+static char *command;
+static void __attribute__((noreturn)) failed(int handle)
+{
+ if (handle)
+ barf_perror("%i: %s", handle, command);
+ barf_perror("%s", command);
+}
+
+static void do_dir(unsigned int handle, char *path)
+{
+ char **entries;
+ unsigned int i, num;
+
+ entries = xs_directory(handles[handle], path, &num);
+ if (!entries)
+ failed(handle);
+
+ for (i = 0; i < num; i++)
+ if (handle)
+ printf("%i:%s\n", handle, entries[i]);
+ else
+ printf("%s\n", entries[i]);
+ free(entries);
+}
+
+static void do_read(unsigned int handle, char *path)
+{
+ char *value;
+ unsigned int len;
+
+ value = xs_read(handles[handle], path, &len);
+ if (!value)
+ failed(handle);
+
+ if (handle)
+ printf("%i:%.*s\n", handle, len, value);
+ else
+ printf("%.*s\n", len, value);
+}
+
+static void do_write(unsigned int handle, char *path, char *flags, char *data)
+{
+ int f;
+
+ if (streq(flags, "none"))
+ f = 0;
+ else if (streq(flags, "create"))
+ f = O_CREAT;
+ else if (streq(flags, "excl"))
+ f = O_CREAT | O_EXCL;
+ else if (streq(flags, "crap"))
+ f = 100;
+ else
+ barf("write flags 'none', 'create' or 'excl' only");
+
+ if (!xs_write(handles[handle], path, data, strlen(data)+1, f))
+ failed(handle);
+}
+
+static void do_setid(unsigned int handle, char *id)
+{
+ if (!xs_bool(xs_debug_command(handles[handle], "setid", id,
+ strlen(id)+1)))
+ failed(handle);
+}
+
+static void do_mkdir(unsigned int handle, char *path)
+{
+ if (!xs_mkdir(handles[handle], path))
+ failed(handle);
+}
+
+static void do_rm(unsigned int handle, char *path)
+{
+ if (!xs_rm(handles[handle], path))
+ failed(handle);
+}
+
+static void do_getperm(unsigned int handle, char *path)
+{
+ unsigned int i, num;
+ struct xs_permissions *perms;
+
+ perms = xs_get_permissions(handles[handle], path, &num);
+ if (!perms)
+ failed(handle);
+
+ for (i = 0; i < num; i++) {
+ char *permstring;
+
+ switch (perms[i].perms) {
+ case XS_PERM_NONE:
+ permstring = "NONE";
+ break;
+ case XS_PERM_WRITE:
+ permstring = "WRITE";
+ break;
+ case XS_PERM_READ:
+ permstring = "READ";
+ break;
+ case XS_PERM_READ|XS_PERM_WRITE:
+ permstring = "READ/WRITE";
+ break;
+ default:
+ barf("bad perm value %i", perms[i].perms);
+ }
+
+ if (handle)
+ printf("%i:%i %s\n", handle, perms[i].id, permstring);
+ else
+ printf("%i %s\n", perms[i].id, permstring);
+ }
+ free(perms);
+}
+
+static void do_setperm(unsigned int handle, char *path, char *line)
+{
+ unsigned int i;
+ struct xs_permissions perms[100];
+
+ strtok(line, " \t\n");
+ strtok(NULL, " \t\n");
+ for (i = 0; ; i++) {
+ char *arg = strtok(NULL, " \t\n");
+ if (!arg)
+ break;
+ perms[i].id = atoi(arg);
+ arg = strtok(NULL, " \t\n");
+ if (!arg)
+ break;
+ if (streq(arg, "WRITE"))
+ perms[i].perms = XS_PERM_WRITE;
+ else if (streq(arg, "READ"))
+ perms[i].perms = XS_PERM_READ;
+ else if (streq(arg, "READ/WRITE"))
+ perms[i].perms = XS_PERM_READ|XS_PERM_WRITE;
+ else if (streq(arg, "NONE"))
+ perms[i].perms = XS_PERM_NONE;
+ else
+ barf("bad flags %s\n", arg);
+ }
+
+ if (!xs_set_permissions(handles[handle], path, perms, i))
+ failed(handle);
+}
+
+static void do_shutdown(unsigned int handle)
+{
+ if (!xs_shutdown(handles[handle]))
+ failed(handle);
+}
+
+static void do_watch(unsigned int handle, const char *node, const char *pri)
+{
+ if (!xs_watch(handles[handle], node, atoi(pri)))
+ failed(handle);
+}
+
+static void do_waitwatch(unsigned int handle)
+{
+ char *node;
+
+ node = xs_read_watch(handles[handle]);
+ if (!node)
+ failed(handle);
+
+ if (handle)
+ printf("%i:%s\n", handle, node);
+ else
+ printf("%s\n", node);
+ free(node);
+}
+
+static void do_ackwatch(unsigned int handle)
+{
+ if (!xs_acknowledge_watch(handles[handle]))
+ failed(handle);
+}
+
+static void do_unwatch(unsigned int handle, const char *node)
+{
+ if (!xs_unwatch(handles[handle], node))
+ failed(handle);
+}
+
+static void do_start(unsigned int handle, const char *node)
+{
+ if (!xs_transaction_start(handles[handle], node))
+ failed(handle);
+}
+
+static void do_end(unsigned int handle, bool abort)
+{
+ if (!xs_transaction_end(handles[handle], abort))
+ failed(handle);
+}
+
+static void do_introduce(unsigned int handle,
+ const char *domid,
+ const char *mfn,
+ const char *eventchn,
+ const char *path)
+{
+ unsigned int i;
+ int fd;
+
+ /* We poll, so ignore signal */
+ signal(SIGUSR2, SIG_IGN);
+ for (i = 0; i < ARRAY_SIZE(handles); i++)
+ if (!handles[i])
+ break;
+
+ fd = open("/tmp/xcmap", O_RDWR);
+ /* Set in and out pointers. */
+ out = mmap(NULL, getpagesize(), PROT_WRITE|PROT_READ, MAP_SHARED,fd,0);
+ if (out == MAP_FAILED)
+ barf_perror("Failed to map /tmp/xcmap page");
+ in = (void *)out + getpagesize() / 2;
+ close(fd);
+
+ /* Tell them the event channel and our PID. */
+ *(int *)((void *)out + 32) = getpid();
+ *(u16 *)((void *)out + 36) = atoi(eventchn);
+
+ /* Create new handle. */
+ handles[i] = new(struct xs_handle);
+ handles[i]->fd = -2;
+
+ if (!xs_introduce_domain(handles[handle], atoi(domid),
+ atol(mfn), atoi(eventchn), path))
+ failed(handle);
+ printf("handle is %i\n", i);
+
+ /* Read in daemon pid. */
+ daemon_pid = *(int *)((void *)out + 32);
+}
+
+static void do_release(unsigned int handle, const char *domid)
+{
+ if (!xs_release_domain(handles[handle], atoi(domid)))
+ failed(handle);
+}
+
+static int strptrcmp(const void *a, const void *b)
+{
+ return strcmp(*(char **)a, *(char **)b);
+}
+
+static void sort_dir(char **dir, unsigned int num)
+{
+ qsort(dir, num, sizeof(char *), strptrcmp);
+}
+
+static void dump_dir(unsigned int handle,
+ const char *node,
+ char **dir,
+ unsigned int numdirs,
+ unsigned int depth)
+{
+ unsigned int i;
+ char spacing[depth+1];
+
+ memset(spacing, ' ', depth);
+ spacing[depth] = '\0';
+
+ sort_dir(dir, numdirs);
+
+ for (i = 0; i < numdirs; i++) {
+ struct xs_permissions *perms;
+ unsigned int j, numperms;
+ unsigned int len;
+ char *contents;
+ unsigned int subnum;
+ char **subdirs;
+ char subnode[strlen(node) + 1 + strlen(dir[i]) + 1];
+
+ sprintf(subnode, "%s/%s", node, dir[i]);
+
+ perms = xs_get_permissions(handles[handle], subnode,&numperms);
+ if (!perms)
+ failed(handle);
+
+ printf("%s%s: ", spacing, dir[i]);
+ for (j = 0; j < numperms; j++) {
+ char buffer[100];
+ if (!perm_to_string(&perms[j], buffer))
+ barf("perm to string");
+ printf("%s ", buffer);
+ }
+ free(perms);
+ printf("\n");
+
+ /* Even directories can have contents. */
+ contents = xs_read(handles[handle], subnode, &len);
+ if (!contents) {
+ if (errno != EISDIR)
+ failed(handle);
+ } else {
+ printf(" %s(%.*s)\n", spacing, len, contents);
+ free(contents);
+ }
+
+ /* Every node is a directory. */
+ subdirs = xs_directory(handles[handle], subnode, &subnum);
+ if (!subdirs)
+ failed(handle);
+ dump_dir(handle, subnode, subdirs, subnum, depth+1);
+ free(subdirs);
+ }
+}
+
+static void dump(int handle)
+{
+ char **subdirs;
+ unsigned int subnum;
+
+ subdirs = xs_directory(handles[handle], "/", &subnum);
+ if (!subdirs)
+ failed(handle);
+
+ dump_dir(handle, "", subdirs, subnum, 0);
+ free(subdirs);
+}
+
+int main(int argc, char *argv[])
+{
+ char line[1024];
+ bool readonly = false, timeout = true;
+ int handle;
+
+ static void alarmed(int sig __attribute__((unused)))
+ {
+ if (handle) {
+ char handlename[10];
+ sprintf(handlename, "%u:", handle);
+ write(STDOUT_FILENO, handlename, strlen(handlename));
+ }
+ write(STDOUT_FILENO, command, strlen(command));
+ write(STDOUT_FILENO, " timeout\n", strlen(" timeout\n"));
+ exit(1);
+ }
+
+ if (argc > 1 && streq(argv[1], "--readonly")) {
+ readonly = true;
+ argc--;
+ argv++;
+ }
+
+ if (argc > 1 && streq(argv[1], "--notimeout")) {
+ timeout = false;
+ argc--;
+ argv++;
+ }
+
+ if (argc != 1)
+ usage();
+
+ /* The size of the ringbuffer: half a page minus head structure. */
+ ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
+
+ signal(SIGALRM, alarmed);
+ while (fgets(line, sizeof(line), stdin)) {
+ char *endp;
+
+ if (strspn(line, " \n") == strlen(line))
+ continue;
+ if (strstarts(line, "#"))
+ continue;
+
+ handle = strtoul(line, &endp, 10);
+ if (endp != line)
+ memmove(line, endp+1, strlen(endp));
+ else
+ handle = 0;
+
+ if (!handles[handle]) {
+ if (readonly)
+ handles[handle] = xs_daemon_open_readonly();
+ else
+ handles[handle] = xs_daemon_open();
+ if (!handles[handle])
+ barf_perror("Opening connection to daemon");
+ }
+ command = arg(line, 0);
+
+ if (timeout)
+ alarm(5);
+ if (streq(command, "dir"))
+ do_dir(handle, arg(line, 1));
+ else if (streq(command, "read"))
+ do_read(handle, arg(line, 1));
+ else if (streq(command, "write"))
+ do_write(handle,
+ arg(line, 1), arg(line, 2), arg(line, 3));
+ else if (streq(command, "setid"))
+ do_setid(handle, arg(line, 1));
+ else if (streq(command, "mkdir"))
+ do_mkdir(handle, arg(line, 1));
+ else if (streq(command, "rm"))
+ do_rm(handle, arg(line, 1));
+ else if (streq(command, "getperm"))
+ do_getperm(handle, arg(line, 1));
+ else if (streq(command, "setperm"))
+ do_setperm(handle, arg(line, 1), line);
+ else if (streq(command, "shutdown"))
+ do_shutdown(handle);
+ else if (streq(command, "watch"))
+ do_watch(handle, arg(line, 1), arg(line, 2));
+ else if (streq(command, "waitwatch"))
+ do_waitwatch(handle);
+ else if (streq(command, "ackwatch"))
+ do_ackwatch(handle);
+ else if (streq(command, "unwatch"))
+ do_unwatch(handle, arg(line, 1));
+ else if (streq(command, "close")) {
+ xs_daemon_close(handles[handle]);
+ handles[handle] = NULL;
+ } else if (streq(command, "start"))
+ do_start(handle, arg(line, 1));
+ else if (streq(command, "commit"))
+ do_end(handle, false);
+ else if (streq(command, "abort"))
+ do_end(handle, true);
+ else if (streq(command, "introduce"))
+ do_introduce(handle, arg(line, 1), arg(line, 2),
+ arg(line, 3), arg(line, 4));
+ else if (streq(command, "release"))
+ do_release(handle, arg(line, 1));
+ else if (streq(command, "dump"))
+ dump(handle);
+ else if (streq(command, "sleep"))
+ sleep(atoi(arg(line, 1)));
+ else
+ barf("Unknown command %s", command);
+ fflush(stdout);
+ alarm(0);
+ }
+ return 0;
+}