From 29c9e570b1eddfd6df789e08da65cf4ddec5f6fe Mon Sep 17 00:00:00 2001 From: "cl349@firebug.cl.cam.ac.uk" Date: Tue, 7 Jun 2005 12:43:58 +0000 Subject: bitkeeper revision 1.1662.1.15 (42a5968eiZE_DjdIFPjxvzLw6ACvCQ) Add xenstore daemon and library. Makefile: Add xenstore subdirectory. Remove xs_stress on clean. Many files: new file ignore: Update ignore list for xenstore. Signed-off-by: Rusty Russell (authored) Signed-off-by: Christian Limpach --- .rootkeys | 37 + BitKeeper/etc/ignore | 7 +- tools/Makefile | 1 + tools/xenstore/.gdbinit | 4 + tools/xenstore/Makefile | 97 ++ tools/xenstore/TODO | 7 + tools/xenstore/fake_libxc.c | 119 ++ tools/xenstore/list.h | 508 ++++++++ tools/xenstore/talloc.c | 1143 +++++++++++++++++ tools/xenstore/talloc.h | 134 ++ tools/xenstore/talloc_guide.txt | 569 +++++++++ tools/xenstore/testsuite/01simple.sh | 4 + tools/xenstore/testsuite/02directory.sh | 31 + tools/xenstore/testsuite/03write.sh | 17 + tools/xenstore/testsuite/04rm.sh | 18 + tools/xenstore/testsuite/05filepermissions.sh | 49 + tools/xenstore/testsuite/06dirpermissions.sh | 61 + tools/xenstore/testsuite/07watch.sh | 32 + tools/xenstore/testsuite/08transaction.sh | 54 + tools/xenstore/testsuite/09domain.sh | 15 + tools/xenstore/testsuite/test.sh | 44 + tools/xenstore/utils.c | 143 +++ tools/xenstore/utils.h | 61 + tools/xenstore/xenstored.h | 81 ++ tools/xenstore/xenstored_core.c | 1354 ++++++++++++++++++++ tools/xenstore/xenstored_core.h | 123 ++ tools/xenstore/xenstored_domain.c | 387 ++++++ tools/xenstore/xenstored_domain.h | 38 + tools/xenstore/xenstored_test.h | 37 + tools/xenstore/xenstored_transaction.c | 284 +++++ tools/xenstore/xenstored_transaction.h | 50 + tools/xenstore/xenstored_watch.c | 279 +++++ tools/xenstore/xenstored_watch.h | 42 + tools/xenstore/xs.c | 551 +++++++++ tools/xenstore/xs.h | 146 +++ tools/xenstore/xs_lib.c | 141 +++ tools/xenstore/xs_lib.h | 63 + tools/xenstore/xs_random.c | 1646 +++++++++++++++++++++++++ tools/xenstore/xs_stress.c | 207 ++++ tools/xenstore/xs_test.c | 647 ++++++++++ 40 files changed, 9230 insertions(+), 1 deletion(-) create mode 100644 tools/xenstore/.gdbinit create mode 100644 tools/xenstore/Makefile create mode 100644 tools/xenstore/TODO create mode 100644 tools/xenstore/fake_libxc.c create mode 100644 tools/xenstore/list.h create mode 100644 tools/xenstore/talloc.c create mode 100644 tools/xenstore/talloc.h create mode 100644 tools/xenstore/talloc_guide.txt create mode 100644 tools/xenstore/testsuite/01simple.sh create mode 100644 tools/xenstore/testsuite/02directory.sh create mode 100644 tools/xenstore/testsuite/03write.sh create mode 100644 tools/xenstore/testsuite/04rm.sh create mode 100644 tools/xenstore/testsuite/05filepermissions.sh create mode 100644 tools/xenstore/testsuite/06dirpermissions.sh create mode 100644 tools/xenstore/testsuite/07watch.sh create mode 100644 tools/xenstore/testsuite/08transaction.sh create mode 100644 tools/xenstore/testsuite/09domain.sh create mode 100755 tools/xenstore/testsuite/test.sh create mode 100644 tools/xenstore/utils.c create mode 100644 tools/xenstore/utils.h create mode 100644 tools/xenstore/xenstored.h create mode 100644 tools/xenstore/xenstored_core.c create mode 100644 tools/xenstore/xenstored_core.h create mode 100644 tools/xenstore/xenstored_domain.c create mode 100644 tools/xenstore/xenstored_domain.h create mode 100644 tools/xenstore/xenstored_test.h create mode 100644 tools/xenstore/xenstored_transaction.c create mode 100644 tools/xenstore/xenstored_transaction.h create mode 100644 tools/xenstore/xenstored_watch.c create mode 100644 tools/xenstore/xenstored_watch.h create mode 100644 tools/xenstore/xs.c create mode 100644 tools/xenstore/xs.h create mode 100644 tools/xenstore/xs_lib.c create mode 100644 tools/xenstore/xs_lib.h create mode 100644 tools/xenstore/xs_random.c create mode 100644 tools/xenstore/xs_stress.c create mode 100644 tools/xenstore/xs_test.c diff --git a/.rootkeys b/.rootkeys index 77b06993c5..a14deeaee8 100644 --- a/.rootkeys +++ b/.rootkeys @@ -996,6 +996,43 @@ 4292540couq-V0TPwyQ6bspNEWNcvw tools/xcutils/Makefile 42925407VysDb9O06OK_RUzTZxfLoA tools/xcutils/xc_restore.c 42936745WTLYamYsmXm_JGJ72JX-_Q tools/xcutils/xc_save.c +42a57d97mxMTlPnxBKep6R4ViI5rjg tools/xenstore/.gdbinit +42a57d97ZEoHuhMAFTuBMlLzA9v_ng tools/xenstore/Makefile +42a57d97ccA4uY-RxONvIH0P8U0gqg tools/xenstore/TODO +42a57d972RzmyLgsoH9b8qqk-UjcCA tools/xenstore/fake_libxc.c +42a57d97IjoPvbIVc4BUzwoKyM0VSw tools/xenstore/list.h +42a57d97fKgtf0HQLiQkAkVsOvuSyA tools/xenstore/talloc.c +42a57d98U3p0XP6xzCybTuaVQscUdw tools/xenstore/talloc.h +42a57d98LFN6Mug-uR4xgAxCE7lwUg tools/xenstore/talloc_guide.txt +42a57d98S69vKJYwO_WUjoFQZ6KzQg tools/xenstore/testsuite/01simple.sh +42a57d98BHcFpZz_fXHweylUEUU97Q tools/xenstore/testsuite/02directory.sh +42a57d98ua4Xeb6pmtbFNTAI833dyw tools/xenstore/testsuite/03write.sh +42a57d98nbuCUsVT0RJj1zA1JyMDsw tools/xenstore/testsuite/04rm.sh +42a57d98_ULKHP3_uX1PK2nPMTzWSQ tools/xenstore/testsuite/05filepermissions.sh +42a57d98YGCLyTDSGmoyFqRqQUlagQ tools/xenstore/testsuite/06dirpermissions.sh +42a57d98fdO519YyATk4_Zwr1STNfQ tools/xenstore/testsuite/07watch.sh +42a57d98zZUtvirUMjmHxFphJjmO7Q tools/xenstore/testsuite/08transaction.sh +42a57d98sn9RbpBgHRv1D99Kt7LwYA tools/xenstore/testsuite/09domain.sh +42a57d98tSuoFCHnnM2GgENXJrRQmw tools/xenstore/testsuite/test.sh +42a57d98zxDP2Ti7dTznGROi66rUGw tools/xenstore/utils.c +42a57d98SDvOYCEjmCjwHSk6390GLA tools/xenstore/utils.h +42a57d98hFKbOY9D0mCE4H4NDoKr1w tools/xenstore/xenstored.h +42a57d981KFHLmJ0CjKkn1_gZhYvdw tools/xenstore/xenstored_core.c +42a57d98bcgE13vYaFxGTusmWbrFDA tools/xenstore/xenstored_core.h +42a57d98cD9wOFyRYfaEP0QgtqL1Xw tools/xenstore/xenstored_domain.c +42a57d98noLWvXU8ePbcqvvmu4p2Gw tools/xenstore/xenstored_domain.h +42a57d98kxHaQ1ApS7RpqmFoEnDmbg tools/xenstore/xenstored_test.h +42a57d981c9P3aFkWtxWEIRUapt_FQ tools/xenstore/xenstored_transaction.c +42a57d99pVo__10bbckp_b_rm6i59A tools/xenstore/xenstored_transaction.h +42a57d99izTIjWfG-IjQAPqYlDWJNg tools/xenstore/xenstored_watch.c +42a57d99-zLxBjzC7rfj_perV-orUg tools/xenstore/xenstored_watch.h +42a57d99BnkhISKgCCRcUqhteyuxCw tools/xenstore/xs.c +42a57d99FyiYSz9AkKKROrRydnA-gQ tools/xenstore/xs.h +42a57d99SrtsJCDUlKyRPf3EX86A1Q tools/xenstore/xs_lib.c +42a57d99L2pYeMFyjQ_4Rnb17xTSMg tools/xenstore/xs_lib.h +42a57d99Kl6Ba8oCHv2fggl7QN9QZA tools/xenstore/xs_random.c +42a57d99SHYR1lQOD0shuErPDg9NKQ tools/xenstore/xs_stress.c +42a57d996aBawpkQNOWkNWXD6LrhPg tools/xenstore/xs_test.c 403a3edbrr8RE34gkbR40zep98SXbg tools/xentrace/Makefile 40a107afN60pFdURgBv9KwEzgRl5mQ tools/xentrace/formats 420d52d2_znVbT4JAPIU36vQOme83g tools/xentrace/xenctx.c diff --git a/BitKeeper/etc/ignore b/BitKeeper/etc/ignore index d996d45a72..b591ce7458 100644 --- a/BitKeeper/etc/ignore +++ b/BitKeeper/etc/ignore @@ -128,8 +128,13 @@ tools/xcs/xcs tools/xcs/xcsdump tools/xcutils/xc_restore tools/xcutils/xc_save +tools/xenstore/testsuite/tmp/* +tools/xenstore/xen +tools/xenstore/xenstored_test +tools/xenstore/xs_random +tools/xenstore/xs_stress +tools/xenstore/xs_test tools/xentrace/xentrace -tools/xfrd/xfrd xen/BLOG xen/TAGS xen/arch/x86/asm-offsets.s diff --git a/tools/Makefile b/tools/Makefile index 95e8989d4c..5e4a2bd586 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -9,6 +9,7 @@ SUBDIRS += xentrace SUBDIRS += python SUBDIRS += xcs SUBDIRS += xcutils +SUBDIRS += xenstore SUBDIRS += pygrub .PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean diff --git a/tools/xenstore/.gdbinit b/tools/xenstore/.gdbinit new file mode 100644 index 0000000000..9a71b20ac4 --- /dev/null +++ b/tools/xenstore/.gdbinit @@ -0,0 +1,4 @@ +set environment XENSTORED_RUNDIR=testsuite/tmp +set environment XENSTORED_ROOTDIR=testsuite/tmp +handle SIGUSR1 noprint nostop +handle SIGPIPE noprint nostop diff --git a/tools/xenstore/Makefile b/tools/xenstore/Makefile new file mode 100644 index 0000000000..cd4a7b3079 --- /dev/null +++ b/tools/xenstore/Makefile @@ -0,0 +1,97 @@ +XEN_ROOT=../.. +# This does something wrong to TARGET_ARCH. +#include $(XEN_ROOT)/tools/Rules.mk +LIBDIR = lib +XEN_LIBXC = $(XEN_ROOT)/tools/libxc + +INSTALL = install +INSTALL_DATA = $(INSTALL) -m0644 +INSTALL_PROG = $(INSTALL) -m0755 +INSTALL_DIR = $(INSTALL) -d -m0755 + +PROFILE=#-pg +BASECFLAGS=-Wall -W -g +# Make gcc generate dependencies. +BASECFLAGS += -Wp,-MD,.$(@F).d +PROG_DEP = .*.d +#BASECFLAGS+= -O3 $(PROFILE) +#BASECFLAGS+= -I$(XEN_ROOT)/tools +BASECFLAGS+= -I$(XEN_ROOT)/tools/libxc +BASECFLAGS+= -I$(XEN_ROOT)/xen/include/public +BASECFLAGS+= -I. + +CFLAGS+=$(BASECFLAGS) +LDFLAGS=$(PROFILE) -L$(XEN_LIBXC) +TESTDIR=`pwd`/testsuite/tmp +TESTFLAGS=-DTESTING +TESTENV=XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR) + +all: xen xenstored libxenstore.a + +testcode: xen xs_test xenstored_test xs_random + +xen: + ln -sf $(XEN_ROOT)/xen/include/public $@ + +xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxc -o $@ + +xenstored_test: xenstored_core_test.o xenstored_watch_test.o xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o fake_libxc.o utils.o + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ + +xs_test: xs_test.o xs_lib.o utils.o +xs_random: xs_random.o xs_test_lib.o xs_lib.o talloc.o utils.o +xs_stress: xs_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o + +xs_test.o xs_stress.o xenstored_core_test.o xenstored_watch_test.o xenstored_transaction_test.o xenstored_domain_test.o xs_random.o xs_test_lib.o talloc_test.o fake_libxc.o: CFLAGS=$(BASECFLAGS) $(TESTFLAGS) + +xenstored_%_test.o: xenstored_%.c + $(COMPILE.c) -o $@ $< + +xs_test_lib.o: xs.c + $(COMPILE.c) -o $@ $< + +talloc_test.o: talloc.c + $(COMPILE.c) -o $@ $< + +libxenstore.a: libxenstore.a(xs.o) libxenstore.a(xs_lib.o) + +clean: testsuite-clean + rm -f *.o *.a xs_test xenstored xenstored_test xs_random xs_stress xen + -$(RM) $(PROG_DEP) + +check: testsuite-run randomcheck stresstest + +testsuite-run: xen xenstored_test xs_test + $(TESTENV) testsuite/test.sh + +testsuite-clean: + rm -rf $(TESTDIR) + +# Make this visible so they can see repeat tests without --fast if they +# fail. +RANDSEED=$(shell date +%s) +randomcheck: xs_random xenstored_test + $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000 $(RANDSEED) + $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED) + $(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED) + +stresstest: xs_stress xenstored_test + rm -rf $(TESTDIR)/store + export $(TESTENV); PID=`./xenstored_test --output-pid`; ./xs_stress 10000; ret=$$?; kill $$PID; exit $$ret + +TAGS: + etags `find . -name '*.[ch]'` + +tarball: clean + cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/ + +install: xenstored libxenstore.a + $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored + $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored + $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin + $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin + $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) + $(INSTALL_DATA) libxenstore.a $(DESTDIR)/usr/$(LIBDIR) + +-include $(PROG_DEP) diff --git a/tools/xenstore/TODO b/tools/xenstore/TODO new file mode 100644 index 0000000000..9e22afe536 --- /dev/null +++ b/tools/xenstore/TODO @@ -0,0 +1,7 @@ +TODO in no particular order. Some of these will never be done. There +are omissions of important but necessary things. It is up to the +reader to fill in the blanks. + +- Remove calls to system() from daemon +- Timeout failed watch responses +- Timeout blocking transactions diff --git a/tools/xenstore/fake_libxc.c b/tools/xenstore/fake_libxc.c new file mode 100644 index 0000000000..decfb4001d --- /dev/null +++ b/tools/xenstore/fake_libxc.c @@ -0,0 +1,119 @@ +/* + Fake libxc which doesn't require hypervisor but talks to xs_test. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "xenstored_core.h" +#include "xenstored_domain.h" +#include "xenstored_test.h" + +static int sigfd; +static int xs_test_pid; +static u16 port; + +/* The event channel maps to a signal, shared page to an mmapped file. */ +int xc_evtchn_send(int xc_handle __attribute__((unused)), int local_port) +{ + assert(local_port == port); + if (kill(xs_test_pid, SIGUSR2) != 0) + barf_perror("fake event channel failed"); + return 0; +} + +void *xc_map_foreign_range(int xc_handle, u32 dom __attribute__((unused)), + int size, int prot, + unsigned long mfn __attribute__((unused))) +{ + void *ret; + + ret = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0); + if (ret == MAP_FAILED) + return NULL; + + /* xs_test tells us pid and port by putting it in buffer, we reply. */ + xs_test_pid = *(int *)(ret + 32); + port = *(int *)(ret + 36); + *(int *)(ret + 32) = getpid(); + return ret; +} + +int xc_interface_open(void) +{ + int fd; + char page[getpagesize()]; + + fd = open("/tmp/xcmap", O_RDWR|O_CREAT|O_TRUNC, 0600); + if (fd < 0) + return fd; + + memset(page, 0, sizeof(page)); + if (!write_all(fd, page, sizeof(page))) + barf_perror("Failed to write /tmp/xcmap page"); + + return fd; +} + +int xc_interface_close(int xc_handle) +{ + close(xc_handle); + return 0; +} + +static void send_to_fd(int signo __attribute__((unused))) +{ + int saved_errno = errno; + write(sigfd, &port, sizeof(port)); + errno = saved_errno; +} + +void fake_block_events(void) +{ + signal(SIGUSR2, SIG_IGN); +} + +void fake_ack_event(void) +{ + signal(SIGUSR2, send_to_fd); +} + +int fake_open_eventchn(void) +{ + int fds[2]; + + if (pipe(fds) != 0) + return -1; + + if (signal(SIGUSR2, send_to_fd) == SIG_ERR) { + int saved_errno = errno; + close(fds[0]); + close(fds[1]); + errno = saved_errno; + return -1; + } + sigfd = fds[1]; + return fds[0]; +} diff --git a/tools/xenstore/list.h b/tools/xenstore/list.h new file mode 100644 index 0000000000..eb35293d7f --- /dev/null +++ b/tools/xenstore/list.h @@ -0,0 +1,508 @@ +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H +/* Taken from Linux kernel code, but de-kernelized for userspace. */ +#include + +/* + * These are non-NULL pointers that will result in page faults + * under normal circumstances, used to verify that nobody uses + * non-initialized list entries. + */ +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +#define list_top(head, type, member) \ +({ \ + struct list_head *_head = (head); \ + list_empty(_head) ? NULL : list_entry(_head->next, type, member); \ +}) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline__ void __list_add_rcu(struct list_head * new, + struct list_head * prev, + struct list_head * next) +{ + new->next = next; + new->prev = prev; + next->prev = new; + prev->next = new; +} + +/** + * list_add_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static __inline__ void list_add_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head, head->next); +} + +/** + * list_add_tail_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static __inline__ void list_add_tail_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} + +/** + * list_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * Note: list_empty on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the list. + */ +static inline void list_del_rcu(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->prev = LIST_POISON2; +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + */ +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add_tail(list, head); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static inline int list_empty(struct list_head *head) +{ + return head->next == head; +} + +static inline void __list_splice(struct list_head *list, + struct list_head *head) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice(struct list_head *list, struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head); +} + +/** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head); + INIT_LIST_HEAD(list); + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * list_for_each_prev - iterate over a list backwards + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev; pos != (head); pos = pos->prev) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_reverse - iterate backwards over list of given type. + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.prev, typeof(*pos), member)) + + +/** + * list_for_each_entry_continue - iterate over list of given type + * continuing after existing point + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_continue(pos, head, member) \ + for (pos = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + + +/* + * Double linked lists with a single pointer list head. + * Mostly useful for hash tables where the two pointer list head is + * too wasteful. + * You lose the ability to access the tail in O(1). + */ + +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +#define HLIST_HEAD_INIT { .first = NULL } +#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } +#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) +#define INIT_HLIST_NODE(ptr) ((ptr)->next = NULL, (ptr)->pprev = NULL) + +static __inline__ int hlist_unhashed(struct hlist_node *h) +{ + return !h->pprev; +} + +static __inline__ int hlist_empty(struct hlist_head *h) +{ + return !h->first; +} + +static __inline__ void __hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + *pprev = next; + if (next) + next->pprev = pprev; +} + +static __inline__ void hlist_del(struct hlist_node *n) +{ + __hlist_del(n); + n->next = LIST_POISON1; + n->pprev = LIST_POISON2; +} + +/** + * hlist_del_rcu - deletes entry from hash list without re-initialization + * @entry: the element to delete from the hash list. + * + * Note: list_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + */ +static inline void hlist_del_rcu(struct hlist_node *n) +{ + __hlist_del(n); + n->pprev = LIST_POISON2; +} + +static __inline__ void hlist_del_init(struct hlist_node *n) +{ + if (n->pprev) { + __hlist_del(n); + INIT_HLIST_NODE(n); + } +} + +#define hlist_del_rcu_init hlist_del_init + +static __inline__ void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + +static __inline__ void hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + n->pprev = &h->first; + if (first) + first->pprev = &n->next; + h->first = n; +} + +/* next must be != NULL */ +static __inline__ void hlist_add_before(struct hlist_node *n, struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + next->pprev = &n->next; + *(n->pprev) = n; +} + +static __inline__ void hlist_add_after(struct hlist_node *n, + struct hlist_node *next) +{ + next->next = n->next; + *(next->pprev) = n; + n->next = next; +} + +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +/* Cannot easily do prefetch unfortunately */ +#define hlist_for_each(pos, head) \ + for (pos = (head)->first; pos; pos = pos->next) + +#define hlist_for_each_safe(pos, n, head) \ + for (pos = (head)->first; n = pos ? pos->next : 0, pos; \ + pos = n) + +/** + * hlist_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_continue(tpos, pos, member) \ + for (pos = (pos)->next; \ + pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_from - iterate over a hlist continuing from existing point + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from(tpos, pos, member) \ + for (; pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @n: another &struct hlist_node to use as temporary storage + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +#endif diff --git a/tools/xenstore/talloc.c b/tools/xenstore/talloc.c new file mode 100644 index 0000000000..8e93c28fe3 --- /dev/null +++ b/tools/xenstore/talloc.c @@ -0,0 +1,1143 @@ +/* + Samba Unix SMB/CIFS implementation. + + Samba trivial allocation library - new interface + + NOTE: Please read talloc_guide.txt for full documentation + + Copyright (C) Andrew Tridgell 2004 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* + inspired by http://swapped.cc/halloc/ +*/ + + +#ifdef _SAMBA_BUILD_ +#include "includes.h" +#if ((SAMBA_VERSION_MAJOR==3)&&(SAMBA_VERSION_MINOR<9)) +/* This is to circumvent SAMBA3's paranoid malloc checker. Here in this file + * we trust ourselves... */ +#ifdef malloc +#undef malloc +#endif +#ifdef realloc +#undef realloc +#endif +#endif +#else +#include +#include +#include +#include +#include +#include "talloc.h" +/* assume a modern system */ +#define HAVE_VA_COPY +#endif + +/* use this to force every realloc to change the pointer, to stress test + code that might not cope */ +#ifdef TESTING +#define ALWAYS_REALLOC 1 +void *test_malloc(size_t size); +#define malloc test_malloc +#endif + +#define MAX_TALLOC_SIZE 0x10000000 +#define TALLOC_MAGIC 0xe814ec4f +#define TALLOC_MAGIC_FREE 0x7faebef3 +#define TALLOC_MAGIC_REFERENCE ((const char *)1) + +/* by default we abort when given a bad pointer (such as when talloc_free() is called + on a pointer that came from malloc() */ +#ifndef TALLOC_ABORT +#define TALLOC_ABORT(reason) abort() +#endif + +#ifndef discard_const_p +#if defined(__intptr_t_defined) || defined(HAVE_INTPTR_T) +# define discard_const_p(type, ptr) ((type *)((intptr_t)(ptr))) +#else +# define discard_const_p(type, ptr) ((type *)(ptr)) +#endif +#endif + +/* this null_context is only used if talloc_enable_leak_report() or + talloc_enable_leak_report_full() is called, otherwise it remains + NULL +*/ +static const void *null_context; +static void *cleanup_context; +static int (*malloc_fail_handler)(void *); +static void *malloc_fail_data; + +struct talloc_reference_handle { + struct talloc_reference_handle *next, *prev; + void *ptr; +}; + +typedef int (*talloc_destructor_t)(void *); + +struct talloc_chunk { + struct talloc_chunk *next, *prev; + struct talloc_chunk *parent, *child; + struct talloc_reference_handle *refs; + size_t size; + unsigned magic; + talloc_destructor_t destructor; + const char *name; +}; + +/* panic if we get a bad magic value */ +static struct talloc_chunk *talloc_chunk_from_ptr(const void *ptr) +{ + struct talloc_chunk *tc = discard_const_p(struct talloc_chunk, ptr)-1; + if (tc->magic != TALLOC_MAGIC) { + if (tc->magic == TALLOC_MAGIC_FREE) { + TALLOC_ABORT("Bad talloc magic value - double free"); + } else { + TALLOC_ABORT("Bad talloc magic value - unknown value"); + } + } + + return tc; +} + +/* hook into the front of the list */ +#define _TLIST_ADD(list, p) \ +do { \ + if (!(list)) { \ + (list) = (p); \ + (p)->next = (p)->prev = NULL; \ + } else { \ + (list)->prev = (p); \ + (p)->next = (list); \ + (p)->prev = NULL; \ + (list) = (p); \ + }\ +} while (0) + +/* remove an element from a list - element doesn't have to be in list. */ +#define _TLIST_REMOVE(list, p) \ +do { \ + if ((p) == (list)) { \ + (list) = (p)->next; \ + if (list) (list)->prev = NULL; \ + } else { \ + if ((p)->prev) (p)->prev->next = (p)->next; \ + if ((p)->next) (p)->next->prev = (p)->prev; \ + } \ + if ((p) && ((p) != (list))) (p)->next = (p)->prev = NULL; \ +} while (0) + + +/* + return the parent chunk of a pointer +*/ +static struct talloc_chunk *talloc_parent_chunk(const void *ptr) +{ + struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr); + while (tc->prev) tc=tc->prev; + return tc->parent; +} + +void *talloc_parent(const void *ptr) +{ + struct talloc_chunk *tc = talloc_parent_chunk(ptr); + return (void *)(tc+1); +} + +/* + Allocate a bit of memory as a child of an existing pointer +*/ +void *_talloc(const void *context, size_t size) +{ + struct talloc_chunk *tc; + + if (context == NULL) { + context = null_context; + } + + if (size >= MAX_TALLOC_SIZE) { + return NULL; + } + + tc = malloc(sizeof(*tc)+size); + if (tc == NULL) { + if (malloc_fail_handler) + if (malloc_fail_handler(malloc_fail_data)) + tc = malloc(sizeof(*tc)+size); + if (!tc) + return NULL; + } + + tc->size = size; + tc->magic = TALLOC_MAGIC; + tc->destructor = NULL; + tc->child = NULL; + tc->name = NULL; + tc->refs = NULL; + + if (context) { + struct talloc_chunk *parent = talloc_chunk_from_ptr(context); + + tc->parent = parent; + + if (parent->child) { + parent->child->parent = NULL; + } + + _TLIST_ADD(parent->child, tc); + } else { + tc->next = tc->prev = tc->parent = NULL; + } + + return (void *)(tc+1); +} + + +/* + setup a destructor to be called on free of a pointer + the destructor should return 0 on success, or -1 on failure. + if the destructor fails then the free is failed, and the memory can + be continued to be used +*/ +void talloc_set_destructor(const void *ptr, int (*destructor)(void *)) +{ + struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr); + tc->destructor = destructor; +} + +/* + increase the reference count on a piece of memory. +*/ +void talloc_increase_ref_count(const void *ptr) +{ + talloc_reference(null_context, ptr); +} + +/* + helper for talloc_reference() +*/ +static int talloc_reference_destructor(void *ptr) +{ + struct talloc_reference_handle *handle = ptr; + struct talloc_chunk *tc1 = talloc_chunk_from_ptr(ptr); + struct talloc_chunk *tc2 = talloc_chunk_from_ptr(handle->ptr); + if (tc1->destructor != (talloc_destructor_t)-1) { + tc1->destructor = NULL; + } + _TLIST_REMOVE(tc2->refs, handle); + talloc_free(handle); + return 0; +} + +/* + make a secondary reference to a pointer, hanging off the given context. + the pointer remains valid until both the original caller and this given + context are freed. + + the major use for this is when two different structures need to reference the + same underlying data, and you want to be able to free the two instances separately, + and in either order +*/ +void *talloc_reference(const void *context, const void *ptr) +{ + struct talloc_chunk *tc; + struct talloc_reference_handle *handle; + if (ptr == NULL) return NULL; + + tc = talloc_chunk_from_ptr(ptr); + handle = talloc_named_const(context, sizeof(*handle), TALLOC_MAGIC_REFERENCE); + + if (handle == NULL) return NULL; + + /* note that we hang the destructor off the handle, not the + main context as that allows the caller to still setup their + own destructor on the context if they want to */ + talloc_set_destructor(handle, talloc_reference_destructor); + handle->ptr = discard_const_p(void, ptr); + _TLIST_ADD(tc->refs, handle); + return handle->ptr; +} + +/* + remove a secondary reference to a pointer. This undo's what + talloc_reference() has done. The context and pointer arguments + must match those given to a talloc_reference() +*/ +static int talloc_unreference(const void *context, const void *ptr) +{ + struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr); + struct talloc_reference_handle *h; + + if (context == NULL) { + context = null_context; + } + + for (h=tc->refs;h;h=h->next) { + struct talloc_chunk *p = talloc_parent_chunk(h); + if ((p==NULL && context==NULL) || p+1 == context) break; + } + if (h == NULL) { + return -1; + } + + talloc_set_destructor(h, NULL); + _TLIST_REMOVE(tc->refs, h); + talloc_free(h); + return 0; +} + +/* + remove a specific parent context from a pointer. This is a more + controlled varient of talloc_free() +*/ +int talloc_unlink(const void *context, void *ptr) +{ + struct talloc_chunk *tc_p, *new_p; + void *new_parent; + + if (ptr == NULL) { + return -1; + } + + if (context == NULL) { + context = null_context; + } + + if (talloc_unreference(context, ptr) == 0) { + return 0; + } + + if (context == NULL) { + if (talloc_parent_chunk(ptr) != NULL) { + return -1; + } + } else { + if (talloc_chunk_from_ptr(context) != talloc_parent_chunk(ptr)) { + return -1; + } + } + + tc_p = talloc_chunk_from_ptr(ptr); + + if (tc_p->refs == NULL) { + return talloc_free(ptr); + } + + new_p = talloc_parent_chunk(tc_p->refs); + if (new_p) { + new_parent = new_p+1; + } else { + new_parent = NULL; + } + + if (talloc_unreference(new_parent, ptr) != 0) { + return -1; + } + + talloc_steal(new_parent, ptr); + + return 0; +} + +/* + add a name to an existing pointer - va_list version +*/ +static void talloc_set_name_v(const void *ptr, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0); + +static void talloc_set_name_v(const void *ptr, const char *fmt, va_list ap) +{ + struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr); + tc->name = talloc_vasprintf(ptr, fmt, ap); + if (tc->name) { + talloc_set_name_const(tc->name, ".name"); + } +} + +/* + add a name to an existing pointer +*/ +void talloc_set_name(const void *ptr, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + talloc_set_name_v(ptr, fmt, ap); + va_end(ap); +} + +/* + more efficient way to add a name to a pointer - the name must point to a + true string constant +*/ +void talloc_set_name_const(const void *ptr, const char *name) +{ + struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr); + tc->name = name; +} + +/* + create a named talloc pointer. Any talloc pointer can be named, and + talloc_named() operates just like talloc() except that it allows you + to name the pointer. +*/ +void *talloc_named(const void *context, size_t size, const char *fmt, ...) +{ + va_list ap; + void *ptr; + + ptr = _talloc(context, size); + if (ptr == NULL) return NULL; + + va_start(ap, fmt); + talloc_set_name_v(ptr, fmt, ap); + va_end(ap); + + return ptr; +} + +/* + create a named talloc pointer. Any talloc pointer can be named, and + talloc_named() operates just like talloc() except that it allows you + to name the pointer. +*/ +void *talloc_named_const(const void *context, size_t size, const char *name) +{ + void *ptr; + + ptr = _talloc(context, size); + if (ptr == NULL) { + return NULL; + } + + talloc_set_name_const(ptr, name); + + return ptr; +} + +/* + return the name of a talloc ptr, or "UNNAMED" +*/ +const char *talloc_get_name(const void *ptr) +{ + struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr); + if (tc->name == TALLOC_MAGIC_REFERENCE) { + return ".reference"; + } + if (tc->name) { + return tc->name; + } + return "UNNAMED"; +} + + +/* + check if a pointer has the given name. If it does, return the pointer, + otherwise return NULL +*/ +void *talloc_check_name(const void *ptr, const char *name) +{ + const char *pname; + if (ptr == NULL) return NULL; + pname = talloc_get_name(ptr); + if (pname == name || strcmp(pname, name) == 0) { + return discard_const_p(void, ptr); + } + return NULL; +} + + +/* + this is for compatibility with older versions of talloc +*/ +void *talloc_init(const char *fmt, ...) +{ + va_list ap; + void *ptr; + + ptr = _talloc(NULL, 0); + if (ptr == NULL) return NULL; + + va_start(ap, fmt); + talloc_set_name_v(ptr, fmt, ap); + va_end(ap); + + return ptr; +} + +/* + this is a replacement for the Samba3 talloc_destroy_pool functionality. It + should probably not be used in new code. It's in here to keep the talloc + code consistent across Samba 3 and 4. +*/ +void talloc_free_children(void *ptr) +{ + struct talloc_chunk *tc; + + if (ptr == NULL) { + return; + } + + tc = talloc_chunk_from_ptr(ptr); + + while (tc->child) { + /* we need to work out who will own an abandoned child + if it cannot be freed. In priority order, the first + choice is owner of any remaining reference to this + pointer, the second choice is our parent, and the + final choice is the null context. */ + void *child = tc->child+1; + const void *new_parent = null_context; + if (tc->child->refs) { + struct talloc_chunk *p = talloc_parent_chunk(tc->child->refs); + if (p) new_parent = p+1; + } + if (talloc_free(child) == -1) { + if (new_parent == null_context) { + struct talloc_chunk *p = talloc_parent_chunk(ptr); + if (p) new_parent = p+1; + } + talloc_steal(new_parent, child); + } + } +} + +/* + free a talloc pointer. This also frees all child pointers of this + pointer recursively + + return 0 if the memory is actually freed, otherwise -1. The memory + will not be freed if the ref_count is > 1 or the destructor (if + any) returns non-zero +*/ +int talloc_free(void *ptr) +{ + struct talloc_chunk *tc; + + if (ptr == NULL) { + return -1; + } + + tc = talloc_chunk_from_ptr(ptr); + + if (tc->refs) { + talloc_reference_destructor(tc->refs); + return -1; + } + + if (tc->destructor) { + talloc_destructor_t d = tc->destructor; + if (d == (talloc_destructor_t)-1) { + return -1; + } + tc->destructor = (talloc_destructor_t)-1; + if (d(ptr) == -1) { + tc->destructor = d; + return -1; + } + tc->destructor = NULL; + } + + talloc_free_children(ptr); + + if (tc->parent) { + _TLIST_REMOVE(tc->parent->child, tc); + if (tc->parent->child) { + tc->parent->child->parent = tc->parent; + } + } else { + if (tc->prev) tc->prev->next = tc->next; + if (tc->next) tc->next->prev = tc->prev; + } + + tc->magic = TALLOC_MAGIC_FREE; + + free(tc); + return 0; +} + + + +/* + A talloc version of realloc. The context argument is only used if + ptr is NULL +*/ +void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *name) +{ + struct talloc_chunk *tc; + void *new_ptr; + + /* size zero is equivalent to free() */ + if (size == 0) { + talloc_free(ptr); + return NULL; + } + + if (size >= MAX_TALLOC_SIZE) { + return NULL; + } + + /* realloc(NULL) is equavalent to malloc() */ + if (ptr == NULL) { + return talloc_named_const(context, size, name); + } + + tc = talloc_chunk_from_ptr(ptr); + + /* don't allow realloc on referenced pointers */ + if (tc->refs) { + return NULL; + } + + /* by resetting magic we catch users of the old memory */ + tc->magic = TALLOC_MAGIC_FREE; + +#if ALWAYS_REALLOC + new_ptr = malloc(size + sizeof(*tc)); + if (!new_ptr) { + tc->magic = TALLOC_MAGIC; + if (malloc_fail_handler) + if (malloc_fail_handler(malloc_fail_data)) + new_ptr = malloc(size + sizeof(*tc)); + } + if (new_ptr) { + memcpy(new_ptr, tc, tc->size + sizeof(*tc)); + free(tc); + } +#else + new_ptr = realloc(tc, size + sizeof(*tc)); + if (!new_ptr) { + tc->magic = TALLOC_MAGIC; + if (malloc_fail_handler) + if (malloc_fail_handler(malloc_fail_data)) + new_ptr = realloc(tc, size + sizeof(*tc)); + } +#endif + if (!new_ptr) { + tc->magic = TALLOC_MAGIC; + return NULL; + } + + tc = new_ptr; + tc->magic = TALLOC_MAGIC; + if (tc->parent) { + tc->parent->child = new_ptr; + } + if (tc->child) { + tc->child->parent = new_ptr; + } + + if (tc->prev) { + tc->prev->next = tc; + } + if (tc->next) { + tc->next->prev = tc; + } + + tc->size = size; + talloc_set_name_const(tc+1, name); + + return (void *)(tc+1); +} + +/* + move a lump of memory from one talloc context to another return the + ptr on success, or NULL if it could not be transferred. + passing NULL as ptr will always return NULL with no side effects. +*/ +void *talloc_steal(const void *new_ctx, const void *ptr) +{ + struct talloc_chunk *tc, *new_tc; + + if (!ptr) { + return NULL; + } + + if (new_ctx == NULL) { + new_ctx = null_context; + } + + tc = talloc_chunk_from_ptr(ptr); + + if (new_ctx == NULL) { + if (tc->parent) { + _TLIST_REMOVE(tc->parent->child, tc); + if (tc->parent->child) { + tc->parent->child->parent = tc->parent; + } + } else { + if (tc->prev) tc->prev->next = tc->next; + if (tc->next) tc->next->prev = tc->prev; + } + + tc->parent = tc->next = tc->prev = NULL; + return discard_const_p(void, ptr); + } + + new_tc = talloc_chunk_from_ptr(new_ctx); + + if (tc == new_tc) { + return discard_const_p(void, ptr); + } + + if (tc->parent) { + _TLIST_REMOVE(tc->parent->child, tc); + if (tc->parent->child) { + tc->parent->child->parent = tc->parent; + } + } else { + if (tc->prev) tc->prev->next = tc->next; + if (tc->next) tc->next->prev = tc->prev; + } + + tc->parent = new_tc; + if (new_tc->child) new_tc->child->parent = NULL; + _TLIST_ADD(new_tc->child, tc); + + return discard_const_p(void, ptr); +} + +/* + return the total size of a talloc pool (subtree) +*/ +off_t talloc_total_size(const void *ptr) +{ + off_t total = 0; + struct talloc_chunk *c, *tc; + + if (ptr == NULL) { + ptr = null_context; + } + if (ptr == NULL) { + return 0; + } + + tc = talloc_chunk_from_ptr(ptr); + + total = tc->size; + for (c=tc->child;c;c=c->next) { + total += talloc_total_size(c+1); + } + return total; +} + +/* + return the total number of blocks in a talloc pool (subtree) +*/ +off_t talloc_total_blocks(const void *ptr) +{ + off_t total = 0; + struct talloc_chunk *c, *tc; + + if (ptr == NULL) { + ptr = null_context; + } + if (ptr == NULL) { + return 0; + } + tc = talloc_chunk_from_ptr(ptr); + + total++; + for (c=tc->child;c;c=c->next) { + total += talloc_total_blocks(c+1); + } + return total; +} + +/* + return the number of external references to a pointer +*/ +static int talloc_reference_count(const void *ptr) +{ + struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr); + struct talloc_reference_handle *h; + int ret = 0; + + for (h=tc->refs;h;h=h->next) { + ret++; + } + return ret; +} + +/* + report on memory usage by all children of a pointer, giving a full tree view +*/ +void talloc_report_depth(const void *ptr, FILE *f, int depth) +{ + struct talloc_chunk *c, *tc = talloc_chunk_from_ptr(ptr); + + for (c=tc->child;c;c=c->next) { + if (c->name == TALLOC_MAGIC_REFERENCE) { + struct talloc_reference_handle *handle = (void *)(c+1); + const char *name2 = talloc_get_name(handle->ptr); + fprintf(f, "%*sreference to: %s\n", depth*4, "", name2); + } else { + const char *name = talloc_get_name(c+1); + fprintf(f, "%*s%-30s contains %6lu bytes in %3lu blocks (ref %d)\n", + depth*4, "", + name, + (unsigned long)talloc_total_size(c+1), + (unsigned long)talloc_total_blocks(c+1), + talloc_reference_count(c+1)); + talloc_report_depth(c+1, f, depth+1); + } + } + +} + +/* + report on memory usage by all children of a pointer, giving a full tree view +*/ +void talloc_report_full(const void *ptr, FILE *f) +{ + if (ptr == NULL) { + ptr = null_context; + } + if (ptr == NULL) return; + + fprintf(f,"full talloc report on '%s' (total %lu bytes in %lu blocks)\n", + talloc_get_name(ptr), + (unsigned long)talloc_total_size(ptr), + (unsigned long)talloc_total_blocks(ptr)); + + talloc_report_depth(ptr, f, 1); + fflush(f); +} + +/* + report on memory usage by all children of a pointer +*/ +void talloc_report(const void *ptr, FILE *f) +{ + struct talloc_chunk *c, *tc; + + if (ptr == NULL) { + ptr = null_context; + } + if (ptr == NULL) return; + + fprintf(f,"talloc report on '%s' (total %lu bytes in %lu blocks)\n", + talloc_get_name(ptr), + (unsigned long)talloc_total_size(ptr), + (unsigned long)talloc_total_blocks(ptr)); + + tc = talloc_chunk_from_ptr(ptr); + + for (c=tc->child;c;c=c->next) { + fprintf(f, "\t%-30s contains %6lu bytes in %3lu blocks\n", + talloc_get_name(c+1), + (unsigned long)talloc_total_size(c+1), + (unsigned long)talloc_total_blocks(c+1)); + } + fflush(f); +} + +/* + report on any memory hanging off the null context +*/ +static void talloc_report_null(void) +{ + if (talloc_total_size(null_context) != 0) { + talloc_report(null_context, stderr); + } +} + +/* + report on any memory hanging off the null context +*/ +static void talloc_report_null_full(void) +{ + if (talloc_total_size(null_context) != 0) { + talloc_report_full(null_context, stderr); + } +} + +/* + enable tracking of the NULL context +*/ +void talloc_enable_null_tracking(void) +{ + if (null_context == NULL) { + null_context = talloc_named_const(NULL, 0, "null_context"); + } +} + +/* + enable leak reporting on exit +*/ +void talloc_enable_leak_report(void) +{ + talloc_enable_null_tracking(); + atexit(talloc_report_null); +} + +/* + enable full leak reporting on exit +*/ +void talloc_enable_leak_report_full(void) +{ + talloc_enable_null_tracking(); + atexit(talloc_report_null_full); +} + +/* + talloc and zero memory. +*/ +void *_talloc_zero(const void *ctx, size_t size, const char *name) +{ + void *p = talloc_named_const(ctx, size, name); + + if (p) { + memset(p, '\0', size); + } + + return p; +} + + +/* + memdup with a talloc. +*/ +void *_talloc_memdup(const void *t, const void *p, size_t size, const char *name) +{ + void *newp = talloc_named_const(t, size, name); + + if (newp) { + memcpy(newp, p, size); + } + + return newp; +} + +/* + strdup with a talloc +*/ +char *talloc_strdup(const void *t, const char *p) +{ + char *ret; + if (!p) { + return NULL; + } + ret = talloc_memdup(t, p, strlen(p) + 1); + if (ret) { + talloc_set_name_const(ret, ret); + } + return ret; +} + +/* + strndup with a talloc +*/ +char *talloc_strndup(const void *t, const char *p, size_t n) +{ + size_t len; + char *ret; + + for (len=0; p[len] && lensize - 1; + len = vsnprintf(NULL, 0, fmt, ap2); + + s = talloc_realloc(NULL, s, char, s_len + len+1); + if (!s) return NULL; + + VA_COPY(ap2, ap); + + vsnprintf(s+s_len, len+1, fmt, ap2); + talloc_set_name_const(s, s); + + return s; +} + +/* + Realloc @p s to append the formatted result of @p fmt and return @p + s, which may have moved. Good for gradually accumulating output + into a string buffer. + */ +char *talloc_asprintf_append(char *s, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + s = talloc_vasprintf_append(s, fmt, ap); + va_end(ap); + return s; +} + +/* + alloc an array, checking for integer overflow in the array size +*/ +void *_talloc_array(const void *ctx, size_t el_size, unsigned count, const char *name) +{ + if (count >= MAX_TALLOC_SIZE/el_size) { + return NULL; + } + return talloc_named_const(ctx, el_size * count, name); +} + +/* + alloc an zero array, checking for integer overflow in the array size +*/ +void *_talloc_zero_array(const void *ctx, size_t el_size, unsigned count, const char *name) +{ + if (count >= MAX_TALLOC_SIZE/el_size) { + return NULL; + } + return _talloc_zero(ctx, el_size * count, name); +} + + +/* + realloc an array, checking for integer overflow in the array size +*/ +void *_talloc_realloc_array(const void *ctx, void *ptr, size_t el_size, unsigned count, const char *name) +{ + if (count >= MAX_TALLOC_SIZE/el_size) { + return NULL; + } + return _talloc_realloc(ctx, ptr, el_size * count, name); +} + +/* + a function version of talloc_realloc(), so it can be passed as a function pointer + to libraries that want a realloc function (a realloc function encapsulates + all the basic capabilities of an allocation library, which is why this is useful) +*/ +void *talloc_realloc_fn(const void *context, void *ptr, size_t size) +{ + return _talloc_realloc(context, ptr, size, NULL); +} + + +static void talloc_autofree(void) +{ + talloc_free(cleanup_context); + cleanup_context = NULL; +} + +/* + return a context which will be auto-freed on exit + this is useful for reducing the noise in leak reports +*/ +void *talloc_autofree_context(void) +{ + if (cleanup_context == NULL) { + cleanup_context = talloc_named_const(NULL, 0, "autofree_context"); + atexit(talloc_autofree); + } + return cleanup_context; +} + +size_t talloc_get_size(const void *context) +{ + struct talloc_chunk *tc; + + if (context == NULL) + return 0; + + tc = talloc_chunk_from_ptr(context); + + return tc->size; +} + +talloc_fail_handler *talloc_set_fail_handler(talloc_fail_handler *handler, + void *data) +{ + talloc_fail_handler *old = malloc_fail_handler; + malloc_fail_handler = handler; + malloc_fail_data = data; + return old; +} diff --git a/tools/xenstore/talloc.h b/tools/xenstore/talloc.h new file mode 100644 index 0000000000..39bcb53fb7 --- /dev/null +++ b/tools/xenstore/talloc.h @@ -0,0 +1,134 @@ +#ifndef _TALLOC_H_ +#define _TALLOC_H_ +/* + Unix SMB/CIFS implementation. + Samba temporary memory allocation functions + + Copyright (C) Andrew Tridgell 2004-2005 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* this is only needed for compatibility with the old talloc */ +typedef void TALLOC_CTX; + +/* + this uses a little trick to allow __LINE__ to be stringified +*/ +#define _STRING_LINE_(s) #s +#define _STRING_LINE2_(s) _STRING_LINE_(s) +#define __LINESTR__ _STRING_LINE2_(__LINE__) +#define __location__ __FILE__ ":" __LINESTR__ + +#ifndef TALLOC_DEPRECATED +#define TALLOC_DEPRECATED 0 +#endif + +/* useful macros for creating type checked pointers */ +#define talloc(ctx, type) (type *)talloc_named_const(ctx, sizeof(type), #type) +#define talloc_size(ctx, size) talloc_named_const(ctx, size, __location__) + +#define talloc_new(ctx) talloc_named_const(ctx, 0, "talloc_new: " __location__) + +#define talloc_zero(ctx, type) (type *)_talloc_zero(ctx, sizeof(type), #type) +#define talloc_zero_size(ctx, size) _talloc_zero(ctx, size, __location__) + +#define talloc_zero_array(ctx, type, count) (type *)_talloc_zero_array(ctx, sizeof(type), count, #type) +#define talloc_array(ctx, type, count) (type *)_talloc_array(ctx, sizeof(type), count, #type) +#define talloc_array_size(ctx, size, count) _talloc_array(ctx, size, count, __location__) + +#define talloc_realloc(ctx, p, type, count) (type *)_talloc_realloc_array(ctx, p, sizeof(type), count, #type) +#define talloc_realloc_size(ctx, ptr, size) _talloc_realloc(ctx, ptr, size, __location__) + +#define talloc_memdup(t, p, size) _talloc_memdup(t, p, size, __location__) + +#define malloc_p(type) (type *)malloc(sizeof(type)) +#define malloc_array_p(type, count) (type *)realloc_array(NULL, sizeof(type), count) +#define realloc_p(p, type, count) (type *)realloc_array(p, sizeof(type), count) + +#define data_blob(ptr, size) data_blob_named(ptr, size, "DATA_BLOB: "__location__) +#define data_blob_talloc(ctx, ptr, size) data_blob_talloc_named(ctx, ptr, size, "DATA_BLOB: "__location__) +#define data_blob_dup_talloc(ctx, blob) data_blob_talloc_named(ctx, (blob)->data, (blob)->length, "DATA_BLOB: "__location__) + +#define talloc_set_type(ptr, type) talloc_set_name_const(ptr, #type) +#define talloc_get_type(ptr, type) (type *)talloc_check_name(ptr, #type) + + +#if TALLOC_DEPRECATED +#define talloc_zero_p(ctx, type) talloc_zero(ctx, type) +#define talloc_p(ctx, type) talloc(ctx, type) +#define talloc_array_p(ctx, type, count) talloc_array(ctx, type, count) +#define talloc_realloc_p(ctx, p, type, count) talloc_realloc(ctx, p, type, count) +#define talloc_destroy(ctx) talloc_free(ctx) +#endif + +#ifndef PRINTF_ATTRIBUTE +#if (__GNUC__ >= 3) +/** Use gcc attribute to check printf fns. a1 is the 1-based index of + * the parameter containing the format, and a2 the index of the first + * argument. Note that some gcc 2.x versions don't handle this + * properly **/ +#define PRINTF_ATTRIBUTE(a1, a2) __attribute__ ((format (__printf__, a1, a2))) +#else +#define PRINTF_ATTRIBUTE(a1, a2) +#endif +#endif + + +/* The following definitions come from talloc.c */ +void *_talloc(const void *context, size_t size); +void talloc_set_destructor(const void *ptr, int (*destructor)(void *)); +void talloc_increase_ref_count(const void *ptr); +void *talloc_reference(const void *context, const void *ptr); +int talloc_unlink(const void *context, void *ptr); +void talloc_set_name(const void *ptr, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3); +void talloc_set_name_const(const void *ptr, const char *name); +void *talloc_named(const void *context, size_t size, + const char *fmt, ...) PRINTF_ATTRIBUTE(3,4); +void *talloc_named_const(const void *context, size_t size, const char *name); +const char *talloc_get_name(const void *ptr); +void *talloc_check_name(const void *ptr, const char *name); +void talloc_report_depth(const void *ptr, FILE *f, int depth); +void *talloc_parent(const void *ptr); +void *talloc_init(const char *fmt, ...) PRINTF_ATTRIBUTE(1,2); +int talloc_free(void *ptr); +void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *name); +void *talloc_steal(const void *new_ctx, const void *ptr); +off_t talloc_total_size(const void *ptr); +off_t talloc_total_blocks(const void *ptr); +void talloc_report_full(const void *ptr, FILE *f); +void talloc_report(const void *ptr, FILE *f); +void talloc_enable_null_tracking(void); +void talloc_enable_leak_report(void); +void talloc_enable_leak_report_full(void); +void *_talloc_zero(const void *ctx, size_t size, const char *name); +void *_talloc_memdup(const void *t, const void *p, size_t size, const char *name); +char *talloc_strdup(const void *t, const char *p); +char *talloc_strndup(const void *t, const char *p, size_t n); +char *talloc_vasprintf(const void *t, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0); +char *talloc_asprintf(const void *t, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3); +char *talloc_asprintf_append(char *s, + const char *fmt, ...) PRINTF_ATTRIBUTE(2,3); +void *_talloc_array(const void *ctx, size_t el_size, unsigned count, const char *name); +void *_talloc_zero_array(const void *ctx, size_t el_size, unsigned count, const char *name); +void *_talloc_realloc_array(const void *ctx, void *ptr, size_t el_size, unsigned count, const char *name); +void *talloc_realloc_fn(const void *context, void *ptr, size_t size); +void *talloc_autofree_context(void); +size_t talloc_get_size(const void *ctx); + +typedef int talloc_fail_handler(void *); +talloc_fail_handler *talloc_set_fail_handler(talloc_fail_handler *, void *); +#endif + diff --git a/tools/xenstore/talloc_guide.txt b/tools/xenstore/talloc_guide.txt new file mode 100644 index 0000000000..c23ac77cad --- /dev/null +++ b/tools/xenstore/talloc_guide.txt @@ -0,0 +1,569 @@ +Using talloc in Samba4 +---------------------- + +Andrew Tridgell +September 2004 + +The most current version of this document is available at + http://samba.org/ftp/unpacked/samba4/source/lib/talloc/talloc_guide.txt + +If you are used to talloc from Samba3 then please read this carefully, +as talloc has changed a lot. + +The new talloc is a hierarchical, reference counted memory pool system +with destructors. Quite a mounthful really, but not too bad once you +get used to it. + +Perhaps the biggest change from Samba3 is that there is no distinction +between a "talloc context" and a "talloc pointer". Any pointer +returned from talloc() is itself a valid talloc context. This means +you can do this: + + struct foo *X = talloc(mem_ctx, struct foo); + X->name = talloc_strdup(X, "foo"); + +and the pointer X->name would be a "child" of the talloc context "X" +which is itself a child of mem_ctx. So if you do talloc_free(mem_ctx) +then it is all destroyed, whereas if you do talloc_free(X) then just X +and X->name are destroyed, and if you do talloc_free(X->name) then +just the name element of X is destroyed. + +If you think about this, then what this effectively gives you is an +n-ary tree, where you can free any part of the tree with +talloc_free(). + +If you find this confusing, then I suggest you run the testsuite to +watch talloc in action. You may also like to add your own tests to +testsuite.c to clarify how some particular situation is handled. + + +Performance +----------- + +All the additional features of talloc() over malloc() do come at a +price. We have a simple performance test in Samba4 that measures +talloc() versus malloc() performance, and it seems that talloc() is +about 10% slower than malloc() on my x86 Debian Linux box. For Samba, +the great reduction in code complexity that we get by using talloc +makes this worthwhile, especially as the total overhead of +talloc/malloc in Samba is already quite small. + + +talloc API +---------- + +The following is a complete guide to the talloc API. Read it all at +least twice. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +(type *)talloc(const void *context, type); + +The talloc() macro is the core of the talloc library. It takes a +memory context and a type, and returns a pointer to a new area of +memory of the given type. + +The returned pointer is itself a talloc context, so you can use it as +the context argument to more calls to talloc if you wish. + +The returned pointer is a "child" of the supplied context. This means +that if you talloc_free() the context then the new child disappears as +well. Alternatively you can free just the child. + +The context argument to talloc() can be NULL, in which case a new top +level context is created. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_size(const void *context, size_t size); + +The function talloc_size() should be used when you don't have a +convenient type to pass to talloc(). Unlike talloc(), it is not type +safe (as it returns a void *), so you are on your own for type checking. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +int talloc_free(void *ptr); + +The talloc_free() function frees a piece of talloc memory, and all its +children. You can call talloc_free() on any pointer returned by +talloc(). + +The return value of talloc_free() indicates success or failure, with 0 +returned for success and -1 for failure. The only possible failure +condition is if the pointer had a destructor attached to it and the +destructor returned -1. See talloc_set_destructor() for details on +destructors. + +If this pointer has an additional parent when talloc_free() is called +then the memory is not actually released, but instead the most +recently established parent is destroyed. See talloc_reference() for +details on establishing additional parents. + +For more control on which parent is removed, see talloc_unlink() + +talloc_free() operates recursively on its children. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +int talloc_free_children(void *ptr); + +The talloc_free_children() walks along the list of all children of a +talloc context and talloc_free()s only the children, not the context +itself. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_reference(const void *context, const void *ptr); + +The talloc_reference() function makes "context" an additional parent +of "ptr". + +The return value of talloc_reference() is always the original pointer +"ptr", unless talloc ran out of memory in creating the reference in +which case it will return NULL (each additional reference consumes +around 48 bytes of memory on intel x86 platforms). + +If "ptr" is NULL, then the function is a no-op, and simply returns NULL. + +After creating a reference you can free it in one of the following +ways: + + - you can talloc_free() any parent of the original pointer. That + will reduce the number of parents of this pointer by 1, and will + cause this pointer to be freed if it runs out of parents. + + - you can talloc_free() the pointer itself. That will destroy the + most recently established parent to the pointer and leave the + pointer as a child of its current parent. + +For more control on which parent to remove, see talloc_unlink() + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +int talloc_unlink(const void *context, const void *ptr); + +The talloc_unlink() function removes a specific parent from ptr. The +context passed must either be a context used in talloc_reference() +with this pointer, or must be a direct parent of ptr. + +Note that if the parent has already been removed using talloc_free() +then this function will fail and will return -1. Likewise, if "ptr" +is NULL, then the function will make no modifications and return -1. + +Usually you can just use talloc_free() instead of talloc_unlink(), but +sometimes it is useful to have the additional control on which parent +is removed. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void talloc_set_destructor(const void *ptr, int (*destructor)(void *)); + +The function talloc_set_destructor() sets the "destructor" for the +pointer "ptr". A destructor is a function that is called when the +memory used by a pointer is about to be released. The destructor +receives the pointer as an argument, and should return 0 for success +and -1 for failure. + +The destructor can do anything it wants to, including freeing other +pieces of memory. A common use for destructors is to clean up +operating system resources (such as open file descriptors) contained +in the structure the destructor is placed on. + +You can only place one destructor on a pointer. If you need more than +one destructor then you can create a zero-length child of the pointer +and place an additional destructor on that. + +To remove a destructor call talloc_set_destructor() with NULL for the +destructor. + +If your destructor attempts to talloc_free() the pointer that it is +the destructor for then talloc_free() will return -1 and the free will +be ignored. This would be a pointless operation anyway, as the +destructor is only called when the memory is just about to go away. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void talloc_increase_ref_count(const void *ptr); + +The talloc_increase_ref_count(ptr) function is exactly equivalent to: + + talloc_reference(NULL, ptr); + +You can use either syntax, depending on which you think is clearer in +your code. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void talloc_set_name(const void *ptr, const char *fmt, ...); + +Each talloc pointer has a "name". The name is used principally for +debugging purposes, although it is also possible to set and get the +name on a pointer in as a way of "marking" pointers in your code. + +The main use for names on pointer is for "talloc reports". See +talloc_report() and talloc_report_full() for details. Also see +talloc_enable_leak_report() and talloc_enable_leak_report_full(). + +The talloc_set_name() function allocates memory as a child of the +pointer. It is logically equivalent to: + talloc_set_name_const(ptr, talloc_asprintf(ptr, fmt, ...)); + +Note that multiple calls to talloc_set_name() will allocate more +memory without releasing the name. All of the memory is released when +the ptr is freed using talloc_free(). + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void talloc_set_name_const(const void *ptr, const char *name); + +The function talloc_set_name_const() is just like talloc_set_name(), +but it takes a string constant, and is much faster. It is extensively +used by the "auto naming" macros, such as talloc_p(). + +This function does not allocate any memory. It just copies the +supplied pointer into the internal representation of the talloc +ptr. This means you must not pass a name pointer to memory that will +disappear before the ptr is freed with talloc_free(). + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_named(const void *context, size_t size, const char *fmt, ...); + +The talloc_named() function creates a named talloc pointer. It is +equivalent to: + + ptr = talloc_size(context, size); + talloc_set_name(ptr, fmt, ....); + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_named_const(const void *context, size_t size, const char *name); + +This is equivalent to: + + ptr = talloc_size(context, size); + talloc_set_name_const(ptr, name); + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +const char *talloc_get_name(const void *ptr); + +This returns the current name for the given talloc pointer. See +talloc_set_name() for details. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_init(const char *fmt, ...); + +This function creates a zero length named talloc context as a top +level context. It is equivalent to: + + talloc_named(NULL, 0, fmt, ...); + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_new(void *ctx); + +This is a utility macro that creates a new memory context hanging +off an exiting context, automatically naming it "talloc_new: __location__" +where __location__ is the source line it is called from. It is +particularly useful for creating a new temporary working context. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +(type *)talloc_realloc(const void *context, void *ptr, type, count); + +The talloc_realloc() macro changes the size of a talloc +pointer. The "count" argument is the number of elements of type "type" +that you want the resulting pointer to hold. + +talloc_realloc() has the following equivalences: + + talloc_realloc(context, NULL, type, 1) ==> talloc(context, type); + talloc_realloc(context, NULL, type, N) ==> talloc_array(context, type, N); + talloc_realloc(context, ptr, type, 0) ==> talloc_free(ptr); + +The "context" argument is only used if "ptr" is not NULL, otherwise it +is ignored. + +talloc_realloc() returns the new pointer, or NULL on failure. The call +will fail either due to a lack of memory, or because the pointer has +more than one parent (see talloc_reference()). + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_realloc_size(const void *context, void *ptr, size_t size); + +the talloc_realloc_size() function is useful when the type is not +known so the typesafe talloc_realloc() cannot be used. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_steal(const void *new_ctx, const void *ptr); + +The talloc_steal() function changes the parent context of a talloc +pointer. It is typically used when the context that the pointer is +currently a child of is going to be freed and you wish to keep the +memory for a longer time. + +The talloc_steal() function returns the pointer that you pass it. It +does not have any failure modes. + +NOTE: It is possible to produce loops in the parent/child relationship +if you are not careful with talloc_steal(). No guarantees are provided +as to your sanity or the safety of your data if you do this. + +talloc_steal (new_ctx, NULL) will return NULL with no sideeffects. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +off_t talloc_total_size(const void *ptr); + +The talloc_total_size() function returns the total size in bytes used +by this pointer and all child pointers. Mostly useful for debugging. + +Passing NULL is allowed, but it will only give a meaningful result if +talloc_enable_leak_report() or talloc_enable_leak_report_full() has +been called. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +off_t talloc_total_blocks(const void *ptr); + +The talloc_total_blocks() function returns the total memory block +count used by this pointer and all child pointers. Mostly useful for +debugging. + +Passing NULL is allowed, but it will only give a meaningful result if +talloc_enable_leak_report() or talloc_enable_leak_report_full() has +been called. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void talloc_report(const void *ptr, FILE *f); + +The talloc_report() function prints a summary report of all memory +used by ptr. One line of report is printed for each immediate child of +ptr, showing the total memory and number of blocks used by that child. + +You can pass NULL for the pointer, in which case a report is printed +for the top level memory context, but only if +talloc_enable_leak_report() or talloc_enable_leak_report_full() has +been called. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void talloc_report_full(const void *ptr, FILE *f); + +This provides a more detailed report than talloc_report(). It will +recursively print the ensire tree of memory referenced by the +pointer. References in the tree are shown by giving the name of the +pointer that is referenced. + +You can pass NULL for the pointer, in which case a report is printed +for the top level memory context, but only if +talloc_enable_leak_report() or talloc_enable_leak_report_full() has +been called. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void talloc_enable_leak_report(void); + +This enables calling of talloc_report(NULL, stderr) when the program +exits. In Samba4 this is enabled by using the --leak-report command +line option. + +For it to be useful, this function must be called before any other +talloc function as it establishes a "null context" that acts as the +top of the tree. If you don't call this function first then passing +NULL to talloc_report() or talloc_report_full() won't give you the +full tree printout. + +Here is a typical talloc report: + +talloc report on 'null_context' (total 267 bytes in 15 blocks) + libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks + libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks + iconv(UTF8,CP850) contains 42 bytes in 2 blocks + libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks + iconv(CP850,UTF8) contains 42 bytes in 2 blocks + iconv(UTF8,UTF-16LE) contains 45 bytes in 2 blocks + iconv(UTF-16LE,UTF8) contains 45 bytes in 2 blocks + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void talloc_enable_leak_report_full(void); + +This enables calling of talloc_report_full(NULL, stderr) when the +program exits. In Samba4 this is enabled by using the +--leak-report-full command line option. + +For it to be useful, this function must be called before any other +talloc function as it establishes a "null context" that acts as the +top of the tree. If you don't call this function first then passing +NULL to talloc_report() or talloc_report_full() won't give you the +full tree printout. + +Here is a typical full report: + +full talloc report on 'root' (total 18 bytes in 8 blocks) + p1 contains 18 bytes in 7 blocks (ref 0) + r1 contains 13 bytes in 2 blocks (ref 0) + reference to: p2 + p2 contains 1 bytes in 1 blocks (ref 1) + x3 contains 1 bytes in 1 blocks (ref 0) + x2 contains 1 bytes in 1 blocks (ref 0) + x1 contains 1 bytes in 1 blocks (ref 0) + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void talloc_enable_null_tracking(void); + +This enables tracking of the NULL memory context without enabling leak +reporting on exit. Useful for when you want to do your own leak +reporting call via talloc_report_null_full(); + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +(type *)talloc_zero(const void *ctx, type); + +The talloc_zero() macro is equivalent to: + + ptr = talloc(ctx, type); + if (ptr) memset(ptr, 0, sizeof(type)); + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_zero_size(const void *ctx, size_t size) + +The talloc_zero_size() function is useful when you don't have a known type + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_memdup(const void *ctx, const void *p, size_t size); + +The talloc_memdup() function is equivalent to: + + ptr = talloc_size(ctx, size); + if (ptr) memcpy(ptr, p, size); + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +char *talloc_strdup(const void *ctx, const char *p); + +The talloc_strdup() function is equivalent to: + + ptr = talloc_size(ctx, strlen(p)+1); + if (ptr) memcpy(ptr, p, strlen(p)+1); + +This functions sets the name of the new pointer to the passed +string. This is equivalent to: + talloc_set_name_const(ptr, ptr) + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +char *talloc_strndup(const void *t, const char *p, size_t n); + +The talloc_strndup() function is the talloc equivalent of the C +library function strndup() + +This functions sets the name of the new pointer to the passed +string. This is equivalent to: + talloc_set_name_const(ptr, ptr) + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +char *talloc_vasprintf(const void *t, const char *fmt, va_list ap); + +The talloc_vasprintf() function is the talloc equivalent of the C +library function vasprintf() + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +char *talloc_asprintf(const void *t, const char *fmt, ...); + +The talloc_asprintf() function is the talloc equivalent of the C +library function asprintf() + +This functions sets the name of the new pointer to the passed +string. This is equivalent to: + talloc_set_name_const(ptr, ptr) + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +char *talloc_asprintf_append(char *s, const char *fmt, ...); + +The talloc_asprintf_append() function appends the given formatted +string to the given string. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +(type *)talloc_array(const void *ctx, type, uint_t count); + +The talloc_array() macro is equivalent to: + + (type *)talloc_size(ctx, sizeof(type) * count); + +except that it provides integer overflow protection for the multiply, +returning NULL if the multiply overflows. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_array_size(const void *ctx, size_t size, uint_t count); + +The talloc_array_size() function is useful when the type is not +known. It operates in the same way as talloc_array(), but takes a size +instead of a type. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_realloc_fn(const void *ctx, void *ptr, size_t size); + +This is a non-macro version of talloc_realloc(), which is useful +as libraries sometimes want a ralloc function pointer. A realloc() +implementation encapsulates the functionality of malloc(), free() and +realloc() in one call, which is why it is useful to be able to pass +around a single function pointer. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_autofree_context(void); + +This is a handy utility function that returns a talloc context +which will be automatically freed on program exit. This can be used +to reduce the noise in memory leak reports. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +void *talloc_check_name(const void *ptr, const char *name); + +This function checks if a pointer has the specified name. If it does +then the pointer is returned. It it doesn't then NULL is returned. + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +(type *)talloc_get_type(const void *ptr, type); + +This macro allows you to do type checking on talloc pointers. It is +particularly useful for void* private pointers. It is equivalent to +this: + + (type *)talloc_check_name(ptr, #type) + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +talloc_set_type(const void *ptr, type); + +This macro allows you to force the name of a pointer to be a +particular type. This can be used in conjunction with +talloc_get_type() to do type checking on void* pointers. + +It is equivalent to this: + talloc_set_name_const(ptr, #type) + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +talloc_get_size(const void *ctx); + +This function lets you know the amount of memory alloced so far by +this context. It does NOT account for subcontext memory. +This can be used to calculate the size of an array. + diff --git a/tools/xenstore/testsuite/01simple.sh b/tools/xenstore/testsuite/01simple.sh new file mode 100644 index 0000000000..9b1eb8f5c3 --- /dev/null +++ b/tools/xenstore/testsuite/01simple.sh @@ -0,0 +1,4 @@ +#! /bin/sh + +# Create an entry, read it. +[ "`echo -e 'write /test create contents\nread /test' | ./xs_test 2>&1`" = "contents" ] diff --git a/tools/xenstore/testsuite/02directory.sh b/tools/xenstore/testsuite/02directory.sh new file mode 100644 index 0000000000..f63ef1ff3d --- /dev/null +++ b/tools/xenstore/testsuite/02directory.sh @@ -0,0 +1,31 @@ +#! /bin/sh + +# Root directory has nothing in it. +[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "" ] + +# Create a file. +[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ] + +# Directory shows it. +[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "test" ] + +# Make a new directory. +[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ] + +# Check it's there. +DIR="`echo -e 'dir /' | ./xs_test 2>&1`" +[ "$DIR" = "test +dir" ] || [ "$DIR" = "dir +test" ] + +# Check it's empty. +[ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "" ] + +# Create a file, check it exists. +[ "`echo -e 'write /dir/test2 create contents2' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "test2" ] +[ "`echo -e 'read /dir/test2' | ./xs_test 2>&1`" = "contents2" ] + +# Creating dir over the top should fail. +[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "FATAL: mkdir: File exists" ] +[ "`echo -e 'mkdir /dir/test2' | ./xs_test 2>&1`" = "FATAL: mkdir: File exists" ] diff --git a/tools/xenstore/testsuite/03write.sh b/tools/xenstore/testsuite/03write.sh new file mode 100644 index 0000000000..cf5f897c54 --- /dev/null +++ b/tools/xenstore/testsuite/03write.sh @@ -0,0 +1,17 @@ +#! /bin/sh + +# Write without create fails. +[ "`echo -e 'write /test none contents' | ./xs_test 2>&1`" = "FATAL: write: No such file or directory" ] + +# Exclusive write succeeds +[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents" ] + +# Exclusive write fails to overwrite. +[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "FATAL: write: File exists" ] + +# Non-exclusive overwrite succeeds. +[ "`echo -e 'write /test none contents2' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents2" ] +[ "`echo -e 'write /test create contents3' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents3" ] diff --git a/tools/xenstore/testsuite/04rm.sh b/tools/xenstore/testsuite/04rm.sh new file mode 100644 index 0000000000..abadd6110a --- /dev/null +++ b/tools/xenstore/testsuite/04rm.sh @@ -0,0 +1,18 @@ +#! /bin/sh + +# Remove non-existant fails. +[ "`echo -e 'rm /test' | ./xs_test 2>&1`" = "FATAL: rm: No such file or directory" ] +[ "`echo -e 'rm /dir/test' | ./xs_test 2>&1`" = "FATAL: rm: No such file or directory" ] + +# Create file and remove it +[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'rm /test' | ./xs_test 2>&1`" = "" ] + +# Create directory and remove it. +[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'rm /dir' | ./xs_test 2>&1`" = "" ] + +# Create directory, create file, remove all. +[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'write /dir/test excl contents' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'rm /dir' | ./xs_test 2>&1`" = "" ] diff --git a/tools/xenstore/testsuite/05filepermissions.sh b/tools/xenstore/testsuite/05filepermissions.sh new file mode 100644 index 0000000000..9d9043f191 --- /dev/null +++ b/tools/xenstore/testsuite/05filepermissions.sh @@ -0,0 +1,49 @@ +#! /bin/sh + +# Fail to get perms on non-existent file. +[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: No such file or directory" ] +[ "`echo -e 'getperm /dir/test' | ./xs_test 2>&1`" = "FATAL: getperm: No such file or directory" ] + +# Create file: we own it, noone has access. +[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "0 NONE" ] +[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ] +[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ] +[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] + +# Grant everyone read access to file. +[ "`echo -e 'setperm /test 0 READ' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "0 READ" ] +[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents" ] +[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] + +# Grant everyone write access to file. +[ "`echo -e 'setperm /test 0 WRITE' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ] +[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ] +[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents2" ] + +# Grant everyone both read and write access. +[ "`echo -e 'setperm /test 0 READ/WRITE' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "0 READ/WRITE" ] +[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents2" ] +[ "`echo -e 'setid 1\nwrite /test none contents3' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents3" ] + +# Change so that user 1 owns it, noone else can do anything. +[ "`echo -e 'setperm /test 1 NONE' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "1 NONE" ] +[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents3" ] +[ "`echo -e 'setid 1\nwrite /test none contents4' | ./xs_test 2>&1`" = "" ] + +# User 2 can do nothing. +[ "`echo -e 'setid 2\nsetperm /test 2 NONE' | ./xs_test 2>&1`" = "FATAL: setperm: Permission denied" ] +[ "`echo -e 'setid 2\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ] +[ "`echo -e 'setid 2\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ] +[ "`echo -e 'setid 2\nwrite /test none contents4' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] + +# Tools can always access things. +[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "1 NONE" ] +[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents4" ] +[ "`echo -e 'write /test none contents5' | ./xs_test 2>&1`" = "" ] diff --git a/tools/xenstore/testsuite/06dirpermissions.sh b/tools/xenstore/testsuite/06dirpermissions.sh new file mode 100644 index 0000000000..922a794f04 --- /dev/null +++ b/tools/xenstore/testsuite/06dirpermissions.sh @@ -0,0 +1,61 @@ +#! /bin/sh + +# Root directory: owned by tool, everyone has read access. +[ "`echo -e 'getperm /' | ./xs_test 2>&1`" = "0 READ" ] + +# Create directory: we own it, noone has access. +[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'getperm /dir' | ./xs_test 2>&1`" = "0 NONE" ] +[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ] +[ "`echo -e 'setid 1\nread /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ] +[ "`echo -e 'setid 1\nwrite /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] + +# Grant everyone read access to directoy. +[ "`echo -e 'setperm /dir 0 READ' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "0 READ" ] +[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\nwrite /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] + +# Grant everyone write access to directory. +[ "`echo -e 'setperm /dir 0 WRITE' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ] +[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ] +[ "`echo -e 'setid 1\nwrite /dir/test create contents' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'read /dir/test' | ./xs_test 2>&1`" = "contents" ] + +# Grant everyone both read and write access. +[ "`echo -e 'setperm /dir 0 READ/WRITE' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "0 READ/WRITE" ] +[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "test" ] +[ "`echo -e 'setid 1\nwrite /dir/test2 create contents' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\nread /dir/test2' | ./xs_test 2>&1`" = "contents" ] + +# Change so that user 1 owns it, noone else can do anything. +[ "`echo -e 'setperm /dir 1 NONE' | ./xs_test 2>&1`" = "" ] +[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "1 NONE" ] +[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1 | sort`" = "test +test2" ] +[ "`echo -e 'setid 1\nwrite /dir/test3 create contents' | ./xs_test 2>&1`" = "" ] + +# User 2 can do nothing. Can't even tell if file exists. +[ "`echo -e 'setid 2\nsetperm /dir 2 NONE' | ./xs_test 2>&1`" = "FATAL: setperm: Permission denied" ] +[ "`echo -e 'setid 2\ngetperm /dir' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ] +[ "`echo -e 'setid 2\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ] +[ "`echo -e 'setid 2\nread /dir/test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ] +[ "`echo -e 'setid 2\nread /dir/test2' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ] +[ "`echo -e 'setid 2\nread /dir/test3' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ] +[ "`echo -e 'setid 2\nread /dir/test4' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ] +[ "`echo -e 'setid 2\nwrite /dir/test none contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] +[ "`echo -e 'setid 2\nwrite /dir/test create contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] +[ "`echo -e 'setid 2\nwrite /dir/test excl contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] +[ "`echo -e 'setid 2\nwrite /dir/test4 none contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] +[ "`echo -e 'setid 2\nwrite /dir/test4 create contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] +[ "`echo -e 'setid 2\nwrite /dir/test4 excl contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ] + +# Tools can always access things. +[ "`echo -e 'getperm /dir' | ./xs_test 2>&1`" = "1 NONE" ] +[ "`echo -e 'dir /dir' | ./xs_test 2>&1 | sort`" = "test +test2 +test3" ] +[ "`echo -e 'write /dir/test4 create contents' | ./xs_test 2>&1`" = "" ] + diff --git a/tools/xenstore/testsuite/07watch.sh b/tools/xenstore/testsuite/07watch.sh new file mode 100644 index 0000000000..bedce6ad5b --- /dev/null +++ b/tools/xenstore/testsuite/07watch.sh @@ -0,0 +1,32 @@ +#! /bin/sh + +# Watch something, write to it, check watch has fired. +[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ] + +[ "`echo -e '1 watch /test 100\n2 write /test create contents2\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/test" ] + +# Check that reads don't set it off. +[ "`echo -e '1 watch /test 100\n2 read /test\n1 waitwatch' | ./xs_test 2>&1`" = "2:contents2 +1:waitwatch timeout" ] + +# mkdir, setperm and rm should (also /tests watching dirs) +[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ] +[ "`echo -e '1 watch /dir 100\n2 mkdir /dir/newdir\n1 waitwatch\n1 ackwatch\n2 setperm /dir/newdir 0 READ\n1 waitwatch\n1 ackwatch\n2 rm /dir/newdir\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/dir/newdir +1:/dir/newdir +1:/dir/newdir" ] + +# ignore watches while doing commands, should work. +[ "`echo -e 'watch /dir 100\nwrite /dir/test create contents\nread /dir/test\nwaitwatch\nackwatch' | ./xs_test 2>&1`" = "contents +/dir/test" ] + +# watch priority /test. +[ "`echo -e '1 watch /dir 1\n3 watch /dir 3\n2 watch /dir 2\nwrite /dir/test create contents\n3 waitwatch\n3 ackwatch\n2 waitwatch\n2 ackwatch\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "3:/dir/test +2:/dir/test +1:/dir/test" ] + +# If one dies (without acking), the other should still get ack. +[ "`echo -e '1 watch /dir 0\n2 watch /dir 1\nwrite /dir/test create contents\n2 waitwatch\n2 close\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "2:/dir/test +1:/dir/test" ] + +# If one dies (without reading at all), the other should still get ack. +[ "`echo -e '1 watch /dir 0\n2 watch /dir 1\nwrite /dir/test create contents\n2 close\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/dir/test" ] diff --git a/tools/xenstore/testsuite/08transaction.sh b/tools/xenstore/testsuite/08transaction.sh new file mode 100644 index 0000000000..2c23ed2496 --- /dev/null +++ b/tools/xenstore/testsuite/08transaction.sh @@ -0,0 +1,54 @@ +#! /bin/sh +# Test transactions. + +# Simple transaction: create a file inside transaction. +[ "`echo -e '1 start / +1 write /entry1 create contents +2 dir / +1 dir / +1 commit +2 read /entry1' | ./xs_test`" = "1:entry1 +2:contents" ] +echo rm /entry1 | ./xs_test + +# Create a file and abort transaction. +[ "`echo -e '1 start / +1 write /entry1 create contents +2 dir / +1 dir / +1 abort +2 dir /' | ./xs_test`" = "1:entry1" ] + +echo write /entry1 create contents | ./xs_test +# Delete in transaction, commit +[ "`echo -e '1 start / +1 rm /entry1 +2 dir / +1 dir / +1 commit +2 dir /' | ./xs_test`" = "2:entry1" ] + +# Delete in transaction, abort. +echo write /entry1 create contents | ./xs_test +[ "`echo -e '1 start / +1 rm /entry1 +2 dir / +1 dir / +1 abort +2 dir /' | ./xs_test`" = "2:entry1 +2:entry1" ] + +# Transactions can take as long as the want... +[ "`echo -e 'start / +sleep 1 +rm /entry1 +commit +dir /' | ./xs_test`" = "" ] + +# ... as long as noone is waiting. +[ "`echo -e '1 start / +2 mkdir /dir +1 mkdir /dir +1 dir / +1 commit' | ./xs_test 2>&1`" = "1:dir +FATAL: 1: commit: Connection timed out" ] diff --git a/tools/xenstore/testsuite/09domain.sh b/tools/xenstore/testsuite/09domain.sh new file mode 100644 index 0000000000..9208dda0ec --- /dev/null +++ b/tools/xenstore/testsuite/09domain.sh @@ -0,0 +1,15 @@ +#! /bin/sh +# Test domain communication. + +# Create a domain, write an entry. +[ "`echo -e 'introduce 1 100 7 /my/home +1 write /entry1 create contents +dir /' | ./xs_test 2>&1`" = "handle is 1 +entry1" ] + +# Release that domain. +[ "`echo -e 'release 1' | ./xs_test`" = "" ] + +# Introduce and release by same connection. +[ "`echo -e 'introduce 1 100 7 /my/home +release 1' | ./xs_test 2>&1`" = "handle is 1" ] diff --git a/tools/xenstore/testsuite/test.sh b/tools/xenstore/testsuite/test.sh new file mode 100755 index 0000000000..5718e84a15 --- /dev/null +++ b/tools/xenstore/testsuite/test.sh @@ -0,0 +1,44 @@ +#! /bin/sh + +set -e +set -m + +run_test() +{ + rm -rf $XENSTORED_ROOTDIR + mkdir $XENSTORED_ROOTDIR +# Weird failures with this. + if type valgrind >/dev/null 2>&1; then + valgrind -q --logfile-fd=3 ./xenstored_test --output-pid --no-fork 3>testsuite/tmp/vgout > /tmp/pid & + while [ ! -s /tmp/pid ]; do sleep 0; done + PID=`cat /tmp/pid` + rm /tmp/pid + else + PID=`./xenstored_test --output-pid` + fi + if sh -e $2 $1; then + if [ -s testsuite/tmp/vgout ]; then + kill $PID + echo VALGRIND errors: + cat testsuite/tmp/vgout + return 1 + fi + echo shutdown | ./xs_test + return 0 + else + # In case daemon is wedged. + kill $PID + sleep 1 + return 1 + fi +} + +for f in testsuite/[0-9]*.sh; do + if run_test $f; then + echo Test $f passed... + else + echo Test $f failed, running verbosely... + run_test $f -x + exit 1 + fi +done diff --git a/tools/xenstore/utils.c b/tools/xenstore/utils.c new file mode 100644 index 0000000000..2345021f70 --- /dev/null +++ b/tools/xenstore/utils.c @@ -0,0 +1,143 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +void xprintf(const char *fmt, ...) +{ + static FILE *out = NULL; + va_list args; + if (!out) + out = fopen("/dev/console", "w"); + if (!out) + out = stderr; + + va_start(args, fmt); + vfprintf(out, fmt, args); + va_end(args); + fflush(out); +} + +void barf(const char *fmt, ...) +{ + char *str; + va_list arglist; + + xprintf("FATAL: "); + + va_start(arglist, fmt); + vasprintf(&str, fmt, arglist); + va_end(arglist); + + xprintf("%s\n", str); + free(str); + exit(1); +} + +void barf_perror(const char *fmt, ...) +{ + char *str; + int err = errno; + va_list arglist; + + xprintf("FATAL: "); + + va_start(arglist, fmt); + vasprintf(&str, fmt, arglist); + va_end(arglist); + + xprintf("%s: %s\n", str, strerror(err)); + free(str); + exit(1); +} + +void *_realloc_array(void *ptr, size_t size, size_t num) +{ + if (num >= SIZE_MAX/size) + return NULL; + return realloc_nofail(ptr, size * num); +} + +void *realloc_nofail(void *ptr, size_t size) +{ + ptr = realloc(ptr, size); + if (ptr) + return ptr; + barf("realloc of %zu failed", size); +} + +void *malloc_nofail(size_t size) +{ + void *ptr = malloc(size); + if (ptr) + return ptr; + barf("malloc of %zu failed", size); +} + +/* Stevens. */ +void daemonize(void) +{ + pid_t pid; + + /* Separate from our parent via fork, so init inherits us. */ + if ((pid = fork()) < 0) + barf_perror("Failed to fork daemon"); + if (pid != 0) + exit(0); + + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); + + /* Session leader so ^C doesn't whack us. */ + setsid(); + /* Move off any mount points we might be in. */ + chdir("/"); + /* Discard our parent's old-fashioned umask prejudices. */ + umask(0); +} + + +/* This version adds one byte (for nul term) */ +void *grab_file(const char *filename, unsigned long *size) +{ + unsigned int max = 16384; + int ret, fd; + void *buffer; + + if (streq(filename, "-")) + fd = dup(STDIN_FILENO); + else + fd = open(filename, O_RDONLY, 0); + + if (fd < 0) + return NULL; + + buffer = malloc(max+1); + *size = 0; + while ((ret = read(fd, buffer + *size, max - *size)) > 0) { + *size += ret; + if (*size == max) + buffer = realloc(buffer, max *= 2 + 1); + } + if (ret < 0) { + free(buffer); + buffer = NULL; + } else + ((char *)buffer)[*size] = '\0'; + close(fd); + return buffer; +} + +void release_file(void *data, unsigned long size __attribute__((unused))) +{ + free(data); +} diff --git a/tools/xenstore/utils.h b/tools/xenstore/utils.h new file mode 100644 index 0000000000..a84f19a22a --- /dev/null +++ b/tools/xenstore/utils.h @@ -0,0 +1,61 @@ +#ifndef _UTILS_H +#define _UTILS_H +#include +#include +#include + +/* Is A == B ? */ +#define streq(a,b) (strcmp((a),(b)) == 0) + +/* Does A start with B ? */ +#define strstarts(a,b) (strncmp((a),(b),strlen(b)) == 0) + +/* Does A end in B ? */ +static inline bool strends(const char *a, const char *b) +{ + if (strlen(a) < strlen(b)) + return false; + + return streq(a + strlen(a) - strlen(b), b); +} + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define ___stringify(x) #x +#define __stringify(x) ___stringify(x) + +/* Convenient wrappers for malloc and realloc. Use them. */ +#define new(type) ((type *)malloc_nofail(sizeof(type))) +#define new_array(type, num) realloc_array((type *)0, (num)) +#define realloc_array(ptr, num) ((__typeof__(ptr))_realloc_array((ptr), sizeof((*ptr)), (num))) + +void *malloc_nofail(size_t size); +void *realloc_nofail(void *ptr, size_t size); +void *_realloc_array(void *ptr, size_t size, size_t num); + +void barf(const char *fmt, ...) __attribute__((noreturn)); +void barf_perror(const char *fmt, ...) __attribute__((noreturn)); + +/* This version adds one byte (for nul term) */ +void *grab_file(const char *filename, unsigned long *size); +void release_file(void *data, unsigned long size); + +/* For writing daemons, based on Stevens. */ +void daemonize(void); + +/* Signal handling: returns fd to listen on. */ +int signal_to_fd(int signal); +void close_signal(int fd); + +void xprintf(const char *fmt, ...); + +#define eprintf(_fmt, _args...) xprintf("[ERR] %s" _fmt, __FUNCTION__, ##_args) +#define iprintf(_fmt, _args...) xprintf("[INF] %s" _fmt, __FUNCTION__, ##_args) + +#ifdef DEBUG +#define dprintf(_fmt, _args...) xprintf("[DBG] %s" _fmt, __FUNCTION__, ##_args) +#else +#define dprintf(_fmt, _args...) ((void)0) +#endif + +#endif /* _UTILS_H */ diff --git a/tools/xenstore/xenstored.h b/tools/xenstore/xenstored.h new file mode 100644 index 0000000000..784ec987a8 --- /dev/null +++ b/tools/xenstore/xenstored.h @@ -0,0 +1,81 @@ +/* + Simple prototyle Xen Store Daemon providing simple tree-like database. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#ifndef _XENSTORED_H +#define _XENSTORED_H + +enum xsd_sockmsg_type +{ + XS_DEBUG, + XS_SHUTDOWN, + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_WATCH_ACK, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_OP_READ_ONLY = XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GETDOMAINPATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors +{ + int errnum; + const char *errstring; +}; +#define XSD_ERROR(x) { x, #x } +static struct xsd_errors xsd_errors[] __attribute__((unused)) = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(ETIMEDOUT), + XSD_ERROR(EISCONN), +}; +struct xsd_sockmsg +{ + u32 type; + u32 len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +#endif /* _XENSTORED_H */ diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c new file mode 100644 index 0000000000..9d15848463 --- /dev/null +++ b/tools/xenstore/xenstored_core.c @@ -0,0 +1,1354 @@ +/* + Simple prototype Xen Store Daemon providing simple tree-like database. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define DEBUG +#include "utils.h" +#include "list.h" +#include "talloc.h" +#include "xs_lib.h" +#include "xenstored.h" +#include "xenstored_core.h" +#include "xenstored_watch.h" +#include "xenstored_transaction.h" +#include "xenstored_domain.h" + +static bool verbose; +static LIST_HEAD(connections); + +#ifdef TESTING +static bool failtest = false; + +/* We override talloc's malloc. */ +void *test_malloc(size_t size) +{ + /* 1 in 20 means only about 50% of connections establish. */ + if (failtest && (random() % 32) == 0) + return NULL; + return malloc(size); +} + +static void stop_failtest(int signum __attribute__((unused))) +{ + failtest = false; +} + +/* Need these before we #define away write_all/mkdir in testing.h */ +bool test_write_all(int fd, void *contents, unsigned int len); +bool test_write_all(int fd, void *contents, unsigned int len) +{ + if (failtest && (random() % 8) == 0) { + if (len) + len = random() % len; + write(fd, contents, len); + errno = ENOSPC; + return false; + } + return write_all(fd, contents, len); +} + +int test_mkdir(const char *dir, int perms); +int test_mkdir(const char *dir, int perms) +{ + if (failtest && (random() % 8) == 0) { + errno = ENOSPC; + return -1; + } + return mkdir(dir, perms); +} +#endif /* TESTING */ + +#include "xenstored_test.h" + +/* FIXME: Ideally, this should never be called. Some can be eliminated. */ +/* Something is horribly wrong: shutdown immediately. */ +void __attribute__((noreturn)) corrupt(struct connection *conn, + const char *fmt, ...) +{ + va_list arglist; + char *str; + int saved_errno = errno; + + va_start(arglist, fmt); + str = talloc_vasprintf(NULL, fmt, arglist); + va_end(arglist); + + eprintf("xenstored corruption: connection id %i: err %s: %s", + conn ? (int)conn->id : -1, strerror(saved_errno), str); +#ifdef TESTING + /* Allow them to attach debugger. */ + sleep(30); +#endif + syslog(LOG_DAEMON, + "xenstored corruption: connection id %i: err %s: %s", + conn ? (int)conn->id : -1, strerror(saved_errno), str); + _exit(2); +} + +static bool write_message(struct connection *conn) +{ + int ret; + struct buffered_data *out = conn->out; + + if (out->inhdr) { + if (verbose) + xprintf("Writing msg %i out to %p\n", + out->hdr.msg.type, conn); + ret = conn->write(conn, out->hdr.raw + out->used, + sizeof(out->hdr) - out->used); + if (ret < 0) + return false; + + out->used += ret; + if (out->used < sizeof(out->hdr)) + return true; + + out->inhdr = false; + out->used = 0; + + /* Second write might block if non-zero. */ + if (out->hdr.msg.len) + return true; + } + + if (verbose) + xprintf("Writing data len %i out to %p\n", + out->hdr.msg.len, conn); + ret = conn->write(conn, out->buffer + out->used, + out->hdr.msg.len - out->used); + + if (ret < 0) + return false; + + out->used += ret; + if (out->used != out->hdr.msg.len) + return true; + + conn->out = NULL; + + /* If this was an event, we wait for ack, otherwise we're done. */ + if (!is_watch_event(conn, out)) + talloc_free(out); + + queue_next_event(conn); + return true; +} + +static int destroy_conn(void *_conn) +{ + struct connection *conn = _conn; + + /* Flush outgoing if possible, but don't block. */ + if (!conn->domain) { + fd_set set; + struct timeval none; + + FD_ZERO(&set); + FD_SET(conn->fd, &set); + none.tv_sec = none.tv_usec = 0; + + while (conn->out + && select(conn->fd+1, NULL, &set, NULL, &none) == 1) + if (!write_message(conn)) + break; + close(conn->fd); + } + list_del(&conn->list); + return 0; +} + +static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock, + int event_fd) +{ + struct connection *i; + int max; + + FD_ZERO(inset); + FD_ZERO(outset); + FD_SET(sock, inset); + max = sock; + FD_SET(ro_sock, inset); + if (ro_sock > max) + max = ro_sock; + FD_SET(event_fd, inset); + if (event_fd > max) + max = event_fd; + list_for_each_entry(i, &connections, list) { + if (i->domain) + continue; + if (!i->blocked) + FD_SET(i->fd, inset); + if (i->out) + FD_SET(i->fd, outset); + if (i->fd > max) + max = i->fd; + } + return max; +} + +/* Read everything from a talloc_open'ed fd. */ +static void *read_all(int *fd, unsigned int *size) +{ + unsigned int max = 4; + int ret; + void *buffer = talloc_size(fd, max); + + *size = 0; + while ((ret = read(*fd, buffer + *size, max - *size)) > 0) { + *size += ret; + if (*size == max) + buffer = talloc_realloc_size(fd, buffer, max *= 2); + } + if (ret < 0) + return NULL; + return buffer; +} + +static int destroy_fd(void *_fd) +{ + int *fd = _fd; + close(*fd); + return 0; +} + +/* Return a pointer to an fd, self-closing and attached to this pathname. */ +static int *talloc_open(const char *pathname, int flags, int mode) +{ + int *fd; + + fd = talloc(pathname, int); + *fd = open(pathname, flags, mode); + if (*fd < 0) { + int saved_errno = errno; + talloc_free(fd); + errno = saved_errno; + return NULL; + } + talloc_set_destructor(fd, destroy_fd); + return fd; +} + +/* Is child a subnode of parent, or equal? */ +bool is_child(const char *child, const char *parent) +{ + unsigned int len = strlen(parent); + + /* / should really be "" for this algorithm to work, but that's a + * usability nightmare. */ + if (streq(parent, "/")) + return true; + + if (strncmp(child, parent, len) != 0) + return false; + + return child[len] == '/' || child[len] == '\0'; +} + +/* Answer never ends in /. */ +char *node_dir_outside_transaction(const char *node) +{ + if (streq(node, "/")) + return talloc_strdup(node, xs_daemon_store()); + return talloc_asprintf(node, "%s%s", xs_daemon_store(), node); +} + +static char *node_dir(struct transaction *trans, const char *node) +{ + if (!trans || !within_transaction(trans, node)) + return node_dir_outside_transaction(node); + return node_dir_inside_transaction(trans, node); +} + +static char *node_datafile(struct transaction *trans, const char *node) +{ + return talloc_asprintf(node, "%s/.data", node_dir(trans, node)); +} + +static char *node_permfile(struct transaction *trans, const char *node) +{ + return talloc_asprintf(node, "%s/.perms", node_dir(trans, node)); +} + +struct buffered_data *new_buffer(void *ctx) +{ + struct buffered_data *data; + + data = talloc(ctx, struct buffered_data); + data->inhdr = true; + data->used = 0; + data->buffer = NULL; + + return data; +} + +/* Return length of string (including nul) at this offset. */ +unsigned int get_string(const struct buffered_data *data, unsigned int offset) +{ + const char *nul; + + if (offset >= data->used) + return 0; + + nul = memchr(data->buffer + offset, 0, data->used - offset); + if (!nul) + return 0; + + return nul - (data->buffer + offset) + 1; +} + +/* Break input into vectors, return the number, fill in up to num of them. */ +unsigned int get_strings(struct buffered_data *data, + char *vec[], unsigned int num) +{ + unsigned int off, i, len; + + off = i = 0; + while ((len = get_string(data, off)) != 0) { + if (i < num) + vec[i] = data->buffer + off; + i++; + off += len; + } + return i; +} + +/* Returns "false", meaning "connection is not blocked". */ +bool send_reply(struct connection *conn, enum xsd_sockmsg_type type, + const void *data, unsigned int len) +{ + struct buffered_data *bdata; + + /* When data gets freed, we want list entry is destroyed (so + * list entry is a child). */ + bdata = new_buffer(conn); + bdata->buffer = talloc_array(bdata, char, len); + + bdata->hdr.msg.type = type; + bdata->hdr.msg.len = len; + memcpy(bdata->buffer, data, len); + + /* There might be an event going out now. Queue behind it. */ + if (conn->out) { + assert(conn->out->hdr.msg.type == XS_WATCH_EVENT); + assert(!conn->waiting_reply); + conn->waiting_reply = bdata; + } else + conn->out = bdata; + return false; +} + +/* Some routines (write, mkdir, etc) just need a non-error return */ +bool send_ack(struct connection *conn, enum xsd_sockmsg_type type) +{ + return send_reply(conn, type, "OK", sizeof("OK")); +} + +bool send_error(struct connection *conn, int error) +{ + unsigned int i; + + for (i = 0; error != xsd_errors[i].errnum; i++) + if (i == ARRAY_SIZE(xsd_errors) - 1) + corrupt(conn, "Unknown error %i (%s)", error, + strerror(error)); + + return send_reply(conn, XS_ERROR, xsd_errors[i].errstring, + strlen(xsd_errors[i].errstring) + 1); +} + +static bool valid_chars(const char *node) +{ + /* Nodes can have lots of crap. */ + return (strspn(node, + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789-/_@") == strlen(node)); +} + +static bool is_valid_nodename(const char *node) +{ + /* Must start in /. */ + if (!strstarts(node, "/")) + return false; + + /* Cannot end in / (unless it's just "/"). */ + if (strends(node, "/") && !streq(node, "/")) + return false; + + /* No double //. */ + if (strstr(node, "//")) + return false; + + return valid_chars(node); +} + +/* We expect one arg in the input: return NULL otherwise. */ +static const char *onearg(struct buffered_data *in) +{ + if (get_string(in, 0) != in->used) + return NULL; + return in->buffer; +} + +/* If it fails, returns NULL and sets errno. */ +static struct xs_permissions *get_perms(struct transaction *transaction, + const char *node, unsigned int *num) +{ + unsigned int size; + char *strings; + struct xs_permissions *ret; + int *fd; + + fd = talloc_open(node_permfile(transaction, node), O_RDONLY, 0); + if (!fd) + return NULL; + strings = read_all(fd, &size); + if (!strings) + return NULL; + + *num = count_strings(strings, size); + ret = talloc_array(node, struct xs_permissions, *num); + if (!strings_to_perms(ret, *num, strings)) + corrupt(NULL, "Permissions corrupt for %s", node); + + return ret; +} + +static char *perms_to_strings(const char *node, + struct xs_permissions *perms, unsigned int num, + unsigned int *len) +{ + unsigned int i; + char *strings = NULL; + char buffer[MAX_STRLEN(domid_t) + 1]; + + for (*len = 0, i = 0; i < num; i++) { + if (!perm_to_string(&perms[i], buffer)) + return NULL; + + strings = talloc_realloc(node, strings, char, + *len + strlen(buffer) + 1); + strcpy(strings + *len, buffer); + *len += strlen(buffer) + 1; + } + return strings; +} + +/* Destroy this, and its children, and its children's children. */ +int destroy_path(void *path) +{ + DIR *dir; + struct dirent *dirent; + + dir = opendir(path); + if (!dir) { + if (unlink(path) == 0 || errno == ENOENT) + return 0; + corrupt(NULL, "Destroying path %s", path); + } + + while ((dirent = readdir(dir)) != NULL) { + char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1]; + sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name); + if (!streq(dirent->d_name,".") && !streq(dirent->d_name,"..")) + destroy_path(fullpath); + } + closedir(dir); + if (rmdir(path) != 0) + corrupt(NULL, "Destroying directory %s", path); + return 0; +} + +/* Create a self-destructing temporary file */ +static char *tempfile(const char *path, void *contents, unsigned int len) +{ + int *fd; + char *tmppath = talloc_asprintf(path, "%s.tmp", path); + + fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640); + if (!fd) + return NULL; + talloc_set_destructor(tmppath, destroy_path); + if (!write_all(*fd, contents, len)) + return NULL; + + return tmppath; +} + +/* We assume rename() doesn't fail on moves in same dir. */ +static void commit_tempfile(const char *path) +{ + char realname[strlen(path) + 1]; + unsigned int len = strrchr(path, '.') - path; + + memcpy(realname, path, len); + realname[len] = '\0'; + if (rename(path, realname) != 0) + corrupt(NULL, "Committing %s", realname); + talloc_set_destructor(path, NULL); +} + +static bool set_perms(struct transaction *transaction, + const char *node, + struct xs_permissions *perms, unsigned int num) +{ + unsigned int len; + char *permpath, *strings; + + strings = perms_to_strings(node, perms, num, &len); + if (!strings) + return false; + + /* Create then move. */ + permpath = tempfile(node_permfile(transaction, node), strings, len); + if (!permpath) + return false; + + commit_tempfile(permpath); + return true; +} + +static char *get_parent(const char *node) +{ + char *slash = strrchr(node + 1, '/'); + if (!slash) + return talloc_strdup(node, "/"); + return talloc_asprintf(node, "%.*s", slash - node, node); +} + +static enum xs_perm_type perm_for_id(domid_t id, + struct xs_permissions *perms, + unsigned int num) +{ + unsigned int i; + + /* Owners and tools get it all... */ + if (!id || perms[0].id == id) + return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_CREATE|XS_PERM_OWNER; + + for (i = 1; i < num; i++) + if (perms[i].id == id) + return perms[i].perms; + + return perms[0].perms; +} + +/* We have a weird permissions system. You can allow someone into a + * specific node without allowing it in the parents. If it's going to + * fail, however, we don't want the errno to indicate any information + * about the node. */ +static int check_with_parents(struct connection *conn, const char *node, + int errnum) +{ + struct xs_permissions *perms; + unsigned int num; + + /* We always tell them about memory failures. */ + if (errnum == ENOMEM) + return errnum; + + do { + node = get_parent(node); + perms = get_perms(conn->transaction, node, &num); + if (perms) + break; + } while (!streq(node, "/")); + + /* No permission at root? We're in trouble. */ + if (!perms) + corrupt(conn, "No permissions file at root"); + + if (!(perm_for_id(conn->id, perms, num) & XS_PERM_READ)) + return EACCES; + + return errnum; +} + +bool check_node_perms(struct connection *conn, const char *node, + enum xs_perm_type perm) +{ + struct xs_permissions *perms; + unsigned int num; + + if (!node) { + errno = EINVAL; + return false; + } + + if (!node || !is_valid_nodename(node)) { + errno = EINVAL; + return false; + } + + if (!conn->write && (perm & XS_PERM_WRITE)) { + errno = EROFS; + return false; + } + + perms = get_perms(conn->transaction, node, &num); + /* No permissions. If we want to create it and + * it doesn't exist, check parent directory. */ + if (!perms && errno == ENOENT && (perm & XS_PERM_CREATE)) { + char *parent = get_parent(node); + if (!parent) + return false; + + perms = get_perms(conn->transaction, parent, &num); + } + if (!perms) { + errno = check_with_parents(conn, node, errno); + return false; + } + + if (perm_for_id(conn->id, perms, num) & perm) + return true; + + errno = check_with_parents(conn, node, EACCES); + return false; +} + +static bool send_directory(struct connection *conn, const char *node) +{ + char *path, *reply = talloc_strdup(node, ""); + unsigned int reply_len = 0; + DIR *dir; + struct dirent *dirent; + + if (!check_node_perms(conn, node, XS_PERM_READ)) + return send_error(conn, errno); + + path = node_dir(conn->transaction, node); + dir = opendir(path); + if (!dir) + return send_error(conn, errno); + + while ((dirent = readdir(dir)) != NULL) { + int len = strlen(dirent->d_name) + 1; + + if (!valid_chars(dirent->d_name)) + continue; + + reply = talloc_realloc(path, reply, char, reply_len + len); + strcpy(reply + reply_len, dirent->d_name); + reply_len += len; + } + closedir(dir); + + return send_reply(conn, XS_DIRECTORY, reply, reply_len); +} + +static bool do_read(struct connection *conn, const char *node) +{ + char *value; + unsigned int size; + int *fd; + + if (!check_node_perms(conn, node, XS_PERM_READ)) + return send_error(conn, errno); + + fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0); + if (!fd) { + /* Data file doesn't exist? We call that a directory */ + if (errno == ENOENT) + errno = EISDIR; + return send_error(conn, errno); + } + + value = read_all(fd, &size); + if (!value) + return send_error(conn, errno); + + return send_reply(conn, XS_READ, value, size); +} + +/* Create a new directory. Optionally put data in it (if data != NULL) */ +static bool new_directory(struct connection *conn, + const char *node, void *data, unsigned int datalen) +{ + struct xs_permissions perms; + char *permstr; + unsigned int len; + int *fd; + char *dir = node_dir(conn->transaction, node); + + if (mkdir(dir, 0750) != 0) + return false; + + /* Set destructor so we clean up if neccesary. */ + talloc_set_destructor(dir, destroy_path); + + /* Default permisisons: we own it, noone else has permission. */ + perms.id = conn->id; + perms.perms = XS_PERM_NONE; + + permstr = perms_to_strings(dir, &perms, 1, &len); + fd = talloc_open(node_permfile(conn->transaction, node), + O_WRONLY|O_CREAT|O_EXCL, 0640); + if (!fd || !write_all(*fd, permstr, len)) + return false; + + if (data) { + char *datapath = node_datafile(conn->transaction, node); + + fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640); + if (!fd || !write_all(*fd, data, datalen)) + return false; + } + + /* Finished! */ + talloc_set_destructor(dir, NULL); + return true; +} + +/* path, flags, data... */ +static bool do_write(struct connection *conn, struct buffered_data *in) +{ + unsigned int offset, datalen; + char *vec[2]; + char *node, *tmppath; + enum xs_perm_type mode; + struct stat st; + + /* Extra "strings" can be created by binary data. */ + if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) + return send_error(conn, EINVAL); + + node = vec[0]; + if (!within_transaction(conn->transaction, node)) + return send_error(conn, EROFS); + + if (transaction_block(conn, node)) + return true; + + offset = strlen(vec[0]) + strlen(vec[1]) + 2; + datalen = in->used - offset; + + if (streq(vec[1], XS_WRITE_NONE)) + mode = XS_PERM_WRITE; + else if (streq(vec[1], XS_WRITE_CREATE)) + mode = XS_PERM_WRITE|XS_PERM_CREATE; + else if (streq(vec[1], XS_WRITE_CREATE_EXCL)) + mode = XS_PERM_WRITE|XS_PERM_CREATE; + else + return send_error(conn, EINVAL); + + if (!check_node_perms(conn, node, mode)) + return send_error(conn, errno); + + if (lstat(node_dir(conn->transaction, node), &st) != 0) { + /* Does not exist... */ + if (errno != ENOENT) + return send_error(conn, errno); + + /* Not going to create it? */ + if (!(mode & XS_PERM_CREATE)) + return send_error(conn, ENOENT); + + if (!new_directory(conn, node, in->buffer + offset, datalen)) + return send_error(conn, errno); + } else { + /* Exists... */ + if (streq(vec[1], XS_WRITE_CREATE_EXCL)) + return send_error(conn, EEXIST); + + tmppath = tempfile(node_datafile(conn->transaction, node), + in->buffer + offset, datalen); + if (!tmppath) + return send_error(conn, errno); + + commit_tempfile(tmppath); + } + + add_change_node(conn->transaction, node); + send_ack(conn, XS_WRITE); + fire_watches(conn->transaction, node); + return false; +} + +static bool do_mkdir(struct connection *conn, const char *node) +{ + if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE)) + return send_error(conn, errno); + + if (!within_transaction(conn->transaction, node)) + return send_error(conn, EROFS); + + if (transaction_block(conn, node)) + return true; + + if (!new_directory(conn, node, NULL, 0)) + return send_error(conn, errno); + + add_change_node(conn->transaction, node); + send_ack(conn, XS_MKDIR); + fire_watches(conn->transaction, node); + return false; +} + +static bool do_rm(struct connection *conn, const char *node) +{ + char *tmppath, *path; + + if (!check_node_perms(conn, node, XS_PERM_WRITE)) + return send_error(conn, errno); + + if (!within_transaction(conn->transaction, node)) + return send_error(conn, EROFS); + + if (transaction_block(conn, node)) + return true; + + if (streq(node, "/")) + return send_error(conn, EINVAL); + + /* We move the directory to temporary name, destructor cleans up. */ + path = node_dir(conn->transaction, node); + tmppath = talloc_asprintf(node, "%s.tmp", path); + talloc_set_destructor(tmppath, destroy_path); + + if (rename(path, tmppath) != 0) + return send_error(conn, errno); + + add_change_node(conn->transaction, node); + send_ack(conn, XS_RM); + fire_watches(conn->transaction, node); + return false; +} + +static bool do_get_perms(struct connection *conn, const char *node) +{ + struct xs_permissions *perms; + char *strings; + unsigned int len, num; + + if (!check_node_perms(conn, node, XS_PERM_READ)) + return send_error(conn, errno); + + perms = get_perms(conn->transaction, node, &num); + if (!perms) + return send_error(conn, errno); + + strings = perms_to_strings(node, perms, num, &len); + if (!strings) + return send_error(conn, errno); + + return send_reply(conn, XS_GET_PERMS, strings, len); +} + +static bool do_set_perms(struct connection *conn, struct buffered_data *in) +{ + unsigned int num; + char *node; + struct xs_permissions *perms; + + num = count_strings(in->buffer, in->used); + if (num < 2) + return send_error(conn, EINVAL); + + /* First arg is node name. */ + node = in->buffer; + in->buffer += strlen(in->buffer) + 1; + num--; + + if (!within_transaction(conn->transaction, node)) + return send_error(conn, EROFS); + + if (transaction_block(conn, node)) + return true; + + /* We must own node to do this (tools can do this too). */ + if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) + return send_error(conn, errno); + + perms = talloc_array(node, struct xs_permissions, num); + if (!strings_to_perms(perms, num, in->buffer)) + return send_error(conn, errno); + + if (!set_perms(conn->transaction, node, perms, num)) + return send_error(conn, errno); + add_change_node(conn->transaction, node); + send_ack(conn, XS_SET_PERMS); + fire_watches(conn->transaction, node); + return false; +} + +/* Process "in" for conn: "in" will vanish after this conversation, so + * we can talloc off it for temporary variables. May free "conn". + * Returns true if can't complete due to block. + */ +static bool process_message(struct connection *conn, struct buffered_data *in) +{ + switch (in->hdr.msg.type) { + case XS_DIRECTORY: + return send_directory(conn, onearg(in)); + + case XS_READ: + return do_read(conn, onearg(in)); + + case XS_WRITE: + return do_write(conn, in); + + case XS_MKDIR: + return do_mkdir(conn, onearg(in)); + + case XS_RM: + return do_rm(conn, onearg(in)); + + case XS_GET_PERMS: + return do_get_perms(conn, onearg(in)); + + case XS_SET_PERMS: + return do_set_perms(conn, in); + + case XS_SHUTDOWN: + send_ack(conn, XS_SHUTDOWN); + /* Everything hangs off auto-free context, freed at exit. */ + exit(0); + +#ifdef TESTING + case XS_DEBUG: { + /* For testing, we allow them to set id. */ + if (streq(in->buffer, "setid")) { + conn->id = atoi(in->buffer + get_string(in, 0)); + send_ack(conn, XS_DEBUG); + } else if (streq(in->buffer, "failtest")) { + if (get_string(in, 0) < in->used) + srandom(atoi(in->buffer + get_string(in, 0))); + send_ack(conn, XS_DEBUG); + failtest = true; + } + return false; + } +#endif /* TESTING */ + + case XS_WATCH: + return do_watch(conn, in); + + case XS_WATCH_ACK: + return do_watch_ack(conn); + + case XS_UNWATCH: + return do_unwatch(conn, onearg(in)); + + case XS_TRANSACTION_START: + return do_transaction_start(conn, onearg(in)); + + case XS_TRANSACTION_END: + return do_transaction_end(conn, onearg(in)); + + case XS_INTRODUCE: + return do_introduce(conn, in); + + case XS_RELEASE: + return do_release(conn, onearg(in)); + + case XS_GETDOMAINPATH: + return do_get_domain_path(conn, onearg(in)); + + case XS_WATCH_EVENT: + default: + eprintf("Client unknown operation %i", in->hdr.msg.type); + send_error(conn, ENOSYS); + return false; + } +} + +static int out_of_mem(void *data) +{ + longjmp(*(jmp_buf *)data, 1); +} + +static void consider_message(struct connection *conn) +{ + struct buffered_data *in = NULL; + enum xsd_sockmsg_type type = conn->in->hdr.msg.type; + jmp_buf talloc_fail; + + /* For simplicity, we kill the connection on OOM. */ + talloc_set_fail_handler(out_of_mem, &talloc_fail); + if (setjmp(talloc_fail)) { + talloc_free(conn); + goto end; + } + + if (verbose) + xprintf("Got message %i len %i from %p\n", + type, conn->in->hdr.msg.len, conn); + + /* We might get a command while waiting for an ack: this means + * the other end discarded it: we will re-transmit. */ + if (type != XS_WATCH_ACK) + reset_watch_event(conn); + + /* Careful: process_message may free connection. We detach + * "in" beforehand and allocate the new buffer to avoid + * touching conn after process_message. + */ + in = talloc_steal(talloc_autofree_context(), conn->in); + conn->in = new_buffer(conn); + if (process_message(conn, in)) { + /* Blocked by transaction: queue for re-xmit. */ + talloc_free(conn->in); + conn->in = in; + in = NULL; + } + +end: + talloc_free(in); + talloc_set_fail_handler(NULL, NULL); + if (talloc_total_blocks(NULL) + != talloc_total_blocks(talloc_autofree_context()) + 1) + talloc_report_full(NULL, stderr); +} + +/* Errors in reading or allocating here mean we get out of sync, so we + * drop the whole client connection. */ +void handle_input(struct connection *conn) +{ + int bytes; + struct buffered_data *in; + + assert(!conn->blocked); + in = conn->in; + + /* Not finished header yet? */ + if (in->inhdr) { + bytes = conn->read(conn, in->hdr.raw + in->used, + sizeof(in->hdr) - in->used); + if (bytes <= 0) + goto bad_client; + in->used += bytes; + if (in->used != sizeof(in->hdr)) + return; + + if (in->hdr.msg.len > PATH_MAX) { + syslog(LOG_DAEMON, "Client tried to feed us %i", + in->hdr.msg.len); + goto bad_client; + } + + in->buffer = talloc_array(in, char, in->hdr.msg.len); + if (!in->buffer) + goto bad_client; + in->used = 0; + in->inhdr = false; + return; + } + + bytes = conn->read(conn, in->buffer + in->used, + in->hdr.msg.len - in->used); + if (bytes < 0) + goto bad_client; + + in->used += bytes; + if (in->used != in->hdr.msg.len) + return; + + consider_message(conn); + return; + +bad_client: + /* Kill it. */ + talloc_free(conn); +} + +void handle_output(struct connection *conn) +{ + if (!write_message(conn)) + talloc_free(conn); +} + +/* If a transaction has ended, see if we can unblock any connections. */ +static void unblock_connections(void) +{ + struct connection *i, *tmp; + + list_for_each_entry_safe(i, tmp, &connections, list) { + if (!i->blocked) + continue; + + if (!transaction_covering_node(i->blocked)) { + talloc_free(i->blocked); + i->blocked = NULL; + consider_message(i); + } + } + + /* To balance bias, move first entry to end. */ + if (!list_empty(&connections)) { + i = list_top(&connections, struct connection, list); + list_del(&i->list); + list_add_tail(&i->list, &connections); + } +} + +struct connection *new_connection(connwritefn_t *write, connreadfn_t *read) +{ + struct connection *new; + jmp_buf talloc_fail; + + new = talloc(talloc_autofree_context(), struct connection); + if (!new) + return NULL; + + new->blocked = false; + new->out = new->waiting_reply = NULL; + new->event = NULL; + new->fd = -1; + new->id = 0; + new->domain = NULL; + new->transaction = NULL; + new->write = write; + new->read = read; + + talloc_set_fail_handler(out_of_mem, &talloc_fail); + if (setjmp(talloc_fail)) { + talloc_free(new); + return NULL; + } + new->in = new_buffer(new); + talloc_set_fail_handler(NULL, NULL); + + list_add_tail(&new->list, &connections); + talloc_set_destructor(new, destroy_conn); + return new; +} + +static int writefd(struct connection *conn, const void *data, unsigned int len) +{ + return write(conn->fd, data, len); +} + +static int readfd(struct connection *conn, void *data, unsigned int len) +{ + return read(conn->fd, data, len); +} + +static void accept_connection(int sock, bool canwrite) +{ + int fd; + struct connection *conn; + + fd = accept(sock, NULL, NULL); + if (fd < 0) + return; + + conn = new_connection(canwrite ? writefd : NULL, readfd); + if (conn) + conn->fd = fd; + else + close(fd); +} + +/* Calc timespan from now to absolute time. */ +static void time_relative_to_now(struct timeval *tv) +{ + struct timeval now; + + gettimeofday(&now, NULL); + if (timercmp(&now, tv, >)) + timerclear(tv); + else { + tv->tv_sec -= now.tv_sec; + if (now.tv_usec > tv->tv_usec) { + tv->tv_sec--; + tv->tv_usec += 1000000; + } + tv->tv_usec -= now.tv_usec; + } +} + +static struct option options[] = { { "no-fork", 0, NULL, 'N' }, + { "verbose", 0, NULL, 'V' }, + { "output-pid", 0, NULL, 'P' }, + { NULL, 0, NULL, 0 } }; + +int main(int argc, char *argv[]) +{ + int opt, *sock, *ro_sock, event_fd, max, tmpout; + struct sockaddr_un addr; + fd_set inset, outset; + bool dofork = true; + bool outputpid = false; + + while ((opt = getopt_long(argc, argv, "DV", options, NULL)) != -1) { + switch (opt) { + case 'N': + dofork = false; + break; + case 'V': + verbose = true; + break; + case 'P': + outputpid = true; + break; + } + } + if (optind != argc) + barf("%s: No arguments desired", argv[0]); + + talloc_enable_leak_report_full(); + + /* Create sockets for them to listen to. */ + sock = talloc(talloc_autofree_context(), int); + *sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (*sock < 0) + barf_perror("Could not create socket"); + ro_sock = talloc(talloc_autofree_context(), int); + *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (*ro_sock < 0) + barf_perror("Could not create socket"); + talloc_set_destructor(sock, destroy_fd); + talloc_set_destructor(ro_sock, destroy_fd); + + /* Don't kill us with SIGPIPE. */ + signal(SIGPIPE, SIG_IGN); + + /* FIXME: Be more sophisticated, don't mug running daemon. */ + unlink(xs_daemon_socket()); + unlink(xs_daemon_socket_ro()); + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, xs_daemon_socket()); + if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0) + barf_perror("Could not bind socket to %s", xs_daemon_socket()); + strcpy(addr.sun_path, xs_daemon_socket_ro()); + if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0) + barf_perror("Could not bind socket to %s", + xs_daemon_socket_ro()); + if (chmod(xs_daemon_socket(), 0600) != 0 + || chmod(xs_daemon_socket_ro(), 0660) != 0) + barf_perror("Could not chmod sockets"); + + if (listen(*sock, 1) != 0 + || listen(*ro_sock, 1) != 0) + barf_perror("Could not listen on sockets"); + + /* If we're the first, create .perms file for root. */ + if (mkdir(xs_daemon_store(), 0750) == 0) { + struct xs_permissions perms; + char *root = talloc_strdup(talloc_autofree_context(), "/"); + + perms.id = 0; + perms.perms = XS_PERM_READ; + if (!set_perms(NULL, root, &perms, 1)) + barf_perror("Could not create permissions in root"); + talloc_free(root); + mkdir(xs_daemon_transactions(), 0750); + } else if (errno != EEXIST) + barf_perror("Could not create root %s", xs_daemon_store()); + + /* Listen to hypervisor. */ + event_fd = domain_init(); + + /* Debugging: daemonize() closes standard fds, so dup here. */ + tmpout = dup(STDOUT_FILENO); + if (dofork) { + openlog("xenstored", 0, LOG_DAEMON); + daemonize(); + } + + if (outputpid) { + char buffer[20]; + sprintf(buffer, "%i\n", getpid()); + write(tmpout, buffer, strlen(buffer)); + } + close(tmpout); + +#ifdef TESTING + signal(SIGUSR1, stop_failtest); +#endif + + /* Get ready to listen to the tools. */ + max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd); + + /* Main loop. */ + for (;;) { + struct connection *i; + struct timeval *tvp = NULL, tv; + + timerclear(&tv); + shortest_transaction_timeout(&tv); + if (timerisset(&tv)) { + time_relative_to_now(&tv); + tvp = &tv; + } + + if (select(max+1, &inset, &outset, NULL, tvp) < 0) { + if (errno == EINTR) + continue; + barf_perror("Select failed"); + } + + if (FD_ISSET(*sock, &inset)) + accept_connection(*sock, true); + + if (FD_ISSET(*ro_sock, &inset)) + accept_connection(*ro_sock, false); + + if (FD_ISSET(event_fd, &inset)) + handle_event(event_fd); + + list_for_each_entry(i, &connections, list) { + if (i->domain) + continue; + + /* Operations can delete themselves or others + * (xs_release): list is not safe after input, + * so break. */ + if (FD_ISSET(i->fd, &inset)) { + handle_input(i); + break; + } + if (FD_ISSET(i->fd, &outset)) { + handle_output(i); + break; + } + } + + if (tvp) + check_transaction_timeout(); + + /* If transactions ended, we might be able to do more work. */ + unblock_connections(); + + max = initialize_set(&inset, &outset, *sock,*ro_sock,event_fd); + } +} diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h new file mode 100644 index 0000000000..fe6eec8f72 --- /dev/null +++ b/tools/xenstore/xenstored_core.h @@ -0,0 +1,123 @@ +/* + Internal interfaces for Xen Store Daemon. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#ifndef _XENSTORED_INTERNAL_H +#define _XENSTORED_INTERNAL_H +#include +#include +#include +#include "xs_lib.h" +#include "xenstored.h" +#include "list.h" + +struct buffered_data +{ + /* Are we still doing the header? */ + bool inhdr; + /* How far are we? */ + unsigned int used; + union { + struct xsd_sockmsg msg; + char raw[sizeof(struct xsd_sockmsg)]; + } hdr; + /* The actual data. */ + char *buffer; +}; + +struct connection; +typedef int connwritefn_t(struct connection *, const void *, unsigned int); +typedef int connreadfn_t(struct connection *, void *, unsigned int); + +struct connection +{ + struct list_head list; + + /* The file descriptor we came in on. */ + int fd; + + /* Who am I? 0 for socket connections. */ + domid_t id; + + /* Are we blocked waiting for a transaction to end? Contains node. */ + char *blocked; + + /* Our current event. If all used, we're waiting for ack. */ + struct watch_event *event; + + /* Buffered incoming data. */ + struct buffered_data *in; + + /* Buffered output data */ + struct buffered_data *out; + + /* If we had a watch fire outgoing when we needed to reply... */ + struct buffered_data *waiting_reply; + + /* My transaction, if any. */ + struct transaction *transaction; + + /* The domain I'm associated with, if any. */ + struct domain *domain; + + /* Methods for communicating over this connection: write can be NULL */ + connwritefn_t *write; + connreadfn_t *read; +}; + +/* Return length of string (including nul) at this offset. */ +unsigned int get_string(const struct buffered_data *data, + unsigned int offset); + +/* Break input into vectors, return the number, fill in up to num of them. */ +unsigned int get_strings(struct buffered_data *data, + char *vec[], unsigned int num); + +/* Is child node a child or equal to parent node? */ +bool is_child(const char *child, const char *parent); + +/* Create a new buffer with lifetime of context. */ +struct buffered_data *new_buffer(void *ctx); + +bool send_reply(struct connection *conn, enum xsd_sockmsg_type type, + const void *data, unsigned int len); + +/* Some routines (write, mkdir, etc) just need a non-error return */ +bool send_ack(struct connection *conn, enum xsd_sockmsg_type type); + +/* Send an error: error is usually "errno". */ +bool send_error(struct connection *conn, int error); + +/* Check permissions on this node. */ +bool check_node_perms(struct connection *conn, const char *node, + enum xs_perm_type perm); + +/* Path to this node outside transaction. */ +char *node_dir_outside_transaction(const char *node); + +/* Fail due to excessive corruption, capitalist pigdogs! */ +void __attribute__((noreturn)) corrupt(struct connection *conn, + const char *fmt, ...); + +struct connection *new_connection(connwritefn_t *write, connreadfn_t *read); + +void handle_input(struct connection *conn); +void handle_output(struct connection *conn); + +/* Convenient talloc-style destructor for paths. */ +int destroy_path(void *path); +#endif /* _XENSTORED_INTERNAL_H */ diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c new file mode 100644 index 0000000000..bcc0a64967 --- /dev/null +++ b/tools/xenstore/xenstored_domain.c @@ -0,0 +1,387 @@ +/* + Domain communications for Xen Store Daemon. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define DEBUG +#include "utils.h" +#include "talloc.h" +#include "xenstored_core.h" +#include "xenstored_domain.h" +#include "xenstored_test.h" + +static int *xc_handle; +static int eventchn_fd; +static unsigned int ringbuf_datasize; + +struct domain +{ + struct list_head list; + + /* The id of this domain */ + domid_t domid; + + /* Event channel port */ + u16 port; + + /* Domain path in store. */ + char *path; + + /* Shared page. */ + void *page; + + /* Input and output ringbuffer heads. */ + struct ringbuf_head *input, *output; + + /* The connection associated with this. */ + struct connection *conn; + +}; + +static LIST_HEAD(domains); + +void domain_set_conn(struct domain *domain, struct connection *conn) +{ + domain->conn = conn; +} + +struct ringbuf_head +{ + u32 write; /* Next place to write to */ + u32 read; /* Next place to read from */ + u8 flags; + char buf[0]; +} __attribute__((packed)); + +#define EVENTCHN_BIND _IO('E', 2) +#define EVENTCHN_UNBIND _IO('E', 3) + +/* FIXME: Mark connection as broken (close it?) when this happens. */ +static bool check_buffer(const struct ringbuf_head *h) +{ + return (h->write < ringbuf_datasize && h->read < ringbuf_datasize); +} + +/* We can't fill last byte: would look like empty buffer. */ +static void *get_output_chunk(const struct ringbuf_head *h, + void *buf, u32 *len) +{ + u32 read_mark; + + if (h->read == 0) + read_mark = ringbuf_datasize - 1; + else + read_mark = h->read - 1; + + /* Here to the end of buffer, unless they haven't read some out. */ + *len = ringbuf_datasize - h->write; + if (read_mark >= h->write) + *len = read_mark - h->write; + return buf + h->write; +} + +static const void *get_input_chunk(const struct ringbuf_head *h, + const void *buf, u32 *len) +{ + /* Here to the end of buffer, unless they haven't written some. */ + *len = ringbuf_datasize - h->read; + if (h->write >= h->read) + *len = h->write - h->read; + return buf + h->read; +} + +static void update_output_chunk(struct ringbuf_head *h, u32 len) +{ + h->write += len; + if (h->write == ringbuf_datasize) + h->write = 0; +} + +static void update_input_chunk(struct ringbuf_head *h, u32 len) +{ + h->read += len; + if (h->read == ringbuf_datasize) + h->read = 0; +} + +static bool buffer_has_input(const struct ringbuf_head *h) +{ + u32 len; + + get_input_chunk(h, NULL, &len); + return (len != 0); +} + +static bool buffer_has_output_room(const struct ringbuf_head *h) +{ + u32 len; + + get_output_chunk(h, NULL, &len); + return (len != 0); +} + +static int writechn(struct connection *conn, const void *data, unsigned int len) +{ + u32 avail; + void *dest; + struct ringbuf_head h; + + /* Must read head once, and before anything else, and verified. */ + h = *conn->domain->output; + mb(); + if (!check_buffer(&h)) { + errno = EIO; + return -1; + } + + dest = get_output_chunk(&h, conn->domain->output->buf, &avail); + if (avail < len) + len = avail; + + memcpy(dest, data, len); + mb(); + update_output_chunk(conn->domain->output, len); + /* FIXME: Probably not neccessary. */ + mb(); + xc_evtchn_send(*xc_handle, conn->domain->port); + return len; +} + +static int readchn(struct connection *conn, void *data, unsigned int len) +{ + u32 avail; + const void *src; + struct ringbuf_head h; + bool was_full; + + /* Must read head once, and before anything else, and verified. */ + h = *conn->domain->input; + mb(); + + if (!check_buffer(&h)) { + errno = EIO; + return -1; + } + + src = get_input_chunk(&h, conn->domain->input->buf, &avail); + if (avail < len) + len = avail; + + was_full = !buffer_has_output_room(&h); + memcpy(data, src, len); + mb(); + update_input_chunk(conn->domain->input, len); + /* FIXME: Probably not neccessary. */ + mb(); + + /* If it was full, tell them we've taken some. */ + if (was_full) + xc_evtchn_send(*xc_handle, conn->domain->port); + return len; +} + +static int destroy_domain(void *_domain) +{ + struct domain *domain = _domain; + + list_del(&domain->list); + + if (domain->port && + (ioctl(eventchn_fd, EVENTCHN_UNBIND, domain->port) != 0)) + eprintf("> Unbinding port %i failed!\n", domain->port); + + if(domain->page) + munmap(domain->page, getpagesize()); + + return 0; +} + +static struct domain *find_domain(u16 port) +{ + struct domain *i; + + list_for_each_entry(i, &domains, list) { + if (i->port == port) + return i; + } + return NULL; +} + +void handle_event(int event_fd) +{ + u16 port; + struct domain *domain; + + if (read(event_fd, &port, sizeof(port)) != sizeof(port)) + barf_perror("Failed to read from event fd"); + + /* We have to handle *all* the data available before we ack: + * careful that handle_input/handle_output can destroy conn. + */ + while ((domain = find_domain(port)) != NULL) { + if (!domain->conn->blocked && buffer_has_input(domain->input)) + handle_input(domain->conn); + else if (domain->conn->out + && buffer_has_output_room(domain->output)) + handle_output(domain->conn); + else + break; + } + +#ifndef TESTING + if (write(event_fd, &port, sizeof(port)) != sizeof(port)) + barf_perror("Failed to write to event fd"); +#endif +} + +/* domid, mfn, evtchn, path */ +bool do_introduce(struct connection *conn, struct buffered_data *in) +{ + struct domain *domain; + char *vec[4]; + + if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) + return send_error(conn, EINVAL); + + /* Hang domain off "in" until we're finished. */ + domain = talloc(in, struct domain); + domain->domid = atoi(vec[0]); + domain->port = atoi(vec[2]); + domain->path = talloc_strdup(domain, vec[3]); + talloc_set_destructor(domain, destroy_domain); + if (!domain->port || !domain->domid) + return send_error(conn, EINVAL); + domain->page = xc_map_foreign_range(*xc_handle, domain->domid, + getpagesize(), + PROT_READ|PROT_WRITE, + atol(vec[1])); + if (!domain->page) + return send_error(conn, errno); + + /* One in each half of page. */ + domain->input = domain->page; + domain->output = domain->page + getpagesize()/2; + + /* Tell kernel we're interested in this event. */ + if (ioctl(eventchn_fd, EVENTCHN_BIND, domain->port) != 0) + return send_error(conn, errno); + + domain->conn = new_connection(writechn, readchn); + domain->conn->domain = domain; + + talloc_steal(domain->conn, domain); + list_add(&domain->list, &domains); + + return send_ack(conn, XS_INTRODUCE); +} + +static struct domain *find_domain_by_domid(domid_t domid) +{ + struct domain *i; + + list_for_each_entry(i, &domains, list) { + if (i->domid == domid) + return i; + } + return NULL; +} + +/* domid */ +bool do_release(struct connection *conn, const char *domid_str) +{ + struct domain *domain; + domid_t domid; + + if (!domid_str) + return send_error(conn, EINVAL); + + domid = atoi(domid_str); + if (!domid) + return send_error(conn, EINVAL); + + domain = find_domain_by_domid(domid); + if (!domain) + return send_error(conn, ENOENT); + + if (!domain->conn) + return send_error(conn, EINVAL); + + talloc_free(domain->conn); + return send_ack(conn, XS_RELEASE); +} + +bool do_get_domain_path(struct connection *conn, const char *domid_str) +{ + struct domain *domain; + domid_t domid; + + if (!domid_str) + return send_error(conn, EINVAL); + + domid = atoi(domid_str); + if (domid == 0) + domain = conn->domain; + else + domain = find_domain_by_domid(domid); + + if (!domain) + return send_error(conn, ENOENT); + + return send_reply(conn, XS_GETDOMAINPATH, domain->path, + strlen(domain->path) + 1); +} + +static int close_xc_handle(void *_handle) +{ + xc_interface_close(*(int *)_handle); + return 0; +} + +/* Returns the event channel handle. */ +int domain_init(void) +{ + /* The size of the ringbuffer: half a page minus head structure. */ + ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head); + + xc_handle = talloc(talloc_autofree_context(), int); + if (!xc_handle) + barf_perror("Failed to allocate domain handle"); + *xc_handle = xc_interface_open(); + if (*xc_handle < 0) + barf_perror("Failed to open connection to hypervisor"); + talloc_set_destructor(xc_handle, close_xc_handle); + +#ifdef TESTING + eventchn_fd = fake_open_eventchn(); +#else + eventchn_fd = open("/dev/xen/evtchn", O_RDWR); +#endif + if (eventchn_fd < 0) + barf_perror("Failed to open connection to hypervisor"); + return eventchn_fd; +} diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h new file mode 100644 index 0000000000..20e85a54b5 --- /dev/null +++ b/tools/xenstore/xenstored_domain.h @@ -0,0 +1,38 @@ +/* + Domain communications for Xen Store Daemon. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#ifndef _XENSTORED_DOMAIN_H +#define _XENSTORED_DOMAIN_H + +void handle_event(int event_fd); + +/* domid, mfn, eventchn, path */ +bool do_introduce(struct connection *conn, struct buffered_data *in); + +/* domid */ +bool do_release(struct connection *conn, const char *domid_str); + +/* domid */ +bool do_get_domain_path(struct connection *conn, const char *domid_str); + +/* Returns the event channel handle */ +int domain_init(void); + +void domain_set_conn(struct domain *domain, struct connection *conn); + +#endif /* _XENSTORED_DOMAIN_H */ diff --git a/tools/xenstore/xenstored_test.h b/tools/xenstore/xenstored_test.h new file mode 100644 index 0000000000..f173a5ca91 --- /dev/null +++ b/tools/xenstore/xenstored_test.h @@ -0,0 +1,37 @@ +/* + Testing replcements for Xen Store Daemon. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#ifndef _XENSTORED_TEST_H +#define _XENSTORED_TEST_H + +#ifdef TESTING +bool test_write_all(int fd, void *contents, unsigned int len); +#define write_all test_write_all + +int test_mkdir(const char *dir, int perms); +#define mkdir test_mkdir + +int fake_open_eventchn(void); +void fake_block_events(void); +void fake_ack_event(void); + +#define ioctl(a,b,c) 0 + +#endif + +#endif /* _XENSTORED_INTERNAL_H */ diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c new file mode 100644 index 0000000000..ca37307f8c --- /dev/null +++ b/tools/xenstore/xenstored_transaction.c @@ -0,0 +1,284 @@ +/* + Transaction code for Xen Store Daemon. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "talloc.h" +#include "list.h" +#include "xenstored_transaction.h" +#include "xenstored_watch.h" +#include "xs_lib.h" +#include "utils.h" +#include "xenstored_test.h" + +struct changed_node +{ + /* The list within this transaction. */ + struct list_head list; + + /* The name of the node. */ + char *node; +}; + +struct transaction +{ + /* Global list of transactions. */ + struct list_head list; + + /* My owner (conn->transaction == me). */ + struct connection *conn; + + /* Subtree this transaction covers */ + char *node; + + /* Base for this transaction. */ + char *divert; + + /* List of changed nodes. */ + struct list_head changes; + + /* Someone's waiting: time limit. */ + struct timeval timeout; + + /* We've timed out. */ + bool destined_to_fail; +}; +static LIST_HEAD(transactions); + +bool within_transaction(struct transaction *trans, const char *node) +{ + if (!trans) + return true; + return is_child(node, trans->node); +} + +/* You are on notice: this transaction is blocking someone. */ +static void start_transaction_timeout(struct transaction *trans) +{ + if (timerisset(&trans->timeout)) + return; + + /* One second timeout. */ + gettimeofday(&trans->timeout, NULL); + trans->timeout.tv_sec += 1; +} + +struct transaction *transaction_covering_node(const char *node) +{ + struct transaction *i; + + list_for_each_entry(i, &transactions, list) { + if (i->destined_to_fail) + continue; + if (is_child(i->node, node) || is_child(node, i->node)) + return i; + } + return NULL; +} + +bool transaction_block(struct connection *conn, const char *node) +{ + struct transaction *trans; + + /* Transactions don't overlap, so we can't be blocked by + * others if we're in one. */ + if (conn->transaction) + return false; + + trans = transaction_covering_node(node); + if (trans) { + start_transaction_timeout(trans); + conn->blocked = talloc_strdup(conn, node); + return true; + } + return false; +} + +/* Callers get a change node (which can fail) and only commit after they've + * finished. This way they don't have to unwind eg. a write. */ +void add_change_node(struct transaction *trans, const char *node) +{ + struct changed_node *i; + + if (!trans) + return; + + list_for_each_entry(i, &trans->changes, list) + if (streq(i->node, node)) + return; + + i = talloc(trans, struct changed_node); + i->node = talloc_strdup(i, node); + INIT_LIST_HEAD(&i->list); + list_add_tail(&i->list, &trans->changes); +} + +char *node_dir_inside_transaction(struct transaction *trans, const char *node) +{ + return talloc_asprintf(node, "%s%s", trans->divert, + node + strlen(trans->node)); +} + +void shortest_transaction_timeout(struct timeval *tv) +{ + struct transaction *i; + + list_for_each_entry(i, &transactions, list) { + if (!timerisset(&i->timeout)) + continue; + + if (!timerisset(tv) || timercmp(&i->timeout, tv, <)) + *tv = i->timeout; + } +} + +void check_transaction_timeout(void) +{ + struct transaction *i; + struct timeval now; + + gettimeofday(&now, NULL); + + list_for_each_entry(i, &transactions, list) { + if (!timerisset(&i->timeout)) + continue; + + if (timercmp(&i->timeout, &now, <)) + i->destined_to_fail = true; + } +} + +/* FIXME: Eliminate all uses of this */ +static bool do_command(const char *cmd) +{ + int ret; + + ret = system(cmd); + if (ret == -1) + return false; + if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) { + errno = EIO; + return false; + } + return true; +} + +static int destroy_transaction(void *_transaction) +{ + struct transaction *trans = _transaction; + + list_del(&trans->list); + return destroy_path(trans->divert); +} + +bool do_transaction_start(struct connection *conn, const char *node) +{ + struct transaction *transaction; + char *dir, *cmd; + + if (conn->transaction) + return send_error(conn, EBUSY); + + if (!check_node_perms(conn, node, XS_PERM_READ)) + return send_error(conn, errno); + + if (transaction_block(conn, node)) + return true; + + dir = node_dir_outside_transaction(node); + + /* Attach transaction to node for autofree until it's complete */ + transaction = talloc(node, struct transaction); + transaction->node = talloc_strdup(transaction, node); + transaction->divert = talloc_asprintf(transaction, "%s/%p/", + xs_daemon_transactions(), + transaction); + cmd = talloc_asprintf(node, "cp -a %s %s", dir, transaction->divert); + if (!do_command(cmd)) + corrupt(conn, "Creating transaction %s", transaction->divert); + + talloc_steal(conn, transaction); + INIT_LIST_HEAD(&transaction->changes); + transaction->conn = conn; + timerclear(&transaction->timeout); + transaction->destined_to_fail = false; + list_add_tail(&transaction->list, &transactions); + conn->transaction = transaction; + talloc_set_destructor(transaction, destroy_transaction); + return send_ack(transaction->conn, XS_TRANSACTION_START); +} + +static bool commit_transaction(struct transaction *trans) +{ + char *tmp, *dir; + struct changed_node *i; + + /* Move: orig -> .old, repl -> orig. Cleanup deletes .old. */ + dir = node_dir_outside_transaction(trans->node); + tmp = talloc_asprintf(trans, "%s.old", dir); + + if (rename(dir, tmp) != 0) + return false; + if (rename(trans->divert, dir) != 0) + corrupt(trans->conn, "Failed rename %s to %s", + trans->divert, dir); + + trans->divert = tmp; + + /* Fire off the watches for everything that changed. */ + list_for_each_entry(i, &trans->changes, list) + fire_watches(NULL, i->node); + return true; +} + +bool do_transaction_end(struct connection *conn, const char *arg) +{ + if (!arg || (!streq(arg, "T") && !streq(arg, "F"))) + return send_error(conn, EINVAL); + + if (!conn->transaction) + return send_error(conn, ENOENT); + + if (streq(arg, "T")) { + if (conn->transaction->destined_to_fail) { + send_error(conn, ETIMEDOUT); + goto failed; + } + if (!commit_transaction(conn->transaction)) { + send_error(conn, errno); + goto failed; + } + } + + talloc_free(conn->transaction); + conn->transaction = NULL; + return send_ack(conn, XS_TRANSACTION_END); + +failed: + talloc_free(conn->transaction); + conn->transaction = NULL; + return false; +} + diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h new file mode 100644 index 0000000000..a21bccad72 --- /dev/null +++ b/tools/xenstore/xenstored_transaction.h @@ -0,0 +1,50 @@ +/* + Transaction code for Xen Store Daemon. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#ifndef _XENSTORED_TRANSACTION_H +#define _XENSTORED_TRANSACTION_H +#include "xenstored_core.h" + +struct transaction; + +bool do_transaction_start(struct connection *conn, const char *node); +bool do_transaction_end(struct connection *conn, const char *arg); + +/* Is node covered by this transaction? */ +bool within_transaction(struct transaction *trans, const char *node); + +/* If a write op on this node blocked by another connections' transaction, + * mark conn, setup transaction timeout and return true. + */ +bool transaction_block(struct connection *conn, const char *node); + +/* Return transaction which covers this node. */ +struct transaction *transaction_covering_node(const char *node); + +/* Return directory of node within transaction t. */ +char *node_dir_inside_transaction(struct transaction *t, const char *node); + +/* This node was changed: can fail and longjmp. */ +void add_change_node(struct transaction *trans, const char *node); + +/* Get shortest timeout: leave tv unset if none. */ +void shortest_transaction_timeout(struct timeval *tv); + +/* Have any transactions timed out yet? */ +void check_transaction_timeout(void); +#endif /* _XENSTORED_TRANSACTION_H */ diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c new file mode 100644 index 0000000000..2df83e1a54 --- /dev/null +++ b/tools/xenstore/xenstored_watch.c @@ -0,0 +1,279 @@ +/* + Watch code for Xen Store Daemon. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include "talloc.h" +#include "list.h" +#include "xenstored_watch.h" +#include "xs_lib.h" +#include "utils.h" +#include "xenstored_test.h" + +/* We create this if anyone is interested "node", then we pass it from + * watch to watch as each connection acks it. + */ +struct watch_event +{ + /* The watch we are firing for (watch->events) */ + struct list_head list; + + /* Watch we are currently attached to. */ + struct watch *watch; + + struct buffered_data *data; +}; + +struct watch +{ + struct list_head list; + unsigned int priority; + + /* Current outstanding events applying to this watch. */ + struct list_head events; + + char *node; + struct connection *conn; +}; +static LIST_HEAD(watches); + +static void reset_event(struct watch_event *event) +{ + event->data->inhdr = true; + event->data->used = 0; +} + +/* We received a non-ACK response: re-queue any watch we just sent. */ +void reset_watch_event(struct connection *conn) +{ + if (waiting_for_ack(conn)) + reset_event(conn->event); +} + +/* We're waiting if we have an event and we sent it all. */ +bool waiting_for_ack(struct connection *conn) +{ + if (!conn->event) + return false; + + if (conn->event->data->inhdr) + return false; + return conn->event->data->used == conn->event->data->hdr.msg.len; +} + +bool is_watch_event(struct connection *conn, struct buffered_data *out) +{ + return (conn->event && out == conn->event->data); +} + +/* Look through our watches: if any of them have an event, queue it. */ +void queue_next_event(struct connection *conn) +{ + struct watch *watch; + + /* We had a reply queued already? Send it. */ + if (conn->waiting_reply) { + conn->out = conn->waiting_reply; + conn->waiting_reply = NULL; + return; + } + + /* If we're waiting for ack, don't queue more. */ + if (waiting_for_ack(conn)) + return; + + /* Find a good event to send. */ + if (!conn->event) { + list_for_each_entry(watch, &watches, list) { + if (watch->conn != conn) + continue; + + conn->event = list_top(&watch->events, + struct watch_event, list); + if (conn->event) + break; + } + if (!conn->event) + return; + } + + conn->out = conn->event->data; +} + +/* Watch on DIR applies to DIR, DIR/FILE, but not DIRLONG. */ +static bool watch_applies(const struct watch *watch, const char *node) +{ + return is_child(node, watch->node); +} + +static struct watch *find_watch(const char *node) +{ + struct watch *watch; + + list_for_each_entry(watch, &watches, list) { + if (watch_applies(watch, node)) + return watch; + } + return NULL; +} + +static struct watch *find_next_watch(struct watch *watch, const char *node) +{ + list_for_each_entry_continue(watch, &watches, list) { + if (watch_applies(watch, node)) + return watch; + } + return NULL; +} + +/* FIXME: we fail to fire on out of memory. Should drop connections. */ +void fire_watches(struct transaction *trans, const char *node) +{ + struct watch *watch; + struct watch_event *event; + + /* During transactions, don't fire watches. */ + if (trans) + return; + + watch = find_watch(node); + if (!watch) + return; + + /* Create and fill in info about event. */ + event = talloc(talloc_autofree_context(), struct watch_event); + event->data = new_buffer(event); + event->data->hdr.msg.type = XS_WATCH_EVENT; + event->data->hdr.msg.len = strlen(node) + 1; + event->data->buffer = talloc_strdup(event->data, node); + + /* Tie event to this watch. */ + event->watch = watch; + list_add(&event->list, &watch->events); + + /* If connection not doing anything, queue this. */ + if (!watch->conn->out) + queue_next_event(watch->conn); +} + +/* We're done with this event: see if anyone else wants it. */ +static void move_event_onwards(struct watch_event *event) +{ + list_del(&event->list); + reset_event(event); + + /* Remove from this watch, and find next watch to put this on. */ + event->watch = find_next_watch(event->watch, event->data->buffer); + if (!event->watch) { + talloc_free(event); + return; + } + + list_add(&event->list, &event->watch->events); + + /* If connection not doing anything, queue this. */ + if (!event->watch->conn->out) + queue_next_event(event->watch->conn); +} + +static int destroy_watch(void *_watch) +{ + struct watch *watch = _watch; + struct watch_event *event; + + /* Forget about sending out or waiting for acks for this watch. */ + if (watch->conn->event && watch->conn->event->watch == watch) + watch->conn->event = NULL; + + /* If we have pending events, pass them on to others. */ + while ((event = list_top(&watch->events, struct watch_event, list))) + move_event_onwards(event); + + /* Remove from global list. */ + list_del(&watch->list); + return 0; +} + +/* We keep watches in priority order. */ +static void insert_watch(struct watch *watch) +{ + struct watch *i; + + list_for_each_entry(i, &watches, list) { + if (i->priority <= watch->priority) { + list_add_tail(&watch->list, &i->list); + return; + } + } + + list_add_tail(&watch->list, &watches); +} + +bool do_watch(struct connection *conn, struct buffered_data *in) +{ + struct watch *watch; + char *vec[2]; + + if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec)) + return send_error(conn, EINVAL); + + if (!check_node_perms(conn, vec[0], XS_PERM_READ)) + return send_error(conn, errno); + + watch = talloc(conn, struct watch); + watch->node = talloc_strdup(watch, vec[0]); + watch->conn = conn; + watch->priority = strtoul(vec[1], NULL, 0); + INIT_LIST_HEAD(&watch->events); + + insert_watch(watch); + talloc_set_destructor(watch, destroy_watch); + return send_ack(conn, XS_WATCH); +} + +bool do_watch_ack(struct connection *conn) +{ + struct watch_event *event; + + if (!waiting_for_ack(conn)) + return send_error(conn, ENOENT); + + /* Remove this watch event. */ + event = conn->event; + conn->event = NULL; + + move_event_onwards(event); + return send_ack(conn, XS_WATCH_ACK); +} + +bool do_unwatch(struct connection *conn, const char *node) +{ + struct watch *watch; + + list_for_each_entry(watch, &watches, list) { + if (watch->conn == conn + && streq(watch->node, node)) { + talloc_free(watch); + return send_ack(conn, XS_UNWATCH); + } + } + return send_error(conn, ENOENT); +} diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h new file mode 100644 index 0000000000..656ce4c36b --- /dev/null +++ b/tools/xenstore/xenstored_watch.h @@ -0,0 +1,42 @@ +/* + Watch code for Xen Store Daemon. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#ifndef _XENSTORED_WATCH_H +#define _XENSTORED_WATCH_H +#include "xenstored_core.h" + +bool do_watch(struct connection *conn, struct buffered_data *in); +bool do_watch_ack(struct connection *conn); +bool do_unwatch(struct connection *conn, const char *node); + +/* Is this a watch event message for this connection? */ +bool is_watch_event(struct connection *conn, struct buffered_data *out); + +/* Look through our watches: if any of them have an event, queue it. */ +void queue_next_event(struct connection *conn); + +/* Is this connection waiting for a watch acknowledgement? */ +bool waiting_for_ack(struct connection *conn); + +/* Reset event if we were sending one */ +void reset_watch_event(struct connection *conn); + +/* Fire all watches. */ +void fire_watches(struct transaction *trans, const char *node); + +#endif /* _XENSTORED_WATCH_H */ diff --git a/tools/xenstore/xs.c b/tools/xenstore/xs.c new file mode 100644 index 0000000000..d5058abfb3 --- /dev/null +++ b/tools/xenstore/xs.c @@ -0,0 +1,551 @@ +/* + Xen Store Daemon interface providing simple tree-like database. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xs.h" +#include "xenstored.h" +#include "xs_lib.h" +#include "utils.h" + +struct xs_handle +{ + int fd; +}; + +/* Get the socket from the store daemon handle. + */ +int xs_fileno(struct xs_handle *h) +{ + return h->fd; +} + +static struct xs_handle *get_socket(const char *connect_to) +{ + struct sockaddr_un addr; + int sock, saved_errno; + struct xs_handle *h = NULL; + + sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (sock < 0) + return NULL; + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, connect_to); + + if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == 0) { + h = malloc(sizeof(*h)); + if (h) { + h->fd = sock; + return h; + } + } + + saved_errno = errno; + close(sock); + free(h); + errno = saved_errno; + return NULL; +} + +struct xs_handle *xs_daemon_open(void) +{ + return get_socket(xs_daemon_socket()); +} + +struct xs_handle *xs_daemon_open_readonly(void) +{ + return get_socket(xs_daemon_socket_ro()); +} + +void xs_daemon_close(struct xs_handle *h) +{ + if (h->fd >= 0) + close(h->fd); + free(h); +} + +static bool read_all(int fd, void *data, unsigned int len) +{ + while (len) { + int done; + + done = read(fd, data, len); + if (done < 0) { + if (errno == EINTR) + continue; + return false; + } + if (done == 0) { + /* It closed fd on us? EBADF is appropriate. */ + errno = EBADF; + return false; + } + data += done; + len -= done; + } + + return true; +} + +#ifdef XSTEST +#define read_all read_all_choice +#define write_all write_all_choice +#endif + +static int get_error(const char *errorstring) +{ + unsigned int i; + + for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) + if (i == ARRAY_SIZE(xsd_errors) - 1) + return EINVAL; + return xsd_errors[i].errnum; +} + +static void *read_reply(int fd, enum xsd_sockmsg_type *type, unsigned int *len) +{ + struct xsd_sockmsg msg; + void *ret; + int saved_errno; + + if (!read_all(fd, &msg, sizeof(msg))) + return NULL; + + ret = malloc(msg.len); + if (!ret) + return NULL; + + if (!read_all(fd, ret, msg.len)) { + saved_errno = errno; + free(ret); + errno = saved_errno; + return NULL; + } + + *type = msg.type; + if (len) + *len = msg.len; + return ret; +} + +/* Send message to xs, get malloc'ed reply. NULL and set errno on error. */ +static void *xs_talkv(struct xs_handle *h, enum xsd_sockmsg_type type, + const struct iovec *iovec, + unsigned int num_vecs, + unsigned int *len) +{ + struct xsd_sockmsg msg; + void *ret = NULL; + int saved_errno; + unsigned int i; + struct sigaction ignorepipe, oldact; + + msg.type = type; + msg.len = 0; + for (i = 0; i < num_vecs; i++) + msg.len += iovec[i].iov_len; + + ignorepipe.sa_handler = SIG_IGN; + sigemptyset(&ignorepipe.sa_mask); + ignorepipe.sa_flags = 0; + sigaction(SIGPIPE, &ignorepipe, &oldact); + + if (!write_all(h->fd, &msg, sizeof(msg))) + goto fail; + + for (i = 0; i < num_vecs; i++) + if (!write_all(h->fd, iovec[i].iov_base, iovec[i].iov_len)) + goto fail; + + /* Watches can have fired before reply comes: daemon detects + * and re-transmits, so we can ignore this. */ + do { + free(ret); + ret = read_reply(h->fd, &msg.type, len); + if (!ret) + goto fail; + } while (msg.type == XS_WATCH_EVENT); + + sigaction(SIGPIPE, &oldact, NULL); + if (msg.type == XS_ERROR) { + saved_errno = get_error(ret); + free(ret); + errno = saved_errno; + return NULL; + } + + assert(msg.type == type); + return ret; + +fail: + /* We're in a bad state, so close fd. */ + saved_errno = errno; + sigaction(SIGPIPE, &oldact, NULL); + close(h->fd); + h->fd = -1; + errno = saved_errno; + return NULL; +} + +/* free(), but don't change errno. */ +static void free_no_errno(void *p) +{ + int saved_errno = errno; + free(p); + errno = saved_errno; +} + +/* Simplified version of xs_talkv: single message. */ +static void *xs_single(struct xs_handle *h, enum xsd_sockmsg_type type, + const char *string, unsigned int *len) +{ + struct iovec iovec; + + iovec.iov_base = (void *)string; + iovec.iov_len = strlen(string) + 1; + return xs_talkv(h, type, &iovec, 1, len); +} + +static bool xs_bool(char *reply) +{ + if (!reply) + return false; + free(reply); + return true; +} + +char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num) +{ + char *strings, *p, **ret; + unsigned int len; + + strings = xs_single(h, XS_DIRECTORY, path, &len); + if (!strings) + return NULL; + + /* Count the strings. */ + *num = count_strings(strings, len); + + /* Transfer to one big alloc for easy freeing. */ + ret = malloc(*num * sizeof(char *) + len); + if (!ret) { + free_no_errno(strings); + return NULL; + } + memcpy(&ret[*num], strings, len); + free_no_errno(strings); + + strings = (char *)&ret[*num]; + for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) + ret[(*num)++] = p; + return ret; +} + +/* Get the value of a single file. + * Returns a malloced value: call free() on it after use. + * len indicates length in bytes. + */ +void *xs_read(struct xs_handle *h, const char *path, unsigned int *len) +{ + return xs_single(h, XS_READ, path, len); +} + +/* Write the value of a single file. + * Returns false on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL. + */ +bool xs_write(struct xs_handle *h, const char *path, + const void *data, unsigned int len, int createflags) +{ + const char *flags; + struct iovec iovec[3]; + + /* Format: Flags (as string), path, data. */ + if (createflags == 0) + flags = XS_WRITE_NONE; + else if (createflags == O_CREAT) + flags = XS_WRITE_CREATE; + else if (createflags == (O_CREAT|O_EXCL)) + flags = XS_WRITE_CREATE_EXCL; + else { + errno = EINVAL; + return false; + } + + iovec[0].iov_base = (void *)path; + iovec[0].iov_len = strlen(path) + 1; + iovec[1].iov_base = (void *)flags; + iovec[1].iov_len = strlen(flags) + 1; + iovec[2].iov_base = (void *)data; + iovec[2].iov_len = len; + + return xs_bool(xs_talkv(h, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); +} + +/* Create a new directory. + * Returns false on failure. + */ +bool xs_mkdir(struct xs_handle *h, const char *path) +{ + return xs_bool(xs_single(h, XS_MKDIR, path, NULL)); +} + +/* Destroy a file or directory (directories must be empty). + * Returns false on failure. + */ +bool xs_rm(struct xs_handle *h, const char *path) +{ + return xs_bool(xs_single(h, XS_RM, path, NULL)); +} + +/* Get permissions of node (first element is owner). + * Returns malloced array, or NULL: call free() after use. + */ +struct xs_permissions *xs_get_permissions(struct xs_handle *h, + const char *path, + unsigned int *num) +{ + char *strings; + unsigned int len; + struct xs_permissions *ret; + + strings = xs_single(h, XS_GET_PERMS, path, &len); + if (!strings) + return NULL; + + /* Count the strings: each one perms then domid. */ + *num = count_strings(strings, len); + + /* Transfer to one big alloc for easy freeing. */ + ret = malloc(*num * sizeof(struct xs_permissions)); + if (!ret) { + free_no_errno(strings); + return NULL; + } + + if (!strings_to_perms(ret, *num, strings)) { + free_no_errno(ret); + ret = NULL; + } + + free(strings); + return ret; +} + +/* Set permissions of node (must be owner). + * Returns false on failure. + */ +bool xs_set_permissions(struct xs_handle *h, const char *path, + struct xs_permissions *perms, + unsigned int num_perms) +{ + unsigned int i; + struct iovec iov[1+num_perms]; + + iov[0].iov_base = (void *)path; + iov[0].iov_len = strlen(path) + 1; + + for (i = 0; i < num_perms; i++) { + char buffer[MAX_STRLEN(domid_t)+1]; + + if (!perm_to_string(&perms[i], buffer)) + goto unwind; + + iov[i+1].iov_base = strdup(buffer); + iov[i+1].iov_len = strlen(buffer) + 1; + if (!iov[i+1].iov_base) + goto unwind; + } + + if (!xs_bool(xs_talkv(h, XS_SET_PERMS, iov, 1+num_perms, NULL))) + goto unwind; + for (i = 0; i < num_perms; i++) + free(iov[i+1].iov_base); + return true; + +unwind: + num_perms = i; + for (i = 0; i < num_perms; i++) + free_no_errno(iov[i+1].iov_base); + return false; +} + +/* Watch a node for changes (poll on fd to detect, or call read_watch()). + * When the node (or any child) changes, fd will become readable. + * Priority indicates order if multiple watchers: higher is first. + * Returns false on failure. + */ +bool xs_watch(struct xs_handle *h, const char *path, unsigned int priority) +{ + char prio[MAX_STRLEN(priority)]; + struct iovec iov[2]; + + sprintf(prio, "%u", priority); + iov[0].iov_base = (void *)path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = prio; + iov[1].iov_len = strlen(prio) + 1; + + return xs_bool(xs_talkv(h, XS_WATCH, iov, ARRAY_SIZE(iov), NULL)); +} + +/* Find out what node change was on (will block if nothing pending). + * Returns malloced path, or NULL: call free() after use. + */ +char *xs_read_watch(struct xs_handle *h) +{ + struct xsd_sockmsg msg; + char *path; + + if (!read_all(h->fd, &msg, sizeof(msg))) + return NULL; + + assert(msg.type == XS_WATCH_EVENT); + path = malloc(msg.len); + if (!path) + return NULL; + + if (!read_all(h->fd, path, msg.len)) { + free_no_errno(path); + return NULL; + } + return path; +} + +/* Acknowledge watch on node. Watches must be acknowledged before + * any other watches can be read. + * Returns false on failure. + */ +bool xs_acknowledge_watch(struct xs_handle *h) +{ + return xs_bool(xs_single(h, XS_WATCH_ACK, "OK", NULL)); +} + +/* Remove a watch on a node. + * Returns false on failure (no watch on that node). + */ +bool xs_unwatch(struct xs_handle *h, const char *path) +{ + return xs_bool(xs_single(h, XS_UNWATCH, path, NULL)); +} + +/* Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + * Transaction only applies to the given subtree. + * You can only have one transaction at any time. + * Returns false on failure. + */ +bool xs_transaction_start(struct xs_handle *h, const char *subtree) +{ + return xs_bool(xs_single(h, XS_TRANSACTION_START, subtree, NULL)); +} + +/* End a transaction. + * If abandon is true, transaction is discarded instead of committed. + * Returns false on failure, which indicates an error: transactions will + * not fail spuriously. + */ +bool xs_transaction_end(struct xs_handle *h, bool abort) +{ + char abortstr[2]; + + if (abort) + strcpy(abortstr, "F"); + else + strcpy(abortstr, "T"); + return xs_bool(xs_single(h, XS_TRANSACTION_END, abortstr, NULL)); +} + +/* Introduce a new domain. + * This tells the store daemon about a shared memory page and event channel + * associated with a domain: the domain uses these to communicate. + */ +bool xs_introduce_domain(struct xs_handle *h, + domid_t domid, + unsigned long mfn, + unsigned int eventchn, + const char *path) +{ + char domid_str[MAX_STRLEN(domid)]; + char mfn_str[MAX_STRLEN(mfn)]; + char eventchn_str[MAX_STRLEN(eventchn)]; + struct iovec iov[4]; + + sprintf(domid_str, "%u", domid); + sprintf(mfn_str, "%lu", mfn); + sprintf(eventchn_str, "%u", eventchn); + + iov[0].iov_base = domid_str; + iov[0].iov_len = strlen(domid_str) + 1; + iov[1].iov_base = mfn_str; + iov[1].iov_len = strlen(mfn_str) + 1; + iov[2].iov_base = eventchn_str; + iov[2].iov_len = strlen(eventchn_str) + 1; + iov[3].iov_base = (char *)path; + iov[3].iov_len = strlen(path) + 1; + + return xs_bool(xs_talkv(h, XS_INTRODUCE, iov, ARRAY_SIZE(iov), NULL)); +} + +bool xs_release_domain(struct xs_handle *h, + domid_t domid) +{ + char domid_str[MAX_STRLEN(domid)]; + + sprintf(domid_str, "%u", domid); + + return xs_bool(xs_single(h, XS_RELEASE, domid_str, NULL)); +} + +bool xs_shutdown(struct xs_handle *h) +{ + bool ret = xs_bool(xs_single(h, XS_SHUTDOWN, "", NULL)); + if (ret) { + char c; + /* Wait for it to actually shutdown. */ + read(h->fd, &c, 1); + } + return ret; +} + +/* Only useful for DEBUG versions */ +char *xs_debug_command(struct xs_handle *h, const char *cmd, + void *data, unsigned int len) +{ + struct iovec iov[2]; + + iov[0].iov_base = (void *)cmd; + iov[0].iov_len = strlen(cmd) + 1; + iov[1].iov_base = data; + iov[1].iov_len = len; + + return xs_talkv(h, XS_DEBUG, iov, ARRAY_SIZE(iov), NULL); +} diff --git a/tools/xenstore/xs.h b/tools/xenstore/xs.h new file mode 100644 index 0000000000..ff9481c3a6 --- /dev/null +++ b/tools/xenstore/xs.h @@ -0,0 +1,146 @@ +#ifndef _XS_H +#define _XS_H +/* + Xen Store Daemon providing simple tree-like database. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* On failure, these routines set errno. */ +#include "xs_lib.h" + +struct xs_handle; + +/* Connect to the xs daemon. + * Returns a handle or NULL. + */ +struct xs_handle *xs_daemon_open(void); + +/* Connect to the xs daemon (readonly for non-root clients). + * Returns a handle or NULL. + */ +struct xs_handle *xs_daemon_open_readonly(void); + +/* Close the connection to the xs daemon. */ +void xs_daemon_close(struct xs_handle *); + +/* Get contents of a directory. + * Returns a malloced array: call free() on it after use. + * Num indicates size. + */ +char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num); + +/* Get the value of a single file. + * Returns a malloced value: call free() on it after use. + * len indicates length in bytes. + */ +void *xs_read(struct xs_handle *h, const char *path, unsigned int *len); + +/* Write the value of a single file. + * Returns false on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL. + */ +bool xs_write(struct xs_handle *h, const char *path, const void *data, unsigned int len, + int createflags); + +/* Create a new directory. + * Returns false on failure. + */ +bool xs_mkdir(struct xs_handle *h, const char *path); + +/* Destroy a file or directory (and children). + * Returns false on failure. + */ +bool xs_rm(struct xs_handle *h, const char *path); + +/* Get permissions of node (first element is owner, first perms is "other"). + * Returns malloced array, or NULL: call free() after use. + */ +struct xs_permissions *xs_get_permissions(struct xs_handle *h, + const char *path, + unsigned int *num); + +/* Set permissions of node (must be owner). + * Returns false on failure. + */ +bool xs_set_permissions(struct xs_handle *h, + const char *path, + struct xs_permissions *perms, + unsigned int num_perms); + +/* Watch a node for changes (poll on fd to detect, or call read_watch()). + * When the node (or any child) changes, fd will become readable. + * Priority indicates order if multiple watchers: higher is first. + * Returns false on failure. + */ +bool xs_watch(struct xs_handle *h, const char *path, unsigned int priority); + +/* Return the FD to poll on to see if a watch has fired. */ +int xs_fileno(struct xs_handle *h); + +/* Find out what node change was on (will block if nothing pending). + * Returns malloced path, or NULL: call free() after use. + */ +char *xs_read_watch(struct xs_handle *h); + +/* Acknowledge watch on node. Watches must be acknowledged before + * any other watches can be read. + * Returns false on failure. + */ +bool xs_acknowledge_watch(struct xs_handle *h); + +/* Remove a watch on a node. + * Returns false on failure (no watch on that node). + */ +bool xs_unwatch(struct xs_handle *h, const char *path); + +/* Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + * Transaction only applies to the given subtree. + * You can only have one transaction at any time. + * Returns false on failure. + */ +bool xs_transaction_start(struct xs_handle *h, const char *subtree); + +/* End a transaction. + * If abandon is true, transaction is discarded instead of committed. + * Returns false on failure, which indicates an error: transactions will + * not fail spuriously. + */ +bool xs_transaction_end(struct xs_handle *h, bool abort); + +/* Introduce a new domain. + * This tells the store daemon about a shared memory page, event channel + * and store path associated with a domain: the domain uses these to communicate. + */ +bool xs_introduce_domain(struct xs_handle *h, + domid_t domid, + unsigned long mfn, + unsigned int eventchn, + const char *path); + +/* Release a domain. + * Tells the store domain to release the memory page to the domain. + */ +bool xs_release_domain(struct xs_handle *h, domid_t domid); + +/* Only useful for DEBUG versions */ +char *xs_debug_command(struct xs_handle *h, const char *cmd, + void *data, unsigned int len); + +/* Shut down the daemon. */ +bool xs_shutdown(struct xs_handle *h); + +#endif /* _XS_H */ diff --git a/tools/xenstore/xs_lib.c b/tools/xenstore/xs_lib.c new file mode 100644 index 0000000000..8630eaffce --- /dev/null +++ b/tools/xenstore/xs_lib.c @@ -0,0 +1,141 @@ +#include "xs_lib.h" +#include +#include +#include +#include +#include + +/* Common routines for the Xen store daemon and client library. */ + +static const char *xs_daemon_rootdir(void) +{ + char *s = getenv("XENSTORED_ROOTDIR"); + return (s ? s : "/var/lib/xenstored"); +} + +static const char *xs_daemon_rundir(void) +{ + char *s = getenv("XENSTORED_RUNDIR"); + return (s ? s : "/var/run/xenstored"); +} + +const char *xs_daemon_socket(void) +{ + static char buf[PATH_MAX]; + sprintf(buf, "%s/socket", xs_daemon_rundir()); + return buf; +} + +const char *xs_daemon_socket_ro(void) +{ + static char buf[PATH_MAX]; + sprintf(buf, "%s/socket_ro", xs_daemon_rundir()); + return buf; +} + +const char *xs_daemon_store(void) +{ + static char buf[PATH_MAX]; + sprintf(buf, "%s/store", xs_daemon_rootdir()); + return buf; +} + +const char *xs_daemon_transactions(void) +{ + static char buf[PATH_MAX]; + sprintf(buf, "%s/transactions", xs_daemon_rootdir()); + return buf; +} + +/* Simple routines for writing to sockets, etc. */ +bool write_all(int fd, const void *data, unsigned int len) +{ + while (len) { + int done; + + done = write(fd, data, len); + if (done < 0 && errno == EINTR) + continue; + if (done <= 0) + return false; + data += done; + len -= done; + } + + return true; +} + +/* Convert strings to permissions. False if a problem. */ +bool strings_to_perms(struct xs_permissions *perms, unsigned int num, + const char *strings) +{ + const char *p; + char *end; + unsigned int i; + + for (p = strings, i = 0; i < num; i++) { + /* "r", "w", or "b" for both. */ + switch (*p) { + case 'r': + perms[i].perms = XS_PERM_READ; + break; + case 'w': + perms[i].perms = XS_PERM_WRITE; + break; + case 'b': + perms[i].perms = XS_PERM_READ|XS_PERM_WRITE; + break; + case 'n': + perms[i].perms = XS_PERM_NONE; + break; + default: + errno = EINVAL; + return false; + } + p++; + perms[i].id = strtol(p, &end, 0); + if (*end || !*p) { + errno = EINVAL; + return false; + } + p = end + 1; + } + return true; +} + +/* Convert permissions to a string (up to len MAX_STRLEN(domid_t)+1). */ +bool perm_to_string(const struct xs_permissions *perm, char *buffer) +{ + switch (perm->perms) { + case XS_PERM_WRITE: + *buffer = 'w'; + break; + case XS_PERM_READ: + *buffer = 'r'; + break; + case XS_PERM_READ|XS_PERM_WRITE: + *buffer = 'b'; + break; + case XS_PERM_NONE: + *buffer = 'n'; + break; + default: + errno = EINVAL; + return false; + } + sprintf(buffer+1, "%i", (int)perm->id); + return true; +} + +/* Given a string and a length, count how many strings (nul terms). */ +unsigned int count_strings(const char *strings, unsigned int len) +{ + unsigned int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) + num++; + + return num; +} + diff --git a/tools/xenstore/xs_lib.h b/tools/xenstore/xs_lib.h new file mode 100644 index 0000000000..a946ab0b19 --- /dev/null +++ b/tools/xenstore/xs_lib.h @@ -0,0 +1,63 @@ +#ifndef _XR_LIB_H +#define _XR_LIB_H +/* + Common routines between Xen store user library and daemon. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include +#include + +/* Bitmask of permissions. */ +enum xs_perm_type { + XS_PERM_NONE = 0, + XS_PERM_READ = 1, + XS_PERM_WRITE = 2, + /* Internal use. */ + XS_PERM_CREATE = 4, + XS_PERM_OWNER = 8, +}; + +struct xs_permissions +{ + domid_t id; + enum xs_perm_type perms; +}; + +/* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */ +#define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2) + +/* Path for various daemon things: env vars can override. */ +const char *xs_daemon_socket(void); +const char *xs_daemon_socket_ro(void); +const char *xs_daemon_store(void); +const char *xs_daemon_transactions(void); + +/* Simple write function: loops for you. */ +bool write_all(int fd, const void *data, unsigned int len); + +/* Convert strings to permissions. False if a problem. */ +bool strings_to_perms(struct xs_permissions *perms, unsigned int num, + const char *strings); + +/* Convert permissions to a string (up to len MAX_STRLEN(domid_t)+1). */ +bool perm_to_string(const struct xs_permissions *perm, char *buffer); + +/* Given a string and a length, count how many strings (nul terms). */ +unsigned int count_strings(const char *strings, unsigned int len); + +#endif /* _XS_LIB_H */ diff --git a/tools/xenstore/xs_random.c b/tools/xenstore/xs_random.c new file mode 100644 index 0000000000..ef5d44d0b0 --- /dev/null +++ b/tools/xenstore/xs_random.c @@ -0,0 +1,1646 @@ +/* Random tests. + + We check that the results from a real filesystem are the same. +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xs.h" +#include "talloc.h" +#include "utils.h" + +struct ops +{ + char *name; + + char **(*dir)(void *h, const char *path, unsigned int *num); + + void *(*read)(void *h, const char *path, unsigned int *len); + + bool (*write)(void *h, const char *path, const void *data, + unsigned int len, int createflags); + + bool (*mkdir)(void *h, const char *path); + + bool (*rm)(void *h, const char *path); + + struct xs_permissions *(*get_perms)(void *h, + const char *path, + unsigned int *num); + + bool (*set_perms)(void *h, + const char *path, + struct xs_permissions *perms, + unsigned int num); + + bool (*transaction_start)(void *h, const char *subtree); + bool (*transaction_end)(void *h, bool abort); + + /* Create and destroy a new handle. */ + void *(*handle)(const char *path); + void (*close)(void *); +}; + +struct file_ops_info +{ + const char *base; + char *transact_base; + char *transact; +}; + +static void convert_to_dir(const char *dirname) +{ + char *tmpname = talloc_asprintf(dirname, "%s.tmp", dirname); + if (rename(dirname, tmpname) != 0) + barf_perror("Failed to rename %s to %s", dirname, tmpname); + if (mkdir(dirname, 0700) != 0) + barf_perror("Failed to mkdir %s", dirname); + if (rename(tmpname,talloc_asprintf(dirname, "%s/.DATA", dirname)) != 0) + barf_perror("Failed to rename into %s", dirname); + /* If perms exists, move it in. */ + rename(talloc_asprintf(dirname, "%s.perms", dirname), + talloc_asprintf(dirname, "%s/.perms", dirname)); +} + +/* Files can be used as dirs, too. Convert them when they are. */ +static void maybe_convert_to_directory(const char *filename) +{ + struct stat st; + char *dirname = talloc_asprintf(filename, "%.*s", + strrchr(filename, '/') - filename, + filename); + if (lstat(dirname, &st) == 0 && S_ISREG(st.st_mode)) + convert_to_dir(dirname); +} + +static char *get_name(struct file_ops_info *info, const char *path) +{ + if (info->transact_base) + return talloc_asprintf(path, "%s%s", info->transact_base, + path); + return talloc_asprintf(path, "%s%s", info->base, path); +} + +static char *path_to_name(struct file_ops_info *info, const char *path) +{ + char *filename = get_name(info, path); + maybe_convert_to_directory(filename); + return filename; +} + +/* Is child a subnode of parent, or equal? */ +static bool is_child(const char *child, const char *parent) +{ + unsigned int len = strlen(parent); + + /* / should really be "" for this algorithm to work, but that's a + * usability nightmare. */ + if (streq(parent, "/")) + return true; + + if (strncmp(child, parent, len) != 0) + return false; + + return child[len] == '/' || child[len] == '\0'; +} + +static bool write_ok(struct file_ops_info *info, const char *path) +{ + if (info->transact && !is_child(path, info->transact)) { + errno = EROFS; + return false; + } + return true; +} + +static char **file_directory(struct file_ops_info *info, + const char *path, unsigned int *num) +{ + char **ret; + DIR *dir; + struct dirent *dirent; + char *p, *dirname = path_to_name(info, path); + unsigned int i, len = 0; + struct stat st; + + /* If it exists, but isn't a directory, we convert it. */ + if (lstat(dirname, &st) == 0 && !S_ISDIR(st.st_mode)) + convert_to_dir(dirname); + + *num = 0; + dir = opendir(dirname); + if (!dir) + return NULL;; + + /* Once to count them. */ + while ((dirent = readdir(dir)) != NULL) { + if (strchr(dirent->d_name, '.')) + continue; + len += strlen(dirent->d_name) + 1; + (*num)++; + } + rewinddir(dir); + + /* Now allocate and fill in. */ + ret = malloc(sizeof(char *) * *num + len); + p = (char *)&ret[*num]; + i = 0; + while ((dirent = readdir(dir)) != NULL) { + if (strchr(dirent->d_name, '.')) + continue; + ret[i] = p; + strcpy(p, dirent->d_name); + p += strlen(p) + 1; + i++; + } + closedir(dir); + + return ret; +} + +static char *filename_to_data(const char *filename) +{ + struct stat st; + + if (lstat(filename, &st) == 0 && S_ISDIR(st.st_mode)) + return talloc_asprintf(filename, "%s/.DATA", filename); + return (char *)filename; +} + +static void *file_read(struct file_ops_info *info, + const char *path, unsigned int *len) +{ + void *ret; + char *filename = filename_to_data(path_to_name(info, path)); + unsigned long size; + + ret = grab_file(filename, &size); + /* Directory exists, .DATA doesn't. */ + if (!ret && errno == ENOENT && strends(filename, ".DATA")) + errno = EISDIR; + *len = size; + return ret; +} + +static struct xs_permissions *file_get_perms(struct file_ops_info *info, + const char *path, + unsigned int *num) +{ + void *perms; + struct xs_permissions *ret; + char *filename = path_to_name(info, path); + char *permfile; + unsigned long size; + struct stat st; + + /* No permfile: we didn't bother, return defaults. */ + if (lstat(filename, &st) != 0) + return NULL; + + if (S_ISDIR(st.st_mode)) + permfile = talloc_asprintf(path, "%s/.perms", filename); + else + permfile = talloc_asprintf(path, "%s.perms", filename); + + perms = grab_file(permfile, &size); + if (!perms) { + ret = new(struct xs_permissions); + ret[0].id = 0; + /* Default for root is readable. */ + if (streq(path, "/")) + ret[0].perms = XS_PERM_READ; + else + ret[0].perms = XS_PERM_NONE; + *num = 1; + release_file(perms, size); + return ret; + } + *num = count_strings(perms, size); + + ret = new_array(struct xs_permissions, *num); + if (!strings_to_perms(ret, *num, perms)) + barf("Reading permissions from %s", permfile); + release_file(perms, size); + return ret; +} + +static bool file_set_perms(struct file_ops_info *info, + const char *path, + struct xs_permissions *perms, + unsigned int num) +{ + unsigned int i; + char *filename = path_to_name(info, path); + char *permfile; + int fd; + struct stat st; + + if (num < 1) { + errno = EINVAL; + return false; + } + + if (!write_ok(info, path)) + return false; + + /* Check non-perm file exists/ */ + if (lstat(filename, &st) != 0) + return false; + + if (S_ISDIR(st.st_mode)) + permfile = talloc_asprintf(path, "%s/.perms", filename); + else + permfile = talloc_asprintf(path, "%s.perms", filename); + + fd = open(permfile, O_WRONLY|O_CREAT|O_TRUNC, 0600); + if (fd < 0) + return false; + + for (i = 0; i < num; i++) { + char buffer[100]; + + if (!perm_to_string(&perms[i], buffer)) { + int saved_errno = errno; + close(fd); + errno = saved_errno; + return false; + } + if (write(fd, buffer, strlen(buffer) + 1) + != (int)strlen(buffer) + 1) + barf_perror("Failed to write perm"); + } + close(fd); + return true; +} + +static bool file_write(struct file_ops_info *info, + const char *path, const void *data, + unsigned int len, int createflags) +{ + char *filename = filename_to_data(path_to_name(info, path)); + int fd; + + /* Kernel isn't strict, but library is. */ + if (createflags & ~(O_CREAT|O_EXCL)) { + errno = EINVAL; + return false; + } + + if (!write_ok(info, path)) + return false; + + /* We regard it as existing if dir exists. */ + if (strends(filename, ".DATA")) { + if (!createflags) + createflags = O_CREAT; + if (createflags & O_EXCL) { + errno = EEXIST; + return false; + } + } + + fd = open(filename, createflags|O_TRUNC|O_WRONLY, 0600); + if (fd < 0) { + /* FIXME: Another hack. */ + if (!(createflags & O_CREAT) && errno == EISDIR) + errno = EEXIST; + return false; + } + + if (write(fd, data, len) != (int)len) + barf_perror("Bad write to %s", filename); + + close(fd); + return true; +} + +static bool file_mkdir(struct file_ops_info *info, const char *path) +{ + char *dirname = path_to_name(info, path); + + /* Same effective order as daemon, so error returns are right. */ + if (mkdir(dirname, 0700) != 0) { + if (errno != ENOENT && errno != ENOTDIR) + write_ok(info, path); + return false; + } + + if (!write_ok(info, path)) { + int saved_errno = errno; + rmdir(dirname); + errno = saved_errno; + return false; + } + return true; +} + +static void do_command(const char *cmd) +{ + int ret; + + ret = system(cmd); + if (ret == -1 || !WIFEXITED(ret) || WEXITSTATUS(ret) != 0) + barf_perror("Failed '%s': %i", cmd, ret); +} + +static bool file_rm(struct file_ops_info *info, const char *path) +{ + char *filename = path_to_name(info, path); + struct stat st; + + if (info->transact && streq(info->transact, path)) { + errno = EINVAL; + return false; + } + + if (lstat(filename, &st) != 0) + return false; + + if (!write_ok(info, path)) + return false; + + if (streq(path, "/")) { + errno = EINVAL; + return false; + } + + do_command(talloc_asprintf(path, "rm -f %s.perms; rm -r %s", + filename, filename)); + return true; +} + +static bool file_transaction_start(struct file_ops_info *info, + const char *subtree) +{ + char *cmd; + char *filename = path_to_name(info, subtree); + struct stat st; + + if (info->transact) { + errno = EBUSY; + return false; + } + + if (lstat(filename, &st) != 0) + return false; + + cmd = talloc_asprintf(NULL, "cp -r %s %s.transact", + info->base, info->base); + do_command(cmd); + talloc_free(cmd); + + info->transact_base = talloc_asprintf(NULL, "%s.transact", info->base); + info->transact = talloc_strdup(NULL, subtree); + return true; +} + +static bool file_transaction_end(struct file_ops_info *info, bool abort) +{ + char *old, *cmd; + + if (!info->transact) { + errno = ENOENT; + return false; + } + + if (abort) { + cmd = talloc_asprintf(NULL, "rm -r %s", info->transact_base); + do_command(cmd); + goto success; + } + + old = talloc_asprintf(NULL, "rm -rf %s", info->base); + do_command(old); + talloc_free(old); + + cmd = talloc_asprintf(NULL, "mv %s %s", + info->transact_base, info->base); + do_command(cmd); + +success: + talloc_free(cmd); + talloc_free(info->transact); + talloc_free(info->transact_base); + info->transact = NULL; + info->transact_base = NULL; + return true; +} + +static struct file_ops_info *file_handle(const char *dir) +{ + struct file_ops_info *info = talloc(NULL, struct file_ops_info); + + info->base = dir; + info->transact_base = NULL; + info->transact = NULL; + return info; +} + +static void file_close(struct file_ops_info *handle) +{ + talloc_free(handle); +} + +static struct xs_handle *xs_handle(const char *dir __attribute__((unused))) +{ + struct xs_handle *h; + + h = xs_daemon_open(); + if (!h) + barf_perror("Connecting to xs daemon"); + return h; +} + +static void xs_close(struct xs_handle *handle) +{ + xs_daemon_close(handle); +} + +struct ops file_ops = { + .name = "FILE", + .dir = (void *)file_directory, + .read = (void *)file_read, + .write = (void *)file_write, + .mkdir = (void *)file_mkdir, + .rm = (void *)file_rm, + .get_perms = (void *)file_get_perms, + .set_perms = (void *)file_set_perms, + .transaction_start = (void *)file_transaction_start, + .transaction_end = (void *)file_transaction_end, + .handle = (void *)file_handle, + .close = (void *)file_close, +}; + +struct ops xs_ops = { + .name = "XS", + .dir = (void *)xs_directory, + .read = (void *)xs_read, + .write = (void *)xs_write, + .mkdir = (void *)xs_mkdir, + .rm = (void *)xs_rm, + .get_perms = (void *)xs_get_permissions, + .set_perms = (void *)xs_set_permissions, + .transaction_start = (void *)xs_transaction_start, + .transaction_end = (void *)xs_transaction_end, + .handle = (void *)xs_handle, + .close = (void *)xs_close, +}; + +static int strptrcmp(const void *a, const void *b) +{ + return strcmp(*(char **)a, *(char **)b); +} + +static void sort_dir(char **dir, unsigned int num) +{ + qsort(dir, num, sizeof(char *), strptrcmp); +} + +static char *dump_dir(struct ops *ops, + void *h, + const char *node, + char **dir, + unsigned int numdirs, + unsigned int depth) +{ + char *ret = talloc_strdup(node, ""); + unsigned int i; + char spacing[depth+1]; + + memset(spacing, ' ', depth); + spacing[depth] = '\0'; + + sort_dir(dir, numdirs); + + for (i = 0; i < numdirs; i++) { + struct xs_permissions *perms; + unsigned int j, numperms; + unsigned int len; + char *contents; + unsigned int subnum; + char **subdirs; + char *subret; + char *subnode = talloc_asprintf(node, "%s/%s", node, dir[i]); + + perms = ops->get_perms(h, subnode, &numperms); + if (!perms) + return NULL; + ret = talloc_asprintf_append(ret, "%s%s: ", spacing, dir[i]); + for (j = 0; j < numperms; j++) { + char buffer[100]; + if (!perm_to_string(&perms[j], buffer)) + barf("perm to string"); + ret = talloc_asprintf_append(ret, "%s ", buffer); + } + free(perms); + ret = talloc_asprintf_append(ret, "\n"); + + /* Even directories can have contents. */ + contents = ops->read(h, subnode, &len); + if (!contents) { + if (errno != EISDIR) + return NULL; + } else { + ret = talloc_asprintf_append(ret, " %s(%.*s)\n", + spacing, len, contents); + free(contents); + } + + /* Every node is a directory. */ + subdirs = ops->dir(h, subnode, &subnum); + if (!subdirs) + return NULL; + subret = dump_dir(ops, h, subnode, subdirs, subnum, depth+1); + if (!subret) + return NULL; + ret = talloc_asprintf_append(ret, "%s", subret); + free(subdirs); + } + return ret; +} + +static char *dump(struct ops *ops, void *h) +{ + char **subdirs; + unsigned int subnum; + char *ret = NULL, *root = talloc_strdup(NULL, "/"); + + subdirs = ops->dir(h, root, &subnum); + if (subdirs) { + ret = dump_dir(ops, h, talloc_strdup(root, ""), subdirs, + subnum, 0); + free(subdirs); + if (ret) + talloc_steal(NULL, ret); + } + talloc_free(root); + return ret; +} + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup2.c, by Bob Jenkins, December 1996, Public Domain. + * hash(), hash2(), hash3, and mix() are externally useful functions. + * Routines to test the hash are included if SELF_TEST is defined. + * You can use this free for any purpose. It has no warranty. + * + * Copyright (C) 2003 David S. Miller (davem@redhat.com) + * + * I've modified Bob's hash to be useful in the Linux kernel, and + * any bugs present are surely my fault. -DaveM + */ + +/* NOTE: Arguments are modified. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* The golden ration: an arbitrary value */ +#define JHASH_GOLDEN_RATIO 0x9e3779b9 + +/* The most generic version, hashes an arbitrary sequence + * of bytes. No alignment or length assumptions are made about + * the input key. + */ +static inline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c, len; + const u8 *k = key; + + len = length; + a = b = JHASH_GOLDEN_RATIO; + c = initval; + + while (len >= 12) { + a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24)); + b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24)); + c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24)); + + __jhash_mix(a,b,c); + + k += 12; + len -= 12; + } + + c += length; + switch (len) { + case 11: c += ((u32)k[10]<<24); + case 10: c += ((u32)k[9]<<16); + case 9 : c += ((u32)k[8]<<8); + case 8 : b += ((u32)k[7]<<24); + case 7 : b += ((u32)k[6]<<16); + case 6 : b += ((u32)k[5]<<8); + case 5 : b += k[4]; + case 4 : a += ((u32)k[3]<<24); + case 3 : a += ((u32)k[2]<<16); + case 2 : a += ((u32)k[1]<<8); + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + +/* A special optimized version that handles 1 or more of u32s. + * The length parameter here is the number of u32s in the key. + */ +static inline u32 jhash2(u32 *k, u32 length, u32 initval) +{ + u32 a, b, c, len; + + a = b = JHASH_GOLDEN_RATIO; + c = initval; + len = length; + + while (len >= 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + k += 3; len -= 3; + } + + c += length * 4; + + switch (len) { + case 2 : b += k[1]; + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + + +/* A special ultra-optimized versions that knows they are hashing exactly + * 3, 2 or 1 word(s). + * + * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally + * done at the end is not done here. + */ +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += initval; + + __jhash_mix(a, b, c); + + return c; +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return jhash_3words(a, b, 0, initval); +} + +static inline u32 jhash_1word(u32 a, u32 initval) +{ + return jhash_3words(a, 0, 0, initval); +} + +static unsigned int get_randomness(int *state) +{ + return jhash_1word((*state)++, *state * 1103515243); +} + +static char *random_path(int *state) +{ + unsigned int i; + char *ret = NULL; + + if (get_randomness(state) % 20 == 0) + return talloc_strdup(NULL, "/"); + + for (i = 0; i < 1 || (get_randomness(state) % 2); i++) { + ret = talloc_asprintf_append(ret, "/%i", + get_randomness(state) % 15); + } + return ret; +} + +static char *bool_to_errstring(bool result) +{ + if (result) + return talloc_strdup(NULL, "OK"); + + /* Real daemon can never return this. */ + if (errno == ENOTDIR) + errno = ENOENT; + return talloc_asprintf(NULL, "FAILED:%s", strerror(errno)); +} + +static char *linearize_dir(char **dir, unsigned int *num) +{ + char *result = NULL; + unsigned int i; + + if (!dir) + return bool_to_errstring(false); + + if (!*num) { + free(dir); + return talloc_strdup(NULL, ""); + } + + sort_dir(dir, *num); + for (i = 0; i < *num; i++) + result = talloc_asprintf_append(result, "%s\n", dir[i]); + free(dir); + return result; +} + +static char *linearize_read(char *read, unsigned int *size) +{ + char *ret; + + if (!read) + return bool_to_errstring(false); + + ret = talloc_asprintf(NULL, "%i:%.*s", *size, *size, read); + free(read); + return ret; +} + +static char *linearize_perms(struct xs_permissions *perms, unsigned int *size) +{ + char *ret = NULL; + unsigned int i; + + if (!perms) + return bool_to_errstring(false); + + for (i = 0; i < *size; i++) + ret = talloc_asprintf_append(ret, "(%u %u)", + perms[i].id, perms[i].perms); + + free(perms); + return ret; +} + +static int random_flags(int *state) +{ + switch (get_randomness(state) % 4) { + case 0: + return 0; + case 1: + return O_CREAT; + case 2: + return O_CREAT|O_EXCL; + default: + return get_randomness(state); + } +} + +/* Do the next operation, return the results. */ +static char *do_next_op(struct ops *ops, void *h, int state, bool verbose) +{ + char *name; + unsigned int num; + char *ret; + + if (verbose) + printf("State %i: ", state); + + name = random_path(&state); + switch (get_randomness(&state) % 9) { + case 0: + if (verbose) + printf("DIR %s\n", name); + ret = linearize_dir(ops->dir(h, name, &num), &num); + break; + case 1: + if (verbose) + printf("READ %s\n", name); + ret = linearize_read(ops->read(h, name, &num), &num); + break; + case 2: { + int flags = random_flags(&state); + char *contents = talloc_asprintf(NULL, "%i", + get_randomness(&state)); + unsigned int len = get_randomness(&state)%(strlen(contents)+1); + if (verbose) + printf("WRITE %s %s %.*s\n", name, + flags == O_CREAT ? "O_CREAT" + : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL" + : flags == 0 ? "0" : "CRAPFLAGS", + len, contents); + ret = bool_to_errstring(ops->write(h, name, contents, len, + flags)); + talloc_steal(ret, contents); + break; + } + case 3: + if (verbose) + printf("MKDIR %s\n", name); + ret = bool_to_errstring(ops->mkdir(h, name)); + break; + case 4: + if (verbose) + printf("RM %s\n", name); + ret = bool_to_errstring(ops->rm(h, name)); + break; + case 5: + if (verbose) + printf("GETPERMS %s\n", name); + ret = linearize_perms(ops->get_perms(h, name, &num), + &num); + break; + case 6: { + unsigned int i, num = get_randomness(&state)%8; + struct xs_permissions perms[num]; + + if (verbose) + printf("SETPERMS %s: ", name); + for (i = 0; i < num; i++) { + perms[i].id = get_randomness(&state)%8; + perms[i].perms = get_randomness(&state)%4; + if (verbose) + printf("%i%c ", perms[i].id, + perms[i].perms == XS_PERM_WRITE ? 'W' + : perms[i].perms == XS_PERM_READ ? 'R' + : perms[i].perms == + (XS_PERM_READ|XS_PERM_WRITE) ? 'B' + : 'N'); + } + if (verbose) + printf("\n"); + ret = bool_to_errstring(ops->set_perms(h, name, perms, + num)); + break; + } + case 7: { + if (verbose) + printf("START %s\n", name); + ret = bool_to_errstring(ops->transaction_start(h, name)); + if (streq(ret, "OK")) { + talloc_free(ret); + ret = talloc_asprintf(NULL, "OK:START-TRANSACT:%s", + name); + } + + break; + } + case 8: { + bool abort = (get_randomness(&state) % 2); + + if (verbose) + printf("STOP %s\n", abort ? "ABORT" : "COMMIT"); + ret = bool_to_errstring(ops->transaction_end(h, abort)); + if (streq(ret, "OK")) { + talloc_free(ret); + ret = talloc_strdup(NULL, "OK:STOP-TRANSACT"); + } + break; + } + default: + barf("Impossible randomness"); + } + + talloc_steal(ret, name); + return ret; +} + +static int daemon_pid; + +static void cleanup_xs_ops(void) +{ + char *cmd; + if (daemon_pid) { + struct xs_handle *h; + h = xs_daemon_open(); + if (h) { + if (xs_shutdown(h)) { + waitpid(daemon_pid, NULL, 0); + daemon_pid = 0; + } + xs_daemon_close(h); + } + if (daemon_pid) { + kill(daemon_pid, SIGTERM); + waitpid(daemon_pid, NULL, 0); + } + } + + cmd = talloc_asprintf(NULL, "rm -rf testsuite/tmp/*"); + do_command(cmd); + talloc_free(cmd); +} + +static void cleanup_file_ops(const char *dir) +{ + char *cmd; + + cmd = talloc_asprintf(NULL, "rm -rf %s %s.transact", dir, dir); + do_command(cmd); + talloc_free(cmd); +} + +static void cleanup(const char *dir) +{ + cleanup_xs_ops(); + cleanup_file_ops(dir); +} + +static void setup_file_ops(const char *dir) +{ + if (mkdir(dir, 0700) != 0) + barf_perror("Creating directory %s", dir); +} + +static void setup_xs_ops(void) +{ + int fds[2]; + + /* Start daemon. */ + pipe(fds); + if ((daemon_pid = fork())) { + /* Child writes PID when its ready: we wait for that. */ + char buffer[20]; + close(fds[1]); + if (read(fds[0], buffer, sizeof(buffer)) < 0) + barf("Failed to summon daemon"); + close(fds[0]); + } else { + dup2(fds[1], STDOUT_FILENO); + close(fds[0]); +#if 0 + execlp("valgrind", "valgrind", "xenstored_test", "--output-pid", + "--no-fork", NULL); +#else + execlp("./xenstored_test", "xenstored_test", "--output-pid", + "--no-fork", NULL); +#endif + exit(1); + } +} + +static void setup(const char *dir) +{ + setup_file_ops(dir); + setup_xs_ops(); +}; + +struct simple_data +{ + unsigned int seed; + bool print_progress; + bool fast; + struct ops *ops; + const char *dir; +}; + +/* Just a random test. Don't care about results, just that it doesn't + * go boom. */ +static unsigned int try_simple(const bool *trymap, + unsigned int number, + bool verbose, + void *_data) +{ + unsigned int i, print; + void *h; + char *snapshot = NULL; + struct simple_data *data = _data; + + if (data->ops == &xs_ops) { + cleanup_xs_ops(); + setup_xs_ops(); + } else { + cleanup_file_ops(data->dir); + setup_file_ops(data->dir); + } + h = data->ops->handle(data->dir); + + print = number / 76; + if (!print) + print = 1; + + for (i = 0; i < number; i++) { + char *ret; + + if (data->print_progress) { + if (i % print == 0) { + printf("."); + fflush(stdout); + } + } + + if (trymap && !trymap[i]) + continue; + + ret = do_next_op(data->ops, h, i + data->seed, verbose); + if (verbose) + printf("-> %.*s\n", strchr(ret, '\n') - ret, ret); + if (streq(ret, "FAILED:Bad file descriptor")) + goto out; + if (kill(daemon_pid, 0) != 0) + goto out; + + if (!data->fast) { + if (strstarts(ret, "OK:START-TRANSACT:")) { + void *pre = data->ops->handle(data->dir); + + snapshot = dump(data->ops, pre); + if (!snapshot) + goto out; + data->ops->close(pre); + } else if (streq(ret, "OK:STOP-TRANSACT")) { + talloc_free(snapshot); + snapshot = NULL; + } + } + + talloc_free(ret); + + if (snapshot) { + void *pre = data->ops->handle(data->dir); + char *contents; + + contents = dump(data->ops, pre); + if (!contents) + goto out; + + if (!streq(contents, snapshot)) + goto out; + + talloc_free(contents); + data->ops->close(pre); + } + } + if (data->print_progress) + printf("\n"); + +out: + data->ops->close(h); + return i; +} + +/* Binary elimination: try eliminating all of them, then reduce. */ +static void reduce(bool *map, + unsigned int number, + unsigned int try_start, unsigned int try_num, + unsigned int (*try)(const bool *map, + unsigned int number, + bool verbose, + void *), + void *data) +{ + bool newmap[number]; + + if (try_num == 0) + return; + + /* Try skipping everything between start and end. */ + memcpy(newmap, map, sizeof(newmap)); + memset(newmap + try_start, 0, try_num * sizeof(bool)); + + /* We want the *same* failure: must fail at "number-1". */ + if (try(newmap, number, false, data) == number - 1) { + memset(map + try_start, 0, try_num * sizeof(bool)); + return; + } + + if (try_num == 1) + return; + + /* Try each half... */ + reduce(map, number, try_start, try_num/2, try, data); + reduce(map, number, try_start + try_num/2, try_num - try_num/2, + try, data); +} + +static void reduce_problem(unsigned int failed, + unsigned int (*try)(const bool *map, + unsigned int number, + bool verbose, + void *data), + void *data) +{ + bool map[failed]; + + memset(map, 1, sizeof(map)); + reduce(map, failed, 0, failed-1, try, data); + + printf("Cut down:\n"); + if (try(map, failed, true, data) != failed - 1) { + printf("Except, that didn't actually fail. Bugger!"); + exit(2); + } + exit(1); +} + +/* Just a random test. Don't care about results, just that it doesn't + * go boom. */ +static void simple_test(const char *dir, + unsigned int iters, unsigned int seed, + bool fast, bool verbose) +{ + struct simple_data data; + unsigned int try; + + data.seed = seed; + data.print_progress = !verbose; + data.fast = fast; + data.ops = &xs_ops; + data.dir = dir; + + try = try_simple(NULL, iters, verbose, &data); + if (try == iters) { + cleanup_xs_ops(); + printf("Succeeded\n"); + exit(0); + } + printf("Failed on iteration %u\n", try + 1); + data.print_progress = false; + reduce_problem(try + 1, try_simple, &data); +} + +static bool ops_equal(struct ops *a, void *ah, + struct ops *b, void *bh, + const char *node, + struct ops **fail) +{ + char **dira = NULL, **dirb = NULL; + char *dataa = NULL, *datab = NULL; + unsigned int i, numa, numb, lena, lenb; + struct xs_permissions *permsa = NULL, *permsb = NULL; + unsigned int numpermsa, numpermsb; + char *nodename; + bool ret = false; + + /* FILE backend expects talloc'ed pointer. */ + nodename = talloc_strdup(NULL, node); + permsa = a->get_perms(ah, nodename, &numpermsa); + if (!permsa) { + *fail = a; + goto out; + } + permsb = b->get_perms(bh, nodename, &numpermsb); + if (!permsb) { + *fail = b; + goto out; + } + if (numpermsa != numpermsb) + goto out; + for (i = 0; i < numpermsa; i++) { + if (permsa[i].perms != permsb[i].perms) + goto out; + if (permsa[i].id != permsb[i].id) + goto out; + } + + /* Non-pure-directory nodes contain data. */ + dataa = a->read(ah, nodename, &lena); + if (!dataa && errno != EISDIR) { + *fail = a; + goto out; + } + datab = b->read(bh, nodename, &lenb); + if (!datab && errno != EISDIR) { + *fail = b; + goto out; + } + + if (dataa) { + if (!datab) + goto out; + if (lena != lenb) + goto out; + + if (memcmp(dataa, datab, lena) != 0) + goto out; + } else + if (datab) + goto out; + + /* Everything is a directory. */ + dira = a->dir(ah, nodename, &numa); + if (!dira) { + *fail = a; + goto out; + } + dirb = b->dir(bh, nodename, &numb); + if (!dirb) { + *fail = b; + goto out; + } + if (numa != numb) + goto out; + sort_dir(dira, numa); + sort_dir(dirb, numb); + for (i = 0; i < numa; i++) { + char subnode[strlen(node) + 1 + strlen(dira[i]) + 1]; + + if (!streq(dira[i], dirb[i])) + goto out; + + strcpy(subnode, node); + if (!streq(node, "/")) + strcat(subnode, "/"); + strcat(subnode, dira[i]); + if (!ops_equal(a, ah, b, bh, subnode, fail)) + goto out; + } + + ret = true; +out: + free(permsa); + free(permsb); + free(dataa); + free(datab); + free(dira); + free(dirb); + talloc_free(nodename); + return ret; +} + +struct diff_data +{ + unsigned int seed; + bool print_progress; + bool fast; + const char *dir; +}; + +/* Differential: try both file and xs backend, watch for differences. */ +static unsigned int try_diff(const bool *trymap, + unsigned int number, + bool verbose, + void *_data) +{ + void *fileh, *xsh; + char *transact = NULL; + struct ops *fail; + struct diff_data *data = _data; + unsigned int i, print; + + cleanup(data->dir); + setup(data->dir); + + fileh = file_handle(data->dir); + xsh = xs_handle(data->dir); + + print = number / 76; + if (!print) + print = 1; + + for (i = 0; i < number; i++) { + char *file, *xs; + + if (data->print_progress) { + if (i % print == 0) { + printf("."); + fflush(stdout); + } + } + if (trymap && !trymap[i]) + continue; + + if (verbose) + printf("FILE: "); + + file = do_next_op(&file_ops, fileh, i+data->seed, verbose); + if (verbose) + printf("-> %.*s\n", strchr(file, '/') - file, file); + + if (verbose) + printf("XS: "); + xs = do_next_op(&xs_ops, xsh, i+data->seed, verbose); + if (verbose) + printf("-> %.*s\n", strchr(xs, '/') - xs, xs); + + if (!streq(file, xs)) + goto out; + + if (strstarts(file, "OK:START-TRANSACT:")) + transact = talloc_strdup(NULL, + file + + strlen("OK:START-TRANSACT:")); + else if (streq(file, "OK:STOP-TRANSACT")) { + talloc_free(transact); + transact = NULL; + } + + talloc_free(file); + talloc_free(xs); + + if (data->fast) + continue; + + fail = NULL; + if (!ops_equal(&xs_ops, xsh, &file_ops, fileh, "/", &fail)) { + if (fail) + barf("%s failed during test\n", fail->name); + if (verbose) + printf("Trees differ:\nXS:%s\nFILE%s\n", + dump(&xs_ops, xsh), + dump(&file_ops, fileh)); + goto out; + } + + if (transact) { + void *fileh_pre = file_handle(data->dir); + void *xsh_pre = xs_handle(data->dir); + + fail = NULL; + if (!ops_equal(&xs_ops, xsh_pre, &file_ops, fileh_pre, + transact, &fail)) { + if (fail) + barf("%s failed during transact\n", + fail->name); + + xs_daemon_close(xsh_pre); + talloc_free(fileh_pre); + goto out; + } + xs_daemon_close(xsh_pre); + talloc_free(fileh_pre); + } + } + if (data->print_progress) + printf("\n"); + + fail = NULL; + if (data->fast) + if (!ops_equal(&xs_ops, xsh, &file_ops, fileh, "/", &fail)) + barf("Final result not the same: try without --fast"); +out: + file_ops.close(fileh); + xs_ops.close(xsh); + return i; +} + +/* Differential random test: compare results against file backend. */ +static void diff_test(const char *dir, + unsigned int iters, unsigned int seed, bool fast, + bool verbose) +{ + struct diff_data data; + unsigned int try; + + data.seed = seed; + data.print_progress = !verbose; + data.fast = fast; + data.dir = dir; + + try = try_diff(NULL, iters, verbose, &data); + if (try == iters) { + cleanup_xs_ops(); + printf("Succeeded\n"); + exit(0); + } + printf("Failed on iteration %u\n", try + 1); + data.print_progress = false; + reduce_problem(try + 1, try_diff, &data); +} + +struct fail_data +{ + unsigned int seed; + bool print_progress; + const char *dir; +}; + +/* Try xs with inserted failures: every op should either succeed or fail. */ +static unsigned int try_fail(const bool *trymap, + unsigned int number, + bool verbose, + void *_data) +{ + unsigned int i, print, tried = 0, aborted = 0; + struct fail_data *data = _data; + struct xs_handle *tmpxsh; + struct file_ops_info *tmpfileh; + void *fileh, *xsh; + struct ops *fail; + char seed[20]; + + /* Make sure failures off to shut down. */ + if (daemon_pid) + kill(daemon_pid, SIGUSR1); + cleanup(data->dir); + setup(data->dir); + + fileh = file_handle(data->dir); + xsh = xs_handle(data->dir); + + sprintf(seed, "%i", data->seed); + free(xs_debug_command(xsh, "failtest", seed, strlen(seed)+1)); + + print = number / 76; + if (!print) + print = 1; + + for (i = 0; i < number; i++) { + unsigned int limit, failed; + char *ret; + + /* A few times we fail due to other end OOM. */ + limit = 0; + while (!xsh) { + xsh = xs_handle(data->dir); + if (!xsh && errno == ECONNREFUSED) { + if (verbose) + printf("Daemon refused connection\n"); + goto out; + } + if (!xsh && limit++ == 5) { + printf("Daemon failed conn 5 times\n"); + goto out; + } + } + + if (data->print_progress) { + if (i % print == 0) { + printf("."); + fflush(stdout); + } + } + if (trymap && !trymap[i]) + continue; + + if (verbose) + printf("(%i) ", i); + ret = do_next_op(&xs_ops, xsh, i + data->seed, verbose); + if (streq(ret, "FAILED:Connection reset by peer") + || streq(ret, "FAILED:Bad file descriptor") + || streq(ret, "FAILED:Broken pipe")) { + xs_close(xsh); + xsh = NULL; + failed = 2; + } else if (strstarts(ret, "OK")) + failed = 0; + else + failed = 1; + + tried++; + if (xsh) + aborted++; + + if (verbose) + printf("-> %.*s\n", strchr(ret, '\n') - ret, ret); + + talloc_free(ret); + + /* Turn off failures using signal. */ + if (kill(daemon_pid, SIGUSR1) != 0) { + if (verbose) + printf("Failed to signal daemon\n"); + goto out; + } + + if (failed == 0) { + /* Succeeded? Do same thing to file backend + * to compare */ + try_applying: + ret = do_next_op(&file_ops, fileh, i + data->seed, + false); + if (!strstarts(ret, "OK")) { + if (!verbose) + printf("File op failed on %i\n", + i + data->seed); + talloc_free(ret); + goto out; + } + talloc_free(ret); + } + + tmpxsh = xs_handle(data->dir); + if (!tmpxsh) { + if (verbose) + printf("Failed to open signalled daemon"); + goto out; + } + tmpfileh = file_handle(data->dir); + + fail = NULL; + if (!ops_equal(&xs_ops, tmpxsh, &file_ops, tmpfileh, "/", + &fail)) { + xs_close(tmpxsh); + file_close(tmpfileh); + if (fail) { + if (verbose) + printf("%s failed\n", fail->name); + goto out; + } + /* Maybe op succeeded: try comparing after local op? */ + if (failed == 2) { + failed = 0; + if (verbose) + printf("(Looks like it succeeded)\n"); + goto try_applying; + } + if (verbose) + printf("Two backends not equal\n"); + goto out; + } + + /* If we lost the xs handle, that ended the transaction */ + if (!xsh) + file_transaction_end(fileh, true); + + /* Turn failures back on. */ + free(xs_debug_command(tmpxsh, "failtest", NULL, 0)); + xs_close(tmpxsh); + file_close(tmpfileh); + } + + printf("Total %u of %u not aborted\n", tried - aborted, tried); +out: + if (xsh) + xs_close(xsh); + return i; +} + +static void fail_test(const char *dir, + unsigned int iters, unsigned int seed, + bool fast __attribute__((unused)), bool verbose) +{ + struct fail_data data; + unsigned int try; + + data.seed = seed; + data.print_progress = !verbose; + data.dir = dir; + + try = try_fail(NULL, iters, verbose, &data); + if (try == iters) { + cleanup_xs_ops(); + printf("Succeeded\n"); + exit(0); + } + printf("Failed on iteration %u\n", try + 1); + fflush(stdout); + data.print_progress = false; + reduce_problem(try + 1, try_fail, &data); +} + +int main(int argc, char *argv[]) +{ + bool verbose = false; + bool simple = false; + bool fast = false; + bool fail = false; + + if (argv[1] && streq(argv[1], "--fail")) { + fail = true; + argv++; + argc--; + } + + if (argv[1] && streq(argv[1], "--simple")) { + simple = true; + argv++; + argc--; + } + + if (argv[1] && streq(argv[1], "--fast")) { + fast = true; + argv++; + argc--; + } + + if (argv[1] && streq(argv[1], "--verbose")) { + verbose = true; + argv++; + argc--; + } + + if (argc != 4) + barf("Usage: xs_random [--fail|--simple] [--fast] [--verbose] "); + + talloc_enable_null_tracking(); + + if (fail) + fail_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose); + else if (simple) + simple_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose); + else + diff_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose); + exit(2); +} diff --git a/tools/xenstore/xs_stress.c b/tools/xenstore/xs_stress.c new file mode 100644 index 0000000000..9c480b1553 --- /dev/null +++ b/tools/xenstore/xs_stress.c @@ -0,0 +1,207 @@ +/* Stress test for Xen Store: multiple people hammering transactions */ +#include "xs.h" +#include "utils.h" +#include +#include +#include +#include +#include +#include +#include + +#define NUM_HANDLES 2 +#define DIR_FANOUT 3 +#define DIR_DEPTH 3 + +/* How often to print progress */ +static int print; + +/* Layout looks like ///count. */ +static void work(unsigned int cycles, unsigned int childnum) +{ + unsigned int i; + struct xs_handle *handles[NUM_HANDLES]; + char id; + + if (childnum < 10) + id = '0' + childnum; + else + id = 'A' + childnum - 10; + + for (i = 0; i < NUM_HANDLES; i++) { + handles[i] = xs_daemon_open(); + if (!handles[i]) + barf_perror("Opening handle %i", i); + } + + srandom(childnum); + for (i = 0; i < cycles; i++) { + unsigned int lockdepth, j, len; + char file[100] = "", lockdir[100]; + char *contents, tmp[100]; + struct xs_handle *h = handles[random() % NUM_HANDLES]; + + lockdepth = random() % DIR_DEPTH; + for (j = 0; j < DIR_DEPTH; j++) { + if (j == lockdepth) + strcpy(lockdir, file); + sprintf(file + strlen(file), "/%li", + random()%DIR_FANOUT); + } + if (streq(lockdir, "")) + strcpy(lockdir, "/"); + + if (!xs_transaction_start(h, lockdir)) + barf_perror("%i: starting transaction %i on %s", + childnum, i, lockdir); + + sprintf(file + strlen(file), "/count"); + contents = xs_read(h, file, &len); + if (!contents) + barf_perror("%i: can't read %s iter %i", + childnum, file, i); + sprintf(tmp, "%i", atoi(contents) + 1); + if (!xs_write(h, file, tmp, strlen(tmp)+1, 0)) + barf_perror("%i: can't write %s iter %i", + childnum, file, i); + + /* Abandon 1 in 10 */ + if (random() % 10 == 0) { + if (!xs_transaction_end(h, true)) + barf_perror("%i: can't abort transact %s", + childnum, lockdir); + i--; + } else { + if (!xs_transaction_end(h, false)) + barf_perror("%i: can't commit transact %s", + childnum, lockdir); + + /* Offset when we print . so kids don't all + * print at once. */ + if ((i + print/(childnum+1)) % print == 0) + write(STDOUT_FILENO, &id, 1); + } + } +} + +static void create_dirs(struct xs_handle *h, const char *base, int togo) +{ + unsigned int i; + char filename[100]; + + if (togo == 0) { + sprintf(filename, "%s/count", base); + if (!xs_write(h, filename, "0", 2, O_EXCL|O_CREAT)) + barf_perror("Writing to %s", filename); + return; + } + + for (i = 0; i < DIR_FANOUT; i++) { + sprintf(filename, "%s/%i", base, i); + if (!xs_mkdir(h, filename)) + barf_perror("xs_mkdir %s", filename); + create_dirs(h, filename, togo-1); + } +} + +static unsigned int add_count(struct xs_handle *h, const char *base, int togo) +{ + unsigned int i, count; + char filename[100]; + + if (togo == 0) { + char *answer; + unsigned int len; + + sprintf(filename, "%s/count", base); + answer = xs_read(h, filename, &len); + if (!answer) + barf_perror("Reading %s", filename); + count = atoi(answer); + free(answer); + return count; + } + + count = 0; + for (i = 0; i < DIR_FANOUT; i++) { + sprintf(filename, "%s/%i", base, i); + count += add_count(h, filename, togo-1); + } + return count; +} + +static void setup(void) +{ + struct xs_handle *h; + + /* Do setup. */ + h = xs_daemon_open(); + if (!h) + barf_perror("Contacting daemon"); + create_dirs(h, "", DIR_DEPTH); + xs_daemon_close(h); +} + +static unsigned int tally_counts(void) +{ + struct xs_handle *h; + unsigned int ret; + + h = xs_daemon_open(); + if (!h) + barf_perror("Contacting daemon"); + + ret = add_count(h, "", DIR_DEPTH); + xs_daemon_close(h); + return ret; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + bool failed = false; + int kids[10]; + + if (argc != 2) + barf("Usage: xs_stress "); + + printf("Setting up directories...\n"); + setup(); + + print = atoi(argv[1]) / 76; + if (!print) + print = 1; + + printf("Running %i children...\n", ARRAY_SIZE(kids)); + for (i = 0; i < ARRAY_SIZE(kids); i++) { + kids[i] = fork(); + if (kids[i] == -1) + barf_perror("fork"); + if (kids[i] == 0) { + work(atoi(argv[1]) / ARRAY_SIZE(kids), i); + exit(0); + } + } + + for (i = 0; i < ARRAY_SIZE(kids); i++) { + int status; + if (waitpid(kids[i], &status, 0) == -1) + barf_perror("waitpid"); + if (!WIFEXITED(status)) + barf("Kid %i died via signal %i\n", + i, WTERMSIG(status)); + if (WEXITSTATUS(status) != 0) { + printf("Child %i exited %i\n", i, WEXITSTATUS(status)); + failed = true; + } + } + if (failed) + exit(1); + + printf("\nCounting results...\n"); + i = tally_counts(); + if (i != (unsigned)atoi(argv[1])) + barf("Total counts %i not %s", i, atoi(argv[1])); + printf("Success!\n"); + exit(0); +} diff --git a/tools/xenstore/xs_test.c b/tools/xenstore/xs_test.c new file mode 100644 index 0000000000..f1e66cbe28 --- /dev/null +++ b/tools/xenstore/xs_test.c @@ -0,0 +1,647 @@ +/* + Xen Store Daemon Test tool + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "xs_lib.h" + +#define XSTEST + +static struct xs_handle *handles[10] = { NULL }; + +struct ringbuf_head +{ + uint32_t write; /* Next place to write to */ + uint32_t read; /* Next place to read from */ + uint8_t flags; + char buf[0]; +} __attribute__((packed)); + +static struct ringbuf_head *out, *in; +static unsigned int ringbuf_datasize; +static int daemon_pid; + +/* FIXME: Mark connection as broken (close it?) when this happens. */ +static bool check_buffer(const struct ringbuf_head *h) +{ + return (h->write < ringbuf_datasize && h->read < ringbuf_datasize); +} + +/* We can't fill last byte: would look like empty buffer. */ +static void *get_output_chunk(const struct ringbuf_head *h, + void *buf, uint32_t *len) +{ + uint32_t read_mark; + + if (h->read == 0) + read_mark = ringbuf_datasize - 1; + else + read_mark = h->read - 1; + + /* Here to the end of buffer, unless they haven't read some out. */ + *len = ringbuf_datasize - h->write; + if (read_mark >= h->write) + *len = read_mark - h->write; + return buf + h->write; +} + +static const void *get_input_chunk(const struct ringbuf_head *h, + const void *buf, uint32_t *len) +{ + /* Here to the end of buffer, unless they haven't written some. */ + *len = ringbuf_datasize - h->read; + if (h->write >= h->read) + *len = h->write - h->read; + return buf + h->read; +} + +static void update_output_chunk(struct ringbuf_head *h, uint32_t len) +{ + h->write += len; + if (h->write == ringbuf_datasize) + h->write = 0; +} + +static void update_input_chunk(struct ringbuf_head *h, uint32_t len) +{ + h->read += len; + if (h->read == ringbuf_datasize) + h->read = 0; +} + +/* FIXME: We spin, and we're sloppy. */ +static bool read_all_shmem(int fd __attribute__((unused)), + void *data, unsigned int len) +{ + unsigned int avail; + + if (!check_buffer(in)) + barf("Corrupt buffer"); + + while (len) { + const void *src = get_input_chunk(in, in->buf, &avail); + if (avail > len) + avail = len; + memcpy(data, src, avail); + data += avail; + len -= avail; + update_input_chunk(in, avail); + } + + /* Tell other end we read something. */ + kill(daemon_pid, SIGUSR2); + return true; +} + +static bool write_all_shmem(int fd __attribute__((unused)), + const void *data, unsigned int len) +{ + uint32_t avail; + + if (!check_buffer(out)) + barf("Corrupt buffer"); + + while (len) { + void *dst = get_output_chunk(out, out->buf, &avail); + if (avail > len) + avail = len; + memcpy(dst, data, avail); + data += avail; + len -= avail; + update_output_chunk(out, avail); + } + + /* Tell other end we wrote something. */ + kill(daemon_pid, SIGUSR2); + return true; +} + +static bool read_all(int fd, void *data, unsigned int len); +static bool read_all_choice(int fd, void *data, unsigned int len) +{ + if (fd == -2) + return read_all_shmem(fd, data, len); + return read_all(fd, data, len); +} + +static bool write_all_choice(int fd, const void *data, unsigned int len) +{ + if (fd == -2) + return write_all_shmem(fd, data, len); + return write_all(fd, data, len); +} + +/* We want access to internal functions. */ +#include "xs.c" + +static void __attribute__((noreturn)) usage(void) +{ + barf("Usage:\n" + " xs_test [--readonly] [--notimeout]\n" + "Reads commands from stdin, one per line:" + " dir \n" + " read \n" + " write ...\n" + " setid \n" + " mkdir \n" + " rm \n" + " getperm \n" + " setperm ...\n" + " shutdown\n" + " watch \n" + " waitwatch\n" + " ackwatch\n" + " unwatch \n" + " close\n" + " start \n" + " abort\n" + " introduce \n" + " commit\n" + " sleep \n" + " dump\n"); +} + +static char *arg(char *line, unsigned int num) +{ + static char *args[10]; + unsigned int i, len = 0; + + for (i = 0; i <= num; i++) { + line += len; + line += strspn(line, " \t\n"); + len = strcspn(line, " \t\n"); + if (!len) + barf("Can't get arg %u", num); + } + + free(args[num]); + args[num] = malloc(len + 1); + memcpy(args[num], line, len); + args[num][len] = '\0'; + return args[num]; +} + +static char *command; +static void __attribute__((noreturn)) failed(int handle) +{ + if (handle) + barf_perror("%i: %s", handle, command); + barf_perror("%s", command); +} + +static void do_dir(unsigned int handle, char *path) +{ + char **entries; + unsigned int i, num; + + entries = xs_directory(handles[handle], path, &num); + if (!entries) + failed(handle); + + for (i = 0; i < num; i++) + if (handle) + printf("%i:%s\n", handle, entries[i]); + else + printf("%s\n", entries[i]); + free(entries); +} + +static void do_read(unsigned int handle, char *path) +{ + char *value; + unsigned int len; + + value = xs_read(handles[handle], path, &len); + if (!value) + failed(handle); + + if (handle) + printf("%i:%.*s\n", handle, len, value); + else + printf("%.*s\n", len, value); +} + +static void do_write(unsigned int handle, char *path, char *flags, char *data) +{ + int f; + + if (streq(flags, "none")) + f = 0; + else if (streq(flags, "create")) + f = O_CREAT; + else if (streq(flags, "excl")) + f = O_CREAT | O_EXCL; + else if (streq(flags, "crap")) + f = 100; + else + barf("write flags 'none', 'create' or 'excl' only"); + + if (!xs_write(handles[handle], path, data, strlen(data)+1, f)) + failed(handle); +} + +static void do_setid(unsigned int handle, char *id) +{ + if (!xs_bool(xs_debug_command(handles[handle], "setid", id, + strlen(id)+1))) + failed(handle); +} + +static void do_mkdir(unsigned int handle, char *path) +{ + if (!xs_mkdir(handles[handle], path)) + failed(handle); +} + +static void do_rm(unsigned int handle, char *path) +{ + if (!xs_rm(handles[handle], path)) + failed(handle); +} + +static void do_getperm(unsigned int handle, char *path) +{ + unsigned int i, num; + struct xs_permissions *perms; + + perms = xs_get_permissions(handles[handle], path, &num); + if (!perms) + failed(handle); + + for (i = 0; i < num; i++) { + char *permstring; + + switch (perms[i].perms) { + case XS_PERM_NONE: + permstring = "NONE"; + break; + case XS_PERM_WRITE: + permstring = "WRITE"; + break; + case XS_PERM_READ: + permstring = "READ"; + break; + case XS_PERM_READ|XS_PERM_WRITE: + permstring = "READ/WRITE"; + break; + default: + barf("bad perm value %i", perms[i].perms); + } + + if (handle) + printf("%i:%i %s\n", handle, perms[i].id, permstring); + else + printf("%i %s\n", perms[i].id, permstring); + } + free(perms); +} + +static void do_setperm(unsigned int handle, char *path, char *line) +{ + unsigned int i; + struct xs_permissions perms[100]; + + strtok(line, " \t\n"); + strtok(NULL, " \t\n"); + for (i = 0; ; i++) { + char *arg = strtok(NULL, " \t\n"); + if (!arg) + break; + perms[i].id = atoi(arg); + arg = strtok(NULL, " \t\n"); + if (!arg) + break; + if (streq(arg, "WRITE")) + perms[i].perms = XS_PERM_WRITE; + else if (streq(arg, "READ")) + perms[i].perms = XS_PERM_READ; + else if (streq(arg, "READ/WRITE")) + perms[i].perms = XS_PERM_READ|XS_PERM_WRITE; + else if (streq(arg, "NONE")) + perms[i].perms = XS_PERM_NONE; + else + barf("bad flags %s\n", arg); + } + + if (!xs_set_permissions(handles[handle], path, perms, i)) + failed(handle); +} + +static void do_shutdown(unsigned int handle) +{ + if (!xs_shutdown(handles[handle])) + failed(handle); +} + +static void do_watch(unsigned int handle, const char *node, const char *pri) +{ + if (!xs_watch(handles[handle], node, atoi(pri))) + failed(handle); +} + +static void do_waitwatch(unsigned int handle) +{ + char *node; + + node = xs_read_watch(handles[handle]); + if (!node) + failed(handle); + + if (handle) + printf("%i:%s\n", handle, node); + else + printf("%s\n", node); + free(node); +} + +static void do_ackwatch(unsigned int handle) +{ + if (!xs_acknowledge_watch(handles[handle])) + failed(handle); +} + +static void do_unwatch(unsigned int handle, const char *node) +{ + if (!xs_unwatch(handles[handle], node)) + failed(handle); +} + +static void do_start(unsigned int handle, const char *node) +{ + if (!xs_transaction_start(handles[handle], node)) + failed(handle); +} + +static void do_end(unsigned int handle, bool abort) +{ + if (!xs_transaction_end(handles[handle], abort)) + failed(handle); +} + +static void do_introduce(unsigned int handle, + const char *domid, + const char *mfn, + const char *eventchn, + const char *path) +{ + unsigned int i; + int fd; + + /* We poll, so ignore signal */ + signal(SIGUSR2, SIG_IGN); + for (i = 0; i < ARRAY_SIZE(handles); i++) + if (!handles[i]) + break; + + fd = open("/tmp/xcmap", O_RDWR); + /* Set in and out pointers. */ + out = mmap(NULL, getpagesize(), PROT_WRITE|PROT_READ, MAP_SHARED,fd,0); + if (out == MAP_FAILED) + barf_perror("Failed to map /tmp/xcmap page"); + in = (void *)out + getpagesize() / 2; + close(fd); + + /* Tell them the event channel and our PID. */ + *(int *)((void *)out + 32) = getpid(); + *(u16 *)((void *)out + 36) = atoi(eventchn); + + /* Create new handle. */ + handles[i] = new(struct xs_handle); + handles[i]->fd = -2; + + if (!xs_introduce_domain(handles[handle], atoi(domid), + atol(mfn), atoi(eventchn), path)) + failed(handle); + printf("handle is %i\n", i); + + /* Read in daemon pid. */ + daemon_pid = *(int *)((void *)out + 32); +} + +static void do_release(unsigned int handle, const char *domid) +{ + if (!xs_release_domain(handles[handle], atoi(domid))) + failed(handle); +} + +static int strptrcmp(const void *a, const void *b) +{ + return strcmp(*(char **)a, *(char **)b); +} + +static void sort_dir(char **dir, unsigned int num) +{ + qsort(dir, num, sizeof(char *), strptrcmp); +} + +static void dump_dir(unsigned int handle, + const char *node, + char **dir, + unsigned int numdirs, + unsigned int depth) +{ + unsigned int i; + char spacing[depth+1]; + + memset(spacing, ' ', depth); + spacing[depth] = '\0'; + + sort_dir(dir, numdirs); + + for (i = 0; i < numdirs; i++) { + struct xs_permissions *perms; + unsigned int j, numperms; + unsigned int len; + char *contents; + unsigned int subnum; + char **subdirs; + char subnode[strlen(node) + 1 + strlen(dir[i]) + 1]; + + sprintf(subnode, "%s/%s", node, dir[i]); + + perms = xs_get_permissions(handles[handle], subnode,&numperms); + if (!perms) + failed(handle); + + printf("%s%s: ", spacing, dir[i]); + for (j = 0; j < numperms; j++) { + char buffer[100]; + if (!perm_to_string(&perms[j], buffer)) + barf("perm to string"); + printf("%s ", buffer); + } + free(perms); + printf("\n"); + + /* Even directories can have contents. */ + contents = xs_read(handles[handle], subnode, &len); + if (!contents) { + if (errno != EISDIR) + failed(handle); + } else { + printf(" %s(%.*s)\n", spacing, len, contents); + free(contents); + } + + /* Every node is a directory. */ + subdirs = xs_directory(handles[handle], subnode, &subnum); + if (!subdirs) + failed(handle); + dump_dir(handle, subnode, subdirs, subnum, depth+1); + free(subdirs); + } +} + +static void dump(int handle) +{ + char **subdirs; + unsigned int subnum; + + subdirs = xs_directory(handles[handle], "/", &subnum); + if (!subdirs) + failed(handle); + + dump_dir(handle, "", subdirs, subnum, 0); + free(subdirs); +} + +int main(int argc, char *argv[]) +{ + char line[1024]; + bool readonly = false, timeout = true; + int handle; + + static void alarmed(int sig __attribute__((unused))) + { + if (handle) { + char handlename[10]; + sprintf(handlename, "%u:", handle); + write(STDOUT_FILENO, handlename, strlen(handlename)); + } + write(STDOUT_FILENO, command, strlen(command)); + write(STDOUT_FILENO, " timeout\n", strlen(" timeout\n")); + exit(1); + } + + if (argc > 1 && streq(argv[1], "--readonly")) { + readonly = true; + argc--; + argv++; + } + + if (argc > 1 && streq(argv[1], "--notimeout")) { + timeout = false; + argc--; + argv++; + } + + if (argc != 1) + usage(); + + /* The size of the ringbuffer: half a page minus head structure. */ + ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head); + + signal(SIGALRM, alarmed); + while (fgets(line, sizeof(line), stdin)) { + char *endp; + + if (strspn(line, " \n") == strlen(line)) + continue; + if (strstarts(line, "#")) + continue; + + handle = strtoul(line, &endp, 10); + if (endp != line) + memmove(line, endp+1, strlen(endp)); + else + handle = 0; + + if (!handles[handle]) { + if (readonly) + handles[handle] = xs_daemon_open_readonly(); + else + handles[handle] = xs_daemon_open(); + if (!handles[handle]) + barf_perror("Opening connection to daemon"); + } + command = arg(line, 0); + + if (timeout) + alarm(5); + if (streq(command, "dir")) + do_dir(handle, arg(line, 1)); + else if (streq(command, "read")) + do_read(handle, arg(line, 1)); + else if (streq(command, "write")) + do_write(handle, + arg(line, 1), arg(line, 2), arg(line, 3)); + else if (streq(command, "setid")) + do_setid(handle, arg(line, 1)); + else if (streq(command, "mkdir")) + do_mkdir(handle, arg(line, 1)); + else if (streq(command, "rm")) + do_rm(handle, arg(line, 1)); + else if (streq(command, "getperm")) + do_getperm(handle, arg(line, 1)); + else if (streq(command, "setperm")) + do_setperm(handle, arg(line, 1), line); + else if (streq(command, "shutdown")) + do_shutdown(handle); + else if (streq(command, "watch")) + do_watch(handle, arg(line, 1), arg(line, 2)); + else if (streq(command, "waitwatch")) + do_waitwatch(handle); + else if (streq(command, "ackwatch")) + do_ackwatch(handle); + else if (streq(command, "unwatch")) + do_unwatch(handle, arg(line, 1)); + else if (streq(command, "close")) { + xs_daemon_close(handles[handle]); + handles[handle] = NULL; + } else if (streq(command, "start")) + do_start(handle, arg(line, 1)); + else if (streq(command, "commit")) + do_end(handle, false); + else if (streq(command, "abort")) + do_end(handle, true); + else if (streq(command, "introduce")) + do_introduce(handle, arg(line, 1), arg(line, 2), + arg(line, 3), arg(line, 4)); + else if (streq(command, "release")) + do_release(handle, arg(line, 1)); + else if (streq(command, "dump")) + dump(handle); + else if (streq(command, "sleep")) + sleep(atoi(arg(line, 1))); + else + barf("Unknown command %s", command); + fflush(stdout); + alarm(0); + } + return 0; +} -- cgit v1.2.3