diff options
Diffstat (limited to 'tools/xenstore/xenstored_core.c')
-rw-r--r-- | tools/xenstore/xenstored_core.c | 1354 |
1 files changed, 1354 insertions, 0 deletions
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c new file mode 100644 index 0000000000..9d15848463 --- /dev/null +++ b/tools/xenstore/xenstored_core.c @@ -0,0 +1,1354 @@ +/* + Simple prototype Xen Store Daemon providing simple tree-like database. + Copyright (C) 2005 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <sys/un.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <syslog.h> +#include <string.h> +#include <errno.h> +#include <dirent.h> +#include <getopt.h> +#include <signal.h> +#include <assert.h> +#include <setjmp.h> + +//#define DEBUG +#include "utils.h" +#include "list.h" +#include "talloc.h" +#include "xs_lib.h" +#include "xenstored.h" +#include "xenstored_core.h" +#include "xenstored_watch.h" +#include "xenstored_transaction.h" +#include "xenstored_domain.h" + +static bool verbose; +static LIST_HEAD(connections); + +#ifdef TESTING +static bool failtest = false; + +/* We override talloc's malloc. */ +void *test_malloc(size_t size) +{ + /* 1 in 20 means only about 50% of connections establish. */ + if (failtest && (random() % 32) == 0) + return NULL; + return malloc(size); +} + +static void stop_failtest(int signum __attribute__((unused))) +{ + failtest = false; +} + +/* Need these before we #define away write_all/mkdir in testing.h */ +bool test_write_all(int fd, void *contents, unsigned int len); +bool test_write_all(int fd, void *contents, unsigned int len) +{ + if (failtest && (random() % 8) == 0) { + if (len) + len = random() % len; + write(fd, contents, len); + errno = ENOSPC; + return false; + } + return write_all(fd, contents, len); +} + +int test_mkdir(const char *dir, int perms); +int test_mkdir(const char *dir, int perms) +{ + if (failtest && (random() % 8) == 0) { + errno = ENOSPC; + return -1; + } + return mkdir(dir, perms); +} +#endif /* TESTING */ + +#include "xenstored_test.h" + +/* FIXME: Ideally, this should never be called. Some can be eliminated. */ +/* Something is horribly wrong: shutdown immediately. */ +void __attribute__((noreturn)) corrupt(struct connection *conn, + const char *fmt, ...) +{ + va_list arglist; + char *str; + int saved_errno = errno; + + va_start(arglist, fmt); + str = talloc_vasprintf(NULL, fmt, arglist); + va_end(arglist); + + eprintf("xenstored corruption: connection id %i: err %s: %s", + conn ? (int)conn->id : -1, strerror(saved_errno), str); +#ifdef TESTING + /* Allow them to attach debugger. */ + sleep(30); +#endif + syslog(LOG_DAEMON, + "xenstored corruption: connection id %i: err %s: %s", + conn ? (int)conn->id : -1, strerror(saved_errno), str); + _exit(2); +} + +static bool write_message(struct connection *conn) +{ + int ret; + struct buffered_data *out = conn->out; + + if (out->inhdr) { + if (verbose) + xprintf("Writing msg %i out to %p\n", + out->hdr.msg.type, conn); + ret = conn->write(conn, out->hdr.raw + out->used, + sizeof(out->hdr) - out->used); + if (ret < 0) + return false; + + out->used += ret; + if (out->used < sizeof(out->hdr)) + return true; + + out->inhdr = false; + out->used = 0; + + /* Second write might block if non-zero. */ + if (out->hdr.msg.len) + return true; + } + + if (verbose) + xprintf("Writing data len %i out to %p\n", + out->hdr.msg.len, conn); + ret = conn->write(conn, out->buffer + out->used, + out->hdr.msg.len - out->used); + + if (ret < 0) + return false; + + out->used += ret; + if (out->used != out->hdr.msg.len) + return true; + + conn->out = NULL; + + /* If this was an event, we wait for ack, otherwise we're done. */ + if (!is_watch_event(conn, out)) + talloc_free(out); + + queue_next_event(conn); + return true; +} + +static int destroy_conn(void *_conn) +{ + struct connection *conn = _conn; + + /* Flush outgoing if possible, but don't block. */ + if (!conn->domain) { + fd_set set; + struct timeval none; + + FD_ZERO(&set); + FD_SET(conn->fd, &set); + none.tv_sec = none.tv_usec = 0; + + while (conn->out + && select(conn->fd+1, NULL, &set, NULL, &none) == 1) + if (!write_message(conn)) + break; + close(conn->fd); + } + list_del(&conn->list); + return 0; +} + +static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock, + int event_fd) +{ + struct connection *i; + int max; + + FD_ZERO(inset); + FD_ZERO(outset); + FD_SET(sock, inset); + max = sock; + FD_SET(ro_sock, inset); + if (ro_sock > max) + max = ro_sock; + FD_SET(event_fd, inset); + if (event_fd > max) + max = event_fd; + list_for_each_entry(i, &connections, list) { + if (i->domain) + continue; + if (!i->blocked) + FD_SET(i->fd, inset); + if (i->out) + FD_SET(i->fd, outset); + if (i->fd > max) + max = i->fd; + } + return max; +} + +/* Read everything from a talloc_open'ed fd. */ +static void *read_all(int *fd, unsigned int *size) +{ + unsigned int max = 4; + int ret; + void *buffer = talloc_size(fd, max); + + *size = 0; + while ((ret = read(*fd, buffer + *size, max - *size)) > 0) { + *size += ret; + if (*size == max) + buffer = talloc_realloc_size(fd, buffer, max *= 2); + } + if (ret < 0) + return NULL; + return buffer; +} + +static int destroy_fd(void *_fd) +{ + int *fd = _fd; + close(*fd); + return 0; +} + +/* Return a pointer to an fd, self-closing and attached to this pathname. */ +static int *talloc_open(const char *pathname, int flags, int mode) +{ + int *fd; + + fd = talloc(pathname, int); + *fd = open(pathname, flags, mode); + if (*fd < 0) { + int saved_errno = errno; + talloc_free(fd); + errno = saved_errno; + return NULL; + } + talloc_set_destructor(fd, destroy_fd); + return fd; +} + +/* Is child a subnode of parent, or equal? */ +bool is_child(const char *child, const char *parent) +{ + unsigned int len = strlen(parent); + + /* / should really be "" for this algorithm to work, but that's a + * usability nightmare. */ + if (streq(parent, "/")) + return true; + + if (strncmp(child, parent, len) != 0) + return false; + + return child[len] == '/' || child[len] == '\0'; +} + +/* Answer never ends in /. */ +char *node_dir_outside_transaction(const char *node) +{ + if (streq(node, "/")) + return talloc_strdup(node, xs_daemon_store()); + return talloc_asprintf(node, "%s%s", xs_daemon_store(), node); +} + +static char *node_dir(struct transaction *trans, const char *node) +{ + if (!trans || !within_transaction(trans, node)) + return node_dir_outside_transaction(node); + return node_dir_inside_transaction(trans, node); +} + +static char *node_datafile(struct transaction *trans, const char *node) +{ + return talloc_asprintf(node, "%s/.data", node_dir(trans, node)); +} + +static char *node_permfile(struct transaction *trans, const char *node) +{ + return talloc_asprintf(node, "%s/.perms", node_dir(trans, node)); +} + +struct buffered_data *new_buffer(void *ctx) +{ + struct buffered_data *data; + + data = talloc(ctx, struct buffered_data); + data->inhdr = true; + data->used = 0; + data->buffer = NULL; + + return data; +} + +/* Return length of string (including nul) at this offset. */ +unsigned int get_string(const struct buffered_data *data, unsigned int offset) +{ + const char *nul; + + if (offset >= data->used) + return 0; + + nul = memchr(data->buffer + offset, 0, data->used - offset); + if (!nul) + return 0; + + return nul - (data->buffer + offset) + 1; +} + +/* Break input into vectors, return the number, fill in up to num of them. */ +unsigned int get_strings(struct buffered_data *data, + char *vec[], unsigned int num) +{ + unsigned int off, i, len; + + off = i = 0; + while ((len = get_string(data, off)) != 0) { + if (i < num) + vec[i] = data->buffer + off; + i++; + off += len; + } + return i; +} + +/* Returns "false", meaning "connection is not blocked". */ +bool send_reply(struct connection *conn, enum xsd_sockmsg_type type, + const void *data, unsigned int len) +{ + struct buffered_data *bdata; + + /* When data gets freed, we want list entry is destroyed (so + * list entry is a child). */ + bdata = new_buffer(conn); + bdata->buffer = talloc_array(bdata, char, len); + + bdata->hdr.msg.type = type; + bdata->hdr.msg.len = len; + memcpy(bdata->buffer, data, len); + + /* There might be an event going out now. Queue behind it. */ + if (conn->out) { + assert(conn->out->hdr.msg.type == XS_WATCH_EVENT); + assert(!conn->waiting_reply); + conn->waiting_reply = bdata; + } else + conn->out = bdata; + return false; +} + +/* Some routines (write, mkdir, etc) just need a non-error return */ +bool send_ack(struct connection *conn, enum xsd_sockmsg_type type) +{ + return send_reply(conn, type, "OK", sizeof("OK")); +} + +bool send_error(struct connection *conn, int error) +{ + unsigned int i; + + for (i = 0; error != xsd_errors[i].errnum; i++) + if (i == ARRAY_SIZE(xsd_errors) - 1) + corrupt(conn, "Unknown error %i (%s)", error, + strerror(error)); + + return send_reply(conn, XS_ERROR, xsd_errors[i].errstring, + strlen(xsd_errors[i].errstring) + 1); +} + +static bool valid_chars(const char *node) +{ + /* Nodes can have lots of crap. */ + return (strspn(node, + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789-/_@") == strlen(node)); +} + +static bool is_valid_nodename(const char *node) +{ + /* Must start in /. */ + if (!strstarts(node, "/")) + return false; + + /* Cannot end in / (unless it's just "/"). */ + if (strends(node, "/") && !streq(node, "/")) + return false; + + /* No double //. */ + if (strstr(node, "//")) + return false; + + return valid_chars(node); +} + +/* We expect one arg in the input: return NULL otherwise. */ +static const char *onearg(struct buffered_data *in) +{ + if (get_string(in, 0) != in->used) + return NULL; + return in->buffer; +} + +/* If it fails, returns NULL and sets errno. */ +static struct xs_permissions *get_perms(struct transaction *transaction, + const char *node, unsigned int *num) +{ + unsigned int size; + char *strings; + struct xs_permissions *ret; + int *fd; + + fd = talloc_open(node_permfile(transaction, node), O_RDONLY, 0); + if (!fd) + return NULL; + strings = read_all(fd, &size); + if (!strings) + return NULL; + + *num = count_strings(strings, size); + ret = talloc_array(node, struct xs_permissions, *num); + if (!strings_to_perms(ret, *num, strings)) + corrupt(NULL, "Permissions corrupt for %s", node); + + return ret; +} + +static char *perms_to_strings(const char *node, + struct xs_permissions *perms, unsigned int num, + unsigned int *len) +{ + unsigned int i; + char *strings = NULL; + char buffer[MAX_STRLEN(domid_t) + 1]; + + for (*len = 0, i = 0; i < num; i++) { + if (!perm_to_string(&perms[i], buffer)) + return NULL; + + strings = talloc_realloc(node, strings, char, + *len + strlen(buffer) + 1); + strcpy(strings + *len, buffer); + *len += strlen(buffer) + 1; + } + return strings; +} + +/* Destroy this, and its children, and its children's children. */ +int destroy_path(void *path) +{ + DIR *dir; + struct dirent *dirent; + + dir = opendir(path); + if (!dir) { + if (unlink(path) == 0 || errno == ENOENT) + return 0; + corrupt(NULL, "Destroying path %s", path); + } + + while ((dirent = readdir(dir)) != NULL) { + char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1]; + sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name); + if (!streq(dirent->d_name,".") && !streq(dirent->d_name,"..")) + destroy_path(fullpath); + } + closedir(dir); + if (rmdir(path) != 0) + corrupt(NULL, "Destroying directory %s", path); + return 0; +} + +/* Create a self-destructing temporary file */ +static char *tempfile(const char *path, void *contents, unsigned int len) +{ + int *fd; + char *tmppath = talloc_asprintf(path, "%s.tmp", path); + + fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640); + if (!fd) + return NULL; + talloc_set_destructor(tmppath, destroy_path); + if (!write_all(*fd, contents, len)) + return NULL; + + return tmppath; +} + +/* We assume rename() doesn't fail on moves in same dir. */ +static void commit_tempfile(const char *path) +{ + char realname[strlen(path) + 1]; + unsigned int len = strrchr(path, '.') - path; + + memcpy(realname, path, len); + realname[len] = '\0'; + if (rename(path, realname) != 0) + corrupt(NULL, "Committing %s", realname); + talloc_set_destructor(path, NULL); +} + +static bool set_perms(struct transaction *transaction, + const char *node, + struct xs_permissions *perms, unsigned int num) +{ + unsigned int len; + char *permpath, *strings; + + strings = perms_to_strings(node, perms, num, &len); + if (!strings) + return false; + + /* Create then move. */ + permpath = tempfile(node_permfile(transaction, node), strings, len); + if (!permpath) + return false; + + commit_tempfile(permpath); + return true; +} + +static char *get_parent(const char *node) +{ + char *slash = strrchr(node + 1, '/'); + if (!slash) + return talloc_strdup(node, "/"); + return talloc_asprintf(node, "%.*s", slash - node, node); +} + +static enum xs_perm_type perm_for_id(domid_t id, + struct xs_permissions *perms, + unsigned int num) +{ + unsigned int i; + + /* Owners and tools get it all... */ + if (!id || perms[0].id == id) + return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_CREATE|XS_PERM_OWNER; + + for (i = 1; i < num; i++) + if (perms[i].id == id) + return perms[i].perms; + + return perms[0].perms; +} + +/* We have a weird permissions system. You can allow someone into a + * specific node without allowing it in the parents. If it's going to + * fail, however, we don't want the errno to indicate any information + * about the node. */ +static int check_with_parents(struct connection *conn, const char *node, + int errnum) +{ + struct xs_permissions *perms; + unsigned int num; + + /* We always tell them about memory failures. */ + if (errnum == ENOMEM) + return errnum; + + do { + node = get_parent(node); + perms = get_perms(conn->transaction, node, &num); + if (perms) + break; + } while (!streq(node, "/")); + + /* No permission at root? We're in trouble. */ + if (!perms) + corrupt(conn, "No permissions file at root"); + + if (!(perm_for_id(conn->id, perms, num) & XS_PERM_READ)) + return EACCES; + + return errnum; +} + +bool check_node_perms(struct connection *conn, const char *node, + enum xs_perm_type perm) +{ + struct xs_permissions *perms; + unsigned int num; + + if (!node) { + errno = EINVAL; + return false; + } + + if (!node || !is_valid_nodename(node)) { + errno = EINVAL; + return false; + } + + if (!conn->write && (perm & XS_PERM_WRITE)) { + errno = EROFS; + return false; + } + + perms = get_perms(conn->transaction, node, &num); + /* No permissions. If we want to create it and + * it doesn't exist, check parent directory. */ + if (!perms && errno == ENOENT && (perm & XS_PERM_CREATE)) { + char *parent = get_parent(node); + if (!parent) + return false; + + perms = get_perms(conn->transaction, parent, &num); + } + if (!perms) { + errno = check_with_parents(conn, node, errno); + return false; + } + + if (perm_for_id(conn->id, perms, num) & perm) + return true; + + errno = check_with_parents(conn, node, EACCES); + return false; +} + +static bool send_directory(struct connection *conn, const char *node) +{ + char *path, *reply = talloc_strdup(node, ""); + unsigned int reply_len = 0; + DIR *dir; + struct dirent *dirent; + + if (!check_node_perms(conn, node, XS_PERM_READ)) + return send_error(conn, errno); + + path = node_dir(conn->transaction, node); + dir = opendir(path); + if (!dir) + return send_error(conn, errno); + + while ((dirent = readdir(dir)) != NULL) { + int len = strlen(dirent->d_name) + 1; + + if (!valid_chars(dirent->d_name)) + continue; + + reply = talloc_realloc(path, reply, char, reply_len + len); + strcpy(reply + reply_len, dirent->d_name); + reply_len += len; + } + closedir(dir); + + return send_reply(conn, XS_DIRECTORY, reply, reply_len); +} + +static bool do_read(struct connection *conn, const char *node) +{ + char *value; + unsigned int size; + int *fd; + + if (!check_node_perms(conn, node, XS_PERM_READ)) + return send_error(conn, errno); + + fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0); + if (!fd) { + /* Data file doesn't exist? We call that a directory */ + if (errno == ENOENT) + errno = EISDIR; + return send_error(conn, errno); + } + + value = read_all(fd, &size); + if (!value) + return send_error(conn, errno); + + return send_reply(conn, XS_READ, value, size); +} + +/* Create a new directory. Optionally put data in it (if data != NULL) */ +static bool new_directory(struct connection *conn, + const char *node, void *data, unsigned int datalen) +{ + struct xs_permissions perms; + char *permstr; + unsigned int len; + int *fd; + char *dir = node_dir(conn->transaction, node); + + if (mkdir(dir, 0750) != 0) + return false; + + /* Set destructor so we clean up if neccesary. */ + talloc_set_destructor(dir, destroy_path); + + /* Default permisisons: we own it, noone else has permission. */ + perms.id = conn->id; + perms.perms = XS_PERM_NONE; + + permstr = perms_to_strings(dir, &perms, 1, &len); + fd = talloc_open(node_permfile(conn->transaction, node), + O_WRONLY|O_CREAT|O_EXCL, 0640); + if (!fd || !write_all(*fd, permstr, len)) + return false; + + if (data) { + char *datapath = node_datafile(conn->transaction, node); + + fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640); + if (!fd || !write_all(*fd, data, datalen)) + return false; + } + + /* Finished! */ + talloc_set_destructor(dir, NULL); + return true; +} + +/* path, flags, data... */ +static bool do_write(struct connection *conn, struct buffered_data *in) +{ + unsigned int offset, datalen; + char *vec[2]; + char *node, *tmppath; + enum xs_perm_type mode; + struct stat st; + + /* Extra "strings" can be created by binary data. */ + if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) + return send_error(conn, EINVAL); + + node = vec[0]; + if (!within_transaction(conn->transaction, node)) + return send_error(conn, EROFS); + + if (transaction_block(conn, node)) + return true; + + offset = strlen(vec[0]) + strlen(vec[1]) + 2; + datalen = in->used - offset; + + if (streq(vec[1], XS_WRITE_NONE)) + mode = XS_PERM_WRITE; + else if (streq(vec[1], XS_WRITE_CREATE)) + mode = XS_PERM_WRITE|XS_PERM_CREATE; + else if (streq(vec[1], XS_WRITE_CREATE_EXCL)) + mode = XS_PERM_WRITE|XS_PERM_CREATE; + else + return send_error(conn, EINVAL); + + if (!check_node_perms(conn, node, mode)) + return send_error(conn, errno); + + if (lstat(node_dir(conn->transaction, node), &st) != 0) { + /* Does not exist... */ + if (errno != ENOENT) + return send_error(conn, errno); + + /* Not going to create it? */ + if (!(mode & XS_PERM_CREATE)) + return send_error(conn, ENOENT); + + if (!new_directory(conn, node, in->buffer + offset, datalen)) + return send_error(conn, errno); + } else { + /* Exists... */ + if (streq(vec[1], XS_WRITE_CREATE_EXCL)) + return send_error(conn, EEXIST); + + tmppath = tempfile(node_datafile(conn->transaction, node), + in->buffer + offset, datalen); + if (!tmppath) + return send_error(conn, errno); + + commit_tempfile(tmppath); + } + + add_change_node(conn->transaction, node); + send_ack(conn, XS_WRITE); + fire_watches(conn->transaction, node); + return false; +} + +static bool do_mkdir(struct connection *conn, const char *node) +{ + if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE)) + return send_error(conn, errno); + + if (!within_transaction(conn->transaction, node)) + return send_error(conn, EROFS); + + if (transaction_block(conn, node)) + return true; + + if (!new_directory(conn, node, NULL, 0)) + return send_error(conn, errno); + + add_change_node(conn->transaction, node); + send_ack(conn, XS_MKDIR); + fire_watches(conn->transaction, node); + return false; +} + +static bool do_rm(struct connection *conn, const char *node) +{ + char *tmppath, *path; + + if (!check_node_perms(conn, node, XS_PERM_WRITE)) + return send_error(conn, errno); + + if (!within_transaction(conn->transaction, node)) + return send_error(conn, EROFS); + + if (transaction_block(conn, node)) + return true; + + if (streq(node, "/")) + return send_error(conn, EINVAL); + + /* We move the directory to temporary name, destructor cleans up. */ + path = node_dir(conn->transaction, node); + tmppath = talloc_asprintf(node, "%s.tmp", path); + talloc_set_destructor(tmppath, destroy_path); + + if (rename(path, tmppath) != 0) + return send_error(conn, errno); + + add_change_node(conn->transaction, node); + send_ack(conn, XS_RM); + fire_watches(conn->transaction, node); + return false; +} + +static bool do_get_perms(struct connection *conn, const char *node) +{ + struct xs_permissions *perms; + char *strings; + unsigned int len, num; + + if (!check_node_perms(conn, node, XS_PERM_READ)) + return send_error(conn, errno); + + perms = get_perms(conn->transaction, node, &num); + if (!perms) + return send_error(conn, errno); + + strings = perms_to_strings(node, perms, num, &len); + if (!strings) + return send_error(conn, errno); + + return send_reply(conn, XS_GET_PERMS, strings, len); +} + +static bool do_set_perms(struct connection *conn, struct buffered_data *in) +{ + unsigned int num; + char *node; + struct xs_permissions *perms; + + num = count_strings(in->buffer, in->used); + if (num < 2) + return send_error(conn, EINVAL); + + /* First arg is node name. */ + node = in->buffer; + in->buffer += strlen(in->buffer) + 1; + num--; + + if (!within_transaction(conn->transaction, node)) + return send_error(conn, EROFS); + + if (transaction_block(conn, node)) + return true; + + /* We must own node to do this (tools can do this too). */ + if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) + return send_error(conn, errno); + + perms = talloc_array(node, struct xs_permissions, num); + if (!strings_to_perms(perms, num, in->buffer)) + return send_error(conn, errno); + + if (!set_perms(conn->transaction, node, perms, num)) + return send_error(conn, errno); + add_change_node(conn->transaction, node); + send_ack(conn, XS_SET_PERMS); + fire_watches(conn->transaction, node); + return false; +} + +/* Process "in" for conn: "in" will vanish after this conversation, so + * we can talloc off it for temporary variables. May free "conn". + * Returns true if can't complete due to block. + */ +static bool process_message(struct connection *conn, struct buffered_data *in) +{ + switch (in->hdr.msg.type) { + case XS_DIRECTORY: + return send_directory(conn, onearg(in)); + + case XS_READ: + return do_read(conn, onearg(in)); + + case XS_WRITE: + return do_write(conn, in); + + case XS_MKDIR: + return do_mkdir(conn, onearg(in)); + + case XS_RM: + return do_rm(conn, onearg(in)); + + case XS_GET_PERMS: + return do_get_perms(conn, onearg(in)); + + case XS_SET_PERMS: + return do_set_perms(conn, in); + + case XS_SHUTDOWN: + send_ack(conn, XS_SHUTDOWN); + /* Everything hangs off auto-free context, freed at exit. */ + exit(0); + +#ifdef TESTING + case XS_DEBUG: { + /* For testing, we allow them to set id. */ + if (streq(in->buffer, "setid")) { + conn->id = atoi(in->buffer + get_string(in, 0)); + send_ack(conn, XS_DEBUG); + } else if (streq(in->buffer, "failtest")) { + if (get_string(in, 0) < in->used) + srandom(atoi(in->buffer + get_string(in, 0))); + send_ack(conn, XS_DEBUG); + failtest = true; + } + return false; + } +#endif /* TESTING */ + + case XS_WATCH: + return do_watch(conn, in); + + case XS_WATCH_ACK: + return do_watch_ack(conn); + + case XS_UNWATCH: + return do_unwatch(conn, onearg(in)); + + case XS_TRANSACTION_START: + return do_transaction_start(conn, onearg(in)); + + case XS_TRANSACTION_END: + return do_transaction_end(conn, onearg(in)); + + case XS_INTRODUCE: + return do_introduce(conn, in); + + case XS_RELEASE: + return do_release(conn, onearg(in)); + + case XS_GETDOMAINPATH: + return do_get_domain_path(conn, onearg(in)); + + case XS_WATCH_EVENT: + default: + eprintf("Client unknown operation %i", in->hdr.msg.type); + send_error(conn, ENOSYS); + return false; + } +} + +static int out_of_mem(void *data) +{ + longjmp(*(jmp_buf *)data, 1); +} + +static void consider_message(struct connection *conn) +{ + struct buffered_data *in = NULL; + enum xsd_sockmsg_type type = conn->in->hdr.msg.type; + jmp_buf talloc_fail; + + /* For simplicity, we kill the connection on OOM. */ + talloc_set_fail_handler(out_of_mem, &talloc_fail); + if (setjmp(talloc_fail)) { + talloc_free(conn); + goto end; + } + + if (verbose) + xprintf("Got message %i len %i from %p\n", + type, conn->in->hdr.msg.len, conn); + + /* We might get a command while waiting for an ack: this means + * the other end discarded it: we will re-transmit. */ + if (type != XS_WATCH_ACK) + reset_watch_event(conn); + + /* Careful: process_message may free connection. We detach + * "in" beforehand and allocate the new buffer to avoid + * touching conn after process_message. + */ + in = talloc_steal(talloc_autofree_context(), conn->in); + conn->in = new_buffer(conn); + if (process_message(conn, in)) { + /* Blocked by transaction: queue for re-xmit. */ + talloc_free(conn->in); + conn->in = in; + in = NULL; + } + +end: + talloc_free(in); + talloc_set_fail_handler(NULL, NULL); + if (talloc_total_blocks(NULL) + != talloc_total_blocks(talloc_autofree_context()) + 1) + talloc_report_full(NULL, stderr); +} + +/* Errors in reading or allocating here mean we get out of sync, so we + * drop the whole client connection. */ +void handle_input(struct connection *conn) +{ + int bytes; + struct buffered_data *in; + + assert(!conn->blocked); + in = conn->in; + + /* Not finished header yet? */ + if (in->inhdr) { + bytes = conn->read(conn, in->hdr.raw + in->used, + sizeof(in->hdr) - in->used); + if (bytes <= 0) + goto bad_client; + in->used += bytes; + if (in->used != sizeof(in->hdr)) + return; + + if (in->hdr.msg.len > PATH_MAX) { + syslog(LOG_DAEMON, "Client tried to feed us %i", + in->hdr.msg.len); + goto bad_client; + } + + in->buffer = talloc_array(in, char, in->hdr.msg.len); + if (!in->buffer) + goto bad_client; + in->used = 0; + in->inhdr = false; + return; + } + + bytes = conn->read(conn, in->buffer + in->used, + in->hdr.msg.len - in->used); + if (bytes < 0) + goto bad_client; + + in->used += bytes; + if (in->used != in->hdr.msg.len) + return; + + consider_message(conn); + return; + +bad_client: + /* Kill it. */ + talloc_free(conn); +} + +void handle_output(struct connection *conn) +{ + if (!write_message(conn)) + talloc_free(conn); +} + +/* If a transaction has ended, see if we can unblock any connections. */ +static void unblock_connections(void) +{ + struct connection *i, *tmp; + + list_for_each_entry_safe(i, tmp, &connections, list) { + if (!i->blocked) + continue; + + if (!transaction_covering_node(i->blocked)) { + talloc_free(i->blocked); + i->blocked = NULL; + consider_message(i); + } + } + + /* To balance bias, move first entry to end. */ + if (!list_empty(&connections)) { + i = list_top(&connections, struct connection, list); + list_del(&i->list); + list_add_tail(&i->list, &connections); + } +} + +struct connection *new_connection(connwritefn_t *write, connreadfn_t *read) +{ + struct connection *new; + jmp_buf talloc_fail; + + new = talloc(talloc_autofree_context(), struct connection); + if (!new) + return NULL; + + new->blocked = false; + new->out = new->waiting_reply = NULL; + new->event = NULL; + new->fd = -1; + new->id = 0; + new->domain = NULL; + new->transaction = NULL; + new->write = write; + new->read = read; + + talloc_set_fail_handler(out_of_mem, &talloc_fail); + if (setjmp(talloc_fail)) { + talloc_free(new); + return NULL; + } + new->in = new_buffer(new); + talloc_set_fail_handler(NULL, NULL); + + list_add_tail(&new->list, &connections); + talloc_set_destructor(new, destroy_conn); + return new; +} + +static int writefd(struct connection *conn, const void *data, unsigned int len) +{ + return write(conn->fd, data, len); +} + +static int readfd(struct connection *conn, void *data, unsigned int len) +{ + return read(conn->fd, data, len); +} + +static void accept_connection(int sock, bool canwrite) +{ + int fd; + struct connection *conn; + + fd = accept(sock, NULL, NULL); + if (fd < 0) + return; + + conn = new_connection(canwrite ? writefd : NULL, readfd); + if (conn) + conn->fd = fd; + else + close(fd); +} + +/* Calc timespan from now to absolute time. */ +static void time_relative_to_now(struct timeval *tv) +{ + struct timeval now; + + gettimeofday(&now, NULL); + if (timercmp(&now, tv, >)) + timerclear(tv); + else { + tv->tv_sec -= now.tv_sec; + if (now.tv_usec > tv->tv_usec) { + tv->tv_sec--; + tv->tv_usec += 1000000; + } + tv->tv_usec -= now.tv_usec; + } +} + +static struct option options[] = { { "no-fork", 0, NULL, 'N' }, + { "verbose", 0, NULL, 'V' }, + { "output-pid", 0, NULL, 'P' }, + { NULL, 0, NULL, 0 } }; + +int main(int argc, char *argv[]) +{ + int opt, *sock, *ro_sock, event_fd, max, tmpout; + struct sockaddr_un addr; + fd_set inset, outset; + bool dofork = true; + bool outputpid = false; + + while ((opt = getopt_long(argc, argv, "DV", options, NULL)) != -1) { + switch (opt) { + case 'N': + dofork = false; + break; + case 'V': + verbose = true; + break; + case 'P': + outputpid = true; + break; + } + } + if (optind != argc) + barf("%s: No arguments desired", argv[0]); + + talloc_enable_leak_report_full(); + + /* Create sockets for them to listen to. */ + sock = talloc(talloc_autofree_context(), int); + *sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (*sock < 0) + barf_perror("Could not create socket"); + ro_sock = talloc(talloc_autofree_context(), int); + *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (*ro_sock < 0) + barf_perror("Could not create socket"); + talloc_set_destructor(sock, destroy_fd); + talloc_set_destructor(ro_sock, destroy_fd); + + /* Don't kill us with SIGPIPE. */ + signal(SIGPIPE, SIG_IGN); + + /* FIXME: Be more sophisticated, don't mug running daemon. */ + unlink(xs_daemon_socket()); + unlink(xs_daemon_socket_ro()); + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, xs_daemon_socket()); + if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0) + barf_perror("Could not bind socket to %s", xs_daemon_socket()); + strcpy(addr.sun_path, xs_daemon_socket_ro()); + if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0) + barf_perror("Could not bind socket to %s", + xs_daemon_socket_ro()); + if (chmod(xs_daemon_socket(), 0600) != 0 + || chmod(xs_daemon_socket_ro(), 0660) != 0) + barf_perror("Could not chmod sockets"); + + if (listen(*sock, 1) != 0 + || listen(*ro_sock, 1) != 0) + barf_perror("Could not listen on sockets"); + + /* If we're the first, create .perms file for root. */ + if (mkdir(xs_daemon_store(), 0750) == 0) { + struct xs_permissions perms; + char *root = talloc_strdup(talloc_autofree_context(), "/"); + + perms.id = 0; + perms.perms = XS_PERM_READ; + if (!set_perms(NULL, root, &perms, 1)) + barf_perror("Could not create permissions in root"); + talloc_free(root); + mkdir(xs_daemon_transactions(), 0750); + } else if (errno != EEXIST) + barf_perror("Could not create root %s", xs_daemon_store()); + + /* Listen to hypervisor. */ + event_fd = domain_init(); + + /* Debugging: daemonize() closes standard fds, so dup here. */ + tmpout = dup(STDOUT_FILENO); + if (dofork) { + openlog("xenstored", 0, LOG_DAEMON); + daemonize(); + } + + if (outputpid) { + char buffer[20]; + sprintf(buffer, "%i\n", getpid()); + write(tmpout, buffer, strlen(buffer)); + } + close(tmpout); + +#ifdef TESTING + signal(SIGUSR1, stop_failtest); +#endif + + /* Get ready to listen to the tools. */ + max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd); + + /* Main loop. */ + for (;;) { + struct connection *i; + struct timeval *tvp = NULL, tv; + + timerclear(&tv); + shortest_transaction_timeout(&tv); + if (timerisset(&tv)) { + time_relative_to_now(&tv); + tvp = &tv; + } + + if (select(max+1, &inset, &outset, NULL, tvp) < 0) { + if (errno == EINTR) + continue; + barf_perror("Select failed"); + } + + if (FD_ISSET(*sock, &inset)) + accept_connection(*sock, true); + + if (FD_ISSET(*ro_sock, &inset)) + accept_connection(*ro_sock, false); + + if (FD_ISSET(event_fd, &inset)) + handle_event(event_fd); + + list_for_each_entry(i, &connections, list) { + if (i->domain) + continue; + + /* Operations can delete themselves or others + * (xs_release): list is not safe after input, + * so break. */ + if (FD_ISSET(i->fd, &inset)) { + handle_input(i); + break; + } + if (FD_ISSET(i->fd, &outset)) { + handle_output(i); + break; + } + } + + if (tvp) + check_transaction_timeout(); + + /* If transactions ended, we might be able to do more work. */ + unblock_connections(); + + max = initialize_set(&inset, &outset, *sock,*ro_sock,event_fd); + } +} |