aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2010-06-04 11:35:23 +0100
committerKeir Fraser <keir.fraser@citrix.com>2010-06-04 11:35:23 +0100
commit78c8506e77b393c00001fce9e8b21eda22ed3007 (patch)
tree389ca22b5b3f4cb280364e264beca2974ce4b924
parent77622d85cf3fd61e96b71750d75af58b60eddfc4 (diff)
downloadxen-78c8506e77b393c00001fce9e8b21eda22ed3007.tar.gz
xen-78c8506e77b393c00001fce9e8b21eda22ed3007.tar.bz2
xen-78c8506e77b393c00001fce9e8b21eda22ed3007.zip
Watchdog timers for domains
Each domain is allowed to set, reset and disable its timers; when any timer runs out the domain is killed. Patch from Christian Limpach <Christian.Limpach@citrix.com> Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
-rw-r--r--.hgignore1
-rw-r--r--tools/libxc/xc_domain.c28
-rw-r--r--tools/libxc/xenctrl.h4
-rw-r--r--tools/misc/Makefile8
-rw-r--r--tools/misc/xen-watchdog59
-rw-r--r--tools/misc/xenwatchdogd.c96
-rw-r--r--xen/common/domain.c11
-rw-r--r--xen/common/keyhandler.c5
-rw-r--r--xen/common/schedule.c85
-rw-r--r--xen/common/shutdown.c9
-rw-r--r--xen/include/public/sched.h17
-rw-r--r--xen/include/xen/sched.h11
12 files changed, 328 insertions, 6 deletions
diff --git a/.hgignore b/.hgignore
index 2bb852ee6d..34c076b21d 100644
--- a/.hgignore
+++ b/.hgignore
@@ -237,6 +237,7 @@
^tools/xcutils/xc_restore$
^tools/xcutils/xc_save$
^tools/xcutils/readnotes$
+^tools/misc/xenwatchdogd$
^tools/xenfb/sdlfb$
^tools/xenfb/vncfb$
^tools/xenmon/xentrace_setmask$
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index ae323fe854..e89b738c75 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -366,6 +366,34 @@ int xc_vcpu_getcontext(xc_interface *xch,
return rc;
}
+int xc_watchdog(xc_interface *xch,
+ uint32_t id,
+ uint32_t timeout)
+{
+ int ret = -1;
+ sched_watchdog_t arg;
+ DECLARE_HYPERCALL;
+
+ hypercall.op = __HYPERVISOR_sched_op;
+ hypercall.arg[0] = (unsigned long)SCHEDOP_watchdog;
+ hypercall.arg[1] = (unsigned long)&arg;
+ arg.id = id;
+ arg.timeout = timeout;
+
+ if ( lock_pages(&arg, sizeof(arg)) != 0 )
+ {
+ PERROR("Could not lock memory for Xen hypercall");
+ goto out1;
+ }
+
+ ret = do_xen_hypercall(xch, &hypercall);
+
+ unlock_pages(&arg, sizeof(arg));
+
+ out1:
+ return ret;
+}
+
int xc_shadow_control(xc_interface *xch,
uint32_t domid,
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 4bcf29d979..705df1d2ff 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -332,6 +332,10 @@ int xc_domain_shutdown(xc_interface *xch,
uint32_t domid,
int reason);
+int xc_watchdog(xc_interface *xch,
+ uint32_t id,
+ uint32_t timeout);
+
int xc_vcpu_setaffinity(xc_interface *xch,
uint32_t domid,
int vcpu,
diff --git a/tools/misc/Makefile b/tools/misc/Makefile
index 01d4f4fbb9..72ebcd6b57 100644
--- a/tools/misc/Makefile
+++ b/tools/misc/Makefile
@@ -10,7 +10,7 @@ CFLAGS += $(INCLUDES)
HDRS = $(wildcard *.h)
-TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool
+TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool xenwatchdogd
TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx
TARGETS := $(TARGETS-y)
@@ -22,7 +22,7 @@ INSTALL_BIN-y := xencons
INSTALL_BIN-$(CONFIG_X86) += xen-detect
INSTALL_BIN := $(INSTALL_BIN-y)
-INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool
+INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool xenwatchdogd
INSTALL_SBIN-$(CONFIG_X86) += xen-hvmctx
INSTALL_SBIN := $(INSTALL_SBIN-y)
@@ -37,8 +37,10 @@ build: $(TARGETS)
install: build
$(INSTALL_DIR) $(DESTDIR)$(BINDIR)
$(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
+ $(INSTALL_DIR) $(DESTDIR)$(CONFIG_DIR)/init.d
$(INSTALL_PYTHON_PROG) $(INSTALL_BIN) $(DESTDIR)$(BINDIR)
$(INSTALL_PYTHON_PROG) $(INSTALL_SBIN) $(DESTDIR)$(SBINDIR)
+ $(INSTALL_PROG) xen-watchdog $(DESTDIR)$(CONFIG_DIR)/init.d
set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d install-recurse; done
.PHONY: clean
@@ -49,7 +51,7 @@ clean:
%.o: %.c $(HDRS) Makefile
$(CC) -c $(CFLAGS) -o $@ $<
-xen-hvmctx xenperf xenpm gtracestat xenlockprof xen-hptool: %: %.o Makefile
+xen-hvmctx xenperf xenpm gtracestat xenlockprof xen-hptool xenwatchdogd: %: %.o Makefile
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenguest) $(LDFLAGS_libxenstore)
gtraceview: %: %.o Makefile
diff --git a/tools/misc/xen-watchdog b/tools/misc/xen-watchdog
new file mode 100644
index 0000000000..417b451922
--- /dev/null
+++ b/tools/misc/xen-watchdog
@@ -0,0 +1,59 @@
+#! /bin/bash
+#
+# xen-watchdog
+#
+# chkconfig: 2345 21 79
+# description: Run domain watchdog daemon
+#
+
+# Source function library.
+. /etc/init.d/functions
+
+start() {
+ local r
+ base="watchdogd"
+ echo -n $"Starting domain watchdog daemon: "
+
+ /usr/sbin/xenwatchdogd 30 15
+ r=$?
+ [ "$r" -eq 0 ] && success $"$base startup" || failure $"$base startup"
+ echo
+
+ return $r
+}
+
+stop() {
+ local r
+ base="watchdogd"
+ echo -n $"Stopping domain watchdog daemon: "
+
+ killall -USR1 watchdogd 2>/dev/null
+ r=$?
+ [ "$r" -eq 0 ] && success $"$base stop" || failure $"$base stop"
+ echo
+
+ return $r
+}
+
+case "$1" in
+ start)
+ start
+ ;;
+ stop)
+ stop
+ ;;
+ restart)
+ stop
+ start
+ ;;
+ status)
+ ;;
+ condrestart)
+ stop
+ start
+ ;;
+ *)
+ echo $"Usage: $0 {start|stop|status|restart|condrestart}"
+ exit 1
+esac
+
diff --git a/tools/misc/xenwatchdogd.c b/tools/misc/xenwatchdogd.c
new file mode 100644
index 0000000000..aa96834e5f
--- /dev/null
+++ b/tools/misc/xenwatchdogd.c
@@ -0,0 +1,96 @@
+
+#include <err.h>
+#include <limits.h>
+#include "xenctrl.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdio.h>
+
+xc_interface *h;
+int id = 0;
+
+void daemonize(void)
+{
+ switch (fork()) {
+ case -1:
+ err(1, "fork");
+ case 0:
+ break;
+ default:
+ exit(0);
+ }
+ umask(0);
+ if (setsid() < 0)
+ err(1, "setsid");
+ if (chdir("/") < 0)
+ err(1, "chdir /");
+ freopen("/dev/null", "r", stdin);
+ freopen("/dev/null", "w", stdout);
+ freopen("/dev/null", "w", stderr);
+}
+
+void catch_exit(int sig)
+{
+ if (id)
+ xc_watchdog(h, id, 300);
+ exit(0);
+}
+
+void catch_usr1(int sig)
+{
+ if (id)
+ xc_watchdog(h, id, 0);
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ int t, s;
+ int ret;
+
+ if (argc < 2)
+ errx(1, "usage: %s <timeout> <sleep>", argv[0]);
+
+ daemonize();
+
+ h = xc_interface_open(NULL, NULL, 0);
+ if (h == NULL)
+ err(1, "xc_interface_open");
+
+ t = strtoul(argv[1], NULL, 0);
+ if (t == ULONG_MAX)
+ err(1, "strtoul");
+
+ s = t / 2;
+ if (argc == 3) {
+ s = strtoul(argv[2], NULL, 0);
+ if (s == ULONG_MAX)
+ err(1, "strtoul");
+ }
+
+ if (signal(SIGHUP, &catch_exit) == SIG_ERR)
+ err(1, "signal");
+ if (signal(SIGINT, &catch_exit) == SIG_ERR)
+ err(1, "signal");
+ if (signal(SIGQUIT, &catch_exit) == SIG_ERR)
+ err(1, "signal");
+ if (signal(SIGTERM, &catch_exit) == SIG_ERR)
+ err(1, "signal");
+ if (signal(SIGUSR1, &catch_usr1) == SIG_ERR)
+ err(1, "signal");
+
+ id = xc_watchdog(h, 0, t);
+ if (id <= 0)
+ err(1, "xc_watchdog setup");
+
+ for (;;) {
+ sleep(s);
+ ret = xc_watchdog(h, id, t);
+ if (ret != 0)
+ err(1, "xc_watchdog");
+ }
+}
diff --git a/xen/common/domain.c b/xen/common/domain.c
index c7928f94d6..91e87179e1 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -209,8 +209,8 @@ struct domain *domain_create(
domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
{
struct domain *d, **pd;
- enum { INIT_xsm = 1u<<0, INIT_rangeset = 1u<<1, INIT_evtchn = 1u<<2,
- INIT_gnttab = 1u<<3, INIT_arch = 1u<<4 };
+ enum { INIT_xsm = 1u<<0, INIT_watchdog = 1u<<1, INIT_rangeset = 1u<<2,
+ INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
int init_status = 0;
int poolid = CPUPOOLID_NONE;
@@ -225,6 +225,9 @@ struct domain *domain_create(
goto fail;
init_status |= INIT_xsm;
+ watchdog_domain_init(d);
+ init_status |= INIT_watchdog;
+
atomic_set(&d->refcnt, 1);
spin_lock_init_prof(d, domain_lock);
spin_lock_init_prof(d, page_alloc_lock);
@@ -327,6 +330,8 @@ struct domain *domain_create(
}
if ( init_status & INIT_rangeset )
rangeset_domain_destroy(d);
+ if ( init_status & INIT_watchdog )
+ watchdog_domain_destroy(d);
if ( init_status & INIT_xsm )
xsm_free_security_domain(d);
xfree(d->pirq_mask);
@@ -604,6 +609,8 @@ static void complete_domain_destroy(struct rcu_head *head)
arch_domain_destroy(d);
+ watchdog_domain_destroy(d);
+
rangeset_domain_destroy(d);
cpupool_rm_domain(d);
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index b4ceb627fc..9133a04a8f 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -241,6 +241,7 @@ static void dump_domains(unsigned char key)
for_each_domain ( d )
{
+ unsigned int i;
printk("General information for domain %u:\n", d->domain_id);
cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask);
printk(" refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d "
@@ -254,6 +255,10 @@ static void dump_domains(unsigned char key)
d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
d->handle[12], d->handle[13], d->handle[14], d->handle[15],
d->vm_assist);
+ for (i = 0 ; i < NR_DOMAIN_WATCHDOG_TIMERS; i++)
+ if ( test_bit(i, &d->watchdog_inuse_map) )
+ printk(" watchdog %d expires in %d seconds\n",
+ i, (u32)((d->watchdog_timer[i].expires - NOW()) >> 30));
arch_dump_domain_info(d);
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 655dbd034c..c4c88e3db1 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -632,6 +632,78 @@ static long do_yield(void)
return 0;
}
+static void domain_watchdog_timeout(void *data)
+{
+ struct domain *d = data;
+
+ if ( d->is_shutting_down || d->is_dying )
+ return;
+
+ printk("Watchdog timer fired for domain %u\n", d->domain_id);
+ domain_shutdown(d, SHUTDOWN_watchdog);
+}
+
+static long domain_watchdog(struct domain *d, uint32_t id, uint32_t timeout)
+{
+ if ( id > NR_DOMAIN_WATCHDOG_TIMERS )
+ return -EINVAL;
+
+ spin_lock(&d->watchdog_lock);
+
+ if ( id == 0 )
+ {
+ for ( id = 0; id < NR_DOMAIN_WATCHDOG_TIMERS; id++ )
+ {
+ if ( test_and_set_bit(id, &d->watchdog_inuse_map) )
+ continue;
+ set_timer(&d->watchdog_timer[id], NOW() + SECONDS(timeout));
+ break;
+ }
+ spin_unlock(&d->watchdog_lock);
+ return id == NR_DOMAIN_WATCHDOG_TIMERS ? -EEXIST : id + 1;
+ }
+
+ id -= 1;
+ if ( !test_bit(id, &d->watchdog_inuse_map) )
+ {
+ spin_unlock(&d->watchdog_lock);
+ return -EEXIST;
+ }
+
+ if ( timeout == 0 )
+ {
+ stop_timer(&d->watchdog_timer[id]);
+ clear_bit(id, &d->watchdog_inuse_map);
+ }
+ else
+ {
+ set_timer(&d->watchdog_timer[id], NOW() + SECONDS(timeout));
+ }
+
+ spin_unlock(&d->watchdog_lock);
+ return 0;
+}
+
+void watchdog_domain_init(struct domain *d)
+{
+ unsigned int i;
+
+ spin_lock_init(&d->watchdog_lock);
+
+ d->watchdog_inuse_map = 0;
+
+ for ( i = 0; i < NR_DOMAIN_WATCHDOG_TIMERS; i++ )
+ init_timer(&d->watchdog_timer[i], domain_watchdog_timeout, d, 0);
+}
+
+void watchdog_domain_destroy(struct domain *d)
+{
+ unsigned int i;
+
+ for ( i = 0; i < NR_DOMAIN_WATCHDOG_TIMERS; i++ )
+ kill_timer(&d->watchdog_timer[i]);
+}
+
long do_sched_op_compat(int cmd, unsigned long arg)
{
long ret = 0;
@@ -773,6 +845,19 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE(void) arg)
break;
}
+ case SCHEDOP_watchdog:
+ {
+ struct sched_watchdog sched_watchdog;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&sched_watchdog, arg, 1) )
+ break;
+
+ ret = domain_watchdog(
+ current->domain, sched_watchdog.id, sched_watchdog.timeout);
+ break;
+ }
+
default:
ret = -ENOSYS;
}
diff --git a/xen/common/shutdown.c b/xen/common/shutdown.c
index a8af94b130..65d59b5ca0 100644
--- a/xen/common/shutdown.c
+++ b/xen/common/shutdown.c
@@ -5,6 +5,7 @@
#include <xen/domain.h>
#include <xen/delay.h>
#include <xen/shutdown.h>
+#include <xen/console.h>
#include <asm/debugger.h>
#include <public/sched.h>
@@ -53,6 +54,14 @@ void dom0_shutdown(u8 reason)
break; /* not reached */
}
+ case SHUTDOWN_watchdog:
+ {
+ printk("Domain 0 shutdown: watchdog rebooting machine.\n");
+ kexec_crash();
+ machine_restart(0);
+ break; /* not reached */
+ }
+
default:
{
printk("Domain 0 shutdown (unknown reason %u): ", reason);
diff --git a/xen/include/public/sched.h b/xen/include/public/sched.h
index e498c3c501..7f87420acf 100644
--- a/xen/include/public/sched.h
+++ b/xen/include/public/sched.h
@@ -106,6 +106,22 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
#define SCHEDOP_shutdown_code 5
/*
+ * Setup, poke and destroy a domain watchdog timer.
+ * @arg == pointer to sched_watchdog structure.
+ * With id == 0, setup a domain watchdog timer to cause domain shutdown
+ * after timeout, returns watchdog id.
+ * With id != 0 and timeout == 0, destroy domain watchdog timer.
+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
+ */
+#define SCHEDOP_watchdog 6
+struct sched_watchdog {
+ uint32_t id; /* watchdog ID */
+ uint32_t timeout; /* timeout */
+};
+typedef struct sched_watchdog sched_watchdog_t;
+DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);
+
+/*
* Reason codes for SCHEDOP_shutdown. These may be interpreted by control
* software to determine the appropriate action. For the most part, Xen does
* not care about the shutdown code.
@@ -114,6 +130,7 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */
#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
+#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */
#endif /* __XEN_PUBLIC_SCHED_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 8785555e91..6e4a89854a 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -191,7 +191,7 @@ struct mem_event_domain
/* event channel port (vcpu0 only) */
int xen_port;
};
-
+
struct domain
{
domid_t domain_id;
@@ -295,6 +295,12 @@ struct domain
struct xenoprof *xenoprof;
int32_t time_offset_seconds;
+ /* Domain watchdog. */
+#define NR_DOMAIN_WATCHDOG_TIMERS 2
+ spinlock_t watchdog_lock;
+ uint32_t watchdog_inuse_map;
+ struct timer watchdog_timer[NR_DOMAIN_WATCHDOG_TIMERS];
+
struct rcu_head rcu;
/*
@@ -598,6 +604,9 @@ uint64_t get_cpu_idle_time(unsigned int cpu);
cpu_online(cpu) && \
!per_cpu(tasklet_work_to_do, cpu))
+void watchdog_domain_init(struct domain *d);
+void watchdog_domain_destroy(struct domain *d);
+
#define IS_PRIV(_d) ((_d)->is_privileged)
#define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))