aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Jackson <ian.jackson@eu.citrix.com>2012-08-03 09:54:08 +0100
committerIan Jackson <ian.jackson@eu.citrix.com>2012-08-03 09:54:08 +0100
commit7253e0fd1aeb3ae7d4714bcc1d86b846b3331995 (patch)
tree943ea082fe03a56387831bd15b6a36e1ffa70b10
parent2dc6e6ef994ca63c7f9fb49981c1b044a69f2ac1 (diff)
downloadxen-7253e0fd1aeb3ae7d4714bcc1d86b846b3331995.tar.gz
xen-7253e0fd1aeb3ae7d4714bcc1d86b846b3331995.tar.bz2
xen-7253e0fd1aeb3ae7d4714bcc1d86b846b3331995.zip
libxl: react correctly to bootloader pty master POLLHUP
Receive POLLHUP on the bootloader master pty is not an error. Hopefully it means that the bootloader has exited and therefore the pty slave side has no process group any more. (At least NetBSD indicates POLLHUP on the master in this case.) So send the bootloader SIGTERM; if it has already exited then this has no effect (except that on some versions of NetBSD it erroneously returns ESRCH and we print a harmless warning) and we will then collect the bootloader's exit status and be satisfied. However, we remember that we have done this so that if we got POLLHUP for some other reason than that the bootloader exited we report something resembling a useful message. In order to implement this we need to provide a way for users of datacopier to handle POLLHUP rather than treating it as fatal. We rename bootloader_abort to bootloader_stop since it now no longer only applies to error situations. Signed-off-by: Roger Pau Monne <roger.pau@citrix.com> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> Acked-by: Ian Campbell <ian.campbell@citrix.com> Committed-by: Ian Campbell <ian.campbell@citrix.com>
-rw-r--r--tools/libxl/libxl_aoutils.c23
-rw-r--r--tools/libxl/libxl_bootloader.c39
-rw-r--r--tools/libxl/libxl_internal.h7
3 files changed, 57 insertions, 12 deletions
diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
index 99972a23b0..983a60afc7 100644
--- a/tools/libxl/libxl_aoutils.c
+++ b/tools/libxl/libxl_aoutils.c
@@ -97,11 +97,31 @@ void libxl__datacopier_prefixdata(libxl__egc *egc, libxl__datacopier_state *dc,
LIBXL_TAILQ_INSERT_TAIL(&dc->bufs, buf, entry);
}
+static int datacopier_pollhup_handled(libxl__egc *egc,
+ libxl__datacopier_state *dc,
+ short revents, int onwrite)
+{
+ STATE_AO_GC(dc->ao);
+
+ if (dc->callback_pollhup && (revents & POLLHUP)) {
+ LOG(DEBUG, "received POLLHUP on %s during copy of %s",
+ onwrite ? dc->writewhat : dc->readwhat,
+ dc->copywhat);
+ libxl__datacopier_kill(dc);
+ dc->callback_pollhup(egc, dc, onwrite, -1);
+ return 1;
+ }
+ return 0;
+}
+
static void datacopier_readable(libxl__egc *egc, libxl__ev_fd *ev,
int fd, short events, short revents) {
libxl__datacopier_state *dc = CONTAINER_OF(ev, *dc, toread);
STATE_AO_GC(dc->ao);
+ if (datacopier_pollhup_handled(egc, dc, revents, 0))
+ return;
+
if (revents & ~POLLIN) {
LOG(ERROR, "unexpected poll event 0x%x (should be POLLIN)"
" on %s during copy of %s", revents, dc->readwhat, dc->copywhat);
@@ -163,6 +183,9 @@ static void datacopier_writable(libxl__egc *egc, libxl__ev_fd *ev,
libxl__datacopier_state *dc = CONTAINER_OF(ev, *dc, towrite);
STATE_AO_GC(dc->ao);
+ if (datacopier_pollhup_handled(egc, dc, revents, 1))
+ return;
+
if (revents & ~POLLOUT) {
LOG(ERROR, "unexpected poll event 0x%x (should be POLLOUT)"
" on %s during copy of %s", revents, dc->writewhat, dc->copywhat);
diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c
index ef5a91ba4d..bfc1b5616a 100644
--- a/tools/libxl/libxl_bootloader.c
+++ b/tools/libxl/libxl_bootloader.c
@@ -215,6 +215,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl)
libxl__domaindeathcheck_init(&bl->deathcheck);
bl->keystrokes.ao = bl->ao; libxl__datacopier_init(&bl->keystrokes);
bl->display.ao = bl->ao; libxl__datacopier_init(&bl->display);
+ bl->got_pollhup = 0;
}
static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
@@ -275,7 +276,7 @@ static void bootloader_local_detached_cb(libxl__egc *egc,
}
/* might be called at any time, provided it's init'd */
-static void bootloader_abort(libxl__egc *egc,
+static void bootloader_stop(libxl__egc *egc,
libxl__bootloader_state *bl, int rc)
{
STATE_AO_GC(bl->ao);
@@ -285,8 +286,8 @@ static void bootloader_abort(libxl__egc *egc,
libxl__datacopier_kill(&bl->display);
if (libxl__ev_child_inuse(&bl->child)) {
r = kill(bl->child.pid, SIGTERM);
- if (r) LOGE(WARN, "after failure, failed to kill bootloader [%lu]",
- (unsigned long)bl->child.pid);
+ if (r) LOGE(WARN, "%sfailed to kill bootloader [%lu]",
+ rc ? "after failure, " : "", (unsigned long)bl->child.pid);
}
bl->rc = rc;
}
@@ -508,7 +509,10 @@ static void bootloader_gotptys(libxl__egc *egc, libxl__openpty_state *op)
bl->keystrokes.maxsz = BOOTLOADER_BUF_OUT;
bl->keystrokes.copywhat =
GCSPRINTF("bootloader input for domain %"PRIu32, bl->domid);
- bl->keystrokes.callback = bootloader_keystrokes_copyfail;
+ bl->keystrokes.callback = bootloader_keystrokes_copyfail;
+ bl->keystrokes.callback_pollhup = bootloader_keystrokes_copyfail;
+ /* pollhup gets called with errnoval==-1 which is not otherwise
+ * possible since errnos are nonnegative, so it's unambiguous */
rc = libxl__datacopier_start(&bl->keystrokes);
if (rc) goto out;
@@ -516,7 +520,8 @@ static void bootloader_gotptys(libxl__egc *egc, libxl__openpty_state *op)
bl->display.maxsz = BOOTLOADER_BUF_IN;
bl->display.copywhat =
GCSPRINTF("bootloader output for domain %"PRIu32, bl->domid);
- bl->display.callback = bootloader_display_copyfail;
+ bl->display.callback = bootloader_display_copyfail;
+ bl->display.callback_pollhup = bootloader_display_copyfail;
rc = libxl__datacopier_start(&bl->display);
if (rc) goto out;
@@ -562,30 +567,42 @@ static void bootloader_gotptys(libxl__egc *egc, libxl__openpty_state *op)
/* perhaps one of these will be called, but perhaps not */
static void bootloader_copyfail(libxl__egc *egc, const char *which,
- libxl__bootloader_state *bl, int onwrite, int errnoval)
+ libxl__bootloader_state *bl, int ondisplay, int onwrite, int errnoval)
{
STATE_AO_GC(bl->ao);
+ int rc = ERROR_FAIL;
+
+ if (errnoval==-1) {
+ /* POLLHUP */
+ if (!!ondisplay != !!onwrite) {
+ rc = 0;
+ bl->got_pollhup = 1;
+ } else {
+ LOG(ERROR, "unexpected POLLHUP on %s", which);
+ }
+ }
if (!onwrite && !errnoval)
LOG(ERROR, "unexpected eof copying %s", which);
- bootloader_abort(egc, bl, ERROR_FAIL);
+
+ bootloader_stop(egc, bl, rc);
}
static void bootloader_keystrokes_copyfail(libxl__egc *egc,
libxl__datacopier_state *dc, int onwrite, int errnoval)
{
libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, keystrokes);
- bootloader_copyfail(egc, "bootloader input", bl, onwrite, errnoval);
+ bootloader_copyfail(egc, "bootloader input", bl, 0, onwrite, errnoval);
}
static void bootloader_display_copyfail(libxl__egc *egc,
libxl__datacopier_state *dc, int onwrite, int errnoval)
{
libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, display);
- bootloader_copyfail(egc, "bootloader output", bl, onwrite, errnoval);
+ bootloader_copyfail(egc, "bootloader output", bl, 1, onwrite, errnoval);
}
static void bootloader_domaindeath(libxl__egc *egc, libxl__domaindeathcheck *dc)
{
libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, deathcheck);
- bootloader_abort(egc, bl, ERROR_FAIL);
+ bootloader_stop(egc, bl, ERROR_FAIL);
}
static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
@@ -599,6 +616,8 @@ static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
libxl__datacopier_kill(&bl->display);
if (status) {
+ if (bl->got_pollhup && WIFSIGNALED(status) && WTERMSIG(status)==SIGTERM)
+ LOG(ERROR, "got POLLHUP, sent SIGTERM");
LOG(ERROR, "bootloader failed - consult logfile %s", bl->logfile);
libxl_report_child_exitstatus(CTX, XTL_ERROR, "bootloader",
pid, status);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 58004b334a..2d6c71a815 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2076,7 +2076,9 @@ typedef struct libxl__datacopier_buf libxl__datacopier_buf;
* errnoval==0 means we got eof and all data was written
* errnoval!=0 means we had a read error, logged
* onwrite==-1 means some other internal failure, errnoval not valid, logged
- * in all cases copier is killed before calling this callback */
+ * If we get POLLHUP, we call callback_pollhup(..., onwrite, -1);
+ * or if callback_pollhup==0 this is an internal failure, as above.
+ * In all cases copier is killed before calling this callback */
typedef void libxl__datacopier_callback(libxl__egc *egc,
libxl__datacopier_state *dc, int onwrite, int errnoval);
@@ -2095,6 +2097,7 @@ struct libxl__datacopier_state {
const char *copywhat, *readwhat, *writewhat; /* for error msgs */
FILE *log; /* gets a copy of everything */
libxl__datacopier_callback *callback;
+ libxl__datacopier_callback *callback_pollhup;
/* remaining fields are private to datacopier */
libxl__ev_fd toread, towrite;
ssize_t used;
@@ -2279,7 +2282,7 @@ struct libxl__bootloader_state {
int nargs, argsspace;
const char **args;
libxl__datacopier_state keystrokes, display;
- int rc;
+ int rc, got_pollhup;
};
_hidden void libxl__bootloader_init(libxl__bootloader_state *bl);