aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorTim Deegan <Tim.Deegan@xensource.com>2007-07-24 14:52:16 +0100
committerTim Deegan <Tim.Deegan@xensource.com>2007-07-24 14:52:16 +0100
commitf6c12fc6b61c7de43a9eec56fba1fc4ac8acbaa0 (patch)
tree6f3e8248a72c055520c6ab5b86cfc18d29b1e678 /tools
parentab3a30feef59156b71338a449fa71340a1077c86 (diff)
downloadxen-f6c12fc6b61c7de43a9eec56fba1fc4ac8acbaa0.tar.gz
xen-f6c12fc6b61c7de43a9eec56fba1fc4ac8acbaa0.tar.bz2
xen-f6c12fc6b61c7de43a9eec56fba1fc4ac8acbaa0.zip
[HVM] Control qemu's state-save via xenstore, instead of SIGUSR1
This lets us verify that qemu has indeed stopped processing before we start saving guest memory. Also allow qemu to continue processing after the save has happened, instead of exiting immediately. Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/ioemu/hw/cirrus_vga.c2
-rw-r--r--tools/ioemu/target-i386-dm/helper2.c40
-rw-r--r--tools/ioemu/vl.c33
-rw-r--r--tools/ioemu/vl.h1
-rw-r--r--tools/ioemu/xenstore.c57
-rw-r--r--tools/python/xen/xend/XendCheckpoint.py6
-rw-r--r--tools/python/xen/xend/XendDomainInfo.py5
-rw-r--r--tools/python/xen/xend/image.py42
8 files changed, 134 insertions, 52 deletions
diff --git a/tools/ioemu/hw/cirrus_vga.c b/tools/ioemu/hw/cirrus_vga.c
index 96f7cbb007..fb2f3ae556 100644
--- a/tools/ioemu/hw/cirrus_vga.c
+++ b/tools/ioemu/hw/cirrus_vga.c
@@ -3096,8 +3096,6 @@ static void cirrus_vga_save(QEMUFile *f, void *opaque)
qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE);
- if (vga_acc)
- cirrus_stop_acc(s);
}
static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
diff --git a/tools/ioemu/target-i386-dm/helper2.c b/tools/ioemu/target-i386-dm/helper2.c
index 971ff257db..7b4dcca687 100644
--- a/tools/ioemu/target-i386-dm/helper2.c
+++ b/tools/ioemu/target-i386-dm/helper2.c
@@ -618,6 +618,7 @@ int main_loop(void)
CPUState *env = cpu_single_env;
int evtchn_fd = xc_evtchn_fd(xce_handle);
char qemu_file[PATH_MAX];
+ fd_set fds;
buffered_io_timer = qemu_new_timer(rt_clock, handle_buffered_io,
cpu_single_env);
@@ -625,19 +626,34 @@ int main_loop(void)
qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, env);
- while (!(vm_running && suspend_requested))
- /* Wait up to 10 msec. */
- main_loop_wait(10);
-
- fprintf(logfile, "device model received suspend signal!\n");
-
- /* Pull all outstanding ioreqs through the system */
- handle_buffered_io(env);
- main_loop_wait(1); /* For the select() on events */
+ xenstore_record_dm_state("running");
+ while (1) {
+ while (!(vm_running && suspend_requested))
+ /* Wait up to 10 msec. */
+ main_loop_wait(10);
+
+ xenstore_record_dm_state("paused");
+ fprintf(logfile, "device model saving state\n");
+
+ /* Pull all outstanding ioreqs through the system */
+ handle_buffered_io(env);
+ main_loop_wait(1); /* For the select() on events */
+
+ /* Save the device state */
+ snprintf(qemu_file, sizeof(qemu_file),
+ "/var/lib/xen/qemu-save.%d", domid);
+ do_savevm(qemu_file);
+
+ /* Wait to be allowed to continue */
+ while (suspend_requested) {
+ FD_ZERO(&fds);
+ FD_SET(xenstore_fd(), &fds);
+ if (select(xenstore_fd() + 1, &fds, NULL, NULL, NULL) > 0)
+ xenstore_process_event(NULL);
+ }
- /* Save the device state */
- snprintf(qemu_file, sizeof(qemu_file), "/var/lib/xen/qemu-save.%d", domid);
- do_savevm(qemu_file);
+ xenstore_record_dm_state("running");
+ }
return 0;
}
diff --git a/tools/ioemu/vl.c b/tools/ioemu/vl.c
index 41035ddec6..b1c3cca009 100644
--- a/tools/ioemu/vl.c
+++ b/tools/ioemu/vl.c
@@ -6856,15 +6856,6 @@ int set_mm_mapping(int xc_handle, uint32_t domid,
return 0;
}
-void suspend(int sig)
-{
- fprintf(logfile, "suspend sig handler called with requested=%d!\n",
- suspend_requested);
- if (sig != SIGUSR1)
- fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig);
- suspend_requested = 1;
-}
-
#if defined(MAPCACHE)
#if defined(__i386__)
@@ -7057,6 +7048,7 @@ int main(int argc, char **argv)
xen_pfn_t *page_array;
extern void *buffered_pio_page;
#endif
+ sigset_t set;
char qemu_dm_logfilename[128];
@@ -7982,24 +7974,11 @@ int main(int argc, char **argv)
close(fd);
}
- /* register signal for the suspend request when save */
- {
- struct sigaction act;
- sigset_t set;
- act.sa_handler = suspend;
- act.sa_flags = SA_RESTART;
- sigemptyset(&act.sa_mask);
-
- sigaction(SIGUSR1, &act, NULL);
-
- /* control panel mask some signals when spawn qemu, need unmask here*/
- sigemptyset(&set);
- sigaddset(&set, SIGUSR1);
- sigaddset(&set, SIGTERM);
- if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
- fprintf(stderr, "unblock signal fail, possible issue for HVM save!\n");
-
- }
+ /* Unblock SIGTERM, which may have been blocked by the caller */
+ sigemptyset(&set);
+ sigaddset(&set, SIGTERM);
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
+ fprintf(stderr, "Failed to unblock SIGTERM\n");
main_loop();
quit_timers();
diff --git a/tools/ioemu/vl.h b/tools/ioemu/vl.h
index 55ee261dc9..80b88a50a4 100644
--- a/tools/ioemu/vl.h
+++ b/tools/ioemu/vl.h
@@ -1456,6 +1456,7 @@ void readline_start(const char *prompt, int is_password,
void xenstore_parse_domain_config(int domid);
int xenstore_fd(void);
void xenstore_process_event(void *opaque);
+void xenstore_record_dm_state(char *state);
void xenstore_check_new_media_present(int timeout);
void xenstore_write_vncport(int vnc_display);
int xenstore_read_vncpasswd(int domid);
diff --git a/tools/ioemu/xenstore.c b/tools/ioemu/xenstore.c
index ddadcb6a0f..37c535e7b5 100644
--- a/tools/ioemu/xenstore.c
+++ b/tools/ioemu/xenstore.c
@@ -186,6 +186,12 @@ void xenstore_parse_domain_config(int domid)
fprintf(logfile, "Watching %s\n", buf);
}
+ /* Set a watch for suspend requests from the migration tools */
+ if (pasprintf(&buf,
+ "/local/domain/0/device-model/%u/command", domid) != -1) {
+ xs_watch(xsh, buf, "dm-command");
+ fprintf(logfile, "Watching %s\n", buf);
+ }
out:
free(type);
@@ -310,6 +316,52 @@ void xenstore_process_logdirty_event(void)
}
+/* Accept state change commands from the control tools */
+static void xenstore_process_dm_command_event(void)
+{
+ char *path = NULL, *command = NULL;
+ unsigned int len;
+ extern int suspend_requested;
+
+ if (pasprintf(&path,
+ "/local/domain/0/device-model/%u/command", domid) == -1) {
+ fprintf(logfile, "out of memory reading dm command\n");
+ goto out;
+ }
+ command = xs_read(xsh, XBT_NULL, path, &len);
+ if (!command)
+ goto out;
+
+ if (!strncmp(command, "save", len)) {
+ fprintf(logfile, "dm-command: pause and save state\n");
+ suspend_requested = 1;
+ } else if (!strncmp(command, "continue", len)) {
+ fprintf(logfile, "dm-command: continue after state save\n");
+ suspend_requested = 0;
+ } else {
+ fprintf(logfile, "dm-command: unknown command\"%*s\"\n", len, command);
+ }
+
+ out:
+ free(path);
+ free(command);
+}
+
+void xenstore_record_dm_state(char *state)
+{
+ char *path = NULL;
+
+ if (pasprintf(&path,
+ "/local/domain/0/device-model/%u/state", domid) == -1) {
+ fprintf(logfile, "out of memory recording dm state\n");
+ goto out;
+ }
+ if (!xs_write(xsh, XBT_NULL, path, state, strlen(state)))
+ fprintf(logfile, "error recording dm state\n");
+
+ out:
+ free(path);
+}
void xenstore_process_event(void *opaque)
{
@@ -325,6 +377,11 @@ void xenstore_process_event(void *opaque)
goto out;
}
+ if (!strcmp(vec[XS_WATCH_TOKEN], "dm-command")) {
+ xenstore_process_dm_command_event();
+ goto out;
+ }
+
if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) ||
strlen(vec[XS_WATCH_TOKEN]) != 3)
goto out;
diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py
index 1aa7d7f69d..47b4cfba13 100644
--- a/tools/python/xen/xend/XendCheckpoint.py
+++ b/tools/python/xen/xend/XendCheckpoint.py
@@ -98,10 +98,6 @@ def save(fd, dominfo, network, live, dst, checkpoint=False):
log.info("Domain %d suspended.", dominfo.getDomid())
dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
domain_name)
- #send signal to device model for save
- if hvm:
- log.info("release_devices for hvm domain")
- dominfo._releaseDevices(True)
tochild.write("done\n")
tochild.flush()
log.debug('Written done')
@@ -139,7 +135,7 @@ def save(fd, dominfo, network, live, dst, checkpoint=False):
except Exception, exn:
log.exception("Save failed on domain %s (%s).", domain_name,
dominfo.getDomid())
-
+
dominfo.resumeDomain()
log.debug("XendCheckpoint.save: resumeDomain")
diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
index eb5c57556c..3ce18bc96f 100644
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -1122,6 +1122,8 @@ class XendDomainInfo:
self._clearRestart()
if reason == 'suspend':
+ if self._stateGet() != DOM_STATE_SUSPENDED:
+ self.image.saveDeviceModel()
self._stateSet(DOM_STATE_SUSPENDED)
# Don't destroy the domain. XendCheckpoint will do
# this once it has finished. However, stop watching
@@ -1764,6 +1766,9 @@ class XendDomainInfo:
ResumeDomain(self.domid)
except:
log.exception("XendDomainInfo.resume: xc.domain_resume failed on domain %s." % (str(self.domid)))
+ if self.is_hvm():
+ self.image.resumeDeviceModel()
+
#
# Channels for xenstore and console
diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
index 28f100bedc..bd5e6b2cf4 100644
--- a/tools/python/xen/xend/image.py
+++ b/tools/python/xen/xend/image.py
@@ -20,6 +20,7 @@
import os, string
import re
import math
+import time
import signal
import xen.lowlevel.xc
@@ -27,6 +28,7 @@ from xen.xend.XendConstants import REVERSE_DOMAIN_SHUTDOWN_REASONS
from xen.xend.XendError import VmError, XendError, HVMRequired
from xen.xend.XendLogging import log
from xen.xend.XendOptions import instance as xenopts
+from xen.xend.xenstore.xstransact import xstransact
from xen.xend.xenstore.xswatch import xswatch
from xen.xend import arch
@@ -175,6 +177,14 @@ class ImageHandler:
"""Create device model for the domain (define in subclass if needed)."""
pass
+ def saveDeviceModel(self):
+ """Save device model for the domain (define in subclass if needed)."""
+ pass
+
+ def resumeDeviceModel(self):
+ """Unpause device model for the domain (define in subclass if needed)."""
+ pass
+
def destroy(self):
"""Extra cleanup on domain destroy (define in subclass if needed)."""
pass
@@ -443,17 +453,34 @@ class HVMImageHandler(ImageHandler):
self.vm.storeDom("image/device-model-pid", self.pid)
log.info("device model pid: %d", self.pid)
+ def saveDeviceModel(self):
+ # Signal the device model to pause itself and save its state
+ xstransact.Store("/local/domain/0/device-model/%i"
+ % self.vm.getDomid(), ('command', 'save'))
+ # Wait for confirmation. Could do this with a watch but we'd
+ # still end up spinning here waiting for the watch to fire.
+ state = ''
+ count = 0
+ while state != 'paused':
+ state = xstransact.Read("/local/domain/0/device-model/%i/state"
+ % self.vm.getDomid())
+ time.sleep(0.1)
+ count += 1
+ if count > 100:
+ raise VmError('Timed out waiting for device model to save')
+
+ def resumeDeviceModel(self):
+ # Signal the device model to resume activity after pausing to save.
+ xstransact.Store("/local/domain/0/device-model/%i"
+ % self.vm.getDomid(), ('command', 'continue'))
+
def recreate(self):
self.pid = self.vm.gatherDom(('image/device-model-pid', int))
def destroy(self, suspend = False):
- if self.pid:
+ if self.pid and not suspend:
try:
- sig = signal.SIGKILL
- if suspend:
- log.info("use sigusr1 to signal qemu %d", self.pid)
- sig = signal.SIGUSR1
- os.kill(self.pid, sig)
+ os.kill(self.pid, signal.SIGKILL)
except OSError, exn:
log.exception(exn)
try:
@@ -464,6 +491,8 @@ class HVMImageHandler(ImageHandler):
# but we can't wait for it because it's not our child.
pass
self.pid = None
+ state = xstransact.Remove("/local/domain/0/device-model/%i"
+ % self.vm.getDomid())
class IA64_HVM_ImageHandler(HVMImageHandler):
@@ -507,6 +536,7 @@ class X86_HVM_ImageHandler(HVMImageHandler):
return max(4 * (256 * self.vm.getVCpuCount() + 2 * (maxmem_kb / 1024)),
shadow_mem_kb)
+
class X86_Linux_ImageHandler(LinuxImageHandler):
def buildDomain(self):