aboutsummaryrefslogtreecommitdiffstats
path: root/stubdom/grub/kexec.c
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2008-06-18 09:36:47 +0100
committerKeir Fraser <keir.fraser@citrix.com>2008-06-18 09:36:47 +0100
commita2c7db64f561821fd528614e68c4d92718210126 (patch)
tree79c7e1c3ef49b5b4272fd12d36420e6fb8d23cd1 /stubdom/grub/kexec.c
parent7074b13cee246f09b3b0a2a6da139b2e047cf4a4 (diff)
downloadxen-a2c7db64f561821fd528614e68c4d92718210126.tar.gz
xen-a2c7db64f561821fd528614e68c4d92718210126.tar.bz2
xen-a2c7db64f561821fd528614e68c4d92718210126.zip
Add PV-GRUB
This fetches GRUB1 sources, applies the {graphical, print function, save default, and ext3_256byte} patches from debian, and applies a patch to make it work on x86_64 and port it to Mini-OS. By using libxc, PV-GRUB can then "kexec" the loaded kernel from inside the domain itself, hence permitting to avoid the security-concerned pygrub. Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
Diffstat (limited to 'stubdom/grub/kexec.c')
-rw-r--r--stubdom/grub/kexec.c324
1 files changed, 324 insertions, 0 deletions
diff --git a/stubdom/grub/kexec.c b/stubdom/grub/kexec.c
new file mode 100644
index 0000000000..5400fe88cd
--- /dev/null
+++ b/stubdom/grub/kexec.c
@@ -0,0 +1,324 @@
+/*
+ * This supports booting another PV kernel from Mini-OS
+ *
+ * The idea is to setup it using libxc, answer to day0 memory allocation
+ * requests, and using a trampoline boot page to switch to the new page table.
+ *
+ * The procedure of the boot page is:
+ * - map itself at the target position (that may overwrite some C stuff, but we
+ * do not care any more)
+ * - jump there
+ * - switch to the target page table
+ * - unpin the old page table
+ * - jump to the new kernel
+ *
+ * Samuel Thibault <Samuel.Thibault@eu.citrix.com>, May 2008
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <xenctrl.h>
+#include <xc_dom.h>
+
+#include <kernel.h>
+#include <console.h>
+#include <os.h>
+#include <blkfront.h>
+#include <netfront.h>
+#include <fbfront.h>
+#include <shared.h>
+
+#include "mini-os.h"
+
+#if 0
+#define DEBUG(fmt, ...) printk(fmt, ## __VA_ARGS__)
+#else
+#define DEBUG(fmt, ...) (void)0
+#endif
+
+/* Assembly boot page from boot.S */
+extern void _boot_page;
+extern pgentry_t _boot_page_entry;
+extern unsigned long _boot_pdmfn;
+extern unsigned long _boot_stack, _boot_target, _boot_start_info, _boot_start;
+extern xen_pfn_t _boot_oldpdmfn;
+extern void _boot(void);
+
+static unsigned long *pages;
+static unsigned long *pages_mfns;
+static unsigned long allocated;
+
+int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
+ domid_t dom);
+
+/* We need mfn to appear as target_pfn, so exchange with the MFN there */
+static void do_exchange(struct xc_dom_image *dom, xen_pfn_t target_pfn, xen_pfn_t source_mfn)
+{
+ xen_pfn_t source_pfn;
+ xen_pfn_t target_mfn;
+
+ for (source_pfn = 0; source_pfn < start_info.nr_pages; source_pfn++)
+ if (dom->p2m_host[source_pfn] == source_mfn)
+ break;
+ ASSERT(source_pfn < start_info.nr_pages);
+
+ target_mfn = dom->p2m_host[target_pfn];
+
+ /* Put target MFN at source PFN */
+ dom->p2m_host[source_pfn] = target_mfn;
+
+ /* Put source MFN at target PFN */
+ dom->p2m_host[target_pfn] = source_mfn;
+}
+
+int kexec_allocate(struct xc_dom_image *dom, xen_vaddr_t up_to)
+{
+ unsigned long new_allocated = (up_to - dom->parms.virt_base) / PAGE_SIZE;
+ unsigned long i;
+
+ pages = realloc(pages, new_allocated * sizeof(*pages));
+ pages_mfns = realloc(pages_mfns, new_allocated * sizeof(*pages_mfns));
+ for (i = allocated; i < new_allocated; i++) {
+ /* Exchange old page of PFN i with a newly allocated page. */
+ xen_pfn_t old_mfn = dom->p2m_host[i];
+ xen_pfn_t new_pfn;
+ xen_pfn_t new_mfn;
+
+ pages[i] = alloc_page();
+ memset((void*) pages[i], 0, PAGE_SIZE);
+ new_pfn = PHYS_PFN(to_phys(pages[i]));
+ pages_mfns[i] = new_mfn = pfn_to_mfn(new_pfn);
+
+ /* Put old page at new PFN */
+ dom->p2m_host[new_pfn] = old_mfn;
+
+ /* Put new page at PFN i */
+ dom->p2m_host[i] = new_mfn;
+ }
+
+ allocated = new_allocated;
+
+ return 0;
+}
+
+void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline)
+{
+ struct xc_dom_image *dom;
+ int rc;
+ domid_t domid = DOMID_SELF;
+ xen_pfn_t pfn;
+ int xc_handle;
+ unsigned long i;
+ void *seg;
+ xen_pfn_t boot_page_mfn = virt_to_mfn(&_boot_page);
+ char features[] = "";
+ struct mmu_update *m2p_updates;
+ unsigned long nr_m2p_updates;
+
+ DEBUG("booting with cmdline %s\n", cmdline);
+ xc_handle = xc_interface_open();
+
+ dom = xc_dom_allocate(cmdline, features);
+ dom->allocate = kexec_allocate;
+
+ dom->kernel_blob = kernel;
+ dom->kernel_size = kernel_size;
+
+ dom->ramdisk_blob = module;
+ dom->ramdisk_size = module_size;
+
+ dom->flags = 0;
+ dom->console_evtchn = start_info.console.domU.evtchn;
+ dom->xenstore_evtchn = start_info.store_evtchn;
+
+ if ( (rc = xc_dom_boot_xen_init(dom, xc_handle, domid)) != 0 ) {
+ grub_printf("xc_dom_boot_xen_init returned %d\n", rc);
+ errnum = ERR_BOOT_FAILURE;
+ goto out;
+ }
+ if ( (rc = xc_dom_parse_image(dom)) != 0 ) {
+ grub_printf("xc_dom_parse_image returned %d\n", rc);
+ errnum = ERR_BOOT_FAILURE;
+ goto out;
+ }
+
+#ifdef __i386__
+ if (strcmp(dom->guest_type, "xen-3.0-x86_32p")) {
+ grub_printf("can only boot x86 32 PAE kernels, not %s\n", dom->guest_type);
+ errnum = ERR_EXEC_FORMAT;
+ goto out;
+ }
+#endif
+#ifdef __x86_64__
+ if (strcmp(dom->guest_type, "xen-3.0-x86_64")) {
+ grub_printf("can only boot x86 64 kernels, not %s\n", dom->guest_type);
+ errnum = ERR_EXEC_FORMAT;
+ goto out;
+ }
+#endif
+
+ /* equivalent of xc_dom_mem_init */
+ dom->arch_hooks = xc_dom_find_arch_hooks(dom->guest_type);
+ dom->total_pages = start_info.nr_pages;
+
+ /* equivalent of arch_setup_meminit */
+
+ /* setup initial p2m */
+ dom->p2m_host = malloc(sizeof(*dom->p2m_host) * dom->total_pages);
+
+ /* Start with our current P2M */
+ for (i = 0; i < dom->total_pages; i++)
+ dom->p2m_host[i] = pfn_to_mfn(i);
+
+ if ( (rc = xc_dom_build_image(dom)) != 0 ) {
+ grub_printf("xc_dom_build_image returned %d\n", rc);
+ errnum = ERR_BOOT_FAILURE;
+ goto out;
+ }
+
+ /* copy hypercall page */
+ /* TODO: domctl instead, but requires privileges */
+ if (dom->parms.virt_hypercall != -1) {
+ pfn = PHYS_PFN(dom->parms.virt_hypercall - dom->parms.virt_base);
+ memcpy((void *) pages[pfn], hypercall_page, PAGE_SIZE);
+ }
+
+ /* Equivalent of xc_dom_boot_image */
+ dom->shared_info_mfn = PHYS_PFN(start_info.shared_info);
+
+ if (!xc_dom_compat_check(dom)) {
+ grub_printf("xc_dom_compat_check failed\n");
+ errnum = ERR_EXEC_FORMAT;
+ goto out;
+ }
+
+ /* Move current console, xenstore and boot MFNs to the allocated place */
+ do_exchange(dom, dom->console_pfn, start_info.console.domU.mfn);
+ do_exchange(dom, dom->xenstore_pfn, start_info.store_mfn);
+ DEBUG("virt base at %llx\n", dom->parms.virt_base);
+ DEBUG("bootstack_pfn %lx\n", dom->bootstack_pfn);
+ _boot_target = dom->parms.virt_base + PFN_PHYS(dom->bootstack_pfn);
+ DEBUG("_boot_target %lx\n", _boot_target);
+ do_exchange(dom, PHYS_PFN(_boot_target - dom->parms.virt_base),
+ virt_to_mfn(&_boot_page));
+
+ /* Make sure the bootstrap page table does not RW-map any of our current
+ * page table frames */
+ kexec_allocate(dom, dom->virt_pgtab_end);
+
+ if ( (rc = xc_dom_update_guest_p2m(dom))) {
+ grub_printf("xc_dom_update_guest_p2m returned %d\n", rc);
+ errnum = ERR_BOOT_FAILURE;
+ goto out;
+ }
+
+ if ( dom->arch_hooks->setup_pgtables )
+ if ( (rc = dom->arch_hooks->setup_pgtables(dom))) {
+ grub_printf("setup_pgtables returned %d\n", rc);
+ errnum = ERR_BOOT_FAILURE;
+ goto out;
+ }
+
+ /* start info page */
+#undef start_info
+ if ( dom->arch_hooks->start_info )
+ dom->arch_hooks->start_info(dom);
+#define start_info (start_info_union.start_info)
+
+ xc_dom_log_memory_footprint(dom);
+
+ /* Unmap libxc's projection of the boot page table */
+ seg = xc_dom_seg_to_ptr(dom, &dom->pgtables_seg);
+ munmap(seg, dom->pgtables_seg.vend - dom->pgtables_seg.vstart);
+
+ /* Unmap day0 pages to avoid having a r/w mapping of the future page table */
+ for (pfn = 0; pfn < allocated; pfn++)
+ munmap((void*) pages[pfn], PAGE_SIZE);
+
+ /* Pin the boot page table base */
+ if ( (rc = pin_table(dom->guest_xc,
+#ifdef __i386__
+ MMUEXT_PIN_L3_TABLE,
+#endif
+#ifdef __x86_64__
+ MMUEXT_PIN_L4_TABLE,
+#endif
+ xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
+ dom->guest_domid)) != 0 ) {
+ grub_printf("pin_table(%lx) returned %d\n", xc_dom_p2m_host(dom,
+ dom->pgtables_seg.pfn), rc);
+ errnum = ERR_BOOT_FAILURE;
+ goto out_remap;
+ }
+
+ /* We populate the Mini-OS page table here so that boot.S can just call
+ * update_va_mapping to project itself there. */
+ need_pgt(_boot_target);
+ DEBUG("day0 pages %lx\n", allocated);
+ DEBUG("boot target page %lx\n", _boot_target);
+ DEBUG("boot page %p\n", &_boot_page);
+ DEBUG("boot page mfn %lx\n", boot_page_mfn);
+ _boot_page_entry = PFN_PHYS(boot_page_mfn) | L1_PROT;
+ DEBUG("boot page entry %llx\n", _boot_page_entry);
+ _boot_oldpdmfn = virt_to_mfn(start_info.pt_base);
+ DEBUG("boot old pd mfn %lx\n", _boot_oldpdmfn);
+ DEBUG("boot pd virt %lx\n", dom->pgtables_seg.vstart);
+ _boot_pdmfn = dom->p2m_host[PHYS_PFN(dom->pgtables_seg.vstart - dom->parms.virt_base)];
+ DEBUG("boot pd mfn %lx\n", _boot_pdmfn);
+ _boot_stack = _boot_target + PAGE_SIZE;
+ DEBUG("boot stack %lx\n", _boot_stack);
+ _boot_start_info = dom->parms.virt_base + PFN_PHYS(dom->start_info_pfn);
+ DEBUG("boot start info %lx\n", _boot_start_info);
+ _boot_start = dom->parms.virt_entry;
+ DEBUG("boot start %lx\n", _boot_start);
+
+ /* Keep only useful entries */
+ for (nr_m2p_updates = pfn = 0; pfn < start_info.nr_pages; pfn++)
+ if (dom->p2m_host[pfn] != pfn_to_mfn(pfn))
+ nr_m2p_updates++;
+
+ m2p_updates = malloc(sizeof(*m2p_updates) * nr_m2p_updates);
+ for (i = pfn = 0; pfn < start_info.nr_pages; pfn++)
+ if (dom->p2m_host[pfn] != pfn_to_mfn(pfn)) {
+ m2p_updates[i].ptr = PFN_PHYS(dom->p2m_host[pfn]) | MMU_MACHPHYS_UPDATE;
+ m2p_updates[i].val = pfn;
+ i++;
+ }
+
+ for (i = 0; i < blk_nb; i++)
+ shutdown_blkfront(blk_dev[i]);
+ if (net_dev)
+ shutdown_netfront(net_dev);
+ if (kbd_dev)
+ shutdown_kbdfront(kbd_dev);
+ stop_kernel();
+
+ /* Update M2P */
+ if ((rc = HYPERVISOR_mmu_update(m2p_updates, nr_m2p_updates, NULL, DOMID_SELF)) < 0) {
+ xprintk("Could not update M2P\n");
+ ASSERT(0);
+ }
+
+ xprintk("go!\n");
+
+ /* Jump to trampoline boot page */
+ _boot();
+
+ ASSERT(0);
+
+out_remap:
+ for (pfn = 0; pfn < allocated; pfn++)
+ do_map_frames(pages[pfn], &pages_mfns[pfn], 1, 0, 0, DOMID_SELF, 0, L1_PROT);
+out:
+ xc_dom_release(dom);
+ for (pfn = 0; pfn < allocated; pfn++)
+ free_page((void*)pages[pfn]);
+ free(pages);
+ free(pages_mfns);
+ pages = NULL;
+ pages_mfns = NULL;
+ allocated = 0;
+ xc_interface_close(xc_handle );
+}