diff options
author | Keir Fraser <keir.fraser@citrix.com> | 2008-06-18 09:36:47 +0100 |
---|---|---|
committer | Keir Fraser <keir.fraser@citrix.com> | 2008-06-18 09:36:47 +0100 |
commit | a2c7db64f561821fd528614e68c4d92718210126 (patch) | |
tree | 79c7e1c3ef49b5b4272fd12d36420e6fb8d23cd1 /stubdom/grub/kexec.c | |
parent | 7074b13cee246f09b3b0a2a6da139b2e047cf4a4 (diff) | |
download | xen-a2c7db64f561821fd528614e68c4d92718210126.tar.gz xen-a2c7db64f561821fd528614e68c4d92718210126.tar.bz2 xen-a2c7db64f561821fd528614e68c4d92718210126.zip |
Add PV-GRUB
This fetches GRUB1 sources, applies the {graphical, print function,
save default, and ext3_256byte} patches from debian, and applies a
patch to make it work on x86_64 and port it to Mini-OS. By using
libxc, PV-GRUB can then "kexec" the loaded kernel from inside the
domain itself, hence permitting to avoid the security-concerned
pygrub.
Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
Diffstat (limited to 'stubdom/grub/kexec.c')
-rw-r--r-- | stubdom/grub/kexec.c | 324 |
1 files changed, 324 insertions, 0 deletions
diff --git a/stubdom/grub/kexec.c b/stubdom/grub/kexec.c new file mode 100644 index 0000000000..5400fe88cd --- /dev/null +++ b/stubdom/grub/kexec.c @@ -0,0 +1,324 @@ +/* + * This supports booting another PV kernel from Mini-OS + * + * The idea is to setup it using libxc, answer to day0 memory allocation + * requests, and using a trampoline boot page to switch to the new page table. + * + * The procedure of the boot page is: + * - map itself at the target position (that may overwrite some C stuff, but we + * do not care any more) + * - jump there + * - switch to the target page table + * - unpin the old page table + * - jump to the new kernel + * + * Samuel Thibault <Samuel.Thibault@eu.citrix.com>, May 2008 + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include <xenctrl.h> +#include <xc_dom.h> + +#include <kernel.h> +#include <console.h> +#include <os.h> +#include <blkfront.h> +#include <netfront.h> +#include <fbfront.h> +#include <shared.h> + +#include "mini-os.h" + +#if 0 +#define DEBUG(fmt, ...) printk(fmt, ## __VA_ARGS__) +#else +#define DEBUG(fmt, ...) (void)0 +#endif + +/* Assembly boot page from boot.S */ +extern void _boot_page; +extern pgentry_t _boot_page_entry; +extern unsigned long _boot_pdmfn; +extern unsigned long _boot_stack, _boot_target, _boot_start_info, _boot_start; +extern xen_pfn_t _boot_oldpdmfn; +extern void _boot(void); + +static unsigned long *pages; +static unsigned long *pages_mfns; +static unsigned long allocated; + +int pin_table(int xc_handle, unsigned int type, unsigned long mfn, + domid_t dom); + +/* We need mfn to appear as target_pfn, so exchange with the MFN there */ +static void do_exchange(struct xc_dom_image *dom, xen_pfn_t target_pfn, xen_pfn_t source_mfn) +{ + xen_pfn_t source_pfn; + xen_pfn_t target_mfn; + + for (source_pfn = 0; source_pfn < start_info.nr_pages; source_pfn++) + if (dom->p2m_host[source_pfn] == source_mfn) + break; + ASSERT(source_pfn < start_info.nr_pages); + + target_mfn = dom->p2m_host[target_pfn]; + + /* Put target MFN at source PFN */ + dom->p2m_host[source_pfn] = target_mfn; + + /* Put source MFN at target PFN */ + dom->p2m_host[target_pfn] = source_mfn; +} + +int kexec_allocate(struct xc_dom_image *dom, xen_vaddr_t up_to) +{ + unsigned long new_allocated = (up_to - dom->parms.virt_base) / PAGE_SIZE; + unsigned long i; + + pages = realloc(pages, new_allocated * sizeof(*pages)); + pages_mfns = realloc(pages_mfns, new_allocated * sizeof(*pages_mfns)); + for (i = allocated; i < new_allocated; i++) { + /* Exchange old page of PFN i with a newly allocated page. */ + xen_pfn_t old_mfn = dom->p2m_host[i]; + xen_pfn_t new_pfn; + xen_pfn_t new_mfn; + + pages[i] = alloc_page(); + memset((void*) pages[i], 0, PAGE_SIZE); + new_pfn = PHYS_PFN(to_phys(pages[i])); + pages_mfns[i] = new_mfn = pfn_to_mfn(new_pfn); + + /* Put old page at new PFN */ + dom->p2m_host[new_pfn] = old_mfn; + + /* Put new page at PFN i */ + dom->p2m_host[i] = new_mfn; + } + + allocated = new_allocated; + + return 0; +} + +void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline) +{ + struct xc_dom_image *dom; + int rc; + domid_t domid = DOMID_SELF; + xen_pfn_t pfn; + int xc_handle; + unsigned long i; + void *seg; + xen_pfn_t boot_page_mfn = virt_to_mfn(&_boot_page); + char features[] = ""; + struct mmu_update *m2p_updates; + unsigned long nr_m2p_updates; + + DEBUG("booting with cmdline %s\n", cmdline); + xc_handle = xc_interface_open(); + + dom = xc_dom_allocate(cmdline, features); + dom->allocate = kexec_allocate; + + dom->kernel_blob = kernel; + dom->kernel_size = kernel_size; + + dom->ramdisk_blob = module; + dom->ramdisk_size = module_size; + + dom->flags = 0; + dom->console_evtchn = start_info.console.domU.evtchn; + dom->xenstore_evtchn = start_info.store_evtchn; + + if ( (rc = xc_dom_boot_xen_init(dom, xc_handle, domid)) != 0 ) { + grub_printf("xc_dom_boot_xen_init returned %d\n", rc); + errnum = ERR_BOOT_FAILURE; + goto out; + } + if ( (rc = xc_dom_parse_image(dom)) != 0 ) { + grub_printf("xc_dom_parse_image returned %d\n", rc); + errnum = ERR_BOOT_FAILURE; + goto out; + } + +#ifdef __i386__ + if (strcmp(dom->guest_type, "xen-3.0-x86_32p")) { + grub_printf("can only boot x86 32 PAE kernels, not %s\n", dom->guest_type); + errnum = ERR_EXEC_FORMAT; + goto out; + } +#endif +#ifdef __x86_64__ + if (strcmp(dom->guest_type, "xen-3.0-x86_64")) { + grub_printf("can only boot x86 64 kernels, not %s\n", dom->guest_type); + errnum = ERR_EXEC_FORMAT; + goto out; + } +#endif + + /* equivalent of xc_dom_mem_init */ + dom->arch_hooks = xc_dom_find_arch_hooks(dom->guest_type); + dom->total_pages = start_info.nr_pages; + + /* equivalent of arch_setup_meminit */ + + /* setup initial p2m */ + dom->p2m_host = malloc(sizeof(*dom->p2m_host) * dom->total_pages); + + /* Start with our current P2M */ + for (i = 0; i < dom->total_pages; i++) + dom->p2m_host[i] = pfn_to_mfn(i); + + if ( (rc = xc_dom_build_image(dom)) != 0 ) { + grub_printf("xc_dom_build_image returned %d\n", rc); + errnum = ERR_BOOT_FAILURE; + goto out; + } + + /* copy hypercall page */ + /* TODO: domctl instead, but requires privileges */ + if (dom->parms.virt_hypercall != -1) { + pfn = PHYS_PFN(dom->parms.virt_hypercall - dom->parms.virt_base); + memcpy((void *) pages[pfn], hypercall_page, PAGE_SIZE); + } + + /* Equivalent of xc_dom_boot_image */ + dom->shared_info_mfn = PHYS_PFN(start_info.shared_info); + + if (!xc_dom_compat_check(dom)) { + grub_printf("xc_dom_compat_check failed\n"); + errnum = ERR_EXEC_FORMAT; + goto out; + } + + /* Move current console, xenstore and boot MFNs to the allocated place */ + do_exchange(dom, dom->console_pfn, start_info.console.domU.mfn); + do_exchange(dom, dom->xenstore_pfn, start_info.store_mfn); + DEBUG("virt base at %llx\n", dom->parms.virt_base); + DEBUG("bootstack_pfn %lx\n", dom->bootstack_pfn); + _boot_target = dom->parms.virt_base + PFN_PHYS(dom->bootstack_pfn); + DEBUG("_boot_target %lx\n", _boot_target); + do_exchange(dom, PHYS_PFN(_boot_target - dom->parms.virt_base), + virt_to_mfn(&_boot_page)); + + /* Make sure the bootstrap page table does not RW-map any of our current + * page table frames */ + kexec_allocate(dom, dom->virt_pgtab_end); + + if ( (rc = xc_dom_update_guest_p2m(dom))) { + grub_printf("xc_dom_update_guest_p2m returned %d\n", rc); + errnum = ERR_BOOT_FAILURE; + goto out; + } + + if ( dom->arch_hooks->setup_pgtables ) + if ( (rc = dom->arch_hooks->setup_pgtables(dom))) { + grub_printf("setup_pgtables returned %d\n", rc); + errnum = ERR_BOOT_FAILURE; + goto out; + } + + /* start info page */ +#undef start_info + if ( dom->arch_hooks->start_info ) + dom->arch_hooks->start_info(dom); +#define start_info (start_info_union.start_info) + + xc_dom_log_memory_footprint(dom); + + /* Unmap libxc's projection of the boot page table */ + seg = xc_dom_seg_to_ptr(dom, &dom->pgtables_seg); + munmap(seg, dom->pgtables_seg.vend - dom->pgtables_seg.vstart); + + /* Unmap day0 pages to avoid having a r/w mapping of the future page table */ + for (pfn = 0; pfn < allocated; pfn++) + munmap((void*) pages[pfn], PAGE_SIZE); + + /* Pin the boot page table base */ + if ( (rc = pin_table(dom->guest_xc, +#ifdef __i386__ + MMUEXT_PIN_L3_TABLE, +#endif +#ifdef __x86_64__ + MMUEXT_PIN_L4_TABLE, +#endif + xc_dom_p2m_host(dom, dom->pgtables_seg.pfn), + dom->guest_domid)) != 0 ) { + grub_printf("pin_table(%lx) returned %d\n", xc_dom_p2m_host(dom, + dom->pgtables_seg.pfn), rc); + errnum = ERR_BOOT_FAILURE; + goto out_remap; + } + + /* We populate the Mini-OS page table here so that boot.S can just call + * update_va_mapping to project itself there. */ + need_pgt(_boot_target); + DEBUG("day0 pages %lx\n", allocated); + DEBUG("boot target page %lx\n", _boot_target); + DEBUG("boot page %p\n", &_boot_page); + DEBUG("boot page mfn %lx\n", boot_page_mfn); + _boot_page_entry = PFN_PHYS(boot_page_mfn) | L1_PROT; + DEBUG("boot page entry %llx\n", _boot_page_entry); + _boot_oldpdmfn = virt_to_mfn(start_info.pt_base); + DEBUG("boot old pd mfn %lx\n", _boot_oldpdmfn); + DEBUG("boot pd virt %lx\n", dom->pgtables_seg.vstart); + _boot_pdmfn = dom->p2m_host[PHYS_PFN(dom->pgtables_seg.vstart - dom->parms.virt_base)]; + DEBUG("boot pd mfn %lx\n", _boot_pdmfn); + _boot_stack = _boot_target + PAGE_SIZE; + DEBUG("boot stack %lx\n", _boot_stack); + _boot_start_info = dom->parms.virt_base + PFN_PHYS(dom->start_info_pfn); + DEBUG("boot start info %lx\n", _boot_start_info); + _boot_start = dom->parms.virt_entry; + DEBUG("boot start %lx\n", _boot_start); + + /* Keep only useful entries */ + for (nr_m2p_updates = pfn = 0; pfn < start_info.nr_pages; pfn++) + if (dom->p2m_host[pfn] != pfn_to_mfn(pfn)) + nr_m2p_updates++; + + m2p_updates = malloc(sizeof(*m2p_updates) * nr_m2p_updates); + for (i = pfn = 0; pfn < start_info.nr_pages; pfn++) + if (dom->p2m_host[pfn] != pfn_to_mfn(pfn)) { + m2p_updates[i].ptr = PFN_PHYS(dom->p2m_host[pfn]) | MMU_MACHPHYS_UPDATE; + m2p_updates[i].val = pfn; + i++; + } + + for (i = 0; i < blk_nb; i++) + shutdown_blkfront(blk_dev[i]); + if (net_dev) + shutdown_netfront(net_dev); + if (kbd_dev) + shutdown_kbdfront(kbd_dev); + stop_kernel(); + + /* Update M2P */ + if ((rc = HYPERVISOR_mmu_update(m2p_updates, nr_m2p_updates, NULL, DOMID_SELF)) < 0) { + xprintk("Could not update M2P\n"); + ASSERT(0); + } + + xprintk("go!\n"); + + /* Jump to trampoline boot page */ + _boot(); + + ASSERT(0); + +out_remap: + for (pfn = 0; pfn < allocated; pfn++) + do_map_frames(pages[pfn], &pages_mfns[pfn], 1, 0, 0, DOMID_SELF, 0, L1_PROT); +out: + xc_dom_release(dom); + for (pfn = 0; pfn < allocated; pfn++) + free_page((void*)pages[pfn]); + free(pages); + free(pages_mfns); + pages = NULL; + pages_mfns = NULL; + allocated = 0; + xc_interface_close(xc_handle ); +} |