aboutsummaryrefslogtreecommitdiffstats
path: root/stubdom/grub/kexec.c
blob: b21c91ae994976aae8829be97500cf460e5a4486 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
/*
 * This supports booting another PV kernel from Mini-OS
 *
 * The idea is to setup it using libxc, answer to day0 memory allocation
 * requests, and using a trampoline boot page to switch to the new page table.
 *
 * The procedure of the boot page is:
 * - map itself at the target position (that may overwrite some C stuff, but we
 *   do not care any more)
 * - jump there
 * - switch to the target page table
 * - unpin the old page table
 * - jump to the new kernel
 *
 * Samuel Thibault <Samuel.Thibault@eu.citrix.com>, May 2008
 */
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>

#include <xenctrl.h>
#include <xc_dom.h>

#include <kernel.h>
#include <console.h>
#include <os.h>
#include <blkfront.h>
#include <netfront.h>
#include <fbfront.h>
#include <shared.h>

#include "mini-os.h"

#if 0
#define DEBUG(fmt, ...) printk(fmt, ## __VA_ARGS__)
#else
#define DEBUG(fmt, ...) (void)0
#endif

/* Assembly boot page from boot.S */
extern void _boot_page;
extern pgentry_t _boot_page_entry;
extern unsigned long _boot_pdmfn;
extern unsigned long _boot_stack, _boot_target, _boot_start_info, _boot_start;
extern xen_pfn_t _boot_oldpdmfn;
extern void _boot(void);

static unsigned long *pages;
static unsigned long *pages_mfns;
static xen_pfn_t *pages_moved2pfns;
static unsigned long allocated;

int pin_table(xc_interface *xc_handle, unsigned int type, unsigned long mfn,
              domid_t dom);

/* We need mfn to appear as target_pfn, so exchange with the MFN there */
static void do_exchange(struct xc_dom_image *dom, xen_pfn_t target_pfn, xen_pfn_t source_mfn)
{
    xen_pfn_t source_pfn;
    xen_pfn_t target_mfn;

    for (source_pfn = 0; source_pfn < start_info.nr_pages; source_pfn++)
        if (dom->p2m_host[source_pfn] == source_mfn)
            break;
    ASSERT(source_pfn < start_info.nr_pages);

    target_mfn = dom->p2m_host[target_pfn];

    /* Put target MFN at source PFN */
    dom->p2m_host[source_pfn] = target_mfn;

    /* Put source MFN at target PFN */
    dom->p2m_host[target_pfn] = source_mfn;
}

int kexec_allocate(struct xc_dom_image *dom, xen_vaddr_t up_to)
{
    unsigned long new_allocated = (up_to - dom->parms.virt_base) / PAGE_SIZE;
    unsigned long i;

    pages = realloc(pages, new_allocated * sizeof(*pages));
    pages_mfns = realloc(pages_mfns, new_allocated * sizeof(*pages_mfns));
    pages_moved2pfns = realloc(pages_moved2pfns, new_allocated * sizeof(*pages_moved2pfns));
    for (i = allocated; i < new_allocated; i++) {
        /* Exchange old page of PFN i with a newly allocated page.  */
        xen_pfn_t old_mfn = dom->p2m_host[i];
        xen_pfn_t new_pfn;
        xen_pfn_t new_mfn;

        pages[i] = alloc_page();
        memset((void*) pages[i], 0, PAGE_SIZE);
        new_pfn = PHYS_PFN(to_phys(pages[i]));
        pages_mfns[i] = new_mfn = pfn_to_mfn(new_pfn);

	/*
	 * If PFN of newly allocated page (new_pfn) is less then currently
	 * requested PFN (i) then look for relevant PFN/MFN pair. In this
	 * situation dom->p2m_host[new_pfn] no longer contains proper MFN
	 * because original page with new_pfn was moved earlier
	 * to different location.
	 */
	for (; new_pfn < i; new_pfn = pages_moved2pfns[new_pfn]);

	/* Store destination PFN of currently requested page. */
	pages_moved2pfns[i] = new_pfn;

        /* Put old page at new PFN */
        dom->p2m_host[new_pfn] = old_mfn;

        /* Put new page at PFN i */
        dom->p2m_host[i] = new_mfn;
    }

    allocated = new_allocated;

    return 0;
}

void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline, unsigned long flags)
{
    struct xc_dom_image *dom;
    int rc;
    domid_t domid = DOMID_SELF;
    xen_pfn_t pfn;
    xc_interface *xc_handle;
    unsigned long i;
    void *seg;
    xen_pfn_t boot_page_mfn = virt_to_mfn(&_boot_page);
    char features[] = "";
    struct mmu_update *m2p_updates;
    unsigned long nr_m2p_updates;

    DEBUG("booting with cmdline %s\n", cmdline);
    xc_handle = xc_interface_open(0,0,0);

    dom = xc_dom_allocate(xc_handle, cmdline, features);
    dom->allocate = kexec_allocate;

    /* We are using guest owned memory, therefore no limits. */
    xc_dom_kernel_max_size(dom, 0);
    xc_dom_ramdisk_max_size(dom, 0);

    dom->kernel_blob = kernel;
    dom->kernel_size = kernel_size;

    dom->ramdisk_blob = module;
    dom->ramdisk_size = module_size;

    dom->flags = flags;
    dom->console_evtchn = start_info.console.domU.evtchn;
    dom->xenstore_evtchn = start_info.store_evtchn;

    if ( (rc = xc_dom_boot_xen_init(dom, xc_handle, domid)) != 0 ) {
        grub_printf("xc_dom_boot_xen_init returned %d\n", rc);
        errnum = ERR_BOOT_FAILURE;
        goto out;
    }
    if ( (rc = xc_dom_parse_image(dom)) != 0 ) {
        grub_printf("xc_dom_parse_image returned %d\n", rc);
        errnum = ERR_BOOT_FAILURE;
        goto out;
    }

#ifdef __i386__
    if (strcmp(dom->guest_type, "xen-3.0-x86_32p")) {
        grub_printf("can only boot x86 32 PAE kernels, not %s\n", dom->guest_type);
        errnum = ERR_EXEC_FORMAT;
        goto out;
    }
#endif
#ifdef __x86_64__
    if (strcmp(dom->guest_type, "xen-3.0-x86_64")) {
        grub_printf("can only boot x86 64 kernels, not %s\n", dom->guest_type);
        errnum = ERR_EXEC_FORMAT;
        goto out;
    }
#endif

    /* equivalent of xc_dom_mem_init */
    dom->arch_hooks = xc_dom_find_arch_hooks(xc_handle, dom->guest_type);
    dom->total_pages = start_info.nr_pages;

    /* equivalent of arch_setup_meminit */

    /* setup initial p2m */
    dom->p2m_host = malloc(sizeof(*dom->p2m_host) * dom->total_pages);

    /* Start with our current P2M */
    for (i = 0; i < dom->total_pages; i++)
        dom->p2m_host[i] = pfn_to_mfn(i);

    if ( (rc = xc_dom_build_image(dom)) != 0 ) {
        grub_printf("xc_dom_build_image returned %d\n", rc);
        errnum = ERR_BOOT_FAILURE;
        goto out;
    }

    /* copy hypercall page */
    /* TODO: domctl instead, but requires privileges */
    if (dom->parms.virt_hypercall != -1) {
        pfn = PHYS_PFN(dom->parms.virt_hypercall - dom->parms.virt_base);
        memcpy((void *) pages[pfn], hypercall_page, PAGE_SIZE);
    }

    /* Equivalent of xc_dom_boot_image */
    dom->shared_info_mfn = PHYS_PFN(start_info.shared_info);

    if (!xc_dom_compat_check(dom)) {
        grub_printf("xc_dom_compat_check failed\n");
        errnum = ERR_EXEC_FORMAT;
        goto out;
    }

    /* Move current console, xenstore and boot MFNs to the allocated place */
    do_exchange(dom, dom->console_pfn, start_info.console.domU.mfn);
    do_exchange(dom, dom->xenstore_pfn, start_info.store_mfn);
    DEBUG("virt base at %llx\n", dom->parms.virt_base);
    DEBUG("bootstack_pfn %lx\n", dom->bootstack_pfn);
    _boot_target = dom->parms.virt_base + PFN_PHYS(dom->bootstack_pfn);
    DEBUG("_boot_target %lx\n", _boot_target);
    do_exchange(dom, PHYS_PFN(_boot_target - dom->parms.virt_base),
            virt_to_mfn(&_boot_page));

    /* Make sure the bootstrap page table does not RW-map any of our current
     * page table frames */
    kexec_allocate(dom, dom->virt_pgtab_end);

    if ( (rc = xc_dom_update_guest_p2m(dom))) {
        grub_printf("xc_dom_update_guest_p2m returned %d\n", rc);
        errnum = ERR_BOOT_FAILURE;
        goto out;
    }

    if ( dom->arch_hooks->setup_pgtables )
        if ( (rc = dom->arch_hooks->setup_pgtables(dom))) {
            grub_printf("setup_pgtables returned %d\n", rc);
            errnum = ERR_BOOT_FAILURE;
            goto out;
        }

    /* start info page */
#undef start_info
    if ( dom->arch_hooks->start_info )
        dom->arch_hooks->start_info(dom);
#define start_info (start_info_union.start_info)

    xc_dom_log_memory_footprint(dom);

    /* Unmap libxc's projection of the boot page table */
    seg = xc_dom_seg_to_ptr(dom, &dom->pgtables_seg);
    munmap(seg, dom->pgtables_seg.vend - dom->pgtables_seg.vstart);

    /* Unmap day0 pages to avoid having a r/w mapping of the future page table */
    for (pfn = 0; pfn < allocated; pfn++)
        munmap((void*) pages[pfn], PAGE_SIZE);

    /* Pin the boot page table base */
    if ( (rc = pin_table(dom->xch,
#ifdef __i386__
                MMUEXT_PIN_L3_TABLE,
#endif
#ifdef __x86_64__
                MMUEXT_PIN_L4_TABLE,
#endif
                xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
                dom->guest_domid)) != 0 ) {
        grub_printf("pin_table(%lx) returned %d\n", xc_dom_p2m_host(dom,
                    dom->pgtables_seg.pfn), rc);
        errnum = ERR_BOOT_FAILURE;
        goto out_remap;
    }

    /* We populate the Mini-OS page table here so that boot.S can just call
     * update_va_mapping to project itself there.  */
    need_pgt(_boot_target);
    DEBUG("day0 pages %lx\n", allocated);
    DEBUG("boot target page %lx\n", _boot_target);
    DEBUG("boot page %p\n", &_boot_page);
    DEBUG("boot page mfn %lx\n", boot_page_mfn);
    _boot_page_entry = PFN_PHYS(boot_page_mfn) | L1_PROT;
    DEBUG("boot page entry %llx\n", _boot_page_entry);
    _boot_oldpdmfn = virt_to_mfn(start_info.pt_base);
    DEBUG("boot old pd mfn %lx\n", _boot_oldpdmfn);
    DEBUG("boot pd virt %lx\n", dom->pgtables_seg.vstart);
    _boot_pdmfn = dom->p2m_host[PHYS_PFN(dom->pgtables_seg.vstart - dom->parms.virt_base)];
    DEBUG("boot pd mfn %lx\n", _boot_pdmfn);
    _boot_stack = _boot_target + PAGE_SIZE;
    DEBUG("boot stack %lx\n", _boot_stack);
    _boot_start_info = dom->parms.virt_base + PFN_PHYS(dom->start_info_pfn);
    DEBUG("boot start info %lx\n", _boot_start_info);
    _boot_start = dom->parms.virt_entry;
    DEBUG("boot start %lx\n", _boot_start);

    /* Keep only useful entries */
    for (nr_m2p_updates = pfn = 0; pfn < start_info.nr_pages; pfn++)
        if (dom->p2m_host[pfn] != pfn_to_mfn(pfn))
            nr_m2p_updates++;

    m2p_updates = malloc(sizeof(*m2p_updates) * nr_m2p_updates);
    for (i = pfn = 0; pfn < start_info.nr_pages; pfn++)
        if (dom->p2m_host[pfn] != pfn_to_mfn(pfn)) {
            m2p_updates[i].ptr = PFN_PHYS(dom->p2m_host[pfn]) | MMU_MACHPHYS_UPDATE;
            m2p_updates[i].val = pfn;
            i++;
        }

    for (i = 0; i < blk_nb; i++)
        shutdown_blkfront(blk_dev[i]);
    if (net_dev)
        shutdown_netfront(net_dev);
    if (kbd_dev)
        shutdown_kbdfront(kbd_dev);
    stop_kernel();

    /* Update M2P */
    if ((rc = HYPERVISOR_mmu_update(m2p_updates, nr_m2p_updates, NULL, DOMID_SELF)) < 0) {
        xprintk("Could not update M2P\n");
        ASSERT(0);
    }

    xprintk("go!\n");

    /* Jump to trampoline boot page */
    _boot();

    ASSERT(0);

out_remap:
    for (pfn = 0; pfn < allocated; pfn++)
        do_map_frames(pages[pfn], &pages_mfns[pfn], 1, 0, 0, DOMID_SELF, 0, L1_PROT);
out:
    xc_dom_release(dom);
    for (pfn = 0; pfn < allocated; pfn++)
        free_page((void*)pages[pfn]);
    free(pages);
    free(pages_mfns);
    pages = NULL;
    pages_mfns = NULL;
    allocated = 0;
    xc_interface_close(xc_handle );
}