aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2008-02-01 11:14:53 +0000
committerKeir Fraser <keir.fraser@citrix.com>2008-02-01 11:14:53 +0000
commitbd03fbb90e6bfcbed2f71a4ef815840f86e66136 (patch)
treea8abcddf6e290d4fbc55e5f7113b6fe156d3606d /tools
parent2c16ce588c31d0f015dea3e60c3463715240cbc6 (diff)
downloadxen-bd03fbb90e6bfcbed2f71a4ef815840f86e66136.tar.gz
xen-bd03fbb90e6bfcbed2f71a4ef815840f86e66136.tar.bz2
xen-bd03fbb90e6bfcbed2f71a4ef815840f86e66136.zip
Direct Linux boot: Support booting relocatable Linux kernels.
This patch introduces the basic infrastructure for direct kernel boot in the ioemu copy of QEMU. The current #ifdef disabled code is actually obsolete wrt to upstream QEMU code. So this is removed entirely. In its place I have imported the latest upstream QEMU code. The QEMU code assumes that the guest RAM is directly mapped into the QEMU process, so there were some changes neccessary. Instead of strcpy/memcpy'ing the args and kernel header into guest RAM, cpu_physical_memory_rw is used. Intead of fread() the initrd and kernel into guest RAM a helper function is used fread2guest which reads into a small buffer and then uses cpu_physical_memory_rw. NB in reading the following, Documentation/i386/boot.txt is a useful reference for what's going on. Next, instead of loading the kernel at 0x100000, this code loads it at 0x200000. This is far enough away that there's no risk of it overlapping with the HVM firmware image. If the Linux kernel boot protocol is 0x205 or later, and the flag at offset 0x234 in the kernel header is 1, then the guest kernel was built with CONFIG_RELOCATABLE=y. In this scenario we merely need to tell the kernel what address it has been relocated to by writing 0x200000 into the kernel header at offset 0x214. When switching from real mode into protected mode the kernel will immediately start executing at 0x200000 and be happy with life. This should work for 2.6.20 or later on i386, and 2.6.22 or later on x86_64. This has been verified with Fedora 7 and Fedora 8 bare metal kernels on i386 and x86_64 from the $TREE/images/pxeboot of the install trees. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/ioemu/hw/pc.c354
1 files changed, 266 insertions, 88 deletions
diff --git a/tools/ioemu/hw/pc.c b/tools/ioemu/hw/pc.c
index 8c17c25477..473387dc62 100644
--- a/tools/ioemu/hw/pc.c
+++ b/tools/ioemu/hw/pc.c
@@ -31,10 +31,8 @@
#define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin"
#define LINUX_BOOT_FILENAME "linux_boot.bin"
-#define KERNEL_LOAD_ADDR 0x00100000
-#define INITRD_LOAD_ADDR 0x00600000
-#define KERNEL_PARAMS_ADDR 0x00090000
-#define KERNEL_CMDLINE_ADDR 0x00099000
+/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables. */
+#define ACPI_DATA_SIZE 0x10000
static fdctrl_t *floppy_controller;
static RTCState *rtc_state;
@@ -363,36 +361,271 @@ void bochs_bios_init(void)
register_ioport_write(0x503, 1, 1, bochs_bios_write, NULL);
}
+/* Generate an initial boot sector which sets state and jump to
+ a specified vector */
+static void generate_bootsect(uint32_t gpr[8], uint16_t segs[6], uint16_t ip)
+{
+ uint8_t bootsect[512], *p;
+ int i;
+
+ if (bs_table[0] == NULL) {
+ fprintf(stderr, "A disk image must be given for 'hda' when booting "
+ "a Linux kernel\n");
+ exit(1);
+ }
+
+ memset(bootsect, 0, sizeof(bootsect));
+
+ /* Copy the MSDOS partition table if possible */
+ bdrv_read(bs_table[0], 0, bootsect, 1);
+
+ /* Make sure we have a partition signature */
+ bootsect[510] = 0x55;
+ bootsect[511] = 0xaa;
+
+ /* Actual code */
+ p = bootsect;
+ *p++ = 0xfa; /* CLI */
+ *p++ = 0xfc; /* CLD */
+
+ for (i = 0; i < 6; i++) {
+ if (i == 1) /* Skip CS */
+ continue;
+
+ *p++ = 0xb8; /* MOV AX,imm16 */
+ *p++ = segs[i];
+ *p++ = segs[i] >> 8;
+ *p++ = 0x8e; /* MOV <seg>,AX */
+ *p++ = 0xc0 + (i << 3);
+ }
-int load_kernel(const char *filename, uint8_t *addr,
- uint8_t *real_addr)
+ for (i = 0; i < 8; i++) {
+ *p++ = 0x66; /* 32-bit operand size */
+ *p++ = 0xb8 + i; /* MOV <reg>,imm32 */
+ *p++ = gpr[i];
+ *p++ = gpr[i] >> 8;
+ *p++ = gpr[i] >> 16;
+ *p++ = gpr[i] >> 24;
+ }
+
+ *p++ = 0xea; /* JMP FAR */
+ *p++ = ip; /* IP */
+ *p++ = ip >> 8;
+ *p++ = segs[1]; /* CS */
+ *p++ = segs[1] >> 8;
+
+ bdrv_set_boot_sector(bs_table[0], bootsect, sizeof(bootsect));
+}
+
+
+static long get_file_size(FILE *f)
{
- int fd, size;
- int setup_sects;
-
- fd = open(filename, O_RDONLY | O_BINARY);
- if (fd < 0)
- return -1;
-
- /* load 16 bit code */
- if (read(fd, real_addr, 512) != 512)
- goto fail;
- setup_sects = real_addr[0x1F1];
- if (!setup_sects)
- setup_sects = 4;
- if (read(fd, real_addr + 512, setup_sects * 512) !=
- setup_sects * 512)
- goto fail;
-
- /* load 32 bit code */
- size = read(fd, addr, 16 * 1024 * 1024);
- if (size < 0)
- goto fail;
- close(fd);
+ long where, size;
+
+ /* XXX: on Unix systems, using fstat() probably makes more sense */
+
+ where = ftell(f);
+ fseek(f, 0, SEEK_END);
+ size = ftell(f);
+ fseek(f, where, SEEK_SET);
+
return size;
- fail:
- close(fd);
- return -1;
+}
+
+static int fread2guest(target_phys_addr_t dst_addr, size_t nbytes, FILE *f)
+{
+ size_t offset = 0;
+ while (nbytes) {
+ uint8_t buf[4096];
+ size_t count = nbytes > sizeof(buf) ? sizeof(buf) : nbytes;
+ if (fread(buf, 1, count, f) != count)
+ return -1;
+
+ cpu_physical_memory_rw(dst_addr+offset, buf, count, 1);
+ offset += count;
+ nbytes -= count;
+ }
+ return 0;
+}
+
+static void load_linux(const char *kernel_filename,
+ const char *initrd_filename,
+ const char *kernel_cmdline)
+{
+ uint16_t protocol;
+ uint32_t gpr[8];
+ uint16_t seg[6];
+ uint16_t real_seg;
+ int setup_size, kernel_size, initrd_size, cmdline_size;
+ uint32_t initrd_max;
+ uint8_t header[1024];
+ target_phys_addr_t real_addr, reloc_prot_addr, prot_addr, cmdline_addr, initrd_addr;
+ size_t ncmdline;
+ FILE *f, *fi;
+
+ /* Align to 16 bytes as a paranoia measure */
+ cmdline_size = (strlen(kernel_cmdline)+16) & ~15;
+
+ /* load the kernel header */
+ f = fopen(kernel_filename, "rb");
+ if (!f || !(kernel_size = get_file_size(f)) ||
+ fread(header, 1, 1024, f) != 1024) {
+ fprintf(stderr, "qemu: could not load kernel '%s'\n",
+ kernel_filename);
+ exit(1);
+ }
+
+ /* kernel protocol version */
+ fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
+ if (ldl_p(header+0x202) == 0x53726448)
+ protocol = lduw_p(header+0x206);
+ else
+ protocol = 0;
+ fprintf(stderr, "header protocol: %x\n", protocol);
+ if (protocol < 0x200 || !(header[0x211] & 0x01)) {
+ /* Low kernel */
+ real_addr = 0x90000;
+ cmdline_addr = 0x9a000 - cmdline_size;
+ prot_addr = 0x10000;
+ reloc_prot_addr = prot_addr;
+ } else if (protocol < 0x202) {
+ /* High but ancient kernel */
+ real_addr = 0x90000;
+ cmdline_addr = 0x9a000 - cmdline_size;
+ prot_addr = 0x100000;
+ reloc_prot_addr = 0x200000;
+ } else {
+ /* High and recent kernel */
+ real_addr = 0x10000;
+ cmdline_addr = 0x20000;
+ prot_addr = 0x100000;
+ reloc_prot_addr = 0x200000;
+ }
+
+ fprintf(stderr,
+ "qemu: real_addr = %#zx\n"
+ "qemu: cmdline_addr = %#zx\n"
+ "qemu: prot_addr = %#zx\n",
+ real_addr,
+ cmdline_addr,
+ prot_addr);
+
+ /* highest address for loading the initrd */
+ if (protocol >= 0x203)
+ initrd_max = ldl_p(header+0x22c);
+ else
+ initrd_max = 0x37ffffff;
+
+ if (initrd_max >= ram_size-ACPI_DATA_SIZE)
+ initrd_max = ram_size-ACPI_DATA_SIZE-1;
+
+
+ /* kernel command line */
+ ncmdline = strlen(kernel_cmdline);
+ if (ncmdline > 4095) {
+ ncmdline = 4095;
+ ((uint8_t*)kernel_cmdline)[4095] = '\0';
+ }
+ fprintf(stderr, "qemu: kernel_cmdline: %#zx ('%s')\n", ncmdline, kernel_cmdline);
+ cpu_physical_memory_rw(cmdline_addr, (uint8_t*)kernel_cmdline, ncmdline+1, 1);
+
+ if (protocol >= 0x202) {
+ stl_p(header+0x228, cmdline_addr);
+ } else {
+ stw_p(header+0x20, 0xA33F);
+ stw_p(header+0x22, cmdline_addr-real_addr);
+ }
+
+ /* loader type */
+ /* High nybble = B reserved for Qemu; low nybble is revision number.
+ If this code is substantially changed, you may want to consider
+ incrementing the revision. */
+ if (protocol >= 0x200)
+ header[0x210] = 0xB0;
+
+ /* heap */
+ if (protocol >= 0x201) {
+ header[0x211] |= 0x80; /* CAN_USE_HEAP */
+ stw_p(header+0x224, cmdline_addr-real_addr-0x200);
+ }
+
+ /* load initrd */
+ if (initrd_filename) {
+ if (protocol < 0x200) {
+ fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
+ exit(1);
+ }
+
+ fi = fopen(initrd_filename, "rb");
+ if (!fi) {
+ fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
+ initrd_filename);
+ exit(1);
+ }
+
+ initrd_size = get_file_size(fi);
+ initrd_addr = ((initrd_max-initrd_size) & ~4095);
+
+ fprintf(stderr, "qemu: loading initrd (%#x bytes) at %#zx\n",
+ initrd_size, initrd_addr);
+
+ if (fread2guest(initrd_addr, initrd_size, fi) < 0) {
+ fprintf(stderr, "qemu: read error on initial ram disk '%s'\n",
+ initrd_filename);
+ exit(1);
+ }
+ fclose(fi);
+
+ stl_p(header+0x218, initrd_addr);
+ stl_p(header+0x21c, initrd_size);
+ }
+
+
+ setup_size = header[0x1f1];
+ if (setup_size == 0)
+ setup_size = 4;
+
+ setup_size = (setup_size+1)*512;
+ kernel_size -= setup_size; /* Size of protected-mode code */
+
+ /* Urgh, Xen's HVM firmware lives at 0x100000, but that's also the
+ * address Linux wants to start life at prior to relocatable support
+ */
+ if (prot_addr != reloc_prot_addr) {
+ if (protocol >= 0x205 && (header[0x234] & 1)) {
+ /* Relocatable automatically */
+ stl_p(header+0x214, reloc_prot_addr);
+ fprintf(stderr, "qemu: kernel is relocatable\n");
+ } else {
+ fprintf(stderr, "qemu: unable to load non-relocatable kernel\n");
+ exit(1);
+ }
+ }
+
+
+ fprintf(stderr, "qemu: loading kernel real mode (%#x bytes) at %#zx\n",
+ setup_size-1024, real_addr);
+ fprintf(stderr, "qemu: loading kernel protected mode (%#x bytes) at %#zx\n",
+ kernel_size, reloc_prot_addr);
+
+ /* store the finalized header and load the rest of the kernel */
+ cpu_physical_memory_rw(real_addr, header, 1024, 1);
+ if (fread2guest(real_addr+1024, setup_size-1024, f) < 0 ||
+ fread2guest(reloc_prot_addr, kernel_size, f) < 0) {
+ fprintf(stderr, "qemu: loading kernel protected mode (%#x bytes) at %#zx\n",
+ kernel_size, reloc_prot_addr);
+ exit(1);
+ }
+ fclose(f);
+
+ /* generate bootsector to set up the initial register state */
+ real_seg = (real_addr) >> 4;
+ seg[0] = seg[2] = seg[3] = seg[4] = seg[4] = real_seg;
+ seg[1] = real_seg+0x20; /* CS */
+ memset(gpr, 0, sizeof gpr);
+ gpr[4] = cmdline_addr-real_addr-16; /* SP (-16 is paranoia) */
+
+ generate_bootsect(gpr, seg, 0);
}
static void main_cpu_reset(void *opaque)
@@ -577,63 +810,8 @@ static void pc_init1(uint64_t ram_size, int vga_ram_size, char *boot_device,
bochs_bios_init();
-#ifndef CONFIG_DM
- if (linux_boot) {
- uint8_t bootsect[512];
- uint8_t old_bootsect[512];
-
- if (bs_table[0] == NULL) {
- fprintf(stderr, "A disk image must be given for 'hda' when booting a Linux kernel\n");
- exit(1);
- }
- snprintf(buf, sizeof(buf), "%s/%s", bios_dir, LINUX_BOOT_FILENAME);
- ret = load_image(buf, bootsect);
- if (ret != sizeof(bootsect)) {
- fprintf(stderr, "qemu: could not load linux boot sector '%s'\n",
- buf);
- exit(1);
- }
-
- if (bdrv_read(bs_table[0], 0, old_bootsect, 1) >= 0) {
- /* copy the MSDOS partition table */
- memcpy(bootsect + 0x1be, old_bootsect + 0x1be, 0x40);
- }
-
- bdrv_set_boot_sector(bs_table[0], bootsect, sizeof(bootsect));
-
- /* now we can load the kernel */
- ret = load_kernel(kernel_filename,
- phys_ram_base + KERNEL_LOAD_ADDR,
- phys_ram_base + KERNEL_PARAMS_ADDR);
- if (ret < 0) {
- fprintf(stderr, "qemu: could not load kernel '%s'\n",
- kernel_filename);
- exit(1);
- }
-
- /* load initrd */
- initrd_size = 0;
- if (initrd_filename) {
- initrd_size = load_image(initrd_filename, phys_ram_base + INITRD_LOAD_ADDR);
- if (initrd_size < 0) {
- fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
- initrd_filename);
- exit(1);
- }
- }
- if (initrd_size > 0) {
- stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x218, INITRD_LOAD_ADDR);
- stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x21c, initrd_size);
- }
- pstrcpy(phys_ram_base + KERNEL_CMDLINE_ADDR, 4096,
- kernel_cmdline);
- stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x20, 0xA33F);
- stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x22,
- KERNEL_CMDLINE_ADDR - KERNEL_PARAMS_ADDR);
- /* loader type */
- stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x210, 0x01);
- }
-#endif /* !CONFIG_DM */
+ if (linux_boot)
+ load_linux(kernel_filename, initrd_filename, kernel_cmdline);
if (pci_enabled) {
pci_bus = i440fx_init(&i440fx_state);