aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEmmanuel Ackaouy <ack@xensource.com>2007-01-25 22:16:52 +0000
committerEmmanuel Ackaouy <ack@xensource.com>2007-01-25 22:16:52 +0000
commitb4feb14b3dd705b59ce7a2d66fe0879d54e408e8 (patch)
treed2786344b21060b576e5d9bc484652a1e14b16de
parenta62512128bdc7632982ddc0b3285703e5ab66c98 (diff)
downloadxen-b4feb14b3dd705b59ce7a2d66fe0879d54e408e8.tar.gz
xen-b4feb14b3dd705b59ce7a2d66fe0879d54e408e8.tar.bz2
xen-b4feb14b3dd705b59ce7a2d66fe0879d54e408e8.zip
libxc domain builder rewrite, core bits.
Signed-off-by: Gerd Hoffmann <kraxel@suse.de> --- tools/libxc/Makefile | 14 tools/libxc/xc_dom.h | 261 +++++++++++++ tools/libxc/xc_dom_binloader.c | 294 +++++++++++++++ tools/libxc/xc_dom_boot.c | 515 +++++++++++++++++++++++++++ tools/libxc/xc_dom_core.c | 773 +++++++++++++++++++++++++++++++++++++++++ tools/libxc/xc_dom_elfloader.c | 283 +++++++++++++++ tools/libxc/xc_dom_ia64.c | 118 ++++++ tools/libxc/xc_dom_powerpc64.c | 100 +++++ tools/libxc/xc_dom_x86.c | 559 +++++++++++++++++++++++++++++ 9 files changed, 2917 insertions(+)
-rw-r--r--tools/libxc/Makefile14
-rw-r--r--tools/libxc/xc_dom.h261
-rw-r--r--tools/libxc/xc_dom_binloader.c294
-rw-r--r--tools/libxc/xc_dom_boot.c515
-rw-r--r--tools/libxc/xc_dom_core.c773
-rw-r--r--tools/libxc/xc_dom_elfloader.c283
-rw-r--r--tools/libxc/xc_dom_ia64.c118
-rw-r--r--tools/libxc/xc_dom_powerpc64.c100
-rw-r--r--tools/libxc/xc_dom_x86.c559
9 files changed, 2917 insertions, 0 deletions
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index adebaf5b20..69d83e63bd 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -45,6 +45,20 @@ $(LIBELF_SRCS) libelf-private.h:
# add libelf bits to libxc
GUEST_SRCS-y += $(LIBELF_SRCS)
+# new domain builder
+GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c
+GUEST_SRCS-y += xc_dom_elfloader.c
+GUEST_SRCS-y += xc_dom_binloader.c
+
+ifeq ($(CONFIG_POWERPC),y)
+# big endian boxes
+GUEST_SRCS-y += xc_dom_powerpc64.c
+else
+# little endian boxes
+GUEST_SRCS-y += xc_dom_x86.c
+GUEST_SRCS-y += xc_dom_ia64.c
+endif
+
-include $(XEN_TARGET_ARCH)/Makefile
CFLAGS += -Werror -Wmissing-prototypes
diff --git a/tools/libxc/xc_dom.h b/tools/libxc/xc_dom.h
new file mode 100644
index 0000000000..b4f7727e79
--- /dev/null
+++ b/tools/libxc/xc_dom.h
@@ -0,0 +1,261 @@
+#include <xen/libelf.h>
+
+#define INVALID_P2M_ENTRY ((xen_pfn_t)-1)
+
+/* --- typedefs and structs ---------------------------------------- */
+
+typedef uint64_t xen_vaddr_t;
+typedef uint64_t xen_paddr_t;
+
+/* FIXME: temporary hack ... */
+#ifndef PRIpfn
+#define PRIpfn "lx"
+#endif
+
+struct xc_dom_seg {
+ xen_vaddr_t vstart;
+ xen_vaddr_t vend;
+ xen_pfn_t pfn;
+};
+
+struct xc_dom_mem {
+ struct xc_dom_mem *next;
+ void *mmap_ptr;
+ size_t mmap_len;
+ unsigned char memory[0];
+};
+
+struct xc_dom_phys {
+ struct xc_dom_phys *next;
+ void *ptr;
+ xen_pfn_t first;
+ xen_pfn_t count;
+};
+
+struct xc_dom_image {
+ /* files */
+ void *kernel_blob;
+ size_t kernel_size;
+ void *ramdisk_blob;
+ size_t ramdisk_size;
+
+ /* arguments and parameters */
+ char *cmdline;
+ uint32_t f_requested[XENFEAT_NR_SUBMAPS];
+
+ /* info from (elf) kernel image */
+ struct elf_dom_parms parms;
+ char *guest_type;
+
+ /* memory layout */
+ struct xc_dom_seg kernel_seg;
+ struct xc_dom_seg ramdisk_seg;
+ struct xc_dom_seg p2m_seg;
+ struct xc_dom_seg pgtables_seg;
+ xen_pfn_t start_info_pfn;
+ xen_pfn_t console_pfn;
+ xen_pfn_t xenstore_pfn;
+ xen_pfn_t shared_info_pfn;
+ xen_pfn_t bootstack_pfn;
+ xen_vaddr_t virt_alloc_end;
+ xen_vaddr_t bsd_symtab_start;
+
+ /* initial page tables */
+ unsigned int pgtables;
+ unsigned int pg_l4;
+ unsigned int pg_l3;
+ unsigned int pg_l2;
+ unsigned int pg_l1;
+ unsigned int alloc_bootstack;
+ unsigned int extra_pages;
+ xen_vaddr_t virt_pgtab_end;
+
+ /* other state info */
+ uint32_t f_active[XENFEAT_NR_SUBMAPS];
+ xen_pfn_t *p2m_host;
+ void *p2m_guest;
+
+ /* physical memory */
+ xen_pfn_t total_pages;
+ struct xc_dom_phys *phys_pages;
+
+ /* malloc memory pool */
+ struct xc_dom_mem *memblocks;
+
+ /* memory footprint stats */
+ size_t alloc_malloc;
+ size_t alloc_mem_map;
+ size_t alloc_file_map;
+ size_t alloc_domU_map;
+
+ /* misc xen domain config stuff */
+ unsigned long flags;
+ unsigned int console_evtchn;
+ unsigned int xenstore_evtchn;
+ xen_pfn_t shared_info_mfn;
+
+ int guest_xc;
+ domid_t guest_domid;
+ int shadow_enabled;
+
+ int xen_version;
+ xen_capabilities_info_t xen_caps;
+
+ /* kernel loader, arch hooks */
+ struct xc_dom_loader *kernel_loader;
+ void *private_loader;
+
+ /* kernel loader */
+ struct xc_dom_arch *arch_hooks;
+};
+
+/* --- pluggable kernel loader ------------------------------------- */
+
+struct xc_dom_loader {
+ char *name;
+ int (*probe) (struct xc_dom_image * dom);
+ int (*parser) (struct xc_dom_image * dom);
+ int (*loader) (struct xc_dom_image * dom);
+
+ struct xc_dom_loader *next;
+};
+
+#define __init __attribute__ ((constructor))
+void xc_dom_register_loader(struct xc_dom_loader *loader);
+
+/* --- arch specific hooks ----------------------------------------- */
+
+struct xc_dom_arch {
+ /* pagetable setup */
+ int (*alloc_magic_pages) (struct xc_dom_image * dom);
+ int (*count_pgtables) (struct xc_dom_image * dom);
+ int (*setup_pgtables) (struct xc_dom_image * dom);
+
+ /* arch-specific data structs setup */
+ int (*start_info) (struct xc_dom_image * dom);
+ int (*shared_info) (struct xc_dom_image * dom, void *shared_info);
+ int (*vcpu) (struct xc_dom_image * dom, void *vcpu_ctxt);
+
+ char *guest_type;
+ int page_shift;
+ int sizeof_pfn;
+
+ struct xc_dom_arch *next;
+};
+void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks);
+
+#define XC_DOM_PAGE_SHIFT(dom) ((dom)->arch_hooks->page_shift)
+#define XC_DOM_PAGE_SIZE(dom) (1 << (dom)->arch_hooks->page_shift)
+
+/* --- main functions ---------------------------------------------- */
+
+struct xc_dom_image *xc_dom_allocate(const char *cmdline, const char *features);
+void xc_dom_release_phys(struct xc_dom_image *dom);
+void xc_dom_release(struct xc_dom_image *dom);
+int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb);
+
+size_t xc_dom_check_gzip(void *blob, size_t ziplen);
+int xc_dom_do_gunzip(void *src, size_t srclen, void *dst, size_t dstlen);
+int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size);
+
+int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename);
+int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename);
+int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem,
+ size_t memsize);
+int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem,
+ size_t memsize);
+
+int xc_dom_parse_image(struct xc_dom_image *dom);
+int xc_dom_build_image(struct xc_dom_image *dom);
+int xc_dom_update_guest_p2m(struct xc_dom_image *dom);
+
+int xc_dom_boot_xen_init(struct xc_dom_image *dom, int xc, domid_t domid);
+int xc_dom_boot_mem_init(struct xc_dom_image *dom);
+void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
+ xen_pfn_t count);
+int xc_dom_boot_image(struct xc_dom_image *dom);
+int xc_dom_compat_check(struct xc_dom_image *dom);
+
+/* --- debugging bits ---------------------------------------------- */
+
+extern FILE *xc_dom_logfile;
+
+void xc_dom_loginit(void);
+int xc_dom_printf(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
+int xc_dom_panic_func(const char *file, int line, xc_error_code err,
+ const char *fmt, ...)
+ __attribute__ ((format(printf, 4, 5)));
+#define xc_dom_panic(err, fmt, args...) \
+ xc_dom_panic_func(__FILE__, __LINE__, err, fmt, ## args)
+#define xc_dom_trace(mark) \
+ xc_dom_printf("%s:%d: trace %s\n", __FILE__, __LINE__, mark)
+
+void xc_dom_log_memory_footprint(struct xc_dom_image *dom);
+
+/* --- simple memory pool ------------------------------------------ */
+
+void *xc_dom_malloc(struct xc_dom_image *dom, size_t size);
+void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size);
+void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
+ const char *filename, size_t * size);
+char *xc_dom_strdup(struct xc_dom_image *dom, const char *str);
+
+/* --- alloc memory pool ------------------------------------------- */
+
+int xc_dom_alloc_page(struct xc_dom_image *dom, char *name);
+int xc_dom_alloc_segment(struct xc_dom_image *dom,
+ struct xc_dom_seg *seg, char *name,
+ xen_vaddr_t start, xen_vaddr_t size);
+
+/* --- misc bits --------------------------------------------------- */
+
+void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t first,
+ xen_pfn_t count);
+void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn);
+void xc_dom_unmap_all(struct xc_dom_image *dom);
+
+static inline void *xc_dom_seg_to_ptr(struct xc_dom_image *dom,
+ struct xc_dom_seg *seg)
+{
+ xen_vaddr_t segsize = seg->vend - seg->vstart;
+ unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+ xen_pfn_t pages = (segsize + page_size - 1) / page_size;
+
+ return xc_dom_pfn_to_ptr(dom, seg->pfn, pages);
+}
+
+static inline void *xc_dom_vaddr_to_ptr(struct xc_dom_image *dom,
+ xen_vaddr_t vaddr)
+{
+ unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+ xen_pfn_t page = (vaddr - dom->parms.virt_base) / page_size;
+ unsigned int offset = (vaddr - dom->parms.virt_base) % page_size;
+ void *ptr = xc_dom_pfn_to_ptr(dom, page, 0);
+
+ if (!ptr)
+ return NULL;
+ return ptr + offset;
+}
+
+static inline int xc_dom_feature_translated(struct xc_dom_image *dom)
+{
+ return elf_xen_feature_get(XENFEAT_auto_translated_physmap, dom->f_active);
+}
+
+static inline xen_pfn_t xc_dom_p2m_host(struct xc_dom_image *dom, xen_pfn_t pfn)
+{
+ if (dom->shadow_enabled)
+ return pfn;
+ return dom->p2m_host[pfn];
+}
+
+static inline xen_pfn_t xc_dom_p2m_guest(struct xc_dom_image *dom,
+ xen_pfn_t pfn)
+{
+ if (xc_dom_feature_translated(dom))
+ return pfn;
+ return dom->p2m_host[pfn];
+}
+
+/* --- arch bits --------------------------------------------------- */
+
diff --git a/tools/libxc/xc_dom_binloader.c b/tools/libxc/xc_dom_binloader.c
new file mode 100644
index 0000000000..dfaef31f27
--- /dev/null
+++ b/tools/libxc/xc_dom_binloader.c
@@ -0,0 +1,294 @@
+/******************************************************************************
+ *
+ * Loads simple binary images. It's like a .COM file in MS-DOS. No headers are
+ * present. The only requirement is that it must have a xen_bin_image table
+ * somewhere in the first 8192 bytes, starting on a 32-bit aligned address.
+ * Those familiar with the multiboot specification should recognize this, it's
+ * (almost) the same as the multiboot header.
+ * The layout of the xen_bin_image table is:
+ *
+ * Offset Type Name Note
+ * 0 uint32_t magic required
+ * 4 uint32_t flags required
+ * 8 uint32_t checksum required
+ * 12 uint32_t header_addr required
+ * 16 uint32_t load_addr required
+ * 20 uint32_t load_end_addr required
+ * 24 uint32_t bss_end_addr required
+ * 28 uint32_t entry_addr required
+ *
+ * - magic
+ * Magic number identifying the table. For images to be loaded by Xen 3, the
+ * magic value is 0x336ec578 ("xEn3" with the 0x80 bit of the "E" set).
+ * - flags
+ * bit 0: indicates whether the image needs to be loaded on a page boundary
+ * bit 1: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ * that memory info should be passed to the image)
+ * bit 2: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ * that the bootloader should pass video mode info to the image)
+ * bit 16: reserved, must be 1 (the multiboot spec uses this bit to indicate
+ * that the values in the fields header_addr - entry_addr are
+ * valid)
+ * All other bits should be set to 0.
+ * - checksum
+ * When added to "magic" and "flags", the resulting value should be 0.
+ * - header_addr
+ * Contains the virtual address corresponding to the beginning of the
+ * table - the memory location at which the magic value is supposed to be
+ * loaded. This field serves to synchronize the mapping between OS image
+ * offsets and virtual memory addresses.
+ * - load_addr
+ * Contains the virtual address of the beginning of the text segment. The
+ * offset in the OS image file at which to start loading is defined by the
+ * offset at which the table was found, minus (header addr - load addr).
+ * load addr must be less than or equal to header addr.
+ * - load_end_addr
+ * Contains the virtual address of the end of the data segment.
+ * (load_end_addr - load_addr) specifies how much data to load. This implies
+ * that the text and data segments must be consecutive in the OS image. If
+ * this field is zero, the domain builder assumes that the text and data
+ * segments occupy the whole OS image file.
+ * - bss_end_addr
+ * Contains the virtual address of the end of the bss segment. The domain
+ * builder initializes this area to zero, and reserves the memory it occupies
+ * to avoid placing boot modules and other data relevant to the loaded image
+ * in that area. If this field is zero, the domain builder assumes that no bss
+ * segment is present.
+ * - entry_addr
+ * The virtual address at which to start execution of the loaded image.
+ *
+ * Some of the field descriptions were copied from "The Multiboot
+ * Specification", Copyright 1995, 96 Bryan Ford <baford@cs.utah.edu>,
+ * Erich Stefan Boleyn <erich@uruk.org> Copyright 1999, 2000, 2001, 2002
+ * Free Software Foundation, Inc.
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+#define round_pgup(_p) (((_p)+(PAGE_SIZE_X86-1))&PAGE_MASK_X86)
+#define round_pgdown(_p) ((_p)&PAGE_MASK_X86)
+
+struct xen_bin_image_table
+{
+ uint32_t magic;
+ uint32_t flags;
+ uint32_t checksum;
+ uint32_t header_addr;
+ uint32_t load_addr;
+ uint32_t load_end_addr;
+ uint32_t bss_end_addr;
+ uint32_t entry_addr;
+};
+
+#define XEN_MULTIBOOT_MAGIC3 0x336ec578
+
+#define XEN_MULTIBOOT_FLAG_ALIGN4K 0x00000001
+#define XEN_MULTIBOOT_FLAG_NEEDMEMINFO 0x00000002
+#define XEN_MULTIBOOT_FLAG_NEEDVIDINFO 0x00000004
+#define XEN_MULTIBOOT_FLAG_ADDRSVALID 0x00010000
+#define XEN_MULTIBOOT_FLAG_PAE_SHIFT 14
+#define XEN_MULTIBOOT_FLAG_PAE_MASK (3 << XEN_MULTIBOOT_FLAG_PAE_SHIFT)
+
+/* Flags we test for */
+#define FLAGS_MASK ((~ 0) & (~ XEN_MULTIBOOT_FLAG_ALIGN4K) & \
+ (~ XEN_MULTIBOOT_FLAG_PAE_MASK))
+#define FLAGS_REQUIRED XEN_MULTIBOOT_FLAG_ADDRSVALID
+
+/* --------------------------------------------------------------------- */
+
+static struct xen_bin_image_table *find_table(struct xc_dom_image *dom)
+{
+ struct xen_bin_image_table *table;
+ uint32_t *probe_ptr;
+ uint32_t *probe_end;
+
+ probe_ptr = dom->kernel_blob;
+ probe_end = dom->kernel_blob + dom->kernel_size - sizeof(*table);
+ if ((void*)probe_end > dom->kernel_blob + 8192)
+ probe_end = dom->kernel_blob + 8192;
+
+ for (table = NULL; probe_ptr < probe_end; probe_ptr++)
+ {
+ if (XEN_MULTIBOOT_MAGIC3 == *probe_ptr)
+ {
+ table = (struct xen_bin_image_table *) probe_ptr;
+ /* Checksum correct? */
+ if (0 == table->magic + table->flags + table->checksum)
+ {
+ return table;
+ }
+ }
+ }
+ return NULL;
+}
+
+static int xc_dom_probe_bin_kernel(struct xc_dom_image *dom)
+{
+ struct xen_bin_image_table *table;
+
+ table = find_table(dom);
+ if (!table)
+ return -EINVAL;
+ return 0;
+}
+
+static int xc_dom_parse_bin_kernel(struct xc_dom_image *dom)
+{
+ struct xen_bin_image_table *image_info;
+ char *image = dom->kernel_blob;
+ size_t image_size = dom->kernel_size;
+ uint32_t start_addr;
+ uint32_t load_end_addr;
+ uint32_t bss_end_addr;
+ uint32_t pae_flags;
+
+ image_info = find_table(dom);
+ if (!image_info)
+ return -EINVAL;
+
+ xc_dom_printf("%s: multiboot header fields\n", __FUNCTION__);
+ xc_dom_printf(" flags: 0x%" PRIx32 "\n", image_info->flags);
+ xc_dom_printf(" header_addr: 0x%" PRIx32 "\n", image_info->header_addr);
+ xc_dom_printf(" load_addr: 0x%" PRIx32 "\n", image_info->load_addr);
+ xc_dom_printf(" load_end_addr: 0x%" PRIx32 "\n", image_info->load_end_addr);
+ xc_dom_printf(" bss_end_addr: 0x%" PRIx32 "\n", image_info->bss_end_addr);
+ xc_dom_printf(" entry_addr: 0x%" PRIx32 "\n", image_info->entry_addr);
+
+ /* Check the flags */
+ if ( FLAGS_REQUIRED != (image_info->flags & FLAGS_MASK) )
+ {
+ xc_dom_panic(XC_INVALID_KERNEL,
+ "%s: xen_bin_image_table flags required "
+ "0x%08" PRIx32 " found 0x%08" PRIx32 "\n",
+ __FUNCTION__, FLAGS_REQUIRED, image_info->flags & FLAGS_MASK);
+ return -EINVAL;
+ }
+
+ /* Sanity check on the addresses */
+ if ( image_info->header_addr < image_info->load_addr ||
+ ((char *) image_info - image) <
+ (image_info->header_addr - image_info->load_addr) )
+ {
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: Invalid header_addr.",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+
+ start_addr = image_info->header_addr - ((char *)image_info - image);
+ load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
+ bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
+
+ xc_dom_printf("%s: calculated addresses\n", __FUNCTION__);
+ xc_dom_printf(" start_addr: 0x%" PRIx32 "\n", start_addr);
+ xc_dom_printf(" load_end_addr: 0x%" PRIx32 "\n", load_end_addr);
+ xc_dom_printf(" bss_end_addr: 0x%" PRIx32 "\n", bss_end_addr);
+
+ if ( start_addr + image_size < load_end_addr )
+ {
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: Invalid load_end_addr.\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+
+ if ( bss_end_addr < load_end_addr)
+ {
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: Invalid bss_end_addr.\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+
+ dom->kernel_seg.vstart = image_info->load_addr;
+ dom->kernel_seg.vend = bss_end_addr;
+ dom->parms.virt_base = start_addr;
+ dom->parms.virt_entry = image_info->entry_addr;
+
+ pae_flags = image_info->flags & XEN_MULTIBOOT_FLAG_PAE_MASK;
+ switch (pae_flags >> XEN_MULTIBOOT_FLAG_PAE_SHIFT) {
+ case 0:
+ dom->guest_type = "xen-3.0-x86_32";
+ break;
+ case 1:
+ dom->guest_type = "xen-3.0-x86_32p";
+ break;
+ case 2:
+ dom->guest_type = "xen-3.0-x86_64";
+ break;
+ case 3:
+ /* Kernel detects PAE at runtime. So try to figure whenever
+ * xen supports PAE and advertise a PAE-capable kernel in case
+ * it does. */
+ dom->guest_type = "xen-3.0-x86_32";
+ if (strstr(dom->xen_caps, "xen-3.0-x86_32p")) {
+ xc_dom_printf("%s: PAE fixup\n", __FUNCTION__);
+ dom->guest_type = "xen-3.0-x86_32p";
+ dom->parms.pae = 2;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int xc_dom_load_bin_kernel(struct xc_dom_image *dom)
+{
+ struct xen_bin_image_table *image_info;
+ char *image = dom->kernel_blob;
+ char *dest;
+ size_t image_size = dom->kernel_size;
+ uint32_t start_addr;
+ uint32_t load_end_addr;
+ uint32_t bss_end_addr;
+ uint32_t skip, text_size, bss_size;
+ uint32_t pae_flags;
+
+ image_info = find_table(dom);
+ if (!image_info)
+ return -EINVAL;
+
+ start_addr = image_info->header_addr - ((char *)image_info - image);
+ load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
+ bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
+
+ /* It's possible that we need to skip the first part of the image */
+ skip = image_info->load_addr - start_addr;
+ text_size = load_end_addr - image_info->load_addr;
+ bss_size = bss_end_addr - load_end_addr;
+
+ xc_dom_printf("%s: calculated sizes\n", __FUNCTION__);
+ xc_dom_printf(" skip: 0x%" PRIx32 "\n", skip);
+ xc_dom_printf(" text_size: 0x%" PRIx32 "\n", text_size);
+ xc_dom_printf(" bss_size: 0x%" PRIx32 "\n", bss_size);
+
+ dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart);
+ memcpy(dest, image + skip, text_size);
+ memset(dest + text_size, 0, bss_size);
+
+ pae_flags = image_info->flags & XEN_MULTIBOOT_FLAG_PAE_MASK;
+ if (3 == (pae_flags >> XEN_MULTIBOOT_FLAG_PAE_SHIFT) && dom->guest_xc > 0)
+ {
+ }
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_loader bin_loader = {
+ .name = "multiboot-binary",
+ .probe = xc_dom_probe_bin_kernel,
+ .parser = xc_dom_parse_bin_kernel,
+ .loader = xc_dom_load_bin_kernel,
+};
+
+static void __init register_loader(void)
+{
+ xc_dom_register_loader(&bin_loader);
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * End:
+ */
diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c
new file mode 100644
index 0000000000..36d6ef3e4d
--- /dev/null
+++ b/tools/libxc/xc_dom_boot.c
@@ -0,0 +1,515 @@
+/*
+ * Xen domain builder -- xen booter.
+ *
+ * This is the code which actually boots a fresh
+ * prepared domain image as xen guest domain.
+ *
+ * ==> this is the only domain bilder code piece
+ * where xen hypercalls are allowed <==
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+#include <xen/hvm/params.h>
+
+/* ------------------------------------------------------------------------ */
+
+static int setup_hypercall_page(struct xc_dom_image *dom)
+{
+ DECLARE_DOMCTL;
+ xen_pfn_t pfn;
+ int rc;
+
+ if (-1 == dom->parms.virt_hypercall)
+ return 0;
+ pfn = (dom->parms.virt_hypercall - dom->parms.virt_base)
+ >> XC_DOM_PAGE_SHIFT(dom);
+
+ xc_dom_printf("%s: vaddr=0x%" PRIx64 " pfn=0x%" PRIpfn "\n", __FUNCTION__,
+ dom->parms.virt_hypercall, pfn);
+ domctl.cmd = XEN_DOMCTL_hypercall_init;
+ domctl.domain = dom->guest_domid;
+ domctl.u.hypercall_init.gmfn = xc_dom_p2m_guest(dom, pfn);
+ rc = do_domctl(dom->guest_xc, &domctl);
+ if (0 != rc)
+ xc_dom_panic(XC_INTERNAL_ERROR, "%s: HYPERCALL_INIT failed (rc=%d)\n",
+ __FUNCTION__, rc);
+ return rc;
+}
+
+static int launch_vm(int xc, domid_t domid, void *ctxt)
+{
+ DECLARE_DOMCTL;
+ int rc;
+
+ xc_dom_printf("%s: called, ctxt=%p\n", __FUNCTION__, ctxt);
+ memset(&domctl, 0, sizeof(domctl));
+ domctl.cmd = XEN_DOMCTL_setvcpucontext;
+ domctl.domain = domid;
+ domctl.u.vcpucontext.vcpu = 0;
+ set_xen_guest_handle(domctl.u.vcpucontext.ctxt, ctxt);
+ rc = do_domctl(xc, &domctl);
+ if (0 != rc)
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: SETVCPUCONTEXT failed (rc=%d)\n", __FUNCTION__, rc);
+ return rc;
+}
+
+static int clear_page(struct xc_dom_image *dom, xen_pfn_t pfn)
+{
+ xen_pfn_t dst;
+ int rc;
+
+ if (0 == pfn)
+ return 0;
+
+ dst = xc_dom_p2m_host(dom, pfn);
+ xc_dom_printf("%s: pfn 0x%" PRIpfn ", mfn 0x%" PRIpfn "\n",
+ __FUNCTION__, pfn, dst);
+ rc = xc_clear_domain_page(dom->guest_xc, dom->guest_domid, dst);
+ if (0 != rc)
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: xc_clear_domain_page failed (pfn 0x%" PRIpfn
+ ", rc=%d)\n", __FUNCTION__, pfn, rc);
+ return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: x86 bits */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+
+static int x86_compat(int xc, domid_t domid, char *guest_type)
+{
+#ifdef XEN_DOMCTL_set_compat
+ static const struct {
+ char *guest;
+ unsigned long cmd;
+ } types[] = {
+ { "xen-3.0-x86_32p", XEN_DOMCTL_set_compat },
+ { "xen-3.0-x86_64", XEN_DOMCTL_set_native },
+ };
+ DECLARE_DOMCTL;
+ int i,rc;
+
+ memset(&domctl, 0, sizeof(domctl));
+ domctl.domain = domid;
+ for (i = 0; i < sizeof(types)/sizeof(types[0]); i++)
+ if (0 == strcmp(types[i].guest, guest_type))
+ domctl.cmd = types[i].cmd;
+ if (0 == domctl.cmd)
+ /* nothing to do */
+ return 0;
+
+ xc_dom_printf("%s: guest %s, cmd %d\n", __FUNCTION__,
+ guest_type, domctl.cmd);
+ rc = do_domctl(xc, &domctl);
+ if (0 != rc)
+ xc_dom_printf("%s: warning: failed (rc=%d)\n",
+ __FUNCTION__, rc);
+ return rc;
+#else
+ xc_dom_printf("%s: compiled without compat/native switching\n", __FUNCTION__);
+ return 0;
+#endif /* XEN_DOMCTL_set_compat */
+}
+
+
+static int x86_shadow(int xc, domid_t domid)
+{
+ int rc, mode;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ mode = XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT |
+ XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE;
+
+ rc = xc_shadow_control(xc, domid,
+ XEN_DOMCTL_SHADOW_OP_ENABLE,
+ NULL, 0, NULL, mode, NULL);
+ if (0 != rc)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: SHADOW_OP_ENABLE (mode=0x%x) failed (rc=%d)\n",
+ __FUNCTION__, mode, rc);
+ return rc;
+ }
+ xc_dom_printf("%s: shadow enabled (mode=0x%x)\n", __FUNCTION__, mode);
+ return rc;
+}
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+ int rc = 0;
+
+ x86_compat(dom->guest_xc, dom->guest_domid, dom->guest_type);
+ if (xc_dom_feature_translated(dom))
+ {
+ dom->shadow_enabled = 1;
+ rc = x86_shadow(dom->guest_xc, dom->guest_domid);
+ }
+ return rc;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+ static const struct {
+ char *guest;
+ unsigned long pgd_type;
+ } types[] = {
+ { "xen-3.0-x86_32", MMUEXT_PIN_L2_TABLE},
+ { "xen-3.0-x86_32p", MMUEXT_PIN_L3_TABLE},
+ { "xen-3.0-x86_64", MMUEXT_PIN_L4_TABLE},
+ };
+ unsigned long pgd_type = 0;
+ shared_info_t *shared_info;
+ xen_pfn_t shinfo;
+ int i, rc;
+
+ for (i = 0; i < sizeof(types) / sizeof(types[0]); i++)
+ if (0 == strcmp(types[i].guest, dom->guest_type))
+ pgd_type = types[i].pgd_type;
+
+ if (!xc_dom_feature_translated(dom))
+ {
+ /* paravirtualized guest */
+ xc_dom_unmap_one(dom, dom->pgtables_seg.pfn);
+ rc = pin_table(dom->guest_xc, pgd_type,
+ xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
+ dom->guest_domid);
+ if (0 != rc)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: pin_table failed (pfn 0x%" PRIpfn ", rc=%d)\n",
+ __FUNCTION__, dom->pgtables_seg.pfn, rc);
+ return rc;
+ }
+ shinfo = dom->shared_info_mfn;
+ }
+ else
+ {
+ /* paravirtualized guest with auto-translation */
+ struct xen_add_to_physmap xatp;
+ int i;
+
+ /* Map shared info frame into guest physmap. */
+ xatp.domid = dom->guest_domid;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.idx = 0;
+ xatp.gpfn = dom->shared_info_pfn;
+ rc = xc_memory_op(dom->guest_xc, XENMEM_add_to_physmap, &xatp);
+ if (rc != 0)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR, "%s: mapping shared_info failed "
+ "(pfn=0x%" PRIpfn ", rc=%d)\n",
+ __FUNCTION__, xatp.gpfn, rc);
+ return rc;
+ }
+
+ /* Map grant table frames into guest physmap. */
+ for (i = 0;; i++)
+ {
+ xatp.domid = dom->guest_domid;
+ xatp.space = XENMAPSPACE_grant_table;
+ xatp.idx = i;
+ xatp.gpfn = dom->total_pages + i;
+ rc = xc_memory_op(dom->guest_xc, XENMEM_add_to_physmap, &xatp);
+ if (rc != 0)
+ {
+ if (i > 0 && errno == EINVAL)
+ {
+ xc_dom_printf("%s: %d grant tables mapped\n", __FUNCTION__,
+ i);
+ break;
+ }
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: mapping grant tables failed " "(pfn=0x%"
+ PRIpfn ", rc=%d)\n", __FUNCTION__, xatp.gpfn, rc);
+ return rc;
+ }
+ }
+ shinfo = dom->shared_info_pfn;
+ }
+
+ /* setup shared_info page */
+ xc_dom_printf("%s: shared_info: pfn 0x%" PRIpfn ", mfn 0x%" PRIpfn "\n",
+ __FUNCTION__, dom->shared_info_pfn, dom->shared_info_mfn);
+ shared_info = xc_map_foreign_range(dom->guest_xc, dom->guest_domid,
+ PAGE_SIZE_X86,
+ PROT_READ | PROT_WRITE,
+ shinfo);
+ if (NULL == shared_info)
+ return -1;
+ dom->arch_hooks->shared_info(dom, shared_info);
+ munmap(shared_info, PAGE_SIZE_X86);
+
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: ia64 */
+
+#elif defined(__ia64__)
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+ DECLARE_DOMCTL;
+ int rc;
+
+ xc_dom_printf("%s: setup firmware\n", __FUNCTION__);
+
+ memset(&domctl, 0, sizeof(domctl));
+ domctl.cmd = XEN_DOMCTL_arch_setup;
+ domctl.domain = dom->guest_domid;
+ domctl.u.arch_setup.flags = 0;
+ domctl.u.arch_setup.bp = (dom->start_info_pfn << PAGE_SHIFT)
+ + sizeof(start_info_t);
+ domctl.u.arch_setup.maxmem = dom->total_pages << PAGE_SHIFT;
+ rc = do_domctl(dom->guest_xc, &domctl);
+ return rc;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+ xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: powerpc */
+
+#elif defined(__powerpc64__)
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+ xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+ return 0;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+ start_info_t *si =
+ xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+
+ xc_dom_printf("%s: TODO: setup devtree\n", __FUNCTION__);
+
+#if 0
+ load_devtree(dom->guest_xc,
+ dom->guest_domid,
+ dom->p2m_host,
+ devtree, // FIXME
+ devtree_addr, // FIXME
+ dom->ramdisk_seg.vstart,
+ dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart,
+ si,
+ dom->start_info_pfn << PAGE_SHIFT);
+#endif
+ return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: other */
+
+#else
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+ xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+ return 0;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+ xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+ return 0;
+}
+
+#endif /* arch stuff */
+
+/* ------------------------------------------------------------------------ */
+
+int xc_dom_compat_check(struct xc_dom_image *dom)
+{
+ xen_capabilities_info_t xen_caps;
+ char *item, *ptr;
+ int match, found = 0;
+
+ strcpy(xen_caps, dom->xen_caps);
+ for (item = strtok_r(xen_caps, " ", &ptr);
+ NULL != item; item = strtok_r(NULL, " ", &ptr))
+ {
+ match = (0 == strcmp(dom->guest_type, item));
+ xc_dom_printf("%s: supported guest type: %s%s\n", __FUNCTION__,
+ item, match ? " <= matches" : "");
+ if (match)
+ found++;
+ }
+ if (!found)
+ xc_dom_panic(XC_INVALID_KERNEL,
+ "%s: guest type %s not supported by xen kernel, sorry\n",
+ __FUNCTION__, dom->guest_type);
+
+ return found;
+}
+
+int xc_dom_boot_xen_init(struct xc_dom_image *dom, int xc, domid_t domid)
+{
+ dom->guest_xc = xc;
+ dom->guest_domid = domid;
+
+ dom->xen_version = xc_version(dom->guest_xc, XENVER_version, NULL);
+ if (xc_version(xc, XENVER_capabilities, &dom->xen_caps) < 0) {
+ xc_dom_panic(XC_INTERNAL_ERROR, "can't get xen capabilities");
+ return -1;
+ }
+ xc_dom_printf("%s: ver %d.%d, caps %s\n", __FUNCTION__,
+ dom->xen_version >> 16, dom->xen_version & 0xff,
+ dom->xen_caps);
+ return 0;
+}
+
+int xc_dom_boot_mem_init(struct xc_dom_image *dom)
+{
+ long rc;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ if (0 != (rc = arch_setup_early(dom)))
+ return rc;
+
+ /* allocate guest memory */
+ rc = xc_domain_memory_populate_physmap(dom->guest_xc, dom->guest_domid,
+ dom->total_pages, 0, 0,
+ dom->p2m_host);
+ if (0 != rc)
+ {
+ xc_dom_panic(XC_OUT_OF_MEMORY,
+ "%s: can't allocate low memory for domain\n",
+ __FUNCTION__);
+ return rc;
+ }
+
+ return 0;
+}
+
+void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
+ xen_pfn_t count)
+{
+ int page_shift = XC_DOM_PAGE_SHIFT(dom);
+ privcmd_mmap_entry_t *entries;
+ void *ptr;
+ int i, rc;
+
+ entries = xc_dom_malloc(dom, count * sizeof(privcmd_mmap_entry_t));
+ if (NULL == entries)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+ " [malloc]\n", __FUNCTION__, pfn, count);
+ return NULL;
+ }
+
+ ptr = mmap(NULL, count << page_shift, PROT_READ | PROT_WRITE,
+ MAP_SHARED, dom->guest_xc, 0);
+ if (MAP_FAILED == ptr)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+ " [mmap]\n", __FUNCTION__, pfn, count);
+ return NULL;
+ }
+
+ for (i = 0; i < count; i++)
+ {
+ entries[i].va = (uintptr_t) ptr + (i << page_shift);
+ entries[i].mfn = xc_dom_p2m_host(dom, pfn + i);
+ entries[i].npages = 1;
+ }
+
+ rc = xc_map_foreign_ranges(dom->guest_xc, dom->guest_domid, entries, count);
+ if (rc < 0)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+ " [xenctl, rc=%d]\n", __FUNCTION__, pfn, count, rc);
+ return NULL;
+ }
+ return ptr;
+}
+
+int xc_dom_boot_image(struct xc_dom_image *dom)
+{
+ DECLARE_DOMCTL;
+ void *ctxt;
+ int rc;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ /* collect some info */
+ domctl.cmd = XEN_DOMCTL_getdomaininfo;
+ domctl.domain = dom->guest_domid;
+ rc = do_domctl(dom->guest_xc, &domctl);
+ if (0 != rc)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: getdomaininfo failed (rc=%d)\n", __FUNCTION__, rc);
+ return rc;
+ }
+ if (domctl.domain != dom->guest_domid)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: Huh? domid mismatch (%d != %d)\n", __FUNCTION__,
+ domctl.domain, dom->guest_domid);
+ return -1;
+ }
+ dom->shared_info_mfn = domctl.u.getdomaininfo.shared_info_frame;
+
+ /* sanity checks */
+ if (!xc_dom_compat_check(dom))
+ return -1;
+
+ /* initial mm setup */
+ if (0 != (rc = xc_dom_update_guest_p2m(dom)))
+ return rc;
+ if (dom->arch_hooks->setup_pgtables)
+ if (0 != (rc = dom->arch_hooks->setup_pgtables(dom)))
+ return rc;
+
+ if (0 != (rc = clear_page(dom, dom->console_pfn)))
+ return rc;
+ if (0 != (rc = clear_page(dom, dom->xenstore_pfn)))
+ return rc;
+
+ /* start info page */
+ if (dom->arch_hooks->start_info)
+ dom->arch_hooks->start_info(dom);
+
+ /* hypercall page */
+ if (0 != (rc = setup_hypercall_page(dom)))
+ return rc;
+ xc_dom_log_memory_footprint(dom);
+
+ /* misc x86 stuff */
+ if (0 != (rc = arch_setup_late(dom)))
+ return rc;
+
+ /* let the vm run */
+ ctxt = xc_dom_malloc(dom, PAGE_SIZE * 2 /* FIXME */ );
+ memset(ctxt, 0, PAGE_SIZE * 2);
+ if (0 != (rc = dom->arch_hooks->vcpu(dom, ctxt)))
+ return rc;
+ xc_dom_unmap_all(dom);
+ rc = launch_vm(dom->guest_xc, dom->guest_domid, ctxt);
+
+ return rc;
+}
diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
new file mode 100644
index 0000000000..8d329ae74f
--- /dev/null
+++ b/tools/libxc/xc_dom_core.c
@@ -0,0 +1,773 @@
+/*
+ * Xen domain builder -- core bits.
+ *
+ * The core code goes here:
+ * - allocate and release domain structs.
+ * - memory management functions.
+ * - misc helper functions.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+/* debugging */
+
+FILE *xc_dom_logfile = NULL;
+
+void xc_dom_loginit(void)
+{
+ if (xc_dom_logfile)
+ return;
+ xc_dom_logfile = fopen("/var/log/xen/domain-builder-ng.log", "a");
+ setvbuf(xc_dom_logfile, NULL, _IONBF, 0);
+ xc_dom_printf("### ----- xc domain builder logfile opened -----\n");
+}
+
+int xc_dom_printf(const char *fmt, ...)
+{
+ va_list args;
+ char buf[1024];
+ int rc;
+
+ if (!xc_dom_logfile)
+ return 0;
+
+ va_start(args, fmt);
+ rc = vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+ rc = fwrite(buf, rc, 1, xc_dom_logfile);
+
+ return rc;
+}
+
+int xc_dom_panic_func(const char *file, int line, xc_error_code err,
+ const char *fmt, ...)
+{
+ va_list args;
+ FILE *fp = stderr;
+ int rc = 0;
+ char pos[256];
+ char msg[XC_MAX_ERROR_MSG_LEN];
+
+ if (xc_dom_logfile)
+ fp = xc_dom_logfile;
+
+ snprintf(pos, sizeof(pos), "%s:%d: panic: ", file, line);
+ va_start(args, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, args);
+ va_end(args);
+ xc_set_error(err, "%s", msg);
+ rc = fprintf(fp, "%s%s", pos, msg);
+ return rc;
+}
+
+static void print_mem(const char *name, size_t mem)
+{
+ if (mem > 32 * 1024 * 1024)
+ xc_dom_printf("%-24s : %zd MB\n", name, mem / (1024 * 1024));
+ else if (mem > 32 * 1024)
+ xc_dom_printf("%-24s : %zd kB\n", name, mem / 1024);
+ else
+ xc_dom_printf("%-24s : %zd bytes\n", name, mem);
+}
+
+void xc_dom_log_memory_footprint(struct xc_dom_image *dom)
+{
+ xc_dom_printf("domain builder memory footprint\n");
+ xc_dom_printf(" allocated\n");
+ print_mem(" malloc", dom->alloc_malloc);
+ print_mem(" anon mmap", dom->alloc_mem_map);
+ xc_dom_printf(" mapped\n");
+ print_mem(" file mmap", dom->alloc_file_map);
+ print_mem(" domU mmap", dom->alloc_domU_map);
+}
+
+/* ------------------------------------------------------------------------ */
+/* simple memory pool */
+
+void *xc_dom_malloc(struct xc_dom_image *dom, size_t size)
+{
+ struct xc_dom_mem *block;
+
+ block = malloc(sizeof(*block) + size);
+ if (NULL == block)
+ return NULL;
+ memset(block, 0, sizeof(*block) + size);
+ block->next = dom->memblocks;
+ dom->memblocks = block;
+ dom->alloc_malloc += sizeof(*block) + size;
+ if (size > 100 * 1024)
+ print_mem(__FUNCTION__, size);
+ return block->memory;
+}
+
+void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size)
+{
+ struct xc_dom_mem *block;
+
+ block = malloc(sizeof(*block));
+ if (NULL == block)
+ return NULL;
+ memset(block, 0, sizeof(*block));
+ block->mmap_len = size;
+ block->mmap_ptr = mmap(NULL, block->mmap_len,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (MAP_FAILED == block->mmap_ptr)
+ {
+ free(block);
+ return NULL;
+ }
+ block->next = dom->memblocks;
+ dom->memblocks = block;
+ dom->alloc_malloc += sizeof(*block);
+ dom->alloc_mem_map += block->mmap_len;
+ if (size > 100 * 1024)
+ print_mem(__FUNCTION__, size);
+ return block->mmap_ptr;
+}
+
+void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
+ const char *filename, size_t * size)
+{
+ struct xc_dom_mem *block = NULL;
+ int fd = -1;
+
+ fd = open(filename, O_RDONLY);
+ if (-1 == fd)
+ goto err;
+
+ lseek(fd, 0, SEEK_SET);
+ *size = lseek(fd, 0, SEEK_END);
+
+ block = malloc(sizeof(*block));
+ if (NULL == block)
+ goto err;
+ memset(block, 0, sizeof(*block));
+ block->mmap_len = *size;
+ block->mmap_ptr = mmap(NULL, block->mmap_len, PROT_READ, MAP_SHARED, fd, 0);
+ if (MAP_FAILED == block->mmap_ptr)
+ goto err;
+ block->next = dom->memblocks;
+ dom->memblocks = block;
+ dom->alloc_malloc += sizeof(*block);
+ dom->alloc_file_map += block->mmap_len;
+ close(fd);
+ if (*size > 100 * 1024)
+ print_mem(__FUNCTION__, *size);
+ return block->mmap_ptr;
+
+ err:
+ if (-1 != fd)
+ close(fd);
+ if (block)
+ free(block);
+ return NULL;
+}
+
+static void xc_dom_free_all(struct xc_dom_image *dom)
+{
+ struct xc_dom_mem *block;
+
+ while (NULL != (block = dom->memblocks))
+ {
+ dom->memblocks = block->next;
+ if (block->mmap_ptr)
+ munmap(block->mmap_ptr, block->mmap_len);
+ free(block);
+ }
+}
+
+char *xc_dom_strdup(struct xc_dom_image *dom, const char *str)
+{
+ size_t len = strlen(str) + 1;
+ char *nstr = xc_dom_malloc(dom, len);
+
+ if (NULL == nstr)
+ return NULL;
+ memcpy(nstr, str, len);
+ return nstr;
+}
+
+/* ------------------------------------------------------------------------ */
+/* read files, copy memory blocks, with transparent gunzip */
+
+size_t xc_dom_check_gzip(void *blob, size_t ziplen)
+{
+ unsigned char *gzlen;
+ size_t unziplen;
+
+ if (0 != strncmp(blob, "\037\213", 2))
+ /* not gzipped */
+ return 0;
+
+ gzlen = blob + ziplen - 4;
+ unziplen = gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0];
+ if (unziplen < ziplen || unziplen > ziplen * 8)
+ {
+ xc_dom_printf
+ ("%s: size (zip %zd, unzip %zd) looks insane, skip gunzip\n",
+ __FUNCTION__, ziplen, unziplen);
+ return 0;
+ }
+
+ return unziplen + 16;
+}
+
+int xc_dom_do_gunzip(void *src, size_t srclen, void *dst, size_t dstlen)
+{
+ z_stream zStream;
+ int rc;
+
+ memset(&zStream, 0, sizeof(zStream));
+ zStream.next_in = src;
+ zStream.avail_in = srclen;
+ zStream.next_out = dst;
+ zStream.avail_out = dstlen;
+ rc = inflateInit2(&zStream, (MAX_WBITS + 32)); /* +32 means "handle gzip" */
+ if (rc != Z_OK)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: inflateInit2 failed (rc=%d)\n", __FUNCTION__, rc);
+ return -1;
+ }
+ rc = inflate(&zStream, Z_FINISH);
+ if (rc != Z_STREAM_END)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: inflate failed (rc=%d)\n", __FUNCTION__, rc);
+ return -1;
+ }
+
+ xc_dom_printf("%s: unzip ok, 0x%zx -> 0x%zx\n",
+ __FUNCTION__, srclen, dstlen);
+ return 0;
+}
+
+int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size)
+{
+ void *unzip;
+ size_t unziplen;
+
+ unziplen = xc_dom_check_gzip(*blob, *size);
+ if (0 == unziplen)
+ return 0;
+
+ unzip = xc_dom_malloc(dom, unziplen);
+ if (NULL == unzip)
+ return -1;
+
+ if (-1 == xc_dom_do_gunzip(*blob, *size, unzip, unziplen))
+ return -1;
+
+ *blob = unzip;
+ *size = unziplen;
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* domain memory */
+
+void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn,
+ xen_pfn_t count)
+{
+ struct xc_dom_phys *phys;
+ unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
+ char *mode = "unset";
+
+ if (pfn > dom->total_pages)
+ {
+ xc_dom_printf("%s: pfn out of range (0x%" PRIpfn " > 0x%" PRIpfn ")\n",
+ __FUNCTION__, pfn, dom->total_pages);
+ return NULL;
+ }
+
+ /* already allocated? */
+ for (phys = dom->phys_pages; NULL != phys; phys = phys->next)
+ {
+ if (pfn >= phys->first + phys->count)
+ continue;
+ if (count)
+ {
+ /* size given: must be completely within the already allocated block */
+ if (pfn + count <= phys->first)
+ continue;
+ if (pfn < phys->first || pfn + count > phys->first + phys->count)
+ {
+ xc_dom_printf("%s: request overlaps allocated block"
+ " (req 0x%" PRIpfn "+0x%" PRIpfn ","
+ " blk 0x%" PRIpfn "+0x%" PRIpfn ")\n",
+ __FUNCTION__, pfn, count, phys->first,
+ phys->count);
+ return NULL;
+ }
+ }
+ else
+ {
+ /* no size given: block must be allocated already,
+ just hand out a pointer to it */
+ if (pfn < phys->first)
+ continue;
+ }
+ return phys->ptr + ((pfn - phys->first) << page_shift);
+ }
+
+ /* allocating is allowed with size specified only */
+ if (0 == count)
+ {
+ xc_dom_printf("%s: no block found, no size given,"
+ " can't malloc (pfn 0x%" PRIpfn ")\n", __FUNCTION__, pfn);
+ return NULL;
+ }
+
+ /* not found, no overlap => allocate */
+ phys = xc_dom_malloc(dom, sizeof(*phys));
+ if (NULL == phys)
+ return NULL;
+ memset(phys, 0, sizeof(*phys));
+ phys->first = pfn;
+ phys->count = count;
+
+ if (dom->guest_domid)
+ {
+ mode = "domU mapping";
+ phys->ptr = xc_dom_boot_domU_map(dom, phys->first, phys->count);
+ if (NULL == phys->ptr)
+ return NULL;
+ dom->alloc_domU_map += phys->count << page_shift;
+ }
+ else
+ {
+ mode = "anonymous memory";
+ phys->ptr = mmap(NULL, phys->count << page_shift,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (MAP_FAILED == phys->ptr)
+ {
+ xc_dom_panic(XC_OUT_OF_MEMORY,
+ "%s: oom: can't allocate 0x%" PRIpfn " pages\n",
+ __FUNCTION__, count);
+ return NULL;
+ }
+ dom->alloc_mem_map += phys->count << page_shift;
+ }
+
+#if 1
+ xc_dom_printf("%s: %s: pfn 0x%" PRIpfn "+0x%" PRIpfn " at %p\n",
+ __FUNCTION__, mode, phys->first, phys->count, phys->ptr);
+#endif
+ phys->next = dom->phys_pages;
+ dom->phys_pages = phys;
+ return phys->ptr;
+}
+
+int xc_dom_alloc_segment(struct xc_dom_image *dom,
+ struct xc_dom_seg *seg, char *name,
+ xen_vaddr_t start, xen_vaddr_t size)
+{
+ unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+ xen_pfn_t pages = (size + page_size - 1) / page_size;
+ void *ptr;
+
+ if (0 == start)
+ start = dom->virt_alloc_end;
+
+ if (start & (page_size - 1))
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: segment start isn't page aligned (0x%" PRIx64 ")\n",
+ __FUNCTION__, start);
+ return -1;
+ }
+ if (start < dom->virt_alloc_end)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: segment start too low (0x%" PRIx64 " < 0x%" PRIx64
+ ")\n", __FUNCTION__, start, dom->virt_alloc_end);
+ return -1;
+ }
+
+ seg->vstart = start;
+ seg->vend = start + pages * page_size;
+ seg->pfn = (seg->vstart - dom->parms.virt_base) / page_size;
+ dom->virt_alloc_end = seg->vend;
+
+ xc_dom_printf("%-20s: %-12s : 0x%" PRIx64 " -> 0x%" PRIx64
+ " (pfn 0x%" PRIpfn " + 0x%" PRIpfn " pages)\n",
+ __FUNCTION__, name, seg->vstart, seg->vend, seg->pfn, pages);
+
+ /* map and clear pages */
+ ptr = xc_dom_seg_to_ptr(dom, seg);
+ if (NULL == ptr)
+ return -1;
+ memset(ptr, 0, pages * page_size);
+
+ return 0;
+}
+
+int xc_dom_alloc_page(struct xc_dom_image *dom, char *name)
+{
+ unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+ xen_vaddr_t start;
+ xen_pfn_t pfn;
+
+ start = dom->virt_alloc_end;
+ dom->virt_alloc_end += page_size;
+ pfn = (start - dom->parms.virt_base) / page_size;
+
+ xc_dom_printf("%-20s: %-12s : 0x%" PRIx64 " (pfn 0x%" PRIpfn ")\n",
+ __FUNCTION__, name, start, pfn);
+ return pfn;
+}
+
+void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn)
+{
+ unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
+ struct xc_dom_phys *phys, *prev = NULL;
+
+ for (phys = dom->phys_pages; NULL != phys; phys = phys->next)
+ {
+ if (pfn >= phys->first && pfn < phys->first + phys->count)
+ break;
+ prev = phys;
+ }
+ if (!phys)
+ {
+ xc_dom_printf("%s: Huh? no mapping with pfn 0x%" PRIpfn "\n",
+ __FUNCTION__, pfn);
+ return;
+ }
+
+ munmap(phys->ptr, phys->count << page_shift);
+ if (prev)
+ prev->next = phys->next;
+ else
+ dom->phys_pages = phys->next;
+}
+
+void xc_dom_unmap_all(struct xc_dom_image *dom)
+{
+ while (dom->phys_pages)
+ xc_dom_unmap_one(dom, dom->phys_pages->first);
+}
+
+/* ------------------------------------------------------------------------ */
+/* pluggable kernel loaders */
+
+static struct xc_dom_loader *first_loader = NULL;
+static struct xc_dom_arch *first_hook = NULL;
+
+void xc_dom_register_loader(struct xc_dom_loader *loader)
+{
+ loader->next = first_loader;
+ first_loader = loader;
+}
+
+static struct xc_dom_loader *xc_dom_find_loader(struct xc_dom_image *dom)
+{
+ struct xc_dom_loader *loader = first_loader;
+
+ while (NULL != loader)
+ {
+ xc_dom_printf("%s: trying %s loader ... ", __FUNCTION__, loader->name);
+ if (0 == loader->probe(dom))
+ {
+ xc_dom_printf("OK\n");
+ return loader;
+ }
+ xc_dom_printf("failed\n");
+ loader = loader->next;
+ }
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: no loader found\n", __FUNCTION__);
+ return NULL;
+}
+
+void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks)
+{
+ hooks->next = first_hook;
+ first_hook = hooks;
+}
+
+static struct xc_dom_arch *xc_dom_find_arch_hooks(char *guest_type)
+{
+ struct xc_dom_arch *hooks = first_hook;
+
+ while (NULL != hooks)
+ {
+ if (0 == strcmp(hooks->guest_type, guest_type))
+ return hooks;
+ hooks = hooks->next;
+ }
+ xc_dom_panic(XC_INVALID_KERNEL,
+ "%s: not found (type %s)\n", __FUNCTION__, guest_type);
+ return NULL;
+}
+
+/* ------------------------------------------------------------------------ */
+/* public interface */
+
+void xc_dom_release(struct xc_dom_image *dom)
+{
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+ if (dom->phys_pages)
+ xc_dom_unmap_all(dom);
+ xc_dom_free_all(dom);
+ free(dom);
+}
+
+struct xc_dom_image *xc_dom_allocate(const char *cmdline, const char *features)
+{
+ struct xc_dom_image *dom;
+
+ xc_dom_printf("%s: cmdline=\"%s\", features=\"%s\"\n",
+ __FUNCTION__, cmdline, features);
+ dom = malloc(sizeof(*dom));
+ if (!dom)
+ goto err;
+
+ memset(dom, 0, sizeof(*dom));
+ if (cmdline)
+ dom->cmdline = xc_dom_strdup(dom, cmdline);
+ if (features)
+ elf_xen_parse_features(features, dom->f_requested, NULL);
+
+ dom->parms.virt_base = UNSET_ADDR;
+ dom->parms.virt_entry = UNSET_ADDR;
+ dom->parms.virt_hypercall = UNSET_ADDR;
+ dom->parms.virt_hv_start_low = UNSET_ADDR;
+ dom->parms.elf_paddr_offset = UNSET_ADDR;
+
+ dom->alloc_malloc += sizeof(*dom);
+ return dom;
+
+ err:
+ if (dom)
+ xc_dom_release(dom);
+ return NULL;
+}
+
+int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename)
+{
+ xc_dom_printf("%s: filename=\"%s\"\n", __FUNCTION__, filename);
+ dom->kernel_blob = xc_dom_malloc_filemap(dom, filename, &dom->kernel_size);
+ if (NULL == dom->kernel_blob)
+ return -1;
+ return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+}
+
+int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename)
+{
+ xc_dom_printf("%s: filename=\"%s\"\n", __FUNCTION__, filename);
+ dom->ramdisk_blob =
+ xc_dom_malloc_filemap(dom, filename, &dom->ramdisk_size);
+ if (NULL == dom->ramdisk_blob)
+ return -1;
+// return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size);
+ return 0;
+}
+
+int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem, size_t memsize)
+{
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+ dom->kernel_blob = (void *)mem;
+ dom->kernel_size = memsize;
+ return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+}
+
+int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem,
+ size_t memsize)
+{
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+ dom->ramdisk_blob = (void *)mem;
+ dom->ramdisk_size = memsize;
+// return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size);
+ return 0;
+}
+
+int xc_dom_parse_image(struct xc_dom_image *dom)
+{
+ int i;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ /* parse kernel image */
+ dom->kernel_loader = xc_dom_find_loader(dom);
+ if (NULL == dom->kernel_loader)
+ goto err;
+ if (0 != dom->kernel_loader->parser(dom))
+ goto err;
+ if (NULL == dom->guest_type)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "%s: guest_type not set\n", __FUNCTION__);
+ goto err;
+ }
+
+ /* check features */
+ for (i = 0; i < XENFEAT_NR_SUBMAPS; i++)
+ {
+ dom->f_active[i] |= dom->f_requested[i]; /* cmd line */
+ dom->f_active[i] |= dom->parms.f_required[i]; /* kernel */
+ if ((dom->f_active[i] & dom->parms.f_supported[i]) != dom->f_active[i])
+ {
+ xc_dom_panic(XC_INVALID_PARAM,
+ "%s: unsupported feature requested\n", __FUNCTION__);
+ goto err;
+ }
+ }
+ return 0;
+
+ err:
+ return -1;
+}
+
+int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb)
+{
+ unsigned int page_shift;
+ xen_pfn_t nr_pages, pfn;
+
+ dom->arch_hooks = xc_dom_find_arch_hooks(dom->guest_type);
+ if (NULL == dom->arch_hooks)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR, "%s: arch hooks not set\n",
+ __FUNCTION__);
+ return -1;
+ }
+
+ page_shift = XC_DOM_PAGE_SHIFT(dom);
+ nr_pages = mem_mb << (20 - page_shift);
+
+ xc_dom_printf("%s: mem %d MB, pages 0x%" PRIpfn " pages, %dk each\n",
+ __FUNCTION__, mem_mb, nr_pages, 1 << (page_shift-10));
+ dom->total_pages = nr_pages;
+
+ xc_dom_printf("%s: 0x%" PRIpfn " pages\n",
+ __FUNCTION__, dom->total_pages);
+
+ /* setup initial p2m */
+ dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * dom->total_pages);
+ for (pfn = 0; pfn < dom->total_pages; pfn++)
+ dom->p2m_host[pfn] = pfn;
+ return 0;
+}
+
+int xc_dom_update_guest_p2m(struct xc_dom_image *dom)
+{
+ uint32_t *p2m_32;
+ uint64_t *p2m_64;
+ xen_pfn_t i;
+
+ if (!dom->p2m_guest)
+ return 0;
+
+ switch (dom->arch_hooks->sizeof_pfn)
+ {
+ case 4:
+ xc_dom_printf("%s: dst 32bit, pages 0x%" PRIpfn " \n",
+ __FUNCTION__, dom->total_pages);
+ p2m_32 = dom->p2m_guest;
+ for (i = 0; i < dom->total_pages; i++)
+ if (INVALID_P2M_ENTRY != dom->p2m_host[i])
+ p2m_32[i] = dom->p2m_host[i];
+ else
+ p2m_32[i] = (uint32_t) - 1;
+ break;
+ case 8:
+ xc_dom_printf("%s: dst 64bit, pages 0x%" PRIpfn " \n",
+ __FUNCTION__, dom->total_pages);
+ p2m_64 = dom->p2m_guest;
+ for (i = 0; i < dom->total_pages; i++)
+ if (INVALID_P2M_ENTRY != dom->p2m_host[i])
+ p2m_64[i] = dom->p2m_host[i];
+ else
+ p2m_64[i] = (uint64_t) - 1;
+ break;
+ default:
+ xc_dom_panic(XC_INTERNAL_ERROR,
+ "sizeof_pfn is invalid (is %d, can be 4 or 8)",
+ dom->arch_hooks->sizeof_pfn);
+ return -1;
+ }
+ return 0;
+}
+
+int xc_dom_build_image(struct xc_dom_image *dom)
+{
+ unsigned int page_size;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ /* check for arch hooks */
+ if (NULL == dom->arch_hooks)
+ {
+ xc_dom_panic(XC_INTERNAL_ERROR, "%s: arch hooks not set\n",
+ __FUNCTION__);
+ goto err;
+ }
+ page_size = XC_DOM_PAGE_SIZE(dom);
+
+ /* load kernel */
+ if (0 != xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel",
+ dom->kernel_seg.vstart,
+ dom->kernel_seg.vend -
+ dom->kernel_seg.vstart))
+ goto err;
+ if (0 != dom->kernel_loader->loader(dom))
+ goto err;
+
+ /* load ramdisk */
+ if (dom->ramdisk_blob)
+ {
+ size_t unziplen, ramdisklen;
+ void *ramdiskmap;
+
+ unziplen = xc_dom_check_gzip(dom->ramdisk_blob, dom->ramdisk_size);
+ ramdisklen = unziplen ? unziplen : dom->ramdisk_size;
+ if (0 != xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk", 0,
+ ramdisklen))
+ goto err;
+ ramdiskmap = xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg);
+ if (unziplen)
+ {
+ if (-1 == xc_dom_do_gunzip(dom->ramdisk_blob, dom->ramdisk_size,
+ ramdiskmap, ramdisklen))
+ goto err;
+ }
+ else
+ memcpy(ramdiskmap, dom->ramdisk_blob, dom->ramdisk_size);
+ }
+
+ /* allocate other pages */
+ if (0 != dom->arch_hooks->alloc_magic_pages(dom))
+ goto err;
+ if (dom->arch_hooks->count_pgtables)
+ {
+ dom->arch_hooks->count_pgtables(dom);
+ if (dom->pgtables > 0)
+ if (0 !=
+ xc_dom_alloc_segment(dom, &dom->pgtables_seg, "page tables", 0,
+ dom->pgtables * page_size))
+ goto err;
+ }
+ if (dom->alloc_bootstack)
+ dom->bootstack_pfn = xc_dom_alloc_page(dom, "boot stack");
+ xc_dom_printf("%-20s: virt_alloc_end : 0x%" PRIx64 "\n",
+ __FUNCTION__, dom->virt_alloc_end);
+ xc_dom_printf("%-20s: virt_pgtab_end : 0x%" PRIx64 "\n",
+ __FUNCTION__, dom->virt_pgtab_end);
+ return 0;
+
+ err:
+ return -1;
+}
diff --git a/tools/libxc/xc_dom_elfloader.c b/tools/libxc/xc_dom_elfloader.c
new file mode 100644
index 0000000000..acb469ecec
--- /dev/null
+++ b/tools/libxc/xc_dom_elfloader.c
@@ -0,0 +1,283 @@
+/*
+ * Xen domain builder -- ELF bits.
+ *
+ * Parse and load ELF kernel images.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+#define XEN_VER "xen-3.0"
+
+/* ------------------------------------------------------------------------ */
+
+static char *xc_dom_guest_type(struct xc_dom_image *dom,
+ struct elf_binary *elf)
+{
+ uint64_t machine = elf_uval(elf, elf->ehdr, e_machine);
+
+ switch (machine) {
+ case EM_386:
+ switch (dom->parms.pae) {
+ case 3 /* PAEKERN_bimodal */:
+ if (strstr(dom->xen_caps, "xen-3.0-x86_32p"))
+ return "xen-3.0-x86_32p";
+ return "xen-3.0-x86_32";
+ case PAEKERN_extended_cr3:
+ case PAEKERN_yes:
+ return "xen-3.0-x86_32p";
+ break;
+ case PAEKERN_no:
+ default:
+ return "xen-3.0-x86_32";
+ }
+ case EM_X86_64:
+ return "xen-3.0-x86_64";
+ case EM_IA_64:
+ return elf_msb(elf) ? "xen-3.0-ia64be" : "xen-3.0-ia64";
+ case EM_PPC64:
+ return "xen-3.0-powerpc64";
+ default:
+ return "xen-3.0-unknown";
+ }
+}
+
+/* ------------------------------------------------------------------------ */
+/* parse elf binary */
+
+static int check_elf_kernel(struct xc_dom_image *dom, int verbose)
+{
+ if (NULL == dom->kernel_blob)
+ {
+ if (verbose)
+ xc_dom_panic(XC_INTERNAL_ERROR, "%s: no kernel image loaded\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+
+ if (!elf_is_elfbinary(dom->kernel_blob))
+ {
+ if (verbose)
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: kernel is not an ELF image\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int xc_dom_probe_elf_kernel(struct xc_dom_image *dom)
+{
+ return check_elf_kernel(dom, 0);
+}
+
+static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
+ struct elf_binary *elf, int load)
+{
+ struct elf_binary syms;
+ const elf_shdr *shdr, *shdr2;
+ xen_vaddr_t symtab, maxaddr;
+ char *hdr;
+ size_t size;
+ int h, count, type, i, tables = 0;
+
+ if (elf_swap(elf)) {
+ xc_dom_printf("%s: non-native byte order, bsd symtab not supported\n",
+ __FUNCTION__);
+ return 0;
+ }
+
+ if (load) {
+ if (!dom->bsd_symtab_start)
+ return 0;
+ size = dom->kernel_seg.vend - dom->bsd_symtab_start;
+ hdr = xc_dom_vaddr_to_ptr(dom, dom->bsd_symtab_start);
+ *(int *)hdr = size - sizeof(int);
+ } else {
+ size = sizeof(int) + elf_size(elf, elf->ehdr) +
+ elf_shdr_count(elf) * elf_size(elf, shdr);
+ hdr = xc_dom_malloc(dom, size);
+ if (hdr == NULL)
+ return 0;
+ dom->bsd_symtab_start = elf_round_up(&syms, dom->kernel_seg.vend);
+ }
+
+ memcpy(hdr + sizeof(int),
+ elf->image,
+ elf_size(elf, elf->ehdr));
+ memcpy(hdr + sizeof(int) + elf_size(elf, elf->ehdr),
+ elf->image + elf_uval(elf, elf->ehdr, e_shoff),
+ elf_shdr_count(elf) * elf_size(elf, shdr));
+ if (elf_64bit(elf)) {
+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)(hdr + sizeof(int));
+ ehdr->e_phoff = 0;
+ ehdr->e_phentsize = 0;
+ ehdr->e_phnum = 0;
+ ehdr->e_shoff = elf_size(elf, elf->ehdr);
+ ehdr->e_shstrndx = SHN_UNDEF;
+ } else {
+ Elf32_Ehdr *ehdr = (Elf32_Ehdr *)(hdr + sizeof(int));
+ ehdr->e_phoff = 0;
+ ehdr->e_phentsize = 0;
+ ehdr->e_phnum = 0;
+ ehdr->e_shoff = elf_size(elf, elf->ehdr);
+ ehdr->e_shstrndx = SHN_UNDEF;
+ }
+ if (0 != elf_init(&syms, hdr + sizeof(int), size - sizeof(int)))
+ return -1;
+ if (xc_dom_logfile)
+ elf_set_logfile(&syms, xc_dom_logfile, 1);
+
+ symtab = dom->bsd_symtab_start + sizeof(int);
+ maxaddr = elf_round_up(&syms, symtab + elf_size(&syms, syms.ehdr) +
+ elf_shdr_count(&syms) * elf_size(&syms, shdr));
+
+ xc_dom_printf("%s/%s: bsd_symtab_start=%" PRIx64 ", kernel.end=0x%" PRIx64
+ " -- symtab=0x%" PRIx64 ", maxaddr=0x%" PRIx64 "\n",
+ __FUNCTION__, load ? "load" : "parse",
+ dom->bsd_symtab_start, dom->kernel_seg.vend, symtab, maxaddr);
+
+ count = elf_shdr_count(&syms);
+ for (h = 0; h < count; h++)
+ {
+ shdr = elf_shdr_by_index(&syms, h);
+ type = elf_uval(&syms, shdr, sh_type);
+ if (type == SHT_STRTAB)
+ {
+ /* Look for a strtab @i linked to symtab @h. */
+ for (i = 0; i < count; i++) {
+ shdr2 = elf_shdr_by_index(&syms, i);
+ if ((elf_uval(&syms, shdr2, sh_type) == SHT_SYMTAB) &&
+ (elf_uval(&syms, shdr2, sh_link) == h))
+ break;
+ }
+ /* Skip symtab @h if we found no corresponding strtab @i. */
+ if (i == count)
+ {
+ if (elf_64bit(&syms))
+ *(Elf64_Off*)(&shdr->e64.sh_offset) = 0;
+ else
+ *(Elf32_Off*)(&shdr->e32.sh_offset) = 0;
+ continue;
+ }
+ }
+
+ if ((type == SHT_STRTAB) || (type == SHT_SYMTAB))
+ {
+ /* Mangled to be based on ELF header location. */
+ if (elf_64bit(&syms))
+ *(Elf64_Off*)(&shdr->e64.sh_offset) = maxaddr - symtab;
+ else
+ *(Elf32_Off*)(&shdr->e32.sh_offset) = maxaddr - symtab;
+ size = elf_uval(&syms, shdr, sh_size);
+ maxaddr = elf_round_up(&syms, maxaddr + size);
+ tables++;
+ xc_dom_printf("%s: h=%d %s, size=0x%zx, maxaddr=0x%" PRIx64 "\n",
+ __FUNCTION__, h,
+ type == SHT_SYMTAB ? "symtab" : "strtab",
+ size, maxaddr);
+
+ if (load) {
+ shdr2 = elf_shdr_by_index(elf, h);
+ memcpy((void*)elf_section_start(&syms, shdr),
+ elf_section_start(elf, shdr2),
+ size);
+ }
+ }
+
+ /* Name is NULL. */
+ if (elf_64bit(&syms))
+ *(Elf64_Half*)(&shdr->e64.sh_name) = 0;
+ else
+ *(Elf32_Word*)(&shdr->e32.sh_name) = 0;
+ }
+
+ if (0 == tables)
+ {
+ xc_dom_printf("%s: no symbol table present\n", __FUNCTION__);
+ dom->bsd_symtab_start = 0;
+ return 0;
+ }
+ if (!load)
+ dom->kernel_seg.vend = maxaddr;
+ return 0;
+}
+
+static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom)
+{
+ struct elf_binary *elf;
+ int rc;
+
+ rc = check_elf_kernel(dom, 1);
+ if (0 != rc)
+ return rc;
+
+ elf = xc_dom_malloc(dom, sizeof(*elf));
+ dom->private_loader = elf;
+ rc = elf_init(elf, dom->kernel_blob, dom->kernel_size);
+ if (xc_dom_logfile)
+ elf_set_logfile(elf, xc_dom_logfile, 1);
+ if (0 != rc)
+ {
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: corrupted ELF image\n",
+ __FUNCTION__);
+ return rc;
+ }
+
+ /* Find the section-header strings table. */
+ if (NULL == elf->sec_strtab)
+ {
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: ELF image has no shstrtab\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+
+ /* parse binary and get xen meta info */
+ elf_parse_binary(elf);
+ if (0 != (rc = elf_xen_parse(elf, &dom->parms)))
+ return rc;
+
+ /* find kernel segment */
+ dom->kernel_seg.vstart = dom->parms.virt_kstart;
+ dom->kernel_seg.vend = dom->parms.virt_kend;
+
+ if (dom->parms.bsd_symtab)
+ xc_dom_load_elf_symtab(dom, elf, 0);
+
+ dom->guest_type = xc_dom_guest_type(dom, elf);
+ xc_dom_printf("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
+ __FUNCTION__, dom->guest_type,
+ dom->kernel_seg.vstart, dom->kernel_seg.vend);
+ return 0;
+}
+
+static int xc_dom_load_elf_kernel(struct xc_dom_image *dom)
+{
+ struct elf_binary *elf = dom->private_loader;
+
+ elf->dest = xc_dom_seg_to_ptr(dom, &dom->kernel_seg);
+ elf_load_binary(elf);
+ if (dom->parms.bsd_symtab)
+ xc_dom_load_elf_symtab(dom, elf, 1);
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_loader elf_loader = {
+ .name = "ELF-generic",
+ .probe = xc_dom_probe_elf_kernel,
+ .parser = xc_dom_parse_elf_kernel,
+ .loader = xc_dom_load_elf_kernel,
+};
+
+static void __init register_loader(void)
+{
+ xc_dom_register_loader(&elf_loader);
+}
diff --git a/tools/libxc/xc_dom_ia64.c b/tools/libxc/xc_dom_ia64.c
new file mode 100644
index 0000000000..7214c3c299
--- /dev/null
+++ b/tools/libxc/xc_dom_ia64.c
@@ -0,0 +1,118 @@
+/*
+ * Xen domain builder -- ia64 bits.
+ *
+ * Most architecture-specific code for ia64 goes here.
+ * - fill architecture-specific structs.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+#include <xen/foreign/ia64.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+ /* allocate special pages */
+ dom->console_pfn = dom->total_pages -1;
+ dom->xenstore_pfn = dom->total_pages -2;
+ dom->start_info_pfn = dom->total_pages -3;
+ return 0;
+}
+
+static int start_info_ia64(struct xc_dom_image *dom)
+{
+ start_info_ia64_t *start_info =
+ xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+ struct xen_ia64_boot_param_ia64 *bp =
+ (struct xen_ia64_boot_param_ia64 *)(start_info + 1);
+
+ xc_dom_printf("%s\n", __FUNCTION__);
+
+ sprintf(start_info->magic, dom->guest_type);
+ start_info->flags = dom->flags;
+ start_info->nr_pages = dom->total_pages;
+ start_info->store_mfn = dom->xenstore_pfn;
+ start_info->store_evtchn = dom->xenstore_evtchn;
+ start_info->console.domU.mfn = dom->console_pfn;
+ start_info->console.domU.evtchn = dom->console_evtchn;
+
+ if (dom->ramdisk_blob)
+ {
+ start_info->mod_start = dom->ramdisk_seg.vstart;
+ start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+ bp->initrd_start = start_info->mod_start;
+ bp->initrd_size = start_info->mod_len;
+ }
+ if (dom->cmdline)
+ {
+ strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+ start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+ bp->command_line = (dom->start_info_pfn << PAGE_SHIFT_IA64)
+ + offsetof(start_info_t, cmd_line);
+ }
+ return 0;
+}
+
+static int shared_info_ia64(struct xc_dom_image *dom, void *ptr)
+{
+ shared_info_ia64_t *shared_info = ptr;
+ int i;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ memset(shared_info, 0, sizeof(*shared_info));
+ for (i = 0; i < MAX_VIRT_CPUS; i++)
+ shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+ shared_info->arch.start_info_pfn = dom->start_info_pfn;
+ return 0;
+}
+
+extern unsigned long xc_ia64_fpsr_default(void);
+
+static int vcpu_ia64(struct xc_dom_image *dom, void *ptr)
+{
+ vcpu_guest_context_ia64_t *ctxt = ptr;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ /* clear everything */
+ memset(ctxt, 0, sizeof(*ctxt));
+
+ ctxt->flags = 0;
+ ctxt->user_regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
+ ctxt->user_regs.cr_iip = dom->parms.virt_entry;
+ ctxt->user_regs.cr_ifs = (uint64_t) 1 << 63;
+#ifdef __ia64__ /* FIXME */
+ ctxt->user_regs.ar_fpsr = xc_ia64_fpsr_default();
+#endif
+ ctxt->user_regs.r28 = (dom->start_info_pfn << PAGE_SHIFT_IA64)
+ + sizeof(start_info_ia64_t);
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_arch = {
+ .guest_type = "xen-3.0-ia64",
+ .page_shift = PAGE_SHIFT_IA64,
+ .alloc_magic_pages = alloc_magic_pages,
+ .start_info = start_info_ia64,
+ .shared_info = shared_info_ia64,
+ .vcpu = vcpu_ia64,
+};
+
+static void __init register_arch_hooks(void)
+{
+ xc_dom_register_arch_hooks(&xc_dom_arch);
+}
diff --git a/tools/libxc/xc_dom_powerpc64.c b/tools/libxc/xc_dom_powerpc64.c
new file mode 100644
index 0000000000..605c3a5d3c
--- /dev/null
+++ b/tools/libxc/xc_dom_powerpc64.c
@@ -0,0 +1,100 @@
+/*
+ * Xen domain builder -- ia64 bits.
+ *
+ * Most architecture-specific code for ia64 goes here.
+ * - fill architecture-specific structs.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+ /* allocate special pages */
+ dom->low_top--; /* shared_info */
+ dom->xenstore_pfn = --dom->low_top;
+ dom->console_pfn = --dom->low_top;
+ dom->start_info_pfn = --dom->low_top;
+ return 0;
+}
+
+static int start_info(struct xc_dom_image *dom)
+{
+ start_info_t *si =
+ xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+
+ xc_dom_printf("%s\n", __FUNCTION__);
+
+ snprintf(si->magic, sizeof(si->magic), "xen-%d.%d-powerpc64HV", 3, 0);
+
+ si->nr_pages = dom->total_pages;
+ si->shared_info = (dom->total_pages - 1) << PAGE_SHIFT;
+ si->store_mfn = dom->xenstore_pfn;
+ si->store_evtchn = dom->store_evtchn;
+ si->console.domU.mfn = dom->console_pfn;
+ si->console.domU.evtchn = dom->console_evtchn;
+ return 0;
+}
+
+static int shared_info(struct xc_dom_image *dom, void *ptr)
+{
+ shared_info_t *shared_info = ptr;
+ int i;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ memset(shared_info, 0, sizeof(*shared_info));
+ return 0;
+}
+
+static int vcpu(struct xc_dom_image *dom, void *ptr)
+{
+ vcpu_guest_context_t *ctxt = ptr;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ /* clear everything */
+ memset(ctxt, 0, sizeof(*ctxt));
+
+ memset(&ctxt->user_regs, 0x55, sizeof(ctxt.user_regs));
+ ctxt->user_regs.pc = dsi->v_kernentry;
+ ctxt->user_regs.msr = 0;
+ ctxt->user_regs.gprs[1] = 0; /* Linux uses its own stack */
+ ctxt->user_regs.gprs[3] = devtree_addr;
+ ctxt->user_regs.gprs[4] = kern_addr;
+ ctxt->user_regs.gprs[5] = 0;
+
+ /* There is a buggy kernel that does not zero the "local_paca", so
+ * we must make sure this register is 0 */
+ ctxt->user_regs.gprs[13] = 0;
+
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_arch = {
+ .guest_type = "xen-3.0-powerpc64",
+ .page_shift = FIXME,
+ .alloc_magic_pages = alloc_magic_pages,
+ .start_info = start_info,
+ .shared_info = shared_info,
+ .vcpu = vcpu,
+};
+
+static void __init register_arch_hooks(void)
+{
+ xc_dom_register_arch_hooks(&xc_dom_arch);
+}
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
new file mode 100644
index 0000000000..9e489c320d
--- /dev/null
+++ b/tools/libxc/xc_dom_x86.c
@@ -0,0 +1,559 @@
+/*
+ * Xen domain builder -- i386 and x86_64 bits.
+ *
+ * Most architecture-specific code for x86 goes here.
+ * - prepare page tables.
+ * - fill architecture-specific structs.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+#include <xen/foreign/x86_32.h>
+#include <xen/foreign/x86_64.h>
+#include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/e820.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define bits_to_mask(bits) (((xen_vaddr_t)1 << (bits))-1)
+#define round_down(addr, mask) ((addr) & ~(mask))
+#define round_up(addr, mask) ((addr) | (mask))
+
+static inline unsigned long
+nr_page_tables(xen_vaddr_t start, xen_vaddr_t end, unsigned long bits)
+{
+ xen_vaddr_t mask = bits_to_mask(bits);
+ int tables;
+
+ if (0 == bits)
+ return 0; /* unused */
+
+ if (8 * sizeof(unsigned long) == bits)
+ {
+ /* must be pgd, need one */
+ start = 0;
+ end = -1;
+ tables = 1;
+ }
+ else
+ {
+ start = round_down(start, mask);
+ end = round_up(end, mask);
+ tables = ((end - start) >> bits) + 1;
+ }
+
+ xc_dom_printf("%s: 0x%016" PRIx64 "/%ld: 0x%016" PRIx64
+ " -> 0x%016" PRIx64 ", %d table(s)\n",
+ __FUNCTION__, mask, bits, start, end, tables);
+ return tables;
+}
+
+static int count_pgtables(struct xc_dom_image *dom, int pae,
+ int l4_bits, int l3_bits, int l2_bits, int l1_bits)
+{
+ int pages, extra_pages;
+ xen_vaddr_t try_virt_end;
+
+ extra_pages = dom->alloc_bootstack ? 1 : 0;
+ extra_pages += dom->extra_pages;
+ pages = extra_pages;
+ for (;;)
+ {
+ try_virt_end = round_up(dom->virt_alloc_end + pages * PAGE_SIZE_X86,
+ bits_to_mask(l1_bits));
+ dom->pg_l4 =
+ nr_page_tables(dom->parms.virt_base, try_virt_end, l4_bits);
+ dom->pg_l3 =
+ nr_page_tables(dom->parms.virt_base, try_virt_end, l3_bits);
+ dom->pg_l2 =
+ nr_page_tables(dom->parms.virt_base, try_virt_end, l2_bits);
+ dom->pg_l1 =
+ nr_page_tables(dom->parms.virt_base, try_virt_end, l1_bits);
+ if (pae && try_virt_end < 0xc0000000)
+ {
+ xc_dom_printf("%s: PAE: extra l2 page table for l3#3\n", __FUNCTION__);
+ dom->pg_l2++;
+ }
+ dom->pgtables = dom->pg_l4 + dom->pg_l3 + dom->pg_l2 + dom->pg_l1;
+ pages = dom->pgtables + extra_pages;
+ if (dom->virt_alloc_end + pages * PAGE_SIZE_X86 <= try_virt_end + 1)
+ break;
+ }
+ dom->virt_pgtab_end = try_virt_end + 1;
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* i386 pagetables */
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT)
+
+static int count_pgtables_x86_32(struct xc_dom_image *dom)
+{
+ return count_pgtables(dom, 0, 0, 0, 32, L2_PAGETABLE_SHIFT_I386);
+}
+
+static int count_pgtables_x86_32_pae(struct xc_dom_image *dom)
+{
+ return count_pgtables(dom, 1, 0, 32,
+ L3_PAGETABLE_SHIFT_PAE, L2_PAGETABLE_SHIFT_PAE);
+}
+
+#define pfn_to_paddr(pfn) ((xen_paddr_t)(pfn) << PAGE_SHIFT_X86)
+
+static int setup_pgtables_x86_32(struct xc_dom_image *dom)
+{
+ xen_pfn_t l2pfn = dom->pgtables_seg.pfn;
+ xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l2;
+ l2_pgentry_32_t *l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+ l1_pgentry_32_t *l1tab = NULL;
+ unsigned long l2off, l1off;
+ xen_vaddr_t addr;
+ xen_pfn_t pgpfn;
+
+ for (addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
+ addr += PAGE_SIZE_X86)
+ {
+ if (NULL == l1tab)
+ {
+ /* get L1 tab, make L2 entry */
+ l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+ l2off = l2_table_offset_i386(addr);
+ l2tab[l2off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+ l1pfn++;
+ }
+
+ /* make L1 entry */
+ l1off = l1_table_offset_i386(addr);
+ pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+ l1tab[l1off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+ if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend)
+ l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */
+ if (L1_PAGETABLE_ENTRIES_I386 - 1 == l1off)
+ l1tab = NULL;
+ }
+ return 0;
+}
+
+static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
+{
+ xen_pfn_t l3pfn = dom->pgtables_seg.pfn;
+ xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l3;
+ xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l3 + dom->pg_l2;
+ l3_pgentry_64_t *l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+ l2_pgentry_64_t *l2tab = NULL;
+ l1_pgentry_64_t *l1tab = NULL;
+ unsigned long l3off, l2off, l1off;
+ xen_vaddr_t addr;
+ xen_pfn_t pgpfn;
+
+ for (addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
+ addr += PAGE_SIZE_X86)
+ {
+ if (NULL == l2tab)
+ {
+ /* get L2 tab, make L3 entry */
+ l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+ l3off = l3_table_offset_pae(addr);
+ l3tab[l3off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+ l2pfn++;
+ }
+
+ if (NULL == l1tab)
+ {
+ /* get L1 tab, make L2 entry */
+ l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+ l2off = l2_table_offset_pae(addr);
+ l2tab[l2off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+ if (L2_PAGETABLE_ENTRIES_PAE - 1 == l2off)
+ l2tab = NULL;
+ l1pfn++;
+ }
+
+ /* make L1 entry */
+ l1off = l1_table_offset_pae(addr);
+ pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+ l1tab[l1off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+ if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend)
+ l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */
+ if (L1_PAGETABLE_ENTRIES_PAE - 1 == l1off)
+ l1tab = NULL;
+ }
+
+ if (dom->virt_pgtab_end <= 0xc0000000)
+ {
+ xc_dom_printf("%s: PAE: extra l2 page table for l3#3\n", __FUNCTION__);
+ l3tab[3] = pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+ }
+ return 0;
+}
+
+#undef L1_PROT
+#undef L2_PROT
+#undef L3_PROT
+
+/* ------------------------------------------------------------------------ */
+/* x86_64 pagetables */
+
+static int count_pgtables_x86_64(struct xc_dom_image *dom)
+{
+ return count_pgtables(dom, 0,
+ L4_PAGETABLE_SHIFT_X86_64 + 9,
+ L4_PAGETABLE_SHIFT_X86_64,
+ L3_PAGETABLE_SHIFT_X86_64, L2_PAGETABLE_SHIFT_X86_64);
+}
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+
+static int setup_pgtables_x86_64(struct xc_dom_image *dom)
+{
+ xen_pfn_t l4pfn = dom->pgtables_seg.pfn;
+ xen_pfn_t l3pfn = dom->pgtables_seg.pfn + dom->pg_l4;
+ xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3;
+ xen_pfn_t l1pfn =
+ dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3 + dom->pg_l2;
+ l4_pgentry_64_t *l4tab = xc_dom_pfn_to_ptr(dom, l4pfn, 1);
+ l3_pgentry_64_t *l3tab = NULL;
+ l2_pgentry_64_t *l2tab = NULL;
+ l1_pgentry_64_t *l1tab = NULL;
+ uint64_t l4off, l3off, l2off, l1off;
+ uint64_t addr;
+ xen_pfn_t pgpfn;
+
+ for (addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
+ addr += PAGE_SIZE_X86)
+ {
+ if (NULL == l3tab)
+ {
+ /* get L3 tab, make L4 entry */
+ l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+ l4off = l4_table_offset_x86_64(addr);
+ l4tab[l4off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, l3pfn)) | L4_PROT;
+ l3pfn++;
+ }
+
+ if (NULL == l2tab)
+ {
+ /* get L2 tab, make L3 entry */
+ l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+ l3off = l3_table_offset_x86_64(addr);
+ l3tab[l3off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+ if (L3_PAGETABLE_ENTRIES_X86_64 - 1 == l3off)
+ l3tab = NULL;
+ l2pfn++;
+ }
+
+ if (NULL == l1tab)
+ {
+ /* get L1 tab, make L2 entry */
+ l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+ l2off = l2_table_offset_x86_64(addr);
+ l2tab[l2off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+ if (L2_PAGETABLE_ENTRIES_X86_64 - 1 == l2off)
+ l2tab = NULL;
+ l1pfn++;
+ }
+
+ /* make L1 entry */
+ l1off = l1_table_offset_x86_64(addr);
+ pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+ l1tab[l1off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+ if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend)
+ l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */
+ if (L1_PAGETABLE_ENTRIES_X86_64 - 1 == l1off)
+ l1tab = NULL;
+ }
+ return 0;
+}
+
+#undef L1_PROT
+#undef L2_PROT
+#undef L3_PROT
+#undef L4_PROT
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+ size_t p2m_size = dom->total_pages * dom->arch_hooks->sizeof_pfn;
+
+ /* allocate phys2mach table */
+ if (0 != xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach", 0, p2m_size))
+ return -1;
+ dom->p2m_guest = xc_dom_seg_to_ptr(dom, &dom->p2m_seg);
+
+ /* allocate special pages */
+ dom->start_info_pfn = xc_dom_alloc_page(dom, "start info");
+ dom->xenstore_pfn = xc_dom_alloc_page(dom, "xenstore");
+ dom->console_pfn = xc_dom_alloc_page(dom, "console");
+ if (xc_dom_feature_translated(dom))
+ dom->shared_info_pfn = xc_dom_alloc_page(dom, "shared info");
+ dom->alloc_bootstack = 1;
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int start_info_x86_32(struct xc_dom_image *dom)
+{
+ start_info_x86_32_t *start_info =
+ xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+ xen_pfn_t shinfo =
+ xc_dom_feature_translated(dom) ? dom->shared_info_pfn : dom->
+ shared_info_mfn;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ sprintf(start_info->magic, dom->guest_type);
+ start_info->nr_pages = dom->total_pages;
+ start_info->shared_info = shinfo << PAGE_SHIFT_X86;
+ start_info->pt_base = dom->pgtables_seg.vstart;
+ start_info->nr_pt_frames = dom->pgtables;
+ start_info->mfn_list = dom->p2m_seg.vstart;
+
+ start_info->flags = dom->flags;
+ start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
+ start_info->store_evtchn = dom->xenstore_evtchn;
+ start_info->console.domU.mfn = xc_dom_p2m_guest(dom, dom->console_pfn);
+ start_info->console.domU.evtchn = dom->console_evtchn;
+
+ if (dom->ramdisk_blob)
+ {
+ start_info->mod_start = dom->ramdisk_seg.vstart;
+ start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+ }
+ if (dom->cmdline)
+ {
+ strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+ start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+ }
+ return 0;
+}
+
+static int start_info_x86_64(struct xc_dom_image *dom)
+{
+ start_info_x86_64_t *start_info =
+ xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+ xen_pfn_t shinfo =
+ xc_dom_feature_translated(dom) ? dom->shared_info_pfn : dom->
+ shared_info_mfn;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ sprintf(start_info->magic, dom->guest_type);
+ start_info->nr_pages = dom->total_pages;
+ start_info->shared_info = shinfo << PAGE_SHIFT_X86;
+ start_info->pt_base = dom->pgtables_seg.vstart;
+ start_info->nr_pt_frames = dom->pgtables;
+ start_info->mfn_list = dom->p2m_seg.vstart;
+
+ start_info->flags = dom->flags;
+ start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
+ start_info->store_evtchn = dom->xenstore_evtchn;
+ start_info->console.domU.mfn = xc_dom_p2m_guest(dom, dom->console_pfn);
+ start_info->console.domU.evtchn = dom->console_evtchn;
+
+ if (dom->ramdisk_blob)
+ {
+ start_info->mod_start = dom->ramdisk_seg.vstart;
+ start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+ }
+ if (dom->cmdline)
+ {
+ strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+ start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+ }
+ return 0;
+}
+
+static int shared_info_x86_32(struct xc_dom_image *dom, void *ptr)
+{
+ shared_info_x86_32_t *shared_info = ptr;
+ int i;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ memset(shared_info, 0, sizeof(*shared_info));
+ for (i = 0; i < MAX_VIRT_CPUS; i++)
+ shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+ return 0;
+}
+
+static int shared_info_x86_64(struct xc_dom_image *dom, void *ptr)
+{
+ shared_info_x86_64_t *shared_info = ptr;
+ int i;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ memset(shared_info, 0, sizeof(*shared_info));
+ for (i = 0; i < MAX_VIRT_CPUS; i++)
+ shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int vcpu_x86_32(struct xc_dom_image *dom, void *ptr)
+{
+ vcpu_guest_context_x86_32_t *ctxt = ptr;
+ xen_pfn_t cr3_pfn;
+ int i;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ /* clear everything */
+ memset(ctxt, 0, sizeof(*ctxt));
+
+ /* Virtual IDT is empty at start-of-day. */
+ for (i = 0; i < 256; i++)
+ {
+ ctxt->trap_ctxt[i].vector = i;
+ ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS_X86_32;
+ }
+
+ /* No callback handlers. */
+ ctxt->event_callback_cs = FLAT_KERNEL_CS_X86_32;
+ ctxt->failsafe_callback_cs = FLAT_KERNEL_CS_X86_32;
+
+ ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_32;
+ ctxt->user_regs.es = FLAT_KERNEL_DS_X86_32;
+ ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_32;
+ ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_32;
+ ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_32;
+ ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_32;
+ ctxt->user_regs.eip = dom->parms.virt_entry;
+ ctxt->user_regs.esp =
+ dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+ ctxt->user_regs.esi =
+ dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
+ ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
+
+ ctxt->kernel_ss = FLAT_KERNEL_SS_X86_32;
+ ctxt->kernel_sp =
+ dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+
+ ctxt->flags = VGCF_in_kernel_X86_32;
+ if (dom->parms.pae == 2 /* extended_cr3 */ ||
+ dom->parms.pae == 3 /* bimodal */)
+ ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
+
+ cr3_pfn = xc_dom_p2m_guest(dom, dom->pgtables_seg.pfn);
+ ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_32(cr3_pfn);
+ xc_dom_printf("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "\n",
+ __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
+
+ return 0;
+}
+
+static int vcpu_x86_64(struct xc_dom_image *dom, void *ptr)
+{
+ vcpu_guest_context_x86_64_t *ctxt = ptr;
+ xen_pfn_t cr3_pfn;
+ int i;
+
+ xc_dom_printf("%s: called\n", __FUNCTION__);
+
+ /* clear everything */
+ memset(ctxt, 0, sizeof(*ctxt));
+
+ /* Virtual IDT is empty at start-of-day. */
+ for (i = 0; i < 256; i++)
+ {
+ ctxt->trap_ctxt[i].vector = i;
+ ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS_X86_64;
+ }
+
+ ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_64;
+ ctxt->user_regs.es = FLAT_KERNEL_DS_X86_64;
+ ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_64;
+ ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_64;
+ ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_64;
+ ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_64;
+ ctxt->user_regs.rip = dom->parms.virt_entry;
+ ctxt->user_regs.rsp =
+ dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+ ctxt->user_regs.rsi =
+ dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
+ ctxt->user_regs.rflags = 1 << 9; /* Interrupt Enable */
+
+ ctxt->kernel_ss = FLAT_KERNEL_SS_X86_64;
+ ctxt->kernel_sp =
+ dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+
+ ctxt->flags = VGCF_in_kernel_X86_64;
+ cr3_pfn = xc_dom_p2m_guest(dom, dom->pgtables_seg.pfn);
+ ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_64(cr3_pfn);
+ xc_dom_printf("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "\n",
+ __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
+
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_32 = {
+ .guest_type = "xen-3.0-x86_32",
+ .page_shift = PAGE_SHIFT_X86,
+ .sizeof_pfn = 4,
+ .alloc_magic_pages = alloc_magic_pages,
+ .count_pgtables = count_pgtables_x86_32,
+ .setup_pgtables = setup_pgtables_x86_32,
+ .start_info = start_info_x86_32,
+ .shared_info = shared_info_x86_32,
+ .vcpu = vcpu_x86_32,
+};
+static struct xc_dom_arch xc_dom_32_pae = {
+ .guest_type = "xen-3.0-x86_32p",
+ .page_shift = PAGE_SHIFT_X86,
+ .sizeof_pfn = 4,
+ .alloc_magic_pages = alloc_magic_pages,
+ .count_pgtables = count_pgtables_x86_32_pae,
+ .setup_pgtables = setup_pgtables_x86_32_pae,
+ .start_info = start_info_x86_32,
+ .shared_info = shared_info_x86_32,
+ .vcpu = vcpu_x86_32,
+};
+
+static struct xc_dom_arch xc_dom_64 = {
+ .guest_type = "xen-3.0-x86_64",
+ .page_shift = PAGE_SHIFT_X86,
+ .sizeof_pfn = 8,
+ .alloc_magic_pages = alloc_magic_pages,
+ .count_pgtables = count_pgtables_x86_64,
+ .setup_pgtables = setup_pgtables_x86_64,
+ .start_info = start_info_x86_64,
+ .shared_info = shared_info_x86_64,
+ .vcpu = vcpu_x86_64,
+};
+
+static void __init register_arch_hooks(void)
+{
+ xc_dom_register_arch_hooks(&xc_dom_32);
+ xc_dom_register_arch_hooks(&xc_dom_32_pae);
+ xc_dom_register_arch_hooks(&xc_dom_64);
+}