diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-04-08 16:40:13 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-04-08 16:40:13 +0000 |
commit | f677efb823a2c576236e96510ca45e5c54df96d1 (patch) | |
tree | d95f76f404c96fb8d579520004fddac53b474cd7 | |
parent | 98da02c11d9f6226418a7d692bc961ab24ae9ce6 (diff) | |
download | xen-f677efb823a2c576236e96510ca45e5c54df96d1.tar.gz xen-f677efb823a2c576236e96510ca45e5c54df96d1.tar.bz2 xen-f677efb823a2c576236e96510ca45e5c54df96d1.zip |
bitkeeper revision 1.856 (4075806dGVuPwXtbZgPbDT2-zKk0gw)
New control-interface functionality for Xenolinux. Also
extended start_info_t to include the event-channel index
for the controller interface.
30 files changed, 2076 insertions, 349 deletions
@@ -102,7 +102,7 @@ 40431ac64Hj4ixUnKmlugZKhXPFE_Q tools/xend/Makefile 4055ad95Se-FqttgxollqOAAHB94zA tools/xend/lib/__init__.py 4055ad97wMLUj0BZT0e_T0EwQN0Bvw tools/xend/lib/console.py -4048c0ddsF0WrU7HUzTvg1MJoCIfWg tools/xend/lib/control_if.h +4048c0ddsF0WrU7HUzTvg1MJoCIfWg tools/xend/lib/domain_controller.h 4054a301VEag2GwrBrFBna5U1BGlLA tools/xend/lib/main.py 4055ad9ah9IuC3sJT2c_gYIFY5Tw_g tools/xend/lib/manager.py 40431ac8wrUEj-XM7B8smFtx_HA7lQ tools/xend/lib/utils.c @@ -668,9 +668,14 @@ 40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c 3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.25-sparse/arch/xen/drivers/network/Makefile 3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.25-sparse/arch/xen/drivers/network/network.c +4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/Makefile +4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/block.c +4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/block.h +4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/vbd.c 405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/Makefile 405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c 3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile +4075806dE5mQwlVUf8-t3YXjiMMWDQ xenolinux-2.4.25-sparse/arch/xen/kernel/ctrl_if.c 3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S 3e5a4e65ibVQmwlOn0j3sVH_j_6hAg xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c 3e5a4e65Hy_1iUvMTPsNqGNXd9uFpg xenolinux-2.4.25-sparse/arch/xen/kernel/head.S @@ -699,7 +704,7 @@ 3e5a4e66rw65CxyolW9PKz4GG42RcA xenolinux-2.4.25-sparse/drivers/char/tty_io.c 3e5a4e669uzIE54VwucPYtGwXLAbzA xenolinux-2.4.25-sparse/fs/exec.c 3e5a4e66wbeCpsJgVf_U8Jde-CNcsA xenolinux-2.4.25-sparse/include/asm-xen/bugs.h -4048c0ddxnIa2GpBAVR-mY6mNSdeJg xenolinux-2.4.25-sparse/include/asm-xen/control_if.h +4048c0ddxnIa2GpBAVR-mY6mNSdeJg xenolinux-2.4.25-sparse/include/asm-xen/ctrl_if.h 3e5a4e66HdSkvIV6SJ1evG_xmTmXHA xenolinux-2.4.25-sparse/include/asm-xen/desc.h 4048c0e0_P2wUTiT6UqgPhn0s7yFcA xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h 3e5a4e66SYp_UpAVcF8Lc1wa3Qtgzw xenolinux-2.4.25-sparse/include/asm-xen/fixmap.h diff --git a/tools/examples/xc_dom_create.py b/tools/examples/xc_dom_create.py index 886dd31f0f..9c7b73d94b 100755 --- a/tools/examples/xc_dom_create.py +++ b/tools/examples/xc_dom_create.py @@ -234,24 +234,24 @@ def make_domain(): print "Error creating domain" sys.exit() - ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline )' % builder_fn) + cmsg = 'new_control_interface(dom='+str(id)+')' + xend_response = xenctl.utils.xend_control_message(cmsg) + if not xend_response['success']: + print "Error creating initial event channel" + print "Error type: " + xend_response['error_type'] + if xend_response['error_type'] == 'exception': + print "Exception type: " + xend_response['exception_type'] + print "Exception value: " + xend_response['exception_value'] + xc.domain_destroy ( dom=id ) + sys.exit() + + ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn) if ret < 0: print "Error building Linux guest OS: " print "Return code = " + str(ret) xc.domain_destroy ( dom=id ) sys.exit() - cmsg = 'new_control_interface(dom='+str(id)+')' - xend_response = xenctl.utils.xend_control_message(cmsg) - if not xend_response['success']: - print "Error creating initial event channel" - print "Error type: " + xend_response['error_type'] - if xend_response['error_type'] == 'exception': - print "Exception type: " + xend_response['exception_type'] - print "Exception value: " + xend_response['exception_value'] - xc.domain_destroy ( dom=id ) - sys.exit() - # setup the virtual block devices # set the expertise level appropriately diff --git a/tools/misc/Makefile b/tools/misc/Makefile index c572e6d9a4..be9f486bdb 100644 --- a/tools/misc/Makefile +++ b/tools/misc/Makefile @@ -3,6 +3,7 @@ CC = gcc CFLAGS = -Wall -O3 EXTRA_INC = -I../../xen/include/hypervisor-ifs EXTRA_INC += -I../../xenolinux-sparse/include -I../xc/lib +EXTRA_INC += -I../xend/lib HDRS = $(wildcard *.h) SRCS = $(wildcard *.c) diff --git a/tools/xc/lib/Makefile b/tools/xc/lib/Makefile index 35a7b392fa..79dce046df 100644 --- a/tools/xc/lib/Makefile +++ b/tools/xc/lib/Makefile @@ -6,6 +6,7 @@ SONAME = libxc.so.$(MAJOR) CC = gcc CFLAGS = -c -Wall -O3 -fno-strict-aliasing CFLAGS += -I../../../xen/include/hypervisor-ifs +CFLAGS += -I../../xend/lib CFLAGS += -I../../../xenolinux-sparse/include HDRS = $(wildcard *.h) diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h index 48b296b40a..936dd852c0 100644 --- a/tools/xc/lib/xc.h +++ b/tools/xc/lib/xc.h @@ -69,12 +69,14 @@ int xc_linux_build(int xc_handle, u64 domid, const char *image_name, const char *ramdisk_name, - const char *cmdline); + const char *cmdline, + unsigned int control_evtchn); int xc_netbsd_build(int xc_handle, u64 domid, const char *image_name, - const char *cmdline); + const char *cmdline, + unsigned int control_evtchn); int xc_bvtsched_global_set(int xc_handle, unsigned long ctx_allow); diff --git a/tools/xc/lib/xc_linux_build.c b/tools/xc/lib/xc_linux_build.c index 3acfc2173d..6f10afde1a 100644 --- a/tools/xc/lib/xc_linux_build.c +++ b/tools/xc/lib/xc_linux_build.c @@ -72,7 +72,8 @@ static int setup_guestos(int xc_handle, unsigned long *pvsi, unsigned long *pvke, dom0_builddomain_t *builddomain, const char *cmdline, - unsigned long shared_info_frame) + unsigned long shared_info_frame, + unsigned int control_evtchn) { l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; @@ -80,7 +81,7 @@ static int setup_guestos(int xc_handle, unsigned long l2tab; unsigned long l1tab; unsigned long count, i; - start_info_t *start_info; + extended_start_info_t *start_info; shared_info_t *shared_info; mmu_t *mmu = NULL; int pm_handle=-1, rc; @@ -272,13 +273,14 @@ static int setup_guestos(int xc_handle, start_info->pt_base = vpt_start; start_info->nr_pt_frames = nr_pt_pages; start_info->mfn_list = vphysmap_start; + start_info->domain_controller_evtchn = control_evtchn; if ( initrd_len != 0 ) { start_info->mod_start = vinitrd_start; start_info->mod_len = initrd_len; } - strncpy(start_info->cmd_line, cmdline, MAX_CMD_LEN); - start_info->cmd_line[MAX_CMD_LEN-1] = '\0'; + strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE); + start_info->cmd_line[MAX_CMDLINE-1] = '\0'; unmap_pfn(pm_handle, start_info); /* shared_info page starts its life empty. */ @@ -379,7 +381,8 @@ int xc_linux_build(int xc_handle, u64 domid, const char *image_name, const char *ramdisk_name, - const char *cmdline) + const char *cmdline, + unsigned int control_evtchn) { dom0_op_t launch_op, op; int initrd_fd = -1; @@ -436,7 +439,8 @@ int xc_linux_build(int xc_handle, initrd_gfd, initrd_size, nr_pages, &vstartinfo_start, &vkern_entry, &launch_op.u.builddomain, cmdline, - op.u.getdomaininfo.shared_info_frame) < 0 ) + op.u.getdomaininfo.shared_info_frame, + control_evtchn) < 0 ) { ERROR("Error constructing guest OS"); goto error_out; diff --git a/tools/xc/lib/xc_netbsd_build.c b/tools/xc/lib/xc_netbsd_build.c index a53018297c..db5552d26e 100644 --- a/tools/xc/lib/xc_netbsd_build.c +++ b/tools/xc/lib/xc_netbsd_build.c @@ -61,7 +61,8 @@ static int setup_guestos(int xc_handle, unsigned long *virt_load_addr, dom0_builddomain_t *builddomain, const char *cmdline, - unsigned long shared_info_frame) + unsigned long shared_info_frame, + unsigned int control_evtchn) { l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; @@ -71,7 +72,7 @@ static int setup_guestos(int xc_handle, unsigned long l1tab; unsigned long count, pt_start; unsigned long symtab_addr = 0, symtab_len = 0; - start_info_t *start_info; + extended_start_info_t *start_info; shared_info_t *shared_info; unsigned long ksize; mmu_t *mmu = NULL; @@ -176,8 +177,9 @@ static int setup_guestos(int xc_handle, start_info->nr_pages = tot_pages; start_info->shared_info = shared_info_frame << PAGE_SHIFT; start_info->flags = 0; - strncpy(start_info->cmd_line, cmdline, MAX_CMD_LEN); - start_info->cmd_line[MAX_CMD_LEN-1] = '\0'; + start_info->domain_controller_evtchn = control_evtchn; + strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE); + start_info->cmd_line[MAX_CMDLINE-1] = '\0'; unmap_pfn(pm_handle, start_info); /* shared_info page starts its life empty. */ @@ -210,7 +212,8 @@ static int setup_guestos(int xc_handle, int xc_netbsd_build(int xc_handle, u64 domid, const char *image_name, - const char *cmdline) + const char *cmdline, + unsigned int control_evtchn) { dom0_op_t launch_op, op; unsigned long load_addr; @@ -259,7 +262,8 @@ int xc_netbsd_build(int xc_handle, if ( setup_guestos(xc_handle, domid, kernel_gfd, tot_pages, &virt_startinfo_addr, &load_addr, &launch_op.u.builddomain, cmdline, - op.u.getdomaininfo.shared_info_frame) < 0 ) + op.u.getdomaininfo.shared_info_frame, + control_evtchn) < 0 ) { ERROR("Error constructing guest OS"); goto error_out; diff --git a/tools/xc/lib/xc_private.h b/tools/xc/lib/xc_private.h index dda04a9f8d..b6c78b74fd 100644 --- a/tools/xc/lib/xc_private.h +++ b/tools/xc/lib/xc_private.h @@ -25,6 +25,9 @@ #include <event_channel.h> #include <sched_ctl.h> +/* from xend/lib */ +#include <domain_controller.h> + #define _PAGE_PRESENT 0x001 #define _PAGE_RW 0x002 #define _PAGE_USER 0x004 diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c index 5114b52afe..3c6fd7abd9 100644 --- a/tools/xc/py/Xc.c +++ b/tools/xc/py/Xc.c @@ -225,14 +225,18 @@ static PyObject *pyxc_linux_build(PyObject *self, u64 dom; char *image, *ramdisk = NULL, *cmdline = ""; + int control_evtchn; - static char *kwd_list[] = { "dom", "image", "ramdisk", "cmdline", NULL }; + static char *kwd_list[] = { "dom", "control_evtchn", + "image", "ramdisk", "cmdline", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|ss", kwd_list, - &dom, &image, &ramdisk, &cmdline) ) + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list, + &dom, &control_evtchn, + &image, &ramdisk, &cmdline) ) return NULL; - if ( xc_linux_build(xc->xc_handle, dom, image, ramdisk, cmdline) != 0 ) + if ( xc_linux_build(xc->xc_handle, dom, image, + ramdisk, cmdline, control_evtchn) != 0 ) return PyErr_SetFromErrno(xc_error); Py_INCREF(zero); @@ -247,14 +251,18 @@ static PyObject *pyxc_netbsd_build(PyObject *self, u64 dom; char *image, *ramdisk = NULL, *cmdline = ""; + int control_evtchn; - static char *kwd_list[] = { "dom", "image", "ramdisk", "cmdline", NULL }; + static char *kwd_list[] = { "dom", "control_evtchn", + "image", "ramdisk", "cmdline", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|ss", kwd_list, - &dom, &image, &ramdisk, &cmdline) ) + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list, + &dom, &control_evtchn, + &image, &ramdisk, &cmdline) ) return NULL; - if ( xc_netbsd_build(xc->xc_handle, dom, image, cmdline) != 0 ) + if ( xc_netbsd_build(xc->xc_handle, dom, image, + cmdline, control_evtchn) != 0 ) return PyErr_SetFromErrno(xc_error); Py_INCREF(zero); diff --git a/tools/xend/lib/control_if.h b/tools/xend/lib/domain_controller.h index 299feebb8d..a6ac3b4c92 100644 --- a/tools/xend/lib/control_if.h +++ b/tools/xend/lib/domain_controller.h @@ -1,5 +1,5 @@ /****************************************************************************** - * control_if.h + * domain_controller.h * * Interface to server controller (e.g., 'xend'). This header file defines the * interface that is shared with guest OSes. @@ -7,8 +7,28 @@ * Copyright (c) 2004, K A Fraser */ -#ifndef __CONTROL_IF_H__ -#define __CONTROL_IF_H__ +#ifndef __DOMAIN_CONTROLLER_H__ +#define __DOMAIN_CONTROLLER_H__ + + +#ifndef BASIC_START_INFO +#error "Xen header file hypervisor-if.h must already be included here." +#endif + + +/* + * EXTENDED BOOTSTRAP STRUCTURE FOR NEW DOMAINS. + */ + +typedef struct { + BASIC_START_INFO; + unsigned int domain_controller_evtchn; +} extended_start_info_t; + + +/* + * CONTROLLER MESSAGING INTERFACE. + */ typedef struct { u8 type; /* echoed in response */ @@ -32,4 +52,5 @@ typedef struct { #define CMSG_CONSOLE 0 #define CMSG_CONSOLE_DATA 0 -#endif /* __CONTROL_IF_H__ */ + +#endif /* __DOMAIN_CONTROLLER_H__ */ diff --git a/tools/xend/lib/utils.c b/tools/xend/lib/utils.c index 6a531da0cc..ea2cee05d5 100644 --- a/tools/xend/lib/utils.c +++ b/tools/xend/lib/utils.c @@ -21,7 +21,9 @@ #include <errno.h> #include <signal.h> #include <xc.h> -#include "control_if.h" + +#include <hypervisor-if.h> +#include "domain_controller.h" /* Needed for Python versions earlier than 2.3. */ #ifndef PyMODINIT_FUNC diff --git a/tools/xend/setup.py b/tools/xend/setup.py index 80770278e9..1f39cb4572 100644 --- a/tools/xend/setup.py +++ b/tools/xend/setup.py @@ -3,7 +3,8 @@ from distutils.core import setup, Extension utils = Extension("utils", extra_compile_args = ["-fno-strict-aliasing"], - include_dirs = ["../xc/lib"], + include_dirs = ["../xc/lib", + "../../xen/include/hypervisor-ifs"], library_dirs = ["../xc/lib"], libraries = ["xc"], sources = ["lib/utils.c"]) diff --git a/tools/xentrace/Makefile b/tools/xentrace/Makefile index 6c8006ae35..7544e4b262 100644 --- a/tools/xentrace/Makefile +++ b/tools/xentrace/Makefile @@ -3,6 +3,7 @@ CC = gcc CFLAGS = -Wall -O3 CFLAGS += -I../../xen/include/hypervisor-ifs CFLAGS += -I../../xenolinux-sparse/include +CFLAGS += -I../xend/lib HDRS = $(wildcard *.h) OBJS = $(patsubst %.c,%.o,$(wildcard *.c)) diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h index 7dc221ad77..0e43333151 100644 --- a/xen/include/hypervisor-ifs/dom0_ops.h +++ b/xen/include/hypervisor-ifs/dom0_ops.h @@ -20,7 +20,6 @@ */ #define DOM0_INTERFACE_VERSION 0xAAAA000B -#define MAX_CMD_LEN 256 #define MAX_DOMAIN_NAME 16 #define DOM0_CREATEDOMAIN 8 diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 3941b34944..a196832eb9 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -299,18 +299,35 @@ typedef struct shared_info_st * bootstrap element. If necessary, the bootstrap virtual region is * extended by an extra 4MB to ensure this. */ -typedef struct start_info_st { - /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ - unsigned long nr_pages; /* total pages allocated to this domain. */ - unsigned long shared_info; /* MACHINE address of shared info struct.*/ - unsigned long flags; /* SIF_xxx flags. */ - /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ - unsigned long pt_base; /* VIRTUAL address of page directory. */ - unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ - unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ - unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ - unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ - unsigned char cmd_line[1]; /* Variable-length options. */ + +/* + * This is the basic bootstrap information structure as passed by Xen to the + * initial controller domain. We want this structure to be easily extended by + * more sophisticated domain builders and controllers, so we make the basic + * fields of this structure available via a BASIC_START_INFO macro. + * + * Extended version of start_info_t should be defined as: + * typedef struct { + * BASIC_START_INFO; + * <...extra fields...> + * } extended_start_info_t; + */ +#define MAX_CMDLINE 256 +#define BASIC_START_INFO \ + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ \ + unsigned long nr_pages; /* total pages allocated to this domain. */ \ + unsigned long shared_info; /* MACHINE address of shared info struct.*/ \ + unsigned long flags; /* SIF_xxx flags. */ \ + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ \ + unsigned long pt_base; /* VIRTUAL address of page directory. */ \ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ \ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ \ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ \ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ \ + unsigned char cmd_line[MAX_CMDLINE] + +typedef struct { + BASIC_START_INFO; } start_info_t; /* These flags are passed in the 'flags' field of start_info_t. */ diff --git a/xenolinux-2.4.25-sparse/arch/xen/Makefile b/xenolinux-2.4.25-sparse/arch/xen/Makefile index 3e210c1e54..aba91a2414 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/Makefile +++ b/xenolinux-2.4.25-sparse/arch/xen/Makefile @@ -50,13 +50,15 @@ HEAD := arch/xen/kernel/head.o arch/xen/kernel/init_task.o SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib SUBDIRS += arch/xen/drivers/console -ifndef CONFIG_XEN_PHYSDEV_ACCESS -SUBDIRS += arch/xen/drivers/network -endif SUBDIRS += arch/xen/drivers/evtchn +ifdef CONFIG_XEN_PHYSDEV_ACCESS +SUBDIRS += arch/xen/drivers/vblkif +SUBDIRS += arch/xen/drivers/vnetif +else SUBDIRS += arch/xen/drivers/block +SUBDIRS += arch/xen/drivers/network +endif SUBDIRS += arch/xen/drivers/balloon -SUBDIRS += arch/xen/drivers/vnetif ifdef CONFIG_XEN_PRIVILEGED_GUEST SUBDIRS += arch/xen/drivers/dom0 endif @@ -64,11 +66,13 @@ endif CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o CORE_FILES += arch/xen/drivers/evtchn/drv.o CORE_FILES += arch/xen/drivers/console/drv.o +ifdef CONFIG_XEN_PHYSDEV_ACCESS +CORE_FILES += arch/xen/drivers/vblkif/drv.o +CORE_FILES += arch/xen/drivers/vnetif/drv.o +else CORE_FILES += arch/xen/drivers/block/drv.o -ifndef CONFIG_XEN_PHYSDEV_ACCESS CORE_FILES += arch/xen/drivers/network/drv.o endif -CORE_FILES += arch/xen/drivers/vnetif/drv.o ifdef CONFIG_XEN_PRIVILEGED_GUEST CORE_FILES += arch/xen/drivers/dom0/drv.o endif diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c index 222097f20d..3287790162 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c @@ -28,54 +28,53 @@ #include <asm/uaccess.h> #include <asm/hypervisor.h> #include <asm/hypervisor-ifs/event_channel.h> -#include <asm/control_if.h> +#include <asm/ctrl_if.h> -static spinlock_t xen_console_lock = SPIN_LOCK_UNLOCKED; +#define XEN_TTY_MINOR 123 + +/* The kernel and user-land drivers share a common transmit buffer. */ +#define WBUF_SIZE 1024 +#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) +static char wbuf[WBUF_SIZE]; +static unsigned int wc, wp; /* write_cons, write_prod */ -static int console_evtchn; +/* This lock protects accesses to the common transmit buffer. */ +static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED; -#define XEN_TTY_MINOR 123 +/* Common transmit-kick routine. */ +static void __xencons_tx_flush(void); + +/* This task is used to defer sending console data until there is space. */ +static void xencons_tx_flush_task_routine(void *data); +static struct tq_struct xencons_tx_flush_task = { + routine: xencons_tx_flush_task_routine +}; /******************** Kernel console driver ********************************/ -static void nonpriv_conwrite(const char *s, unsigned int count) +static void kcons_write( + struct console *c, const char *s, unsigned int count) { - control_if_t *ctrl_if; - evtchn_op_t evtchn_op; - int src, dst, p; - - ctrl_if = (control_if_t *)((char *)HYPERVISOR_shared_info + 2048); + int i; + unsigned long flags; - while ( count != 0 ) + spin_lock_irqsave(&xencons_lock, flags); + + for ( i = 0; i < count; i++ ) { - /* Wait for the request ring to drain. */ - while ( ctrl_if->tx_resp_prod != ctrl_if->tx_req_prod ) - barrier(); - - p = MASK_CONTROL_IDX(ctrl_if->tx_req_prod); - - ctrl_if->tx_ring[p].type = CMSG_CONSOLE; - ctrl_if->tx_ring[p].subtype = CMSG_CONSOLE_DATA; - ctrl_if->tx_ring[p].id = 0xaa; - src = dst = 0; - while ( (src < count) && (dst < (sizeof(ctrl_if->tx_ring[p].msg)-1)) ) - { - if ( (ctrl_if->tx_ring[p].msg[dst++] = s[src++]) == '\n' ) - ctrl_if->tx_ring[p].msg[dst++] = '\r'; - } - ctrl_if->tx_ring[p].length = dst; - - ctrl_if->tx_req_prod++; - evtchn_op.cmd = EVTCHNOP_send; - evtchn_op.u.send.local_port = console_evtchn; - (void)HYPERVISOR_event_channel_op(&evtchn_op); - - s += src; - count -= src; + if ( (wp - wc) >= (WBUF_SIZE - 1) ) + break; + if ( (wbuf[WBUF_MASK(wp++)] = s[i]) == '\n' ) + wbuf[WBUF_MASK(wp++)] = '\r'; } + + __xencons_tx_flush(); + + spin_unlock_irqrestore(&xencons_lock, flags); } -static void priv_conwrite(const char *s, unsigned int count) +static void kcons_write_dom0( + struct console *c, const char *s, unsigned int count) { int rc; @@ -89,18 +88,6 @@ static void priv_conwrite(const char *s, unsigned int count) } } -static void kcons_write(struct console *co, const char *s, - unsigned int count) -{ - unsigned long flags; - spin_lock_irqsave(&xen_console_lock, flags); - if ( !(start_info.flags & SIF_INITDOMAIN) ) - nonpriv_conwrite(s, count); - else - priv_conwrite(s, count); - spin_unlock_irqrestore(&xen_console_lock, flags); -} - static kdev_t kcons_device(struct console *c) { /* @@ -112,7 +99,6 @@ static kdev_t kcons_device(struct console *c) static struct console kcons_info = { name: "xencons", - write: kcons_write, device: kcons_device, flags: CON_PRINTBUFFER, index: -1, @@ -120,28 +106,8 @@ static struct console kcons_info = { void xen_console_init(void) { - evtchn_op_t op; - int i; - - if ( !(start_info.flags & SIF_INITDOMAIN) ) - { - /* Scan the event-channel space to find our control link to DOM0. */ - for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) - { - op.cmd = EVTCHNOP_status; - op.u.status.dom = DOMID_SELF; - op.u.status.port = i; - if ( (HYPERVISOR_event_channel_op(&op) == 0) && - (op.u.status.status == EVTCHNSTAT_interdomain) && - (op.u.status.u.interdomain.dom == 0) ) - break; - } - - /* Bug out if there is no control link. */ - if ( (console_evtchn = i) == NR_EVENT_CHANNELS ) - BUG(); - } - + kcons_info.write = + (start_info.flags & SIF_INITDOMAIN) ? kcons_write_dom0 : kcons_write; register_console(&kcons_info); } @@ -159,7 +125,10 @@ asmlinkage int xprintk(const char *fmt, ...) va_end(args); /* Send the processed output directly to Xen. */ - kcons_write(NULL, printk_buf, printk_len); + if ( start_info.flags & SIF_INITDOMAIN ) + kcons_write_dom0(NULL, printk_buf, printk_len); + else + kcons_write(NULL, printk_buf, printk_len); return 0; } @@ -167,173 +136,186 @@ asmlinkage int xprintk(const char *fmt, ...) /******************** User-space console driver (/dev/console) ************/ -static struct tty_driver xen_console_driver; -static int xen_console_refcount; -static struct tty_struct *xen_console_table[1]; -static struct termios *xen_console_termios[1]; -static struct termios *xen_console_termios_locked[1]; -static struct tty_struct *xen_console_tty; -static int console_irq; +static struct tty_driver xencons_driver; +static int xencons_refcount; +static struct tty_struct *xencons_table[1]; +static struct termios *xencons_termios[1]; +static struct termios *xencons_termios_locked[1]; +static struct tty_struct *xencons_tty; +static int xencons_priv_irq; +static char x_char; + +/* Non-privileged receive callback. */ +static void xencons_rx(ctrl_msg_t *msg, unsigned long id) +{ + int i; + unsigned long flags; -#define WBUF_SIZE 1024 -#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) -static char wbuf[WBUF_SIZE], x_char; -static unsigned int wc, wp; /* write_cons, write_prod */ + spin_lock_irqsave(&xencons_lock, flags); + if ( xencons_tty != NULL ) + { + for ( i = 0; i < msg->length; i++ ) + tty_insert_flip_char(xencons_tty, msg->msg[i], 0); + tty_flip_buffer_push(xencons_tty); + } + spin_unlock_irqrestore(&xencons_lock, flags); -static void __do_console_io(void) -{ - control_if_t *ctrl_if; - control_msg_t *msg; - evtchn_op_t evtchn_op; - CONTROL_RING_IDX c; - int i, l, work_done = 0; - static char rbuf[16]; + msg->length = 0; + ctrl_if_send_response(msg); +} - if ( xen_console_tty == NULL ) - return; +/* Privileged and non-privileged transmit worker. */ +static void __xencons_tx_flush(void) +{ + int sz, work_done = 0; + ctrl_msg_t msg; - /* Special-case I/O handling for domain 0. */ if ( start_info.flags & SIF_INITDOMAIN ) { - /* Receive work. */ - while ( (l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0 ) - for ( i = 0; i < l; i++ ) - tty_insert_flip_char(xen_console_tty, rbuf[i], 0); - if ( xen_console_tty->flip.count != 0 ) - tty_flip_buffer_push(xen_console_tty); + if ( x_char ) + { + kcons_write_dom0(NULL, &x_char, 1); + x_char = 0; + work_done = 1; + } - /* Transmit work. */ while ( wc != wp ) { - l = wp - wc; - if ( l > (WBUF_SIZE - WBUF_MASK(wc)) ) - l = WBUF_SIZE - WBUF_MASK(wc); - priv_conwrite(&wbuf[WBUF_MASK(wc)], l); - wc += l; - wake_up_interruptible(&xen_console_tty->write_wait); - if ( (xen_console_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && - (xen_console_tty->ldisc.write_wakeup != NULL) ) - (xen_console_tty->ldisc.write_wakeup)(xen_console_tty); + sz = wp - wc; + if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) ) + sz = WBUF_SIZE - WBUF_MASK(wc); + kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz); + wc += sz; + work_done = 1; } - - return; } - - ctrl_if = (control_if_t *)((char *)HYPERVISOR_shared_info + 2048); - - /* Receive work. */ - for ( c = ctrl_if->rx_resp_prod; c != ctrl_if->rx_req_prod; c++ ) + else { - msg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(c)]; - if ( (msg->type == CMSG_CONSOLE) && - (msg->subtype == CMSG_CONSOLE_DATA) ) + while ( x_char ) { - for ( i = 0; i < msg->length; i++ ) - tty_insert_flip_char(xen_console_tty, msg->msg[i], 0); + msg.type = CMSG_CONSOLE; + msg.subtype = CMSG_CONSOLE_DATA; + msg.length = 1; + msg.msg[0] = x_char; + + if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ) + x_char = 0; + else if ( ctrl_if_enqueue_space_callback(&xencons_tx_flush_task) ) + break; + + work_done = 1; } - msg->length = 0; - } - if ( ctrl_if->rx_resp_prod != c ) - { - ctrl_if->rx_resp_prod = c; - work_done = 1; - tty_flip_buffer_push(xen_console_tty); - } - - /* Transmit work. */ - for ( c = ctrl_if->tx_req_prod; - (c - ctrl_if->tx_resp_prod) != CONTROL_RING_SIZE; - c++ ) - { - if ( (wc == wp) && (x_char == 0) ) - break; - msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(c)]; - msg->type = CMSG_CONSOLE; - msg->subtype = CMSG_CONSOLE_DATA; - msg->id = 0xaa; - l = 0; - if ( x_char != 0 ) /* Handle XON/XOFF urgently. */ + + while ( wc != wp ) { - msg->msg[l++] = x_char; - x_char = 0; + sz = wp - wc; + if ( sz > sizeof(msg.msg) ) + sz = sizeof(msg.msg); + if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) ) + sz = WBUF_SIZE - WBUF_MASK(wc); + + msg.type = CMSG_CONSOLE; + msg.subtype = CMSG_CONSOLE_DATA; + msg.length = sz; + memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz); + + if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ) + wc += sz; + else if ( ctrl_if_enqueue_space_callback(&xencons_tx_flush_task) ) + break; + + work_done = 1; } - while ( (l < sizeof(msg->msg)) && (wc != wp) ) - msg->msg[l++] = wbuf[WBUF_MASK(wc++)]; - msg->length = l; - } - if ( ctrl_if->tx_req_prod != c ) - { - ctrl_if->tx_req_prod = c; - work_done = 1; - /* There might be something for waiters to do. */ - wake_up_interruptible(&xen_console_tty->write_wait); - if ( (xen_console_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && - (xen_console_tty->ldisc.write_wakeup != NULL) ) - (xen_console_tty->ldisc.write_wakeup)(xen_console_tty); } - if ( work_done ) + if ( work_done && (xencons_tty != NULL) ) { - /* Send a notification to the controller. */ - evtchn_op.cmd = EVTCHNOP_send; - evtchn_op.u.send.local_port = console_evtchn; - (void)HYPERVISOR_event_channel_op(&evtchn_op); + wake_up_interruptible(&xencons_tty->write_wait); + if ( (xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && + (xencons_tty->ldisc.write_wakeup != NULL) ) + (xencons_tty->ldisc.write_wakeup)(xencons_tty); } } -static void console_interrupt(int irq, void *dev_id, struct pt_regs *regs) +/* Non-privileged transmit kicker. */ +static void xencons_tx_flush_task_routine(void *data) +{ + unsigned long flags; + spin_lock_irqsave(&xencons_lock, flags); + __xencons_tx_flush(); + spin_unlock_irqrestore(&xencons_lock, flags); +} + +/* Privileged receive callback and transmit kicker. */ +static void xencons_priv_interrupt(int irq, void *dev_id, struct pt_regs *regs) { + static char rbuf[16]; + int i, l; unsigned long flags; - spin_lock_irqsave(&xen_console_lock, flags); - __do_console_io(); - spin_unlock_irqrestore(&xen_console_lock, flags); + + spin_lock_irqsave(&xencons_lock, flags); + + if ( xencons_tty != NULL ) + { + /* Receive work. */ + while ( (l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0 ) + for ( i = 0; i < l; i++ ) + tty_insert_flip_char(xencons_tty, rbuf[i], 0); + if ( xencons_tty->flip.count != 0 ) + tty_flip_buffer_push(xencons_tty); + } + + /* Transmit work. */ + __xencons_tx_flush(); + + spin_unlock_irqrestore(&xencons_lock, flags); } -static int xen_console_write_room(struct tty_struct *tty) +static int xencons_write_room(struct tty_struct *tty) { return WBUF_SIZE - (wp - wc); } -static int xen_console_chars_in_buffer(struct tty_struct *tty) +static int xencons_chars_in_buffer(struct tty_struct *tty) { return wp - wc; } -static void xen_console_send_xchar(struct tty_struct *tty, char ch) +static void xencons_send_xchar(struct tty_struct *tty, char ch) { unsigned long flags; - spin_lock_irqsave(&xen_console_lock, flags); + spin_lock_irqsave(&xencons_lock, flags); x_char = ch; - __do_console_io(); - spin_unlock_irqrestore(&xen_console_lock, flags); + __xencons_tx_flush(); + spin_unlock_irqrestore(&xencons_lock, flags); } -static void xen_console_throttle(struct tty_struct *tty) +static void xencons_throttle(struct tty_struct *tty) { if ( I_IXOFF(tty) ) - xen_console_send_xchar(tty, STOP_CHAR(tty)); + xencons_send_xchar(tty, STOP_CHAR(tty)); } -static void xen_console_unthrottle(struct tty_struct *tty) +static void xencons_unthrottle(struct tty_struct *tty) { if ( I_IXOFF(tty) ) { if ( x_char != 0 ) x_char = 0; else - xen_console_send_xchar(tty, START_CHAR(tty)); + xencons_send_xchar(tty, START_CHAR(tty)); } } -static void xen_console_flush_buffer(struct tty_struct *tty) +static void xencons_flush_buffer(struct tty_struct *tty) { unsigned long flags; - spin_lock_irqsave(&xen_console_lock, flags); + spin_lock_irqsave(&xencons_lock, flags); wc = wp = 0; - spin_unlock_irqrestore(&xen_console_lock, flags); + spin_unlock_irqrestore(&xencons_lock, flags); } -static inline int __xen_console_put_char(int ch) +static inline int __xencons_put_char(int ch) { char _ch = (char)ch; if ( (wp - wc) == WBUF_SIZE ) @@ -342,7 +324,7 @@ static inline int __xen_console_put_char(int ch) return 1; } -static int xen_console_write(struct tty_struct *tty, int from_user, +static int xencons_write(struct tty_struct *tty, int from_user, const u_char * buf, int count) { int i; @@ -351,7 +333,7 @@ static int xen_console_write(struct tty_struct *tty, int from_user, if ( from_user && verify_area(VERIFY_READ, buf, count) ) return -EINVAL; - spin_lock_irqsave(&xen_console_lock, flags); + spin_lock_irqsave(&xencons_lock, flags); for ( i = 0; i < count; i++ ) { @@ -360,35 +342,35 @@ static int xen_console_write(struct tty_struct *tty, int from_user, __get_user(ch, buf + i); else ch = buf[i]; - if ( !__xen_console_put_char(ch) ) + if ( !__xencons_put_char(ch) ) break; } if ( i != 0 ) - __do_console_io(); + __xencons_tx_flush(); - spin_unlock_irqrestore(&xen_console_lock, flags); + spin_unlock_irqrestore(&xencons_lock, flags); return i; } -static void xen_console_put_char(struct tty_struct *tty, u_char ch) +static void xencons_put_char(struct tty_struct *tty, u_char ch) { unsigned long flags; - spin_lock_irqsave(&xen_console_lock, flags); - (void)__xen_console_put_char(ch); - spin_unlock_irqrestore(&xen_console_lock, flags); + spin_lock_irqsave(&xencons_lock, flags); + (void)__xencons_put_char(ch); + spin_unlock_irqrestore(&xencons_lock, flags); } -static void xen_console_flush_chars(struct tty_struct *tty) +static void xencons_flush_chars(struct tty_struct *tty) { unsigned long flags; - spin_lock_irqsave(&xen_console_lock, flags); - __do_console_io(); - spin_unlock_irqrestore(&xen_console_lock, flags); + spin_lock_irqsave(&xencons_lock, flags); + __xencons_tx_flush(); + spin_unlock_irqrestore(&xencons_lock, flags); } -static void xen_console_wait_until_sent(struct tty_struct *tty, int timeout) +static void xencons_wait_until_sent(struct tty_struct *tty, int timeout) { unsigned long orig_jiffies = jiffies; @@ -405,7 +387,7 @@ static void xen_console_wait_until_sent(struct tty_struct *tty, int timeout) set_current_state(TASK_RUNNING); } -static int xen_console_open(struct tty_struct *tty, struct file *filp) +static int xencons_open(struct tty_struct *tty, struct file *filp) { int line; unsigned long flags; @@ -418,17 +400,17 @@ static int xen_console_open(struct tty_struct *tty, struct file *filp) return -ENODEV; } - spin_lock_irqsave(&xen_console_lock, flags); + spin_lock_irqsave(&xencons_lock, flags); tty->driver_data = NULL; - if ( xen_console_tty == NULL ) - xen_console_tty = tty; - __do_console_io(); - spin_unlock_irqrestore(&xen_console_lock, flags); + if ( xencons_tty == NULL ) + xencons_tty = tty; + __xencons_tx_flush(); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } -static void xen_console_close(struct tty_struct *tty, struct file *filp) +static void xencons_close(struct tty_struct *tty, struct file *filp) { unsigned long flags; @@ -441,77 +423,82 @@ static void xen_console_close(struct tty_struct *tty, struct file *filp) if ( tty->ldisc.flush_buffer != NULL ) tty->ldisc.flush_buffer(tty); tty->closing = 0; - spin_lock_irqsave(&xen_console_lock, flags); - xen_console_tty = NULL; - spin_unlock_irqrestore(&xen_console_lock, flags); + spin_lock_irqsave(&xencons_lock, flags); + xencons_tty = NULL; + spin_unlock_irqrestore(&xencons_lock, flags); } MOD_DEC_USE_COUNT; } -int __init xen_con_init(void) +int __init xencons_init(void) { - memset(&xen_console_driver, 0, sizeof(struct tty_driver)); - xen_console_driver.magic = TTY_DRIVER_MAGIC; - xen_console_driver.name = "xencons"; - xen_console_driver.major = TTY_MAJOR; - xen_console_driver.minor_start = XEN_TTY_MINOR; - xen_console_driver.num = 1; - xen_console_driver.type = TTY_DRIVER_TYPE_SERIAL; - xen_console_driver.subtype = SERIAL_TYPE_NORMAL; - xen_console_driver.init_termios = tty_std_termios; - xen_console_driver.flags = + memset(&xencons_driver, 0, sizeof(struct tty_driver)); + xencons_driver.magic = TTY_DRIVER_MAGIC; + xencons_driver.name = "xencons"; + xencons_driver.major = TTY_MAJOR; + xencons_driver.minor_start = XEN_TTY_MINOR; + xencons_driver.num = 1; + xencons_driver.type = TTY_DRIVER_TYPE_SERIAL; + xencons_driver.subtype = SERIAL_TYPE_NORMAL; + xencons_driver.init_termios = tty_std_termios; + xencons_driver.flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_NO_DEVFS; - xen_console_driver.refcount = &xen_console_refcount; - xen_console_driver.table = xen_console_table; - xen_console_driver.termios = xen_console_termios; - xen_console_driver.termios_locked = xen_console_termios_locked; - - xen_console_driver.open = xen_console_open; - xen_console_driver.close = xen_console_close; - xen_console_driver.write = xen_console_write; - xen_console_driver.write_room = xen_console_write_room; - xen_console_driver.put_char = xen_console_put_char; - xen_console_driver.flush_chars = xen_console_flush_chars; - xen_console_driver.chars_in_buffer = xen_console_chars_in_buffer; - xen_console_driver.send_xchar = xen_console_send_xchar; - xen_console_driver.flush_buffer = xen_console_flush_buffer; - xen_console_driver.throttle = xen_console_throttle; - xen_console_driver.unthrottle = xen_console_unthrottle; - xen_console_driver.wait_until_sent = xen_console_wait_until_sent; - - if ( tty_register_driver(&xen_console_driver) ) + xencons_driver.refcount = &xencons_refcount; + xencons_driver.table = xencons_table; + xencons_driver.termios = xencons_termios; + xencons_driver.termios_locked = xencons_termios_locked; + + xencons_driver.open = xencons_open; + xencons_driver.close = xencons_close; + xencons_driver.write = xencons_write; + xencons_driver.write_room = xencons_write_room; + xencons_driver.put_char = xencons_put_char; + xencons_driver.flush_chars = xencons_flush_chars; + xencons_driver.chars_in_buffer = xencons_chars_in_buffer; + xencons_driver.send_xchar = xencons_send_xchar; + xencons_driver.flush_buffer = xencons_flush_buffer; + xencons_driver.throttle = xencons_throttle; + xencons_driver.unthrottle = xencons_unthrottle; + xencons_driver.wait_until_sent = xencons_wait_until_sent; + + if ( tty_register_driver(&xencons_driver) ) panic("Couldn't register Xen virtual console driver\n"); - if ( !(start_info.flags & SIF_INITDOMAIN) ) - console_irq = bind_evtchn_to_irq(console_evtchn); + if ( start_info.flags & SIF_INITDOMAIN ) + { + xencons_priv_irq = bind_virq_to_irq(VIRQ_CONSOLE); + (void)request_irq(xencons_priv_irq, + xencons_priv_interrupt, 0, "console", NULL); + } else - console_irq = bind_virq_to_irq(VIRQ_CONSOLE); - - (void)request_irq(console_irq, - console_interrupt, 0, "console", NULL); + { + (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx); + } printk("Xen virtual console successfully installed\n"); return 0; } -void __exit xen_con_fini(void) +void __exit xencons_fini(void) { int ret; - ret = tty_unregister_driver(&xen_console_driver); - if ( ret != 0 ) + if ( (ret = tty_unregister_driver(&xencons_driver)) != 0 ) printk(KERN_ERR "Unable to unregister Xen console driver: %d\n", ret); - free_irq(console_irq, NULL); - - if ( !(start_info.flags & SIF_INITDOMAIN) ) - unbind_evtchn_from_irq(console_evtchn); - else + if ( start_info.flags & SIF_INITDOMAIN ) + { + free_irq(xencons_priv_irq, NULL); unbind_virq_from_irq(VIRQ_CONSOLE); + } + else + { + ctrl_if_unregister_receiver(CMSG_CONSOLE, xencons_rx); + } } -module_init(xen_con_init); -module_exit(xen_con_fini); +module_init(xencons_init); +module_exit(xencons_fini); diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/Makefile b/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/Makefile new file mode 100644 index 0000000000..35986ca54a --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := block.o vbd.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/block.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/block.c new file mode 100644 index 0000000000..d00dd98f7b --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/block.c @@ -0,0 +1,625 @@ +/****************************************************************************** + * block.c + * + * Xenolinux virtual block-device driver. + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge + */ + +#include "block.h" +#include <linux/blk.h> +#include <linux/cdrom.h> +#include <linux/tqueue.h> +#include <linux/sched.h> +#include <scsi/scsi.h> + +#include <linux/interrupt.h> + +typedef unsigned char byte; /* from linux/ide.h */ + +#define STATE_ACTIVE 0 +#define STATE_SUSPENDED 1 +#define STATE_CLOSED 2 +static unsigned int state = STATE_SUSPENDED; + +/* Dynamically-mapped IRQs. */ +static int xlblk_response_irq, xlblk_update_irq; + +static blk_ring_t *blk_ring; +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ +static BLK_RING_IDX req_prod; /* Private request producer. */ + +/* We plug the I/O ring if the driver is suspended or if the ring is full. */ +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ + (state != STATE_ACTIVE)) + + +/* + * Request queues with outstanding work, but ring is currently full. + * We need no special lock here, as we always access this with the + * io_request_lock held. We only need a small maximum list. + */ +#define MAX_PENDING 8 +static request_queue_t *pending_queues[MAX_PENDING]; +static int nr_pending; + +static kdev_t sg_dev; +static int sg_operation = -1; +static unsigned long sg_next_sect; +#define DISABLE_SCATTERGATHER() (sg_operation = -1) + +static inline void signal_requests_to_xen(void) +{ + block_io_op_t op; + + DISABLE_SCATTERGATHER(); + blk_ring->req_prod = req_prod; + + op.cmd = BLOCK_IO_OP_SIGNAL; + HYPERVISOR_block_io_op(&op); + return; +} + + +/* + * xlblk_update_int/update-vbds_task - handle VBD update events from Xen + * + * Schedule a task for keventd to run, which will update the VBDs and perform + * the corresponding updates to our view of VBD state, so the XenoLinux will + * respond to changes / additions / deletions to the set of VBDs automatically. + */ +static struct tq_struct update_tq; +static void update_vbds_task(void *unused) +{ + xlvbd_update_vbds(); +} +static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + update_tq.routine = update_vbds_task; + schedule_task(&update_tq); +} + + +int xen_block_open(struct inode *inode, struct file *filep) +{ + short xldev = inode->i_rdev; + struct gendisk *gd = get_gendisk(xldev); + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + short minor = MINOR(xldev); + + if ( gd->part[minor].nr_sects == 0 ) + { + /* + * Device either doesn't exist, or has zero capacity; we use a few + * cheesy heuristics to return the relevant error code + */ + if ( (gd->sizes[minor >> gd->minor_shift] != 0) || + ((minor & (gd->max_p - 1)) != 0) ) + { + /* + * We have a real device, but no such partition, or we just have a + * partition number so guess this is the problem. + */ + return -ENXIO; /* no such device or address */ + } + else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) + { + /* This is a removable device => assume that media is missing. */ + return -ENOMEDIUM; /* media not present (this is a guess) */ + } + else + { + /* Just go for the general 'no such device' error. */ + return -ENODEV; /* no such device */ + } + } + + /* Update of usage count is protected by per-device semaphore. */ + disk->usage++; + + return 0; +} + + +int xen_block_release(struct inode *inode, struct file *filep) +{ + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + + /* + * When usage drops to zero it may allow more VBD updates to occur. + * Update of usage count is protected by a per-device semaphore. + */ + if ( --disk->usage == 0 ) + { + update_tq.routine = update_vbds_task; + schedule_task(&update_tq); + } + + return 0; +} + + +int xen_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument) +{ + kdev_t dev = inode->i_rdev; + struct hd_geometry *geo = (struct hd_geometry *)argument; + struct gendisk *gd; + struct hd_struct *part; + int i; + + /* NB. No need to check permissions. That is done for us. */ + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", + command, (long) argument, dev); + + gd = get_gendisk(dev); + part = &gd->part[MINOR(dev)]; + + switch ( command ) + { + case BLKGETSIZE: + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); + return put_user(part->nr_sects, (unsigned long *) argument); + + case BLKGETSIZE64: + DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, + (u64)part->nr_sects * 512); + return put_user((u64)part->nr_sects * 512, (u64 *) argument); + + case BLKRRPART: /* re-read partition table */ + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); + return xen_block_revalidate(dev); + + case BLKSSZGET: + return hardsect_size[MAJOR(dev)][MINOR(dev)]; + + case BLKBSZGET: /* get block size */ + DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); + break; + + case BLKBSZSET: /* set block size */ + DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); + break; + + case BLKRASET: /* set read-ahead */ + DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); + break; + + case BLKRAGET: /* get read-ahead */ + DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); + break; + + case HDIO_GETGEO: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; + return 0; + + case HDIO_GETGEO_BIG: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; + return 0; + + case CDROMMULTISESSION: + DPRINTK("FIXME: support multisession CDs later\n"); + for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) + if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; + return 0; + + case SCSI_IOCTL_GET_BUS_NUMBER: + DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev"); + return -ENOSYS; + + default: + printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command); + return -ENOSYS; + } + + return 0; +} + +/* check media change: should probably do something here in some cases :-) */ +int xen_block_check(kdev_t dev) +{ + DPRINTK("xen_block_check\n"); + return 0; +} + +int xen_block_revalidate(kdev_t dev) +{ + struct block_device *bd; + struct gendisk *gd; + xl_disk_t *disk; + unsigned long capacity; + int i, rc = 0; + + if ( (bd = bdget(dev)) == NULL ) + return -EINVAL; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((gd = get_gendisk(dev)) == NULL) || + ((disk = xldev_to_xldisk(dev)) == NULL) || + ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) + { + rc = -EINVAL; + goto out; + } + + if ( disk->usage > 1 ) + { + rc = -EBUSY; + goto out; + } + + /* Only reread partition table if VBDs aren't mapped to partitions. */ + if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) + { + for ( i = gd->max_p - 1; i >= 0; i-- ) + { + invalidate_device(dev+i, 1); + gd->part[MINOR(dev+i)].start_sect = 0; + gd->part[MINOR(dev+i)].nr_sects = 0; + gd->sizes[MINOR(dev+i)] = 0; + } + + grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); + } + + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + + +/* + * hypervisor_request + * + * request block io + * + * id: for guest use only. + * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*} + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + */ +static int hypervisor_request(unsigned long id, + int operation, + char * buffer, + unsigned long sector_number, + unsigned short nr_sectors, + kdev_t device) +{ + unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); + struct gendisk *gd; + blk_ring_req_entry_t *req; + struct buffer_head *bh; + + if ( unlikely(nr_sectors >= (1<<9)) ) + BUG(); + if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) + BUG(); + + if ( unlikely(state == STATE_CLOSED) ) + return 1; + + switch ( operation ) + { + + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + gd = get_gendisk(device); + + /* + * Update the sector_number we'll pass down as appropriate; note that + * we could sanity check that resulting sector will be in this + * partition, but this will happen in xen anyhow. + */ + sector_number += gd->part[MINOR(device)].start_sect; + + /* + * If this unit doesn't consist of virtual (i.e., Xen-specified) + * partitions then we clear the partn bits from the device number. + */ + if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & + GENHD_FL_VIRT_PARTNS) ) + device &= ~(gd->max_p - 1); + + if ( (sg_operation == operation) && + (sg_dev == device) && + (sg_next_sect == sector_number) ) + { + req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req; + bh = (struct buffer_head *)id; + bh->b_reqnext = (struct buffer_head *)req->id; + req->id = id; + req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; + if ( ++req->nr_segments < MAX_BLK_SEGS ) + sg_next_sect += nr_sectors; + else + DISABLE_SCATTERGATHER(); + return 0; + } + else if ( RING_PLUGGED ) + { + return 1; + } + else + { + sg_operation = operation; + sg_dev = device; + sg_next_sect = sector_number + nr_sectors; + } + break; + + default: + panic("unknown op %d\n", operation); + } + + /* Fill out a communications ring structure. */ + req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; + req->id = id; + req->operation = operation; + req->sector_number = (xen_sector_t)sector_number; + req->device = device; + req->nr_segments = 1; + req->buffer_and_sects[0] = buffer_ma | nr_sectors; + req_prod++; + + return 0; +} + + +/* + * do_xlblk_request + * read a block; request is in a request queue + */ +void do_xlblk_request(request_queue_t *rq) +{ + struct request *req; + struct buffer_head *bh, *next_bh; + int rw, nsect, full, queued = 0; + + DPRINTK("xlblk.c::do_xlblk_request\n"); + + while ( !rq->plugged && !list_empty(&rq->queue_head)) + { + if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) + goto out; + + DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", + req, req->cmd, req->sector, + req->current_nr_sectors, req->nr_sectors, req->bh); + + rw = req->cmd; + if ( rw == READA ) + rw = READ; + if ( unlikely((rw != READ) && (rw != WRITE)) ) + panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); + + req->errors = 0; + + bh = req->bh; + while ( bh != NULL ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + + full = hypervisor_request( + (unsigned long)bh, + (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); + + if ( full ) + { + bh->b_reqnext = next_bh; + pending_queues[nr_pending++] = rq; + if ( unlikely(nr_pending >= MAX_PENDING) ) + BUG(); + goto out; + } + + queued++; + + /* Dequeue the buffer head from the request. */ + nsect = bh->b_size >> 9; + bh = req->bh = next_bh; + + if ( bh != NULL ) + { + /* There's another buffer head to do. Update the request. */ + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + req->current_nr_sectors = bh->b_size >> 9; + req->buffer = bh->b_data; + } + else + { + /* That was the last buffer head. Finalise the request. */ + if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) + BUG(); + blkdev_dequeue_request(req); + end_that_request_last(req); + } + } + } + + out: + if ( queued != 0 ) signal_requests_to_xen(); +} + + +static void kick_pending_request_queues(void) +{ + /* We kick pending request queues if the ring is reasonably empty. */ + if ( (nr_pending != 0) && + ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) ) + { + /* Attempt to drain the queue, but bail if the ring becomes full. */ + while ( (nr_pending != 0) && !RING_PLUGGED ) + do_xlblk_request(pending_queues[--nr_pending]); + } +} + + +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + BLK_RING_IDX i; + unsigned long flags; + struct buffer_head *bh, *next_bh; + + if ( unlikely(state == STATE_CLOSED) ) + return; + + spin_lock_irqsave(&io_request_lock, flags); + + for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) + { + blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; + switch ( bret->operation ) + { + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + if ( unlikely(bret->status != 0) ) + DPRINTK("Bad return from blkdev data request: %lx\n", + bret->status); + for ( bh = (struct buffer_head *)bret->id; + bh != NULL; + bh = next_bh ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, !bret->status); + } + break; + + default: + BUG(); + } + } + + resp_cons = i; + + kick_pending_request_queues(); + + spin_unlock_irqrestore(&io_request_lock, flags); +} + + +static void reset_xlblk_interface(void) +{ + block_io_op_t op; + + nr_pending = 0; + + op.cmd = BLOCK_IO_OP_RESET; + if ( HYPERVISOR_block_io_op(&op) != 0 ) + printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n"); + + op.cmd = BLOCK_IO_OP_RING_ADDRESS; + (void)HYPERVISOR_block_io_op(&op); + + set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT); + blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; + + wmb(); + state = STATE_ACTIVE; +} + + +int __init xlblk_init(void) +{ + int error; + + reset_xlblk_interface(); + + xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV); + xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD); + + error = request_irq(xlblk_response_irq, xlblk_response_int, + SA_SAMPLE_RANDOM, "blkdev", NULL); + if ( error ) + { + printk(KERN_ALERT "Could not allocate receive interrupt\n"); + goto fail; + } + + error = request_irq(xlblk_update_irq, xlblk_update_int, + 0, "blkdev", NULL); + + if ( error ) + { + printk(KERN_ALERT "Could not allocate block update interrupt\n"); + goto fail; + } + + (void)xlvbd_init(); + + return 0; + + fail: + return error; +} + + +static void __exit xlblk_cleanup(void) +{ + xlvbd_cleanup(); + free_irq(xlblk_response_irq, NULL); + free_irq(xlblk_update_irq, NULL); + unbind_virq_from_irq(VIRQ_BLKDEV); + unbind_virq_from_irq(VIRQ_VBD_UPD); +} + + +#ifdef MODULE +module_init(xlblk_init); +module_exit(xlblk_cleanup); +#endif + + +void blkdev_suspend(void) +{ + state = STATE_SUSPENDED; + wmb(); + + while ( resp_cons != blk_ring->req_prod ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + wmb(); + state = STATE_CLOSED; + wmb(); + + clear_fixmap(FIX_BLKRING_BASE); +} + + +void blkdev_resume(void) +{ + reset_xlblk_interface(); + spin_lock_irq(&io_request_lock); + kick_pending_request_queues(); + spin_unlock_irq(&io_request_lock); +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/block.h b/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/block.h new file mode 100644 index 0000000000..e41e03970e --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/block.h @@ -0,0 +1,82 @@ +/****************************************************************************** + * block.h + * + * Shared definitions between all levels of XenoLinux Virtual block devices. + */ + +#ifndef __XEN_DRIVERS_BLOCK_H__ +#define __XEN_DRIVERS_BLOCK_H__ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/fs.h> +#include <linux/hdreg.h> +#include <linux/blkdev.h> +#include <linux/major.h> + +#include <asm/hypervisor-ifs/hypervisor-if.h> +#include <asm/hypervisor-ifs/vbd.h> +#include <asm/io.h> +#include <asm/atomic.h> +#include <asm/uaccess.h> + +#if 0 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK(_f, _a...) ((void)0) +#endif + +#if 0 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK_IOCTL(_f, _a...) ((void)0) +#endif + +/* Private gendisk->flags[] values. */ +#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */ +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ + +/* + * We have one of these per vbd, whether ide, scsi or 'other'. + * They hang in an array off the gendisk structure. We may end up putting + * all kinds of interesting stuff here :-) + */ +typedef struct xl_disk { + int usage; +} xl_disk_t; + +extern int xen_control_msg(int operration, char *buffer, int size); +extern int xen_block_open(struct inode *inode, struct file *filep); +extern int xen_block_release(struct inode *inode, struct file *filep); +extern int xen_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument); +extern int xen_block_check(kdev_t dev); +extern int xen_block_revalidate(kdev_t dev); +extern void do_xlblk_request (request_queue_t *rq); + +extern void xlvbd_update_vbds(void); + +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) +{ + struct gendisk *gd = get_gendisk(xldev); + + if ( gd == NULL ) + return NULL; + + return (xl_disk_t *)gd->real_devices + + (MINOR(xldev) >> gd->minor_shift); +} + + +/* Virtual block-device subsystem. */ +extern int xlvbd_init(void); +extern void xlvbd_cleanup(void); + +#endif /* __XEN_DRIVERS_BLOCK_H__ */ diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/vbd.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/vbd.c new file mode 100644 index 0000000000..e08b976c56 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/vblkif/vbd.c @@ -0,0 +1,561 @@ +/****************************************************************************** + * vbd.c + * + * Xenolinux virtual block-device driver (xvd). + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge + */ + +#include "block.h" +#include <linux/blk.h> + +/* + * For convenience we distinguish between ide, scsi and 'other' (i.e. + * potentially combinations of the two) in the naming scheme and in a few + * other places (like default readahead, etc). + */ +#define XLIDE_MAJOR_NAME "hd" +#define XLSCSI_MAJOR_NAME "sd" +#define XLVBD_MAJOR_NAME "xvd" + +#define XLIDE_DEVS_PER_MAJOR 2 +#define XLSCSI_DEVS_PER_MAJOR 16 +#define XLVBD_DEVS_PER_MAJOR 16 + +#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ +#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */ + +#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ +#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */ + +#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ +#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ + +/* The below are for the generic drivers/block/ll_rw_block.c code. */ +static int xlide_blksize_size[256]; +static int xlide_hardsect_size[256]; +static int xlide_max_sectors[256]; +static int xlscsi_blksize_size[256]; +static int xlscsi_hardsect_size[256]; +static int xlscsi_max_sectors[256]; +static int xlvbd_blksize_size[256]; +static int xlvbd_hardsect_size[256]; +static int xlvbd_max_sectors[256]; + +/* Information from Xen about our VBDs. */ +#define MAX_VBDS 64 +static int nr_vbds; +static xen_disk_t *vbd_info; + +static struct block_device_operations xlvbd_block_fops = +{ + open: xen_block_open, + release: xen_block_release, + ioctl: xen_block_ioctl, + check_media_change: xen_block_check, + revalidate: xen_block_revalidate, +}; + +static int xlvbd_get_vbd_info(xen_disk_t *disk_info) +{ + int error; + block_io_op_t op; + + /* Probe for disk information. */ + memset(&op, 0, sizeof(op)); + op.cmd = BLOCK_IO_OP_VBD_PROBE; + op.u.probe_params.domain = 0; + op.u.probe_params.xdi.max = MAX_VBDS; + op.u.probe_params.xdi.disks = disk_info; + op.u.probe_params.xdi.count = 0; + + if ( (error = HYPERVISOR_block_io_op(&op)) != 0 ) + { + printk(KERN_ALERT "Could not probe disks (%d)\n", error); + return -1; + } + + return op.u.probe_params.xdi.count; +} + +/* + * xlvbd_init_device - initialise a VBD device + * @disk: a xen_disk_t describing the VBD + * + * Takes a xen_disk_t * that describes a VBD the domain has access to. + * Performs appropriate initialisation and registration of the device. + * + * Care needs to be taken when making re-entrant calls to ensure that + * corruption does not occur. Also, devices that are in use should not have + * their details updated. This is the caller's responsibility. + */ +static int xlvbd_init_device(xen_disk_t *xd) +{ + int device = xd->device; + int major = MAJOR(device); + int minor = MINOR(device); + int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ + int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ + char *major_name; + struct gendisk *gd; + struct block_device *bd; + xl_disk_t *disk; + int i, rc = 0, max_part, partno; + unsigned long capacity; + + unsigned char buf[64]; + + if ( (bd = bdget(device)) == NULL ) + return -1; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) ) + { + printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device); + rc = -1; + goto out; + } + + if ( is_ide ) { + + major_name = XLIDE_MAJOR_NAME; + max_part = XLIDE_MAX_PART; + + } else if ( is_scsi ) { + + major_name = XLSCSI_MAJOR_NAME; + max_part = XLSCSI_MAX_PART; + + } else if (XD_VIRTUAL(xd->info)) { + + major_name = XLVBD_MAJOR_NAME; + max_part = XLVBD_MAX_PART; + + } else { + + /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */ + printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", + major, minor); + is_scsi = 1; + major_name = "cciss"; + max_part = XLSCSI_MAX_PART; + + } + + partno = minor & (max_part - 1); + + if ( (gd = get_gendisk(device)) == NULL ) + { + rc = register_blkdev(major, major_name, &xlvbd_block_fops); + if ( rc < 0 ) + { + printk(KERN_ALERT "XL VBD: can't get major %d\n", major); + goto out; + } + + if ( is_ide ) + { + blksize_size[major] = xlide_blksize_size; + hardsect_size[major] = xlide_hardsect_size; + max_sectors[major] = xlide_max_sectors; + read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ + } + else if ( is_scsi ) + { + blksize_size[major] = xlscsi_blksize_size; + hardsect_size[major] = xlscsi_hardsect_size; + max_sectors[major] = xlscsi_max_sectors; + read_ahead[major] = 0; /* XXX 8; -- guessing */ + } + else + { + blksize_size[major] = xlvbd_blksize_size; + hardsect_size[major] = xlvbd_hardsect_size; + max_sectors[major] = xlvbd_max_sectors; + read_ahead[major] = 8; + } + + blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as + * soon as we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); + + /* Construct an appropriate gendisk structure. */ + gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + gd->major = major; + gd->major_name = major_name; + + gd->max_p = max_part; + if ( is_ide ) + { + gd->minor_shift = XLIDE_PARTN_SHIFT; + gd->nr_real = XLIDE_DEVS_PER_MAJOR; + } + else if ( is_scsi ) + { + gd->minor_shift = XLSCSI_PARTN_SHIFT; + gd->nr_real = XLSCSI_DEVS_PER_MAJOR; + } + else + { + gd->minor_shift = XLVBD_PARTN_SHIFT; + gd->nr_real = XLVBD_DEVS_PER_MAJOR; + } + + /* + ** The sizes[] and part[] arrays hold the sizes and other + ** information about every partition with this 'major' (i.e. + ** every disk sharing the 8 bit prefix * max partns per disk) + */ + gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); + gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), + GFP_KERNEL); + memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); + memset(gd->part, 0, max_part * gd->nr_real + * sizeof(struct hd_struct)); + + + gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), + GFP_KERNEL); + memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); + + gd->next = NULL; + gd->fops = &xlvbd_block_fops; + + gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), + GFP_KERNEL); + gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); + + memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); + memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); + + add_gendisk(gd); + + blk_size[major] = gd->sizes; + } + + if ( XD_READONLY(xd->info) ) + set_device_ro(device, 1); + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN; + + /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */ + capacity = (unsigned long)xd->capacity; + + if ( partno != 0 ) + { + /* + * If this was previously set up as a real disc we will have set + * up partition-table information. Virtual partitions override + * 'real' partitions, and the two cannot coexist on a device. + */ + if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && + (gd->sizes[minor & ~(max_part-1)] != 0) ) + { + /* + * Any non-zero sub-partition entries must be cleaned out before + * installing 'virtual' partition entries. The two types cannot + * coexist, and virtual partitions are favoured. + */ + kdev_t dev = device & ~(max_part-1); + for ( i = max_part - 1; i > 0; i-- ) + { + invalidate_device(dev+i, 1); + gd->part[MINOR(dev+i)].start_sect = 0; + gd->part[MINOR(dev+i)].nr_sects = 0; + gd->sizes[MINOR(dev+i)] = 0; + } + printk(KERN_ALERT + "Virtual partitions found for /dev/%s - ignoring any " + "real partition information we may have found.\n", + disk_name(gd, MINOR(device), buf)); + } + + /* Need to skankily setup 'partition' information */ + gd->part[minor].start_sect = 0; + gd->part[minor].nr_sects = capacity; + gd->sizes[minor] = capacity; + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; + } + else + { + gd->part[minor].nr_sects = capacity; + gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9); + + /* Some final fix-ups depending on the device type */ + switch ( XD_TYPE(xd->info) ) + { + case XD_TYPE_CDROM: + case XD_TYPE_FLOPPY: + case XD_TYPE_TAPE: + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; + printk(KERN_ALERT + "Skipping partition check on %s /dev/%s\n", + XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : + (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : + "floppy"), disk_name(gd, MINOR(device), buf)); + break; + + case XD_TYPE_DISK: + /* Only check partitions on real discs (not virtual!). */ + if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) + { + printk(KERN_ALERT + "Skipping partition check on virtual /dev/%s\n", + disk_name(gd, MINOR(device), buf)); + break; + } + register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity); + break; + + default: + printk(KERN_ALERT "XenoLinux: unknown device type %d\n", + XD_TYPE(xd->info)); + break; + } + } + + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + + +/* + * xlvbd_remove_device - remove a device node if possible + * @device: numeric device ID + * + * Updates the gendisk structure and invalidates devices. + * + * This is OK for now but in future, should perhaps consider where this should + * deallocate gendisks / unregister devices. + */ +static int xlvbd_remove_device(int device) +{ + int i, rc = 0, minor = MINOR(device); + struct gendisk *gd; + struct block_device *bd; + xl_disk_t *disk = NULL; + + if ( (bd = bdget(device)) == NULL ) + return -1; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((gd = get_gendisk(device)) == NULL) || + ((disk = xldev_to_xldisk(device)) == NULL) ) + BUG(); + + if ( disk->usage != 0 ) + { + printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); + rc = -1; + goto out; + } + + if ( (minor & (gd->max_p-1)) != 0 ) + { + /* 1: The VBD is mapped to a partition rather than a whole unit. */ + invalidate_device(device, 1); + gd->part[minor].start_sect = 0; + gd->part[minor].nr_sects = 0; + gd->sizes[minor] = 0; + + /* Clear the consists-of-virtual-partitions flag if possible. */ + gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; + for ( i = 1; i < gd->max_p; i++ ) + if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 ) + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; + + /* + * If all virtual partitions are now gone, and a 'whole unit' VBD is + * present, then we can try to grok the unit's real partition table. + */ + if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && + (gd->sizes[minor & ~(gd->max_p-1)] != 0) && + !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) ) + { + register_disk(gd, + device&~(gd->max_p-1), + gd->max_p, + &xlvbd_block_fops, + gd->part[minor&~(gd->max_p-1)].nr_sects); + } + } + else + { + /* + * 2: The VBD is mapped to an entire 'unit'. Clear all partitions. + * NB. The partition entries are only cleared if there are no VBDs + * mapped to individual partitions on this unit. + */ + i = gd->max_p - 1; /* Default: clear subpartitions as well. */ + if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) + i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */ + while ( i >= 0 ) + { + invalidate_device(device+i, 1); + gd->part[minor+i].start_sect = 0; + gd->part[minor+i].nr_sects = 0; + gd->sizes[minor+i] = 0; + i--; + } + } + + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + +/* + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver + * state. The VBDs need to be updated in this way when the domain is + * initialised and also each time we receive an XLBLK_UPDATE event. + */ +void xlvbd_update_vbds(void) +{ + int i, j, k, old_nr, new_nr; + xen_disk_t *old_info, *new_info, *merged_info; + + old_info = vbd_info; + old_nr = nr_vbds; + + new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) + { + kfree(new_info); + return; + } + + /* + * Final list maximum size is old list + new list. This occurs only when + * old list and new list do not overlap at all, and we cannot yet destroy + * VBDs in the old list because the usage counts are busy. + */ + merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL); + + /* @i tracks old list; @j tracks new list; @k tracks merged list. */ + i = j = k = 0; + + while ( (i < old_nr) && (j < new_nr) ) + { + if ( old_info[i].device < new_info[j].device ) + { + if ( xlvbd_remove_device(old_info[i].device) != 0 ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + i++; + } + else if ( old_info[i].device > new_info[j].device ) + { + if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + j++; + } + else + { + if ( ((old_info[i].capacity == new_info[j].capacity) && + (old_info[i].info == new_info[j].info)) || + (xlvbd_remove_device(old_info[i].device) != 0) ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + else if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + i++; j++; + } + } + + for ( ; i < old_nr; i++ ) + { + if ( xlvbd_remove_device(old_info[i].device) != 0 ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + } + + for ( ; j < new_nr; j++ ) + { + if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + } + + vbd_info = merged_info; + nr_vbds = k; + + kfree(old_info); + kfree(new_info); +} + + +/* + * Set up all the linux device goop for the virtual block devices (vbd's) that + * xen tells us about. Note that although from xen's pov VBDs are addressed + * simply an opaque 16-bit device number, the domain creation tools + * conventionally allocate these numbers to correspond to those used by 'real' + * linux -- this is just for convenience as it means e.g. that the same + * /etc/fstab can be used when booting with or without xen. + */ +int __init xlvbd_init(void) +{ + int i; + + /* + * If compiled as a module, we don't support unloading yet. We therefore + * permanently increment the reference count to disallow it. + */ + SET_MODULE_OWNER(&xlvbd_block_fops); + MOD_INC_USE_COUNT; + + /* Initialize the global arrays. */ + for ( i = 0; i < 256; i++ ) + { + /* from the generic ide code (drivers/ide/ide-probe.c, etc) */ + xlide_blksize_size[i] = 1024; + xlide_hardsect_size[i] = 512; + xlide_max_sectors[i] = 128; /* 'hwif->rqsize' if we knew it */ + + /* from the generic scsi disk code (drivers/scsi/sd.c) */ + xlscsi_blksize_size[i] = 1024; /* XXX 512; */ + xlscsi_hardsect_size[i] = 512; + xlscsi_max_sectors[i] = 128*8; /* XXX 128; */ + + /* we don't really know what to set these too since it depends */ + xlvbd_blksize_size[i] = 512; + xlvbd_hardsect_size[i] = 512; + xlvbd_max_sectors[i] = 128; + } + + vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + nr_vbds = xlvbd_get_vbd_info(vbd_info); + + if ( nr_vbds < 0 ) + { + kfree(vbd_info); + vbd_info = NULL; + nr_vbds = 0; + } + else + { + for ( i = 0; i < nr_vbds; i++ ) + xlvbd_init_device(&vbd_info[i]); + } + + return 0; +} + + +#ifdef MODULE +module_init(xlvbd_init); +#endif diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile b/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile index 78f5314004..90438fa623 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile @@ -10,7 +10,7 @@ export-objs := i386_ksyms.o obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \ - i386_ksyms.o i387.o evtchn.o pci-dma.o + i386_ksyms.o i387.o evtchn.o ctrl_if.o pci-dma.o ifdef CONFIG_PCI obj-y += pci-i386.o pci-pc.o diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/ctrl_if.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/ctrl_if.c new file mode 100644 index 0000000000..4c43d091e7 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/ctrl_if.c @@ -0,0 +1,317 @@ +/****************************************************************************** + * ctrl_if.c + * + * Management functions for special interface to the domain controller. + * + * Copyright (c) 2004, K A Fraser + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <asm/ctrl_if.h> +#include <asm/hypervisor.h> +#include <asm/hypervisor-ifs/event_channel.h> + +static int ctrl_if_evtchn; +static int ctrl_if_irq; +static spinlock_t ctrl_if_lock; + +static struct irqaction ctrl_if_irq_action; + +static CONTROL_RING_IDX ctrl_if_tx_resp_cons; +static CONTROL_RING_IDX ctrl_if_rx_req_cons; + +/* Incoming message requests: primary message type -> message handler. */ +static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256]; + +/* Incoming message responses: message identifier -> message handler/id. */ +static struct { + ctrl_msg_handler_t fn; + unsigned long id; +} ctrl_if_txmsg_id_mapping[CONTROL_RING_SIZE]; + +static DECLARE_TASK_QUEUE(ctrl_if_tx_tq); +static DECLARE_WAIT_QUEUE_HEAD(ctrl_if_tx_wait); +static void __ctrl_if_tx_tasklet(unsigned long data); +static DECLARE_TASKLET(ctrl_if_tx_tasklet, __ctrl_if_tx_tasklet, 0); + +static void __ctrl_if_rx_tasklet(unsigned long data); +static DECLARE_TASKLET(ctrl_if_rx_tasklet, __ctrl_if_rx_tasklet, 0); + +#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048)) +#define TX_FULL(_c) \ + (((_c)->tx_req_prod - ctrl_if_tx_resp_cons) == CONTROL_RING_SIZE) + +static void ctrl_if_notify_controller(void) +{ + evtchn_op_t evtchn_op; + evtchn_op.cmd = EVTCHNOP_send; + evtchn_op.u.send.local_port = ctrl_if_evtchn; + (void)HYPERVISOR_event_channel_op(&evtchn_op); +} + +static void ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id) +{ + msg->length = 0; + ctrl_if_send_response(msg); +} + +static void __ctrl_if_tx_tasklet(unsigned long data) +{ + control_if_t *ctrl_if = get_ctrl_if(); + ctrl_msg_t *msg; + int was_full = TX_FULL(ctrl_if); + + while ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod ) + { + msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if_tx_resp_cons)]; + + /* Execute the callback handler, if one was specified. */ + if ( msg->id != 0xFF ) + { + (*ctrl_if_txmsg_id_mapping[msg->id].fn)( + msg, ctrl_if_txmsg_id_mapping[msg->id].id); + smp_mb(); /* Execute, /then/ free. */ + ctrl_if_txmsg_id_mapping[msg->id].fn = NULL; + } + + /* + * Step over the message in the ring /after/ finishing reading it. As + * soon as the index is updated then the message may get blown away. + */ + smp_mb(); + ctrl_if_tx_resp_cons++; + } + + if ( was_full && !TX_FULL(ctrl_if) ) + { + wake_up(&ctrl_if_tx_wait); + run_task_queue(&ctrl_if_tx_tq); + } +} + +static void __ctrl_if_rx_tasklet(unsigned long data) +{ + control_if_t *ctrl_if = get_ctrl_if(); + ctrl_msg_t *msg; + + while ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod ) + { + /* + * We need no locking or barriers here. There will be one and only one + * response as a result of each callback, so the callback handler + * doesn't need to worry about the 'msg' being overwritten until: + * 1. It returns (if the message must persist then it must be copied). + * 2. A response is sent (the response may overwrite the request). + */ + msg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)]; + (*ctrl_if_rxmsg_handler[msg->type])(msg, 0); + } +} + +static void ctrl_if_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + control_if_t *ctrl_if = get_ctrl_if(); + + if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod ) + tasklet_schedule(&ctrl_if_tx_tasklet); + + if ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod ) + tasklet_schedule(&ctrl_if_rx_tasklet); +} + +int ctrl_if_send_message_noblock( + ctrl_msg_t *msg, + ctrl_msg_handler_t hnd, + unsigned long id) +{ + control_if_t *ctrl_if = get_ctrl_if(); + unsigned long flags; + int i; + + spin_lock_irqsave(&ctrl_if_lock, flags); + + if ( TX_FULL(ctrl_if) ) + { + spin_unlock_irqrestore(&ctrl_if_lock, flags); + return -EAGAIN; + } + + msg->id = 0xFF; + if ( hnd != NULL ) + { + for ( i = 0; ctrl_if_txmsg_id_mapping[i].fn != NULL; i++ ) + continue; + ctrl_if_txmsg_id_mapping[i].fn = hnd; + ctrl_if_txmsg_id_mapping[i].id = id; + msg->id = i; + } + + memcpy(&ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if->tx_req_prod)], + msg, sizeof(*msg)); + wmb(); /* Write the message before letting the controller peek at it. */ + ctrl_if->tx_req_prod++; + + spin_unlock_irqrestore(&ctrl_if_lock, flags); + + ctrl_if_notify_controller(); + + return 0; +} + +int ctrl_if_send_message_block( + ctrl_msg_t *msg, + ctrl_msg_handler_t hnd, + unsigned long id, + long wait_state) +{ + DECLARE_WAITQUEUE(wait, current); + int rc; + + /* Fast path. */ + if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != -EAGAIN ) + return rc; + + add_wait_queue(&ctrl_if_tx_wait, &wait); + + for ( ; ; ) + { + set_current_state(wait_state); + + if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != -EAGAIN ) + break; + + rc = -ERESTARTSYS; + if ( signal_pending(current) && (wait_state == TASK_INTERRUPTIBLE) ) + break; + + schedule(); + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&ctrl_if_tx_wait, &wait); + + return rc; +} + +int ctrl_if_enqueue_space_callback(struct tq_struct *task) +{ + control_if_t *ctrl_if = get_ctrl_if(); + + /* Fast path. */ + if ( !TX_FULL(ctrl_if) ) + return 0; + + (void)queue_task(task, &ctrl_if_tx_tq); + + /* + * We may race execution of the task queue, so return re-checked status. If + * the task is not executed despite the ring being non-full then we will + * certainly return 'not full'. + */ + smp_mb(); + return TX_FULL(ctrl_if); +} + +void ctrl_if_send_response(ctrl_msg_t *msg) +{ + control_if_t *ctrl_if = get_ctrl_if(); + unsigned long flags; + ctrl_msg_t *dmsg; + + /* + * NB. The response may the original request message, modified in-place. + * In this situation we may have src==dst, so no copying is required. + */ + spin_lock_irqsave(&ctrl_if_lock, flags); + dmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if->rx_resp_prod)]; + if ( dmsg != msg ) + memcpy(dmsg, msg, sizeof(*msg)); + wmb(); /* Write the message before letting the controller peek at it. */ + ctrl_if->rx_resp_prod++; + spin_unlock_irqrestore(&ctrl_if_lock, flags); + + ctrl_if_notify_controller(); +} + +int ctrl_if_register_receiver(u8 type, ctrl_msg_handler_t hnd) +{ + unsigned long flags; + int inuse; + + spin_lock_irqsave(&ctrl_if_lock, flags); + + inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler); + + if ( inuse ) + printk(KERN_INFO "Receiver %p already established for control " + "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type); + else + ctrl_if_rxmsg_handler[type] = hnd; + + spin_unlock_irqrestore(&ctrl_if_lock, flags); + + return !inuse; +} + +void ctrl_if_unregister_receiver(u8 type, ctrl_msg_handler_t hnd) +{ + unsigned long flags; + + spin_lock_irqsave(&ctrl_if_lock, flags); + + if ( ctrl_if_rxmsg_handler[type] != hnd ) + printk(KERN_INFO "Receiver %p is not registered for control " + "messages of type %d.\n", hnd, type); + else + ctrl_if_rxmsg_handler[type] = ctrl_if_rxmsg_default_handler; + + spin_unlock_irqrestore(&ctrl_if_lock, flags); + + /* Ensure that @hnd will not be executed after this function returns. */ + tasklet_unlock_wait(&ctrl_if_rx_tasklet); +} + +void ctrl_if_suspend(void) +{ + if ( start_info.flags & SIF_INITDOMAIN ) + return; + + free_irq(ctrl_if_irq, NULL); + unbind_evtchn_from_irq(ctrl_if_evtchn); +} + +void ctrl_if_resume(void) +{ + if ( start_info.flags & SIF_INITDOMAIN ) + return; + + ctrl_if_tx_resp_cons = 0; + ctrl_if_rx_req_cons = 0; + + ctrl_if_evtchn = start_info.domain_controller_evtchn; + ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn); + + memset(&ctrl_if_irq_action, 0, sizeof(ctrl_if_irq_action)); + ctrl_if_irq_action.handler = ctrl_if_interrupt; + ctrl_if_irq_action.name = "ctrl-if"; + (void)setup_irq(ctrl_if_irq, &ctrl_if_irq_action); +} + +void __init ctrl_if_init(void) +{ + int i; + + for ( i = 0; i < 256; i++ ) + ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler; + + spin_lock_init(&ctrl_if_lock); + + ctrl_if_resume(); +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c index 7425f92047..a9c8991fb9 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c @@ -15,6 +15,7 @@ #include <asm/system.h> #include <asm/ptrace.h> #include <asm/synch_bitops.h> +#include <asm/ctrl_if.h> #include <asm/hypervisor.h> #include <asm/hypervisor-ifs/event_channel.h> #include <asm/hypervisor-ifs/physdev.h> @@ -396,4 +397,7 @@ void __init init_IRQ(void) } (void)setup_irq(bind_virq_to_irq(VIRQ_MISDIRECT), &misdirect_action); + + /* This needs to be done early, but after the IRQ subsystem is alive. */ + ctrl_if_init(); } diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c index bd65655c48..36faf53832 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c @@ -43,6 +43,7 @@ #include <asm/dma.h> #include <asm/mpspec.h> #include <asm/mmu_context.h> +#include <asm/ctrl_if.h> #include <asm/hypervisor.h> #include <asm/hypervisor-ifs/dom0_ops.h> #include <linux/netdevice.h> @@ -1187,6 +1188,8 @@ static void stop_task(void *unused) time_suspend(); + ctrl_if_suspend(); + HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; clear_fixmap(FIX_SHARED_INFO); @@ -1200,6 +1203,8 @@ static void stop_task(void *unused) HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); memset(empty_zero_page, 0, PAGE_SIZE); + ctrl_if_resume(); + time_resume(); __sti(); diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/control_if.h b/xenolinux-2.4.25-sparse/include/asm-xen/control_if.h deleted file mode 100644 index dd15a96bff..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xen/control_if.h +++ /dev/null @@ -1,32 +0,0 @@ -/****************************************************************************** - * control_if.h - * - * Copyright (c) 2004, K A Fraser - */ - -#ifndef __CONTROL_IF_H__ -#define __CONTROL_IF_H__ - -typedef struct { - u8 type; /* echoed in response */ - u8 subtype; /* echoed in response */ - u8 id; /* echoed in response */ - u8 length; /* number of bytes in 'msg' */ - unsigned char msg[60]; /* type-specific message data */ -} control_msg_t; - -#define CONTROL_RING_SIZE 8 -typedef unsigned int CONTROL_RING_IDX; -#define MASK_CONTROL_IDX(_i) ((_i)&(CONTROL_RING_SIZE-1)) - -typedef struct { - control_msg_t tx_ring[CONTROL_RING_SIZE]; /* guest-OS -> controller */ - control_msg_t rx_ring[CONTROL_RING_SIZE]; /* controller -> guest-OS */ - CONTROL_RING_IDX tx_req_prod, tx_resp_prod; - CONTROL_RING_IDX rx_req_prod, rx_resp_prod; -} control_if_t; - -#define CMSG_CONSOLE 0 -#define CMSG_CONSOLE_DATA 0 - -#endif /* __CONTROL_IF_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/ctrl_if.h b/xenolinux-2.4.25-sparse/include/asm-xen/ctrl_if.h new file mode 100644 index 0000000000..c9e874bb0d --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/ctrl_if.h @@ -0,0 +1,98 @@ +/****************************************************************************** + * ctrl_if.h + * + * Management functions for special interface to the domain controller. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __ASM_XEN__CTRL_IF_H__ +#define __ASM_XEN__CTRL_IF_H__ + +#include <asm/hypervisor.h> + +typedef control_msg_t ctrl_msg_t; + +/* + * Callback function type. Called for asynchronous processing of received + * request messages, and responses to previously-transmitted request messages. + * The parameters are (@msg, @id). + * @msg: Original request/response message (not a copy). The message can be + * modified in-place by the handler (e.g., a response callback can + * turn a request message into a response message in place). The message + * is no longer accessible after the callback handler returns -- if the + * message is required to persist for longer then it must be copied. + * @id: (Response callbacks only) The 'id' that was specified when the + * original request message was queued for transmission. + */ +typedef void (*ctrl_msg_handler_t)(ctrl_msg_t *, unsigned long); + +/* + * Send @msg to the domain controller. Execute @hnd when a response is + * received, passing the response message and the specified @id. This + * operation will not block: it will return -EAGAIN if there is no space. + * Notes: + * 1. The @msg is copied if it is transmitted and so can be freed after this + * function returns. + * 2. If @hnd is NULL then no callback is executed. + */ +int ctrl_if_send_message_noblock( + ctrl_msg_t *msg, + ctrl_msg_handler_t hnd, + unsigned long id); + +/* + * Send @msg to the domain controller. Execute @hnd when a response is + * received, passing the response message and the specified @id. This + * operation will block until the message is sent, or a signal is received + * for the calling process (unless @wait_state is TASK_UNINTERRUPTIBLE). + * Notes: + * 1. The @msg is copied if it is transmitted and so can be freed after this + * function returns. + * 2. If @hnd is NULL then no callback is executed. + */ +int ctrl_if_send_message( + ctrl_msg_t *msg, + ctrl_msg_handler_t hnd, + unsigned long id, + long wait_state); + +/* + * Request a callback when there is /possibly/ space to immediately send a + * message to the domain controller. This function returns 0 if there is + * already space to trasnmit a message --- in this case the callback task /may/ + * still be executed. If this function returns 1 then the callback /will/ be + * executed when space becomes available. + */ +int ctrl_if_enqueue_space_callback(struct tq_struct *task); + +/* + * Send a response (@msg) to a message from the domain controller. This will + * never block. + * Notes: + * 1. The @msg is copied and so can be freed after this function returns. + * 2. The @msg may be the original request message, modified in-place. + */ +void ctrl_if_send_response(ctrl_msg_t *msg); + +/* + * Register a receiver for typed messages from the domain controller. The + * handler (@hnd) is called for every received message of specified @type. + * Returns TRUE (non-zero) if the handler was successfully registered. + */ +int ctrl_if_register_receiver(u8 type, ctrl_msg_handler_t hnd); + +/* + * Unregister a receiver for typed messages from the domain controller. The + * handler (@hnd) will not be executed after this function returns. + */ +void ctrl_if_unregister_receiver(u8 type, ctrl_msg_handler_t hnd); + +/* Suspend/resume notifications. */ +void ctrl_if_suspend(void); +void ctrl_if_resume(void); + +/* Start-of-day setup. */ +void ctrl_if_init(void); + +#endif /* __ASM_XEN__CONTROL_IF_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h b/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h index 73149d5426..e20f67e651 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h @@ -13,13 +13,14 @@ #include <linux/kernel.h> #include <asm/hypervisor-ifs/hypervisor-if.h> #include <asm/hypervisor-ifs/dom0_ops.h> +#include <asm/domain_controller.h> #include <asm/ptrace.h> #include <asm/page.h> /* arch/xen/kernel/setup.c */ union start_info_union { - start_info_t start_info; + extended_start_info_t start_info; char padding[512]; }; extern union start_info_union start_info_union; diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/suspend.h b/xenolinux-2.4.25-sparse/include/asm-xen/suspend.h index 937137f005..0a9c8e74a9 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/suspend.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/suspend.h @@ -12,7 +12,7 @@ typedef struct suspend_record_st { /* To be filled in before resume. */ - start_info_t resume_info; + extended_start_info_t resume_info; /* * The number of a machine frame containing, in sequence, the number of * each machine frame that contains PFN -> MFN translation table data. diff --git a/xenolinux-2.4.25-sparse/mkbuildtree b/xenolinux-2.4.25-sparse/mkbuildtree index faaf923c30..46fe4784ad 100755 --- a/xenolinux-2.4.25-sparse/mkbuildtree +++ b/xenolinux-2.4.25-sparse/mkbuildtree @@ -112,8 +112,11 @@ mkdir ${AD}/include/asm-xen/hypervisor-ifs cd ${AD}/include/asm-xen/hypervisor-ifs relative_lndir ../../../${RS}/../xen/include/hypervisor-ifs -# The remainder are the i386 -> xen-i386 links +# Another special symlink: to the shared definitions for the control interface cd .. +ln -sf ../../${RS}/../tools/xend/lib/domain_controller.h + +# The remainder are the i386 -> xen-i386 links ln -sf ../asm-i386/a.out.h ln -sf ../asm-i386/apicdef.h ln -sf ../asm-i386/apic.h |