From 163ce6261c54cbfe6fec6f011497a3d14d30b5e4 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Thu, 25 Mar 2004 11:51:43 +0000 Subject: bitkeeper revision 1.825.1.2 (4062c7cfNjG5kiKHfguNA2SIXnllng) Many files: New IRQ upcall world. evtchn.c: Rename: xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c -> xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c --- .rootkeys | 2 +- tools/xc/lib/xc.h | 39 ++- tools/xc/lib/xc_evtchn.c | 51 ++-- tools/xc/lib/xc_linux_build.c | 1 + tools/xc/lib/xc_netbsd_build.c | 1 + tools/xc/py/Xc.c | 57 ++-- tools/xend/lib/main.py | 8 +- tools/xend/lib/utils.c | 3 +- xen/arch/i386/entry.S | 17 +- xen/arch/i386/traps.c | 8 +- xen/common/debug.c | 6 +- xen/common/domain.c | 35 +-- xen/common/event_channel.c | 218 +++++++++----- xen/common/keyhandler.c | 10 +- xen/common/physdev.c | 20 +- xen/common/schedule.c | 24 +- xen/drivers/block/xen_block.c | 4 +- xen/drivers/block/xen_vbd.c | 24 +- xen/drivers/char/console.c | 4 +- xen/drivers/char/keyboard.c | 7 +- xen/include/hypervisor-ifs/event_channel.h | 67 +++-- xen/include/hypervisor-ifs/hypervisor-if.h | 131 ++++----- xen/include/xen/event.h | 132 ++++----- xen/include/xen/sched.h | 36 ++- xen/net/dev.c | 10 +- .../arch/xen/drivers/block/block.c | 22 +- .../arch/xen/drivers/console/console.c | 44 ++- .../arch/xen/drivers/evtchn/evtchn.c | 182 +++--------- .../arch/xen/drivers/network/network.c | 16 +- .../arch/xen/drivers/vnetif/vnetif.c | 16 +- xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile | 2 +- xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S | 30 +- xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c | 317 +++++++++++++++++++++ .../arch/xen/kernel/hypervisor.c | 170 ----------- .../arch/xen/kernel/i386_ksyms.c | 2 +- xenolinux-2.4.25-sparse/arch/xen/kernel/physirq.c | 9 +- xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c | 22 +- xenolinux-2.4.25-sparse/arch/xen/kernel/time.c | 10 +- xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h | 87 +++++- .../include/asm-xen/hypervisor.h | 4 - xenolinux-2.4.25-sparse/include/asm-xen/irq.h | 27 +- xenolinux-2.4.25-sparse/include/asm-xen/keyboard.h | 7 +- xenolinux-2.4.25-sparse/include/asm-xen/system.h | 25 +- 43 files changed, 1080 insertions(+), 827 deletions(-) create mode 100644 xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c delete mode 100644 xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c diff --git a/.rootkeys b/.rootkeys index b3ac44531b..e61ecf8776 100644 --- a/.rootkeys +++ b/.rootkeys @@ -628,8 +628,8 @@ 405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c 3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile 3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S +3e5a4e65ibVQmwlOn0j3sVH_j_6hAg xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c 3e5a4e65Hy_1iUvMTPsNqGNXd9uFpg xenolinux-2.4.25-sparse/arch/xen/kernel/head.S -3e5a4e65ibVQmwlOn0j3sVH_j_6hAg xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c 3e5a4e65RMGcuA-HCn3-wNx3fFQwdg xenolinux-2.4.25-sparse/arch/xen/kernel/i386_ksyms.c 3e5a4e65MEvZhlr070sK5JsfAQlv7Q xenolinux-2.4.25-sparse/arch/xen/kernel/ioport.c 3e5a4e653U6cELGv528IxOLHvCq8iA xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h index aba2906842..1045be69d0 100644 --- a/tools/xc/lib/xc.h +++ b/tools/xc/lib/xc.h @@ -165,25 +165,38 @@ int xc_vbd_probe(int xc_handle, xc_vbd_t *vbds); #define DOMID_SELF (~1ULL) -#define EVTCHNSTAT_closed 0 /* Chennel is not in use. */ -#define EVTCHNSTAT_disconnected 1 /* Channel is not connected to remote. */ -#define EVTCHNSTAT_connected 2 /* Channel is connected to remote. */ -int xc_evtchn_open(int xc_handle, - u64 dom1, /* may be DOMID_SELF */ - u64 dom2, /* may be DOMID_SELF */ - int *port1, - int *port2); + +typedef struct { +#define EVTCHNSTAT_closed 0 /* Chennel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is not bound to a source. */ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ + int status; + union { + struct { + u64 dom; + int port; + } interdomain; + int pirq; + int virq; + } u; +} xc_evtchn_status_t; + +int xc_evtchn_bind_interdomain(int xc_handle, + u64 dom1, /* may be DOMID_SELF */ + u64 dom2, /* may be DOMID_SELF */ + int *port1, + int *port2); int xc_evtchn_close(int xc_handle, u64 dom, /* may be DOMID_SELF */ int port); int xc_evtchn_send(int xc_handle, int local_port); int xc_evtchn_status(int xc_handle, - u64 dom1, /* may be DOMID_SELF */ - int port1, - u64 *dom2, - int *port2, - int *chn_status); + u64 dom, /* may be DOMID_SELF */ + int port, + xc_evtchn_status_t *status); int xc_physdev_pci_access_modify(int xc_handle, u64 domid, diff --git a/tools/xc/lib/xc_evtchn.c b/tools/xc/lib/xc_evtchn.c index b3e17f8951..a92cc5b903 100644 --- a/tools/xc/lib/xc_evtchn.c +++ b/tools/xc/lib/xc_evtchn.c @@ -29,25 +29,25 @@ static int do_evtchn_op(int xc_handle, evtchn_op_t *op) out1: return ret; } -int xc_evtchn_open(int xc_handle, - u64 dom1, - u64 dom2, - int *port1, - int *port2) +int xc_evtchn_bind_interdomain(int xc_handle, + u64 dom1, + u64 dom2, + int *port1, + int *port2) { evtchn_op_t op; int rc; - op.cmd = EVTCHNOP_open; - op.u.open.dom1 = (domid_t)dom1; - op.u.open.dom2 = (domid_t)dom2; + op.cmd = EVTCHNOP_bind_interdomain; + op.u.bind_interdomain.dom1 = (domid_t)dom1; + op.u.bind_interdomain.dom2 = (domid_t)dom2; if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) { if ( port1 != NULL ) - *port1 = op.u.open.port1; + *port1 = op.u.bind_interdomain.port1; if ( port2 != NULL ) - *port2 = op.u.open.port2; + *port2 = op.u.bind_interdomain.port2; } return rc; @@ -77,27 +77,32 @@ int xc_evtchn_send(int xc_handle, int xc_evtchn_status(int xc_handle, - u64 dom1, - int port1, - u64 *dom2, - int *port2, - int *chn_status) + u64 dom, + int port, + xc_evtchn_status_t *status) { evtchn_op_t op; int rc; op.cmd = EVTCHNOP_status; - op.u.status.dom1 = (domid_t)dom1; - op.u.status.port1 = port1; + op.u.status.dom = (domid_t)dom; + op.u.status.port = port; if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) { - if ( dom2 != NULL ) - *dom2 = (u64)op.u.status.dom2; - if ( port2 != NULL ) - *port2 = op.u.status.port2; - if ( chn_status != NULL ) - *chn_status = op.u.status.status; + switch ( status->status = op.u.status.status ) + { + case EVTCHNSTAT_interdomain: + status->u.interdomain.dom = (u64)op.u.status.u.interdomain.dom; + status->u.interdomain.port = op.u.status.u.interdomain.port; + break; + case EVTCHNSTAT_pirq: + status->u.pirq = op.u.status.u.pirq; + break; + case EVTCHNSTAT_virq: + status->u.virq = op.u.status.u.virq; + break; + } } return rc; diff --git a/tools/xc/lib/xc_linux_build.c b/tools/xc/lib/xc_linux_build.c index fa388d52ef..f9f6949348 100644 --- a/tools/xc/lib/xc_linux_build.c +++ b/tools/xc/lib/xc_linux_build.c @@ -248,6 +248,7 @@ static int setup_guestos(int xc_handle, /* shared_info page starts its life empty. */ shared_info = map_pfn_writeable(pm_handle, shared_info_frame); memset(shared_info, 0, PAGE_SIZE); + shared_info->evtchn_upcall_mask = ~0UL; /* mask all upcalls */ unmap_pfn(pm_handle, shared_info); /* Send the page update requests down to the hypervisor. */ diff --git a/tools/xc/lib/xc_netbsd_build.c b/tools/xc/lib/xc_netbsd_build.c index 3472f32257..ae7ebecc6e 100644 --- a/tools/xc/lib/xc_netbsd_build.c +++ b/tools/xc/lib/xc_netbsd_build.c @@ -183,6 +183,7 @@ static int setup_guestos(int xc_handle, /* shared_info page starts its life empty. */ shared_info = map_pfn_writeable(pm_handle, shared_info_frame); memset(shared_info, 0, PAGE_SIZE); + shared_info->evtchn_upcall_mask = ~0UL; /* mask all upcalls */ unmap_pfn(pm_handle, shared_info); /* Send the page update requests down to the hypervisor. */ diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c index 85ad49de9a..96b9bf491a 100644 --- a/tools/xc/py/Xc.c +++ b/tools/xc/py/Xc.c @@ -687,9 +687,9 @@ static PyObject *pyxc_vbd_probe(PyObject *self, return list; } -static PyObject *pyxc_evtchn_open(PyObject *self, - PyObject *args, - PyObject *kwds) +static PyObject *pyxc_evtchn_bind_interdomain(PyObject *self, + PyObject *args, + PyObject *kwds) { XcObject *xc = (XcObject *)self; @@ -702,7 +702,8 @@ static PyObject *pyxc_evtchn_open(PyObject *self, &dom1, &dom2) ) return NULL; - if ( xc_evtchn_open(xc->xc_handle, dom1, dom2, &port1, &port2) != 0 ) + if ( xc_evtchn_bind_interdomain(xc->xc_handle, dom1, + dom2, &port1, &port2) != 0 ) return PyErr_SetFromErrno(xc_error); return Py_BuildValue("{s:i,s:i}", @@ -759,34 +760,45 @@ static PyObject *pyxc_evtchn_status(PyObject *self, XcObject *xc = (XcObject *)self; PyObject *dict; - u64 dom1 = DOMID_SELF, dom2; - int port1, port2, status, ret; + u64 dom = DOMID_SELF; + int port, ret; + xc_evtchn_status_t status; static char *kwd_list[] = { "port", "dom", NULL }; if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|L", kwd_list, - &port1, &dom1) ) + &port, &dom) ) return NULL; - ret = xc_evtchn_status(xc->xc_handle, dom1, port1, &dom2, &port2, &status); + ret = xc_evtchn_status(xc->xc_handle, dom, port, &status); if ( ret != 0 ) return PyErr_SetFromErrno(xc_error); - switch ( status ) + switch ( status.status ) { case EVTCHNSTAT_closed: dict = Py_BuildValue("{s:s}", "status", "closed"); break; - case EVTCHNSTAT_disconnected: + case EVTCHNSTAT_unbound: dict = Py_BuildValue("{s:s}", - "status", "disconnected"); + "status", "unbound"); break; - case EVTCHNSTAT_connected: + case EVTCHNSTAT_interdomain: dict = Py_BuildValue("{s:s,s:L,s:i}", - "status", "connected", - "dom", dom2, - "port", port2); + "status", "interdomain", + "dom", status.u.interdomain.dom, + "port", status.u.interdomain.port); + break; + case EVTCHNSTAT_pirq: + dict = Py_BuildValue("{s:s,s:i}", + "status", "pirq", + "irq", status.u.pirq); + break; + case EVTCHNSTAT_virq: + dict = Py_BuildValue("{s:s,s:i}", + "status", "virq", + "irq", status.u.virq); break; default: dict = Py_BuildValue("{}"); @@ -1134,8 +1146,8 @@ static PyMethodDef pyxc_methods[] = { " writeable [int]: Bool - is this VBD writeable?\n" " nr_sectors [long]: Size of this VBD, in 512-byte sectors.\n" }, - { "evtchn_open", - (PyCFunction)pyxc_evtchn_open, + { "evtchn_bind_interdomain", + (PyCFunction)pyxc_evtchn_bind_interdomain, METH_VARARGS | METH_KEYWORDS, "\n" "Open an event channel between two domains.\n" " dom1 [long, SELF]: First domain to be connected.\n" @@ -1166,10 +1178,13 @@ static PyMethodDef pyxc_methods[] = { " dom [long, SELF]: Dom-id of one endpoint of the channel.\n" " port [int]: Port-id of one endpoint of the channel.\n\n" "Returns: [dict] dictionary is empty on failure.\n" - " status [str]: 'closed', 'disconnected', or 'connected'.\n" - "The following are also returned if 'status' is 'connected':\n" - " dom [long]: Port-id for endpoint at dom1.\n" - " port [int]: Port-id for endpoint at dom2.\n" }, + " status [str]: 'closed', 'unbound', 'interdomain', 'pirq'," + " or 'virq'.\n" + "The following are returned if 'status' is 'interdomain':\n" + " dom [long]: Dom-id of remote endpoint.\n" + " port [int]: Port-id of remote endpoint.\n" + "The following are returned if 'status' is 'pirq' or 'virq':\n" + " irq [int]: IRQ number.\n" }, { "physdev_pci_access_modify", (PyCFunction)pyxc_physdev_pci_access_modify, diff --git a/tools/xend/lib/main.py b/tools/xend/lib/main.py index 15b2f089cc..6cbfa5ad35 100755 --- a/tools/xend/lib/main.py +++ b/tools/xend/lib/main.py @@ -175,16 +175,16 @@ def daemon_loop(): # getting clogged with stale connections. if type == notifier.DISCONNECT: ret = xc.evtchn_status(idx) - if ret['status'] != 'connected': + if ret['status'] == 'interdomain': notifier.clear(idx, notifier.NORMAL) notifier.clear(idx, notifier.DISCONNECT) if control_list.has_key(idx): (port, rbuf, wbuf, con_if) = control_list[idx] con_if.close() del control_list[idx], port, rbuf, wbuf, con_if - elif ret['status'] == 'disconnected': - # There's noone to do the closure for us... - xc.evtchn_close(idx) + elif ret['status'] == 'unbound': + # There's noone to do the closure for us... + xc.evtchn_close(idx) # A standard notification: probably means there are messages to # read or that there is space to write messages. diff --git a/tools/xend/lib/utils.c b/tools/xend/lib/utils.c index 07cba257f5..e57b7ed9d3 100644 --- a/tools/xend/lib/utils.c +++ b/tools/xend/lib/utils.c @@ -632,7 +632,8 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args) goto fail2; } - if ( xc_evtchn_open(xup->xc_handle, DOMID_SELF, dom, &port1, &port2) != 0 ) + if ( xc_evtchn_bind_interdomain(xup->xc_handle, + DOMID_SELF, dom, &port1, &port2) != 0 ) { PyErr_SetString(port_error, "Could not open channel to domain"); goto fail3; diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index 0f0a826128..af7c7fa6f5 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -112,8 +112,8 @@ FAILSAFE_SEL = 32 FAILSAFE_ADDR = 36 /* Offsets in shared_info_t */ -EVENTS = 0 -EVENTS_MASK = 4 +UPCALL_PENDING = 0 +UPCALL_MASK = 4 /* Offsets in guest_trap_bounce */ GTB_ERROR_CODE = 0 @@ -368,15 +368,12 @@ test_all_events: jnz process_hyp_events /*test_guest_events:*/ movl SHARED_INFO(%ebx),%eax - shl $31,%ecx # %ecx = EVENTS_MASTER_ENABLE_MASK - test %ecx,EVENTS_MASK(%eax) - jz restore_all_guest # only notify if master switch enabled - movl EVENTS(%eax),%ecx - andl EVENTS_MASK(%eax),%ecx - jz restore_all_guest # skip if no events to deliver + movl UPCALL_MASK(%eax),%ecx notl %ecx - btrl $31,%ecx # NB. We clear all events that are - andl %ecx,EVENTS_MASK(%eax) # being delivered + master enable. + andl UPCALL_PENDING(%eax),%ecx # ECX = pending & ~mask + andl $1,%ecx # Is bit 0 pending and not masked? + jz restore_all_guest + orl %ecx,UPCALL_MASK(%eax) # Upcalls are masked during delivery /*process_guest_events:*/ movzwl PROCESSOR(%ebx),%edx shl $4,%edx # sizeof(guest_trap_bounce) == 16 diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c index 205827e098..1c514aea23 100644 --- a/xen/arch/i386/traps.c +++ b/xen/arch/i386/traps.c @@ -205,7 +205,7 @@ static inline void do_trap(int trapnr, char *str, gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - clear_bit(EVENTS_MASTER_ENABLE_BIT, &p->shared_info->events_mask); + set_bit(0, &p->shared_info->evtchn_upcall_mask); return; fault_in_hypervisor: @@ -276,7 +276,7 @@ asmlinkage void do_int3(struct pt_regs *regs, long error_code) gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - clear_bit(EVENTS_MASTER_ENABLE_BIT, &p->shared_info->events_mask); + set_bit(0, &p->shared_info->evtchn_upcall_mask); return; } @@ -346,7 +346,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code) gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - clear_bit(EVENTS_MASTER_ENABLE_BIT, &p->shared_info->events_mask); + set_bit(0, &p->shared_info->evtchn_upcall_mask); return; fault_in_hypervisor: @@ -434,7 +434,7 @@ asmlinkage void do_general_protection(struct pt_regs *regs, long error_code) gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - clear_bit(EVENTS_MASTER_ENABLE_BIT, &p->shared_info->events_mask); + set_bit(0, &p->shared_info->evtchn_upcall_mask); return; gp_in_kernel: diff --git a/xen/common/debug.c b/xen/common/debug.c index 2fcaf26c2e..988e09d2e8 100644 --- a/xen/common/debug.c +++ b/xen/common/debug.c @@ -224,16 +224,12 @@ void pdb_do_debug (dom0_op_t *op) case 's' : { - unsigned long cpu_mask; struct task_struct * p = find_domain_by_id(op->u.debug.domain); if (p != NULL) { if (p->state != TASK_STOPPED) - { - cpu_mask = mark_guest_event(p, _EVENT_STOP); - guest_event_notify(cpu_mask); - } + send_guest_virq(p, VIRQ_STOP); put_task_struct(p); } else diff --git a/xen/common/domain.c b/xen/common/domain.c index 32f1840266..1fd3dbc46b 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -53,14 +53,19 @@ struct task_struct *do_createdomain(domid_t dom_id, unsigned int cpu) if ( p->domain != IDLE_DOMAIN_ID ) { + if ( init_event_channels(p) != 0 ) + { + free_task_struct(p); + return NULL; + } + /* We use a large intermediate to avoid overflow in sprintf. */ sprintf(buf, "Domain-%llu", dom_id); strncpy(p->name, buf, MAX_DOMAIN_NAME); p->name[MAX_DOMAIN_NAME-1] = '\0'; spin_lock_init(&p->blk_ring_lock); - spin_lock_init(&p->event_channel_lock); - + p->addr_limit = USER_DS; spin_lock_init(&p->page_list_lock); @@ -133,8 +138,6 @@ void kill_domain_with_errmsg(const char *err) void __kill_domain(struct task_struct *p) { - extern void destroy_event_channels(struct task_struct *); - int i; struct task_struct **pp; unsigned long flags; @@ -197,25 +200,16 @@ void kill_domain(void) long kill_other_domain(domid_t dom, int force) { struct task_struct *p; - unsigned long cpu_mask = 0; - p = find_domain_by_id(dom); - if ( p == NULL ) return -ESRCH; + if ( (p = find_domain_by_id(dom)) == NULL ) + return -ESRCH; if ( p->state == TASK_STOPPED ) - { __kill_domain(p); - } else if ( force ) - { - cpu_mask = mark_hyp_event(p, _HYP_EVENT_DIE); - hyp_event_notify(cpu_mask); - } + send_hyp_event(p, _HYP_EVENT_DIE); else - { - cpu_mask = mark_guest_event(p, _EVENT_DIE); - guest_event_notify(cpu_mask); - } + send_guest_virq(p, VIRQ_DIE); put_task_struct(p); return 0; @@ -234,7 +228,6 @@ void stop_domain(void) long stop_other_domain(domid_t dom) { - unsigned long cpu_mask; struct task_struct *p; if ( dom == 0 ) @@ -244,10 +237,7 @@ long stop_other_domain(domid_t dom) if ( p == NULL) return -ESRCH; if ( p->state != TASK_STOPPED ) - { - cpu_mask = mark_guest_event(p, _EVENT_STOP); - guest_event_notify(cpu_mask); - } + send_guest_virq(p, VIRQ_STOP); put_task_struct(p); return 0; @@ -757,6 +747,7 @@ int setup_guestos(struct task_struct *p, dom0_createdomain_t *params, /* Set up shared info area. */ update_dom_time(p->shared_info); p->shared_info->domain_time = 0; + p->shared_info->evtchn_upcall_mask = ~0UL; /* mask all upcalls */ virt_startinfo_address = (start_info_t *) (virt_load_address + ((alloc_index - 1) << PAGE_SHIFT)); diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index babd168cb5..f5e48f366e 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -25,7 +25,8 @@ #include #include -#define MAX_EVENT_CHANNELS 1024 +#define INIT_EVENT_CHANNELS 16 +#define MAX_EVENT_CHANNELS 1024 static int get_free_port(struct task_struct *p) { @@ -65,28 +66,11 @@ static int get_free_port(struct task_struct *p) return port; } -static inline unsigned long set_event_pending(struct task_struct *p, int port) -{ - if ( !test_and_set_bit(port, &p->shared_info->event_channel_pend[0]) && - !test_and_set_bit(port>>5, &p->shared_info->event_channel_pend_sel) ) - return mark_guest_event(p, _EVENT_EVTCHN); - return 0; -} - -static inline unsigned long set_event_disc(struct task_struct *p, int port) -{ - if ( !test_and_set_bit(port, &p->shared_info->event_channel_disc[0]) && - !test_and_set_bit(port>>5, &p->shared_info->event_channel_disc_sel) ) - return mark_guest_event(p, _EVENT_EVTCHN); - return 0; -} - -static long event_channel_open(evtchn_open_t *open) +static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) { struct task_struct *p1, *p2; int port1 = 0, port2 = 0; - unsigned long cpu_mask; - domid_t dom1 = open->dom1, dom2 = open->dom2; + domid_t dom1 = bind->dom1, dom2 = bind->dom2; long rc = 0; if ( !IS_PRIV(current) ) @@ -130,21 +114,16 @@ static long event_channel_open(evtchn_open_t *open) goto out; } - p1->event_channel[port1].remote_dom = p2; - p1->event_channel[port1].remote_port = (u16)port2; - p1->event_channel[port1].state = ECS_CONNECTED; - - p2->event_channel[port2].remote_dom = p1; - p2->event_channel[port2].remote_port = (u16)port1; - p2->event_channel[port2].state = ECS_CONNECTED; + p1->event_channel[port1].u.remote.dom = p2; + p1->event_channel[port1].u.remote.port = (u16)port2; + p1->event_channel[port1].state = ECS_INTERDOMAIN; - /* Ensure that the disconnect signal is not asserted. */ - clear_bit(port1, &p1->shared_info->event_channel_disc[0]); - clear_bit(port2, &p2->shared_info->event_channel_disc[0]); + p2->event_channel[port2].u.remote.dom = p1; + p2->event_channel[port2].u.remote.port = (u16)port1; + p2->event_channel[port2].state = ECS_INTERDOMAIN; - cpu_mask = set_event_pending(p1, port1); - cpu_mask |= set_event_pending(p2, port2); - guest_event_notify(cpu_mask); + evtchn_set_pending(p1, port1); + evtchn_set_pending(p2, port2); out: spin_unlock(&p1->event_channel_lock); @@ -154,19 +133,55 @@ static long event_channel_open(evtchn_open_t *open) put_task_struct(p1); put_task_struct(p2); - open->port1 = port1; - open->port2 = port2; + bind->port1 = port1; + bind->port2 = port2; return rc; } -static long __event_channel_close(struct task_struct *p1, int port1) +static long evtchn_bind_virq(evtchn_bind_virq_t *bind) +{ + struct task_struct *p = current; + int virq = bind->virq; + int port; + + if ( virq >= NR_VIRQS ) + return -EINVAL; + + spin_lock(&p->event_channel_lock); + + /* + * Port 0 is the fallback port for VIRQs that haven't been explicitly + * bound yet. The exception is the 'error VIRQ', which is permanently + * bound to port 0. + */ + if ( ((port = p->virq_to_evtchn[virq]) != 0) || + (virq == VIRQ_ERROR) || + ((port = get_free_port(p)) < 0) ) + goto out; + + p->event_channel[port].state = ECS_VIRQ; + p->event_channel[port].u.virq = virq; + + p->virq_to_evtchn[virq] = port; + + out: + spin_unlock(&p->event_channel_lock); + + if ( port < 0 ) + return port; + + bind->port = port; + return 0; +} + + +static long __evtchn_close(struct task_struct *p1, int port1) { struct task_struct *p2 = NULL; event_channel_t *chn1, *chn2; int port2; - unsigned long cpu_mask = 0; long rc = 0; again: @@ -174,18 +189,34 @@ static long __event_channel_close(struct task_struct *p1, int port1) chn1 = p1->event_channel; - if ( (port1 < 0) || (port1 >= p1->max_event_channel) || - (chn1[port1].state == ECS_FREE) ) + /* NB. Port 0 is special (VIRQ_ERROR). Never let it be closed. */ + if ( (port1 <= 0) || (port1 >= p1->max_event_channel) ) { rc = -EINVAL; goto out; } - if ( chn1[port1].state == ECS_CONNECTED ) + switch ( chn1[port1].state ) { + case ECS_FREE: + rc = -EINVAL; + goto out; + + case ECS_UNBOUND: + break; + + case ECS_PIRQ: + p1->pirq_to_evtchn[chn1[port1].u.pirq] = 0; + break; + + case ECS_VIRQ: + p1->virq_to_evtchn[chn1[port1].u.virq] = 0; + break; + + case ECS_INTERDOMAIN: if ( p2 == NULL ) { - p2 = chn1[port1].remote_dom; + p2 = chn1[port1].u.remote.dom; get_task_struct(p2); if ( p1->domain < p2->domain ) @@ -199,35 +230,39 @@ static long __event_channel_close(struct task_struct *p1, int port1) goto again; } } - else if ( p2 != chn1[port1].remote_dom ) + else if ( p2 != chn1[port1].u.remote.dom ) { rc = -EINVAL; goto out; } chn2 = p2->event_channel; - port2 = chn1[port1].remote_port; + port2 = chn1[port1].u.remote.port; if ( port2 >= p2->max_event_channel ) BUG(); - if ( chn2[port2].state != ECS_CONNECTED ) + if ( chn2[port2].state != ECS_INTERDOMAIN ) BUG(); - if ( chn2[port2].remote_dom != p1 ) + if ( chn2[port2].u.remote.dom != p1 ) BUG(); - chn2[port2].state = ECS_DISCONNECTED; - chn2[port2].remote_dom = NULL; - chn2[port2].remote_port = 0xFFFF; + chn2[port2].state = ECS_UNBOUND; + chn2[port2].u.remote.dom = NULL; + chn2[port2].u.remote.port = 0xFFFF; + + evtchn_set_exception(p2, port2); + + break; - cpu_mask |= set_event_disc(p2, port2); + default: + BUG(); } - chn1[port1].state = ECS_FREE; - chn1[port1].remote_dom = NULL; - chn1[port1].remote_port = 0xFFFF; + chn1[port1].state = ECS_FREE; + chn1[port1].u.remote.dom = NULL; + chn1[port1].u.remote.port = 0xFFFF; - cpu_mask |= set_event_disc(p1, port1); - guest_event_notify(cpu_mask); + evtchn_set_exception(p1, port1); out: if ( p2 != NULL ) @@ -243,7 +278,7 @@ static long __event_channel_close(struct task_struct *p1, int port1) } -static long event_channel_close(evtchn_close_t *close) +static long evtchn_close(evtchn_close_t *close) { struct task_struct *p; long rc; @@ -257,38 +292,36 @@ static long event_channel_close(evtchn_close_t *close) if ( (p = find_domain_by_id(dom)) == NULL ) return -ESRCH; - rc = __event_channel_close(p, close->port); + rc = __evtchn_close(p, close->port); put_task_struct(p); return rc; } -static long event_channel_send(int lport) +static long evtchn_send(int lport) { struct task_struct *lp = current, *rp; int rport; - unsigned long cpu_mask; spin_lock(&lp->event_channel_lock); if ( unlikely(lport < 0) || unlikely(lport >= lp->max_event_channel) || - unlikely(lp->event_channel[lport].state != ECS_CONNECTED) ) + unlikely(lp->event_channel[lport].state != ECS_INTERDOMAIN) ) { spin_unlock(&lp->event_channel_lock); return -EINVAL; } - rp = lp->event_channel[lport].remote_dom; - rport = lp->event_channel[lport].remote_port; + rp = lp->event_channel[lport].u.remote.dom; + rport = lp->event_channel[lport].u.remote.port; get_task_struct(rp); spin_unlock(&lp->event_channel_lock); - cpu_mask = set_event_pending(rp, rport); - guest_event_notify(cpu_mask); + evtchn_set_pending(rp, rport); put_task_struct(rp); @@ -296,11 +329,11 @@ static long event_channel_send(int lport) } -static long event_channel_status(evtchn_status_t *status) +static long evtchn_status(evtchn_status_t *status) { struct task_struct *p; - domid_t dom = status->dom1; - int port = status->port1; + domid_t dom = status->dom; + int port = status->port; event_channel_t *chn; if ( dom == DOMID_SELF ) @@ -326,13 +359,21 @@ static long event_channel_status(evtchn_status_t *status) case ECS_FREE: status->status = EVTCHNSTAT_closed; break; - case ECS_DISCONNECTED: - status->status = EVTCHNSTAT_disconnected; + case ECS_UNBOUND: + status->status = EVTCHNSTAT_unbound; + break; + case ECS_INTERDOMAIN: + status->status = EVTCHNSTAT_interdomain; + status->u.interdomain.dom = chn[port].u.remote.dom->domain; + status->u.interdomain.port = chn[port].u.remote.port; + break; + case ECS_PIRQ: + status->status = EVTCHNSTAT_pirq; + status->u.pirq = chn[port].u.pirq; break; - case ECS_CONNECTED: - status->status = EVTCHNSTAT_connected; - status->dom2 = chn[port].remote_dom->domain; - status->port2 = chn[port].remote_port; + case ECS_VIRQ: + status->status = EVTCHNSTAT_virq; + status->u.virq = chn[port].u.virq; break; default: BUG(); @@ -353,22 +394,28 @@ long do_event_channel_op(evtchn_op_t *uop) switch ( op.cmd ) { - case EVTCHNOP_open: - rc = event_channel_open(&op.u.open); + case EVTCHNOP_bind_interdomain: + rc = evtchn_bind_interdomain(&op.u.bind_interdomain); + if ( copy_to_user(uop, &op, sizeof(op)) != 0 ) + rc = -EFAULT; /* Cleaning up here would be a mess! */ + break; + + case EVTCHNOP_bind_virq: + rc = evtchn_bind_virq(&op.u.bind_virq); if ( copy_to_user(uop, &op, sizeof(op)) != 0 ) rc = -EFAULT; /* Cleaning up here would be a mess! */ break; case EVTCHNOP_close: - rc = event_channel_close(&op.u.close); + rc = evtchn_close(&op.u.close); break; case EVTCHNOP_send: - rc = event_channel_send(op.u.send.local_port); + rc = evtchn_send(op.u.send.local_port); break; case EVTCHNOP_status: - rc = event_channel_status(&op.u.status); + rc = evtchn_status(&op.u.status); if ( copy_to_user(uop, &op, sizeof(op)) != 0 ) rc = -EFAULT; break; @@ -382,13 +429,28 @@ long do_event_channel_op(evtchn_op_t *uop) } +int init_event_channels(struct task_struct *p) +{ + spin_lock_init(&p->event_channel_lock); + p->event_channel = kmalloc(INIT_EVENT_CHANNELS * sizeof(event_channel_t), + GFP_KERNEL); + if ( unlikely(p->event_channel == NULL) ) + return -ENOMEM; + p->max_event_channel = INIT_EVENT_CHANNELS; + memset(p->event_channel, 0, INIT_EVENT_CHANNELS * sizeof(event_channel_t)); + p->event_channel[0].state = ECS_VIRQ; + p->event_channel[0].u.virq = VIRQ_ERROR; + return 0; +} + + void destroy_event_channels(struct task_struct *p) { int i; if ( p->event_channel != NULL ) { for ( i = 0; i < p->max_event_channel; i++ ) - (void)__event_channel_close(p, i); + (void)__evtchn_close(p, i); kfree(p->event_channel); } } diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index 69c3ff686b..e2eed7a85c 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -99,7 +99,7 @@ static char *task_states[] = void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) { - unsigned long flags, cpu_mask = 0; + unsigned long flags; struct task_struct *p; shared_info_t *s; s_time_t now = NOW(); @@ -116,15 +116,13 @@ void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) p->domain, p->processor, p->has_cpu ? 'T':'F', task_states[p->state], p->hyp_events); s = p->shared_info; - printk("Guest: events = %08lx, events_mask = %08lx\n", - s->events, s->events_mask); + printk("Guest: upcall_pend = %08lx, upcall_mask = %08lx\n", + s->evtchn_upcall_pending, s->evtchn_upcall_mask); printk("Notifying guest...\n"); - cpu_mask |= mark_guest_event(p, _EVENT_DEBUG); + send_guest_virq(p, VIRQ_DEBUG); } read_unlock_irqrestore(&tasklist_lock, flags); - - guest_event_notify(cpu_mask); } extern void perfc_printall (u_char key, void *dev_id, struct pt_regs *regs); diff --git a/xen/common/physdev.c b/xen/common/physdev.c index e13dce8c2f..da22f88cfe 100644 --- a/xen/common/physdev.c +++ b/xen/common/physdev.c @@ -576,28 +576,16 @@ static long pci_find_irq(int seg, int bus, int dev, int func, u32 *val) static void phys_dev_interrupt(int irq, void *dev_id, struct pt_regs *ptregs) { phys_dev_t *pdev; - struct task_struct *p; - unsigned long cpu_mask = 0; - if ( !(pdev = (phys_dev_t *)dev_id) ) + if ( (pdev = (phys_dev_t *)dev_id) == NULL ) { printk("spurious interrupt, no proper device id, %d\n", irq); return; } - p = pdev->owner; - - if ( test_bit(irq, &p->shared_info->physirq_pend) ) - { - /* Some interrupt already delivered to guest */ - return; - } - - /* notify guest */ - set_bit(irq, &p->shared_info->physirq_pend); + /* XXX KAF: introduced race here? */ set_bit(ST_IRQ_DELIVERED, &pdev->state); - cpu_mask |= mark_guest_event(p, _EVENT_PHYSIRQ); - guest_event_notify(cpu_mask); + send_guest_pirq(pdev->owner, irq); } /* this is called instead of the PICs original end handler. @@ -767,11 +755,13 @@ static long pci_finished_irq(int irq) return -EINVAL; } +#if 0 /* XXX KAF: do we need this? */ if ( test_bit(irq, ¤t->shared_info->physirq_pend) ) { printk("finished_irq called for un-acknowleged irq %d\n", irq); return -EINVAL; } +#endif clear_bit(ST_IRQ_DELIVERED, &pdev->state); diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 0738a843db..7b06d3a109 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -218,7 +218,7 @@ void wake_up(struct task_struct *p) static long do_block(void) { ASSERT(current->domain != IDLE_DOMAIN_ID); - set_bit(EVENTS_MASTER_ENABLE_BIT, ¤t->shared_info->events_mask); + clear_bit(0, ¤t->shared_info->evtchn_upcall_mask); current->state = TASK_INTERRUPTIBLE; TRACE_2D(TRC_SCHED_BLOCK, current->domain, current); __enter_scheduler(); @@ -351,7 +351,7 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd) */ unsigned long __reschedule(struct task_struct *p) { - int cpu = p->processor; + int cpu = p->processor; struct task_struct *curr; s_time_t now, min_time; @@ -385,9 +385,13 @@ void reschedule(struct task_struct *p) spin_lock_irqsave(&schedule_lock[p->processor], flags); cpu_mask = __reschedule(p); - spin_unlock_irqrestore(&schedule_lock[p->processor], flags); - hyp_event_notify(cpu_mask); + +#ifdef CONFIG_SMP + cpu_mask &= ~(1 << smp_processor_id()); + if ( cpu_mask != 0 ) + smp_send_event_check_mask(cpu_mask); +#endif } /* @@ -489,7 +493,7 @@ asmlinkage void __enter_scheduler(void) /* Mark a timer event for the newly-scheduled domain. */ if ( !is_idle_task(next) ) - set_bit(_EVENT_TIMER, &next->shared_info->events); + evtchn_set_pending(next, VIRQ_TIMER); schedule_tail(next); @@ -528,8 +532,8 @@ static void t_timer_fn(unsigned long unused) TRACE_0D(TRC_SCHED_T_TIMER_FN); - if ( !is_idle_task(p) ) - set_bit(_EVENT_TIMER, &p->shared_info->events); + if ( !is_idle_task(p) ) + send_guest_virq(p, VIRQ_TIMER); t_timer[p->processor].expires = NOW() + MILLISECS(10); add_ac_timer(&t_timer[p->processor]); @@ -538,13 +542,9 @@ static void t_timer_fn(unsigned long unused) /* Domain timer function, sends a virtual timer interrupt to domain */ static void dom_timer_fn(unsigned long data) { - unsigned long cpu_mask = 0; struct task_struct *p = (struct task_struct *)data; - TRACE_0D(TRC_SCHED_DOM_TIMER_FN); - - cpu_mask |= mark_guest_event(p, _EVENT_TIMER); - guest_event_notify(cpu_mask); + send_guest_virq(p, VIRQ_TIMER); } diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c index 2d8055142d..f44902b1c9 100644 --- a/xen/drivers/block/xen_block.c +++ b/xen/drivers/block/xen_block.c @@ -539,7 +539,6 @@ static void dispatch_rw_block_io(struct task_struct *p, static void make_response(struct task_struct *p, unsigned long id, unsigned short op, unsigned long st) { - unsigned long cpu_mask; blk_ring_resp_entry_t *resp; /* Place on the response ring for the relevant domain. */ @@ -553,8 +552,7 @@ static void make_response(struct task_struct *p, unsigned long id, spin_unlock(&p->blk_ring_lock); /* Kick the relevant domain. */ - cpu_mask = mark_guest_event(p, _EVENT_BLKDEV); - guest_event_notify(cpu_mask); + send_guest_virq(p, VIRQ_BLKDEV); } static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs) diff --git a/xen/drivers/block/xen_vbd.c b/xen/drivers/block/xen_vbd.c index d7bb53631d..8d150f5872 100644 --- a/xen/drivers/block/xen_vbd.c +++ b/xen/drivers/block/xen_vbd.c @@ -29,7 +29,6 @@ long __vbd_create(struct task_struct *p, vbd_t *vbd; rb_node_t **rb_p, *rb_parent = NULL; long ret = 0; - unsigned long cpu_mask; spin_lock(&p->vbd_lock); @@ -69,8 +68,7 @@ long __vbd_create(struct task_struct *p, rb_link_node(&vbd->rb, rb_parent, rb_p); rb_insert_color(&vbd->rb, &p->vbd_rb); - cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); - guest_event_notify(cpu_mask); + send_guest_virq(p, VIRQ_VBD_UPD); out: spin_unlock(&p->vbd_lock); @@ -110,7 +108,6 @@ long __vbd_grow(struct task_struct *p, vbd_t *vbd = NULL; rb_node_t *rb; long ret = 0; - unsigned long cpu_mask; spin_lock(&p->vbd_lock); @@ -150,8 +147,7 @@ long __vbd_grow(struct task_struct *p, *px = x; - cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); - guest_event_notify(cpu_mask); + send_guest_virq(p, VIRQ_VBD_UPD); out: spin_unlock(&p->vbd_lock); @@ -190,7 +186,6 @@ long vbd_shrink(vbd_shrink_t *shrink) vbd_t *vbd = NULL; rb_node_t *rb; long ret = 0; - unsigned long cpu_mask; if ( !IS_PRIV(current) ) return -EPERM; @@ -233,8 +228,7 @@ long vbd_shrink(vbd_shrink_t *shrink) *px = x->next; kfree(x); - cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); - guest_event_notify(cpu_mask); + send_guest_virq(p, VIRQ_VBD_UPD); out: spin_unlock(&p->vbd_lock); @@ -252,7 +246,6 @@ long vbd_setextents(vbd_setextents_t *setextents) rb_node_t *rb; int i; long ret = 0; - unsigned long cpu_mask; if ( !IS_PRIV(current) ) return -EPERM; @@ -323,8 +316,7 @@ long vbd_setextents(vbd_setextents_t *setextents) /* Make the new list visible. */ vbd->extents = new_extents; - cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); - guest_event_notify(cpu_mask); + send_guest_virq(p, VIRQ_VBD_UPD); out: spin_unlock(&p->vbd_lock); @@ -348,7 +340,6 @@ long vbd_delete(vbd_delete_t *delete) vbd_t *vbd; rb_node_t *rb; xen_extent_le_t *x, *t; - unsigned long cpu_mask; if( !IS_PRIV(current) ) return -EPERM; @@ -392,8 +383,7 @@ long vbd_delete(vbd_delete_t *delete) x = t; } - cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); - guest_event_notify(cpu_mask); + send_guest_virq(p, VIRQ_VBD_UPD); spin_unlock(&p->vbd_lock); put_task_struct(p); @@ -406,7 +396,6 @@ void destroy_all_vbds(struct task_struct *p) vbd_t *vbd; rb_node_t *rb; xen_extent_le_t *x, *t; - unsigned long cpu_mask; spin_lock(&p->vbd_lock); @@ -426,8 +415,7 @@ void destroy_all_vbds(struct task_struct *p) } } - cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); - guest_event_notify(cpu_mask); + send_guest_virq(p, VIRQ_VBD_UPD); spin_unlock(&p->vbd_lock); } diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c index 027e22faa8..d0fe21acb0 100644 --- a/xen/drivers/char/console.c +++ b/xen/drivers/char/console.c @@ -243,7 +243,6 @@ static void switch_serial_input(void) static void __serial_rx(unsigned char c, struct pt_regs *regs) { key_handler *handler; - unsigned long cpu_mask; struct task_struct *p; if ( xen_rx ) @@ -257,8 +256,7 @@ static void __serial_rx(unsigned char c, struct pt_regs *regs) if ( serial_rx_prod++ == serial_rx_cons ) { p = find_domain_by_id(0); /* only DOM0 reads the serial buffer */ - cpu_mask = mark_guest_event(p, _EVENT_CONSOLE); - guest_event_notify(cpu_mask); + send_guest_virq(p, VIRQ_CONSOLE); put_task_struct(p); } } diff --git a/xen/drivers/char/keyboard.c b/xen/drivers/char/keyboard.c index ed933462f0..960bc5f773 100644 --- a/xen/drivers/char/keyboard.c +++ b/xen/drivers/char/keyboard.c @@ -193,7 +193,7 @@ static void keyboard_interrupt(int irq, void *dev_id, struct pt_regs *regs) { unsigned char status=0, scancode; unsigned int work = 1000; - unsigned long cpu_mask = 0, flags; + unsigned long flags; struct task_struct *p = CONSOLE_OWNER; spin_lock_irqsave(&kbd_lock, flags); @@ -227,7 +227,7 @@ static void keyboard_interrupt(int irq, void *dev_id, struct pt_regs *regs) if ( p != NULL ) { kbd_ring_push(status, scancode); - cpu_mask |= mark_guest_event(p, _EVENT_PS2); + send_guest_virq(p, VIRQ_PS2); } } @@ -237,10 +237,7 @@ static void keyboard_interrupt(int irq, void *dev_id, struct pt_regs *regs) spin_unlock_irqrestore(&kbd_lock, flags); if ( p != NULL ) - { put_task_struct(p); - guest_event_notify(cpu_mask); - } } diff --git a/xen/include/hypervisor-ifs/event_channel.h b/xen/include/hypervisor-ifs/event_channel.h index 159f146b2f..6fae72cc38 100644 --- a/xen/include/hypervisor-ifs/event_channel.h +++ b/xen/include/hypervisor-ifs/event_channel.h @@ -10,20 +10,34 @@ #define __HYPERVISOR_IFS__EVENT_CHANNEL_H__ /* - * EVTCHNOP_open: Open a communication channel between and . + * EVTCHNOP_bind_interdomain: Open an event channel between and . * NOTES: * 1. and/or may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may create an event channel. * 3. and are only supplied if the op succeeds. */ -#define EVTCHNOP_open 0 -typedef struct evtchn_open +#define EVTCHNOP_bind_interdomain 0 +typedef struct evtchn_bind_interdomain { /* IN parameters. */ domid_t dom1, dom2; /* OUT parameters. */ int port1, port2; -} evtchn_open_t; +} evtchn_bind_interdomain_t; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to IRQ . + * NOTES: + * 1. A virtual IRQ may be bound to at most one event channel per domain. + */ +#define EVTCHNOP_bind_virq 1 +typedef struct evtchn_bind_virq +{ + /* IN parameters. */ + int virq; + /* OUT parameters. */ + int port; +} evtchn_bind_virq_t; /* * EVTCHNOP_close: Close the communication channel which has an endpoint at @@ -33,7 +47,7 @@ typedef struct evtchn_open * 2. Only a sufficiently-privileged domain may close an event channel * for which is not DOMID_SELF. */ -#define EVTCHNOP_close 1 +#define EVTCHNOP_close 2 typedef struct evtchn_close { /* IN parameters. */ @@ -46,7 +60,7 @@ typedef struct evtchn_close * EVTCHNOP_send: Send an event to the remote end of the channel whose local * endpoint is . */ -#define EVTCHNOP_send 2 +#define EVTCHNOP_send 3 typedef struct evtchn_send { /* IN parameters. */ @@ -56,36 +70,45 @@ typedef struct evtchn_send /* * EVTCHNOP_status: Get the current status of the communication channel which - * has an endpoint at . + * has an endpoint at . * NOTES: - * 1. may be specified as DOMID_SELF. + * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may obtain the status of an event - * channel for which is not DOMID_SELF. - * 3. is only supplied if status is 'connected'. + * channel for which is not DOMID_SELF. */ -#define EVTCHNOP_status 3 /* Get status of . */ +#define EVTCHNOP_status 4 typedef struct evtchn_status { /* IN parameters */ - domid_t dom1; - int port1; + domid_t dom; + int port; /* OUT parameters */ - domid_t dom2; - int port2; -#define EVTCHNSTAT_closed 0 /* Chennel is not in use. */ -#define EVTCHNSTAT_disconnected 1 /* Channel is not connected to remote. */ -#define EVTCHNSTAT_connected 2 /* Channel is connected to remote. */ +#define EVTCHNSTAT_closed 0 /* Chennel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is not bound to a source. */ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ int status; + union { + int __none; /* EVTCHNSTAT_closed, EVTCHNSTAT_unbound */ + struct { + domid_t dom; + int port; + } interdomain; /* EVTCHNSTAT_interdomain */ + int pirq; /* EVTCHNSTAT_pirq */ + int virq; /* EVTCHNSTAT_virq */ + } u; } evtchn_status_t; typedef struct evtchn_op { int cmd; /* EVTCHNOP_* */ union { - evtchn_open_t open; - evtchn_close_t close; - evtchn_send_t send; - evtchn_status_t status; + evtchn_bind_interdomain_t bind_interdomain; + evtchn_bind_virq_t bind_virq; + evtchn_close_t close; + evtchn_send_t send; + evtchn_status_t status; } u; } evtchn_op_t; diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 5b15808694..12065a0bcc 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -50,40 +50,25 @@ #define ARGS_PER_MULTICALL_ENTRY 8 -/* EVENT MESSAGES - * - * Here, as in the interrupts to the guestos, additional network interfaces - * are defined. These definitions server as placeholders for the event bits, - * however, in the code these events will allways be referred to as shifted - * offsets from the base NET events. +/* + * VIRTUAL INTERRUPTS + * + * Virtual interrupts that a guest OS may receive from the hypervisor. */ -/* Events that a guest OS may receive from the hypervisor. */ -#define EVENT_BLKDEV 0x01 /* A block device response has been queued. */ -#define EVENT_TIMER 0x02 /* A timeout has been updated. */ -#define EVENT_DIE 0x04 /* OS is about to be killed. Clean up please! */ -#define EVENT_DEBUG 0x08 /* Request guest to dump debug info (gross!) */ -#define EVENT_NET 0x10 /* There are packets for transmission. */ -#define EVENT_PS2 0x20 /* PS/2 keyboard or mouse event(s) */ -#define EVENT_STOP 0x40 /* Prepare for stopping and possible pickling */ -#define EVENT_EVTCHN 0x80 /* Event pending on an event channel */ -#define EVENT_VBD_UPD 0x100 /* Event to signal VBDs should be reprobed */ -#define EVENT_CONSOLE 0x200 /* This is only for domain-0 initial console. */ -#define EVENT_PHYSIRQ 0x400 /* Event to signal pending physical IRQs. */ - -/* Bit offsets, as opposed to the above masks. */ -#define _EVENT_BLKDEV 0 -#define _EVENT_TIMER 1 -#define _EVENT_DIE 2 -#define _EVENT_DEBUG 3 -#define _EVENT_NET 4 -#define _EVENT_PS2 5 -#define _EVENT_STOP 6 -#define _EVENT_EVTCHN 7 -#define _EVENT_VBD_UPD 8 -#define _EVENT_CONSOLE 9 -#define _EVENT_PHYSIRQ 10 - +#define VIRQ_BLKDEV 0 /* A block device response has been queued. */ +#define VIRQ_TIMER 1 /* A timeout has been updated. */ +#define VIRQ_DIE 2 /* OS is about to be killed. Clean up please! */ +#define VIRQ_DEBUG 3 /* Request guest to dump debug info (gross!) */ +#define VIRQ_NET 4 /* There are packets for transmission. */ +#define VIRQ_PS2 5 /* PS/2 keyboard or mouse event(s) */ +#define VIRQ_STOP 6 /* Prepare for stopping and possible pickling */ +#define VIRQ_EVTCHN 7 /* Event pending on an event channel */ +#define VIRQ_VBD_UPD 8 /* Event to signal VBDs should be reprobed */ +#define VIRQ_CONSOLE 9 /* This is only for domain-0 initial console. */ +#define VIRQ_PHYSIRQ 10 /* Event to signal pending physical IRQs. */ +#define VIRQ_ERROR 11 /* Catch-all virtual interrupt. */ +#define NR_VIRQS 12 /* * MMU_XXX: specified in least 2 bits of 'ptr' field. These bits are masked @@ -121,12 +106,6 @@ #define UVMF_FLUSH_TLB 1 /* Flush entire TLB. */ #define UVMF_INVLPG 2 /* Flush the VA mapping being updated. */ -/* - * Master "switch" for enabling/disabling event delivery. - */ -#define EVENTS_MASTER_ENABLE_MASK 0x80000000UL -#define EVENTS_MASTER_ENABLE_BIT 31 - /* * SCHEDOP_* - Scheduler hypercall operations. @@ -172,45 +151,57 @@ typedef struct * Xen/guestos shared data -- pointer provided in start_info. * NB. We expect that this struct is smaller than a page. */ -typedef struct shared_info_st { - - /* Bitmask of outstanding event notifications hypervisor -> guest OS. */ - unsigned long events; +typedef struct shared_info_st +{ /* - * Hypervisor will only signal event delivery via the "callback exception" - * when a pending event is not masked. The mask also contains a "master - * enable" which prevents any event delivery. This mask can be used to - * prevent unbounded reentrancy and stack overflow (in this way, acts as a - * kind of interrupt-enable flag). + * If bit 0 in evtchn_upcall_pending is transitioned 0->1, and bit 0 in + * evtchn_upcall_mask is clear, then an asynchronous upcall is scheduled. + * The upcall mask can be used to prevent unbounded reentrancy and stack + * overflow (in this way, acts as a kind of interrupt-enable flag). */ - unsigned long events_mask; + unsigned long evtchn_upcall_pending; + unsigned long evtchn_upcall_mask; /* - * A domain can have up to 1024 bidirectional event channels to/from other - * domains. Domains must agree out-of-band to set up a connection, and then - * each must explicitly request a connection to the other. When both have - * made the request the channel is fully allocated and set up. + * A domain can have up to 1024 "event channels" on which it can send + * and receive asynchronous event notifications. There are three classes + * of event that are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually the setup + * is initiated and organised by a privileged third party such as + * software running in domain 0). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. * - * An event channel is a single sticky 'bit' of information. Setting the - * sticky bit also causes an upcall into the target domain. In this way - * events can be seen as an IPI [Inter-Process(or) Interrupt]. + * Event channels are addressed by a "port index" between 0 and 1023. + * Each channel is associated with three bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. EXCEPTION -- notifies the domain that there has been some + * exceptional event associated with this channel (e.g. remote + * disconnect, physical IRQ error). This bit is cleared by the guest. + * 3. MASK -- if this bit is clear then a 0->1 transition of PENDING + * or EXCEPTION will cause an asynchronous upcall to be scheduled. + * This bit is only updated by the guest. It is read-only within Xen. + * If a channel becomes pending or an exceptional event occurs while + * the channel is masked then the 'edge' is lost (i.e., when the + * channel is unmasked, the guest must manually handle pending + * notifications as no upcall will be scheduled by Xen). * - * A guest can see which of its event channels are pending by reading the - * 'event_channel_pend' bitfield. To avoid a linear scan of the entire - * bitfield there is a 'selector' which indicates which words in the - * bitfield contain at least one set bit. - * - * There is a similar bitfield to indicate which event channels have been - * disconnected by the remote end. There is also a 'selector' for this - * field. + * To expedite scanning of pending notifications and exceptions, any + * 0->1 transition on an unmasked channel causes a corresponding bit in + * a 32-bit selector to be set. Each bit in the selector covers a 32-bit + * word in the PENDING or EXCEPTION bitfield array. */ - u32 event_channel_pend[32]; - u32 event_channel_pend_sel; - u32 event_channel_disc[32]; - u32 event_channel_disc_sel; - - /* Bitmask of physical IRQ lines that are pending for this domain. */ - unsigned long physirq_pend; + u32 evtchn_pending[32]; + u32 evtchn_pending_sel; + u32 evtchn_exception[32]; + u32 evtchn_exception_sel; + u32 evtchn_mask[32]; /* * Time: The following abstractions are exposed: System Time, Clock Time, diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h index fd57576d9e..e471e56afd 100644 --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -6,33 +6,23 @@ * Copyright (c) 2002, K A Fraser */ +#ifndef __XEN_EVENT_H__ +#define __XEN_EVENT_H__ + #include #include #include -#ifdef CONFIG_SMP - /* - * mark_guest_event: - * @p: Domain to which event should be passed - * @event: Event number - * RETURNS: "Bitmask" of CPU on which process is currently running - * - * Idea is that caller may loop on task_list, looking for domains - * to pass events to (using this function). The caller accumulates the - * bits returned by this function (ORing them together) then calls - * event_notify(). - * - * Guest_events are per-domain events passed directly to the guest OS - * in ring 1. + * GENERIC SCHEDULING CALLBACK MECHANISMS */ -static inline unsigned long mark_guest_event(struct task_struct *p, int event) + +/* Schedule an asynchronous callback for the specified domain. */ +static inline void __guest_notify(struct task_struct *p) { +#ifdef CONFIG_SMP unsigned long flags, cpu_mask; - if ( test_and_set_bit(event, &p->shared_info->events) ) - return 0; - spin_lock_irqsave(&schedule_lock[p->processor], flags); if ( p->state == TASK_INTERRUPTIBLE ) __wake_up(p); @@ -41,75 +31,85 @@ static inline unsigned long mark_guest_event(struct task_struct *p, int event) cpu_mask |= 1 << p->processor; spin_unlock_irqrestore(&schedule_lock[p->processor], flags); - return cpu_mask; + cpu_mask &= ~(1 << smp_processor_id()); + if ( cpu_mask != 0 ) + smp_send_event_check_mask(cpu_mask); +#else + if ( p->state == TASK_INTERRUPTIBLE ) + wake_up(p); + reschedule(p); +#endif } -/* As above, but hyp_events are handled within the hypervisor. */ -static inline unsigned long mark_hyp_event(struct task_struct *p, int event) +static inline void guest_notify(struct task_struct *p) { - unsigned long flags, cpu_mask; + /* + * Upcall already pending or upcalls masked? + * NB. Suitably synchronised on x86: + * We must set the pending bit before checking the mask, but this is + * guaranteed to occur because test_and_set_bit() is an ordering barrier. + */ + if ( !test_and_set_bit(0, &p->shared_info->evtchn_upcall_pending) && + !test_bit(0, &p->shared_info->evtchn_upcall_mask) ) + __guest_notify(p); +} - if ( test_and_set_bit(event, &p->hyp_events) ) - return 0; - spin_lock_irqsave(&schedule_lock[p->processor], flags); - if ( p->state == TASK_INTERRUPTIBLE ) - __wake_up(p); - cpu_mask = __reschedule(p); - if ( p->has_cpu ) - cpu_mask |= 1 << p->processor; - spin_unlock_irqrestore(&schedule_lock[p->processor], flags); +/* + * EVENT-CHANNEL NOTIFICATIONS + * NB. As in guest_notify, evtchn_set_* is suitably synchronised on x86. + */ - return cpu_mask; +static inline void evtchn_set_pending(struct task_struct *p, int port) +{ + shared_info_t *s = p->shared_info; + if ( !test_and_set_bit(port, &s->evtchn_pending[0]) && + !test_bit (port, &s->evtchn_mask[0]) && + !test_and_set_bit(port>>5, &s->evtchn_pending_sel) ) + guest_notify(p); } -/* Notify the given set of CPUs that guest events may be outstanding. */ -static inline void guest_event_notify(unsigned long cpu_mask) +static inline void evtchn_set_exception(struct task_struct *p, int port) { - cpu_mask &= ~(1 << smp_processor_id()); - if ( cpu_mask != 0 ) smp_send_event_check_mask(cpu_mask); + shared_info_t *s = p->shared_info; + if ( !test_and_set_bit(port, &s->evtchn_exception[0]) && + !test_bit (port, &s->evtchn_mask[0]) && + !test_and_set_bit(port>>5, &s->evtchn_exception_sel) ) + guest_notify(p); } -#else - -static inline unsigned long mark_guest_event(struct task_struct *p, int event) +/* + * send_guest_virq: + * @p: Domain to which virtual IRQ should be sent + * @virq: Virtual IRQ number (VIRQ_*) + */ +static inline void send_guest_virq(struct task_struct *p, int virq) { - if ( !test_and_set_bit(event, &p->shared_info->events) ) - { - if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); - reschedule(p); - } - return 0; + evtchn_set_pending(p, p->virq_to_evtchn[virq]); } -static inline unsigned long mark_hyp_event(struct task_struct *p, int event) +/* + * send_guest_pirq: + * @p: Domain to which physical IRQ should be sent + * @pirq: Physical IRQ number + */ +static inline void send_guest_pirq(struct task_struct *p, int pirq) { - if ( !test_and_set_bit(event, &p->hyp_events) ) - { - if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); - reschedule(p); - } - return 0; + evtchn_set_pending(p, p->pirq_to_evtchn[pirq]); } -#define guest_event_notify(_mask) ((void)0) - -#endif -/* Notify hypervisor events in thesame way as for guest OS events. */ -#define hyp_event_notify(_mask) guest_event_notify(_mask) - -/* Clear a guest-OS event from a per-domain mask. */ -static inline void clear_guest_event(struct task_struct *p, int event) -{ - clear_bit(event, &p->shared_info->events); -} +/* + * HYPERVISOR-HANDLED EVENTS + */ -/* Clear a hypervisor event from a per-domain mask. */ -static inline void clear_hyp_event(struct task_struct *p, int event) +static inline void send_hyp_event(struct task_struct *p, int event) { - clear_bit(event, &p->hyp_events); + if ( !test_and_set_bit(event, &p->hyp_events) ) + __guest_notify(p); } /* Called on return from (architecture-dependent) entry.S. */ void do_hyp_events(void); + +#endif /* __XEN_EVENT_H__ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index c41123ebc4..192c2fcc22 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -51,14 +51,25 @@ struct task_struct; typedef struct event_channel_st { - struct task_struct *remote_dom; - u16 remote_port; -#define ECS_FREE 0 /* Available for use. */ -#define ECS_DISCONNECTED 1 /* Connection is closed. Remote is disconnected. */ -#define ECS_CONNECTED 2 /* Connected to remote end. */ - u16 state; +#define ECS_FREE 0 /* Channel is available for use. */ +#define ECS_UNBOUND 1 /* Channel is not bound to a particular source. */ +#define ECS_INTERDOMAIN 2 /* Channel is bound to another domain. */ +#define ECS_PIRQ 3 /* Channel is bound to a physical IRQ line. */ +#define ECS_VIRQ 4 /* Channel is bound to a virtual IRQ line. */ + u16 state; + union { + struct { + u16 port; + struct task_struct *dom; + } __attribute__ ((packed)) remote; /* state == ECS_CONNECTED */ + u16 pirq; /* state == ECS_PIRQ */ + u16 virq; /* state == ECS_VIRQ */ + } u; } event_channel_t; +int init_event_channels(struct task_struct *p); +void destroy_event_channels(struct task_struct *p); + struct task_struct { /* @@ -145,6 +156,14 @@ struct task_struct unsigned int max_event_channel; spinlock_t event_channel_lock; + /* + * Interrupt to event-channel mappings. Updates should be protected by the + * domain's event-channel spinlock. Read accesses can also synchronise on + * the lock, but races don't usually matter. + */ + u16 pirq_to_evtchn[64]; + u16 virq_to_evtchn[NR_VIRQS]; + /* Physical I/O */ spinlock_t pcidev_lock; struct list_head pcidev_list; @@ -270,8 +289,9 @@ static inline long schedule_timeout(long timeout) } #define signal_pending(_p) \ - ((_p)->hyp_events || \ - ((_p)->shared_info->events & (_p)->shared_info->events_mask)) + (((_p)->hyp_events != 0) || \ + (test_bit(0, &(_p)->shared_info->evtchn_upcall_pending) && \ + !test_bit(0, &(_p)->shared_info->evtchn_upcall_mask))) void domain_init(void); diff --git a/xen/net/dev.c b/xen/net/dev.c index 0761ba1343..05d474f98b 100644 --- a/xen/net/dev.c +++ b/xen/net/dev.c @@ -2335,10 +2335,7 @@ static void make_tx_response(net_vif_t *vif, smp_mb(); /* Update producer before checking event threshold. */ if ( i == vif->shared_idxs->tx_event ) - { - unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET); - guest_event_notify(cpu_mask); - } + send_guest_virq(vif->domain, VIRQ_NET); } @@ -2361,10 +2358,7 @@ static void make_rx_response(net_vif_t *vif, smp_mb(); /* Update producer before checking event threshold. */ if ( i == vif->shared_idxs->rx_event ) - { - unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET); - guest_event_notify(cpu_mask); - } + send_guest_virq(vif->domain, VIRQ_NET); } diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.c index 1297fe1a03..d00dd98f7b 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.c +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.c @@ -18,15 +18,14 @@ typedef unsigned char byte; /* from linux/ide.h */ -#define XLBLK_RESPONSE_IRQ HYPEREVENT_IRQ(_EVENT_BLKDEV) -#define XLBLK_UPDATE_IRQ HYPEREVENT_IRQ(_EVENT_VBD_UPD) -#define DEBUG_IRQ HYPEREVENT_IRQ(_EVENT_DEBUG) - #define STATE_ACTIVE 0 #define STATE_SUSPENDED 1 #define STATE_CLOSED 2 static unsigned int state = STATE_SUSPENDED; +/* Dynamically-mapped IRQs. */ +static int xlblk_response_irq, xlblk_update_irq; + static blk_ring_t *blk_ring; static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ static BLK_RING_IDX req_prod; /* Private request producer. */ @@ -552,7 +551,10 @@ int __init xlblk_init(void) reset_xlblk_interface(); - error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, + xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV); + xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD); + + error = request_irq(xlblk_response_irq, xlblk_response_int, SA_SAMPLE_RANDOM, "blkdev", NULL); if ( error ) { @@ -560,8 +562,8 @@ int __init xlblk_init(void) goto fail; } - error = request_irq(XLBLK_UPDATE_IRQ, xlblk_update_int, - SA_INTERRUPT, "blkdev", NULL); + error = request_irq(xlblk_update_irq, xlblk_update_int, + 0, "blkdev", NULL); if ( error ) { @@ -581,8 +583,10 @@ int __init xlblk_init(void) static void __exit xlblk_cleanup(void) { xlvbd_cleanup(); - free_irq(XLBLK_RESPONSE_IRQ, NULL); - free_irq(XLBLK_UPDATE_IRQ, NULL); + free_irq(xlblk_response_irq, NULL); + free_irq(xlblk_update_irq, NULL); + unbind_virq_from_irq(VIRQ_BLKDEV); + unbind_virq_from_irq(VIRQ_VBD_UPD); } diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c index 2a7ff12adf..a994f461d8 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c @@ -119,6 +119,13 @@ static struct console kcons_info = { void xen_console_init(void) { register_console(&kcons_info); + + /* + * XXX This prevents a bogus 'VIRQ_ERROR' when interrupts are enabled + * for the first time. This works because by this point all important + * VIRQs (eg. timer) have been properly bound. + */ + clear_bit(0, &HYPERVISOR_shared_info->evtchn_pending[0]); } @@ -149,6 +156,7 @@ static struct tty_struct *xen_console_table[1]; static struct termios *xen_console_termios[1]; static struct termios *xen_console_termios_locked[1]; static struct tty_struct *xen_console_tty; +static int console_irq; #define WBUF_SIZE 1024 #define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) @@ -194,9 +202,6 @@ static void __do_console_io(void) return; } - /* Acknowledge the notification. */ - evtchn_clear_port(0); - ctrl_if = (control_if_t *)((char *)HYPERVISOR_shared_info + 2048); /* Receive work. */ @@ -259,17 +264,7 @@ static void __do_console_io(void) } } -/* This is the callback entry point for domains != 0. */ -static void control_event(unsigned int port) -{ - unsigned long flags; - spin_lock_irqsave(&xen_console_lock, flags); - __do_console_io(); - spin_unlock_irqrestore(&xen_console_lock, flags); -} - -/* This is the callback entry point for domain 0. */ -static void control_irq(int irq, void *dev_id, struct pt_regs *regs) +static void console_interrupt(int irq, void *dev_id, struct pt_regs *regs) { unsigned long flags; spin_lock_irqsave(&xen_console_lock, flags); @@ -472,17 +467,12 @@ int __init xen_con_init(void) panic("Couldn't register Xen virtual console driver\n"); if ( !(start_info.flags & SIF_INITDOMAIN) ) - { - if ( evtchn_request_port(0, control_event) != 0 ) - BUG(); - control_event(0); /* kickstart the console */ - } + console_irq = bind_evtchn_to_irq(1); else - { - request_irq(HYPEREVENT_IRQ(_EVENT_CONSOLE), - control_irq, 0, "console", NULL); - control_irq(0, NULL, NULL); /* kickstart the console */ - } + console_irq = bind_virq_to_irq(VIRQ_CONSOLE); + + (void)request_irq(console_irq, + console_interrupt, 0, "console", NULL); printk("Xen virtual console successfully installed\n"); @@ -497,8 +487,12 @@ void __exit xen_con_fini(void) if ( ret != 0 ) printk(KERN_ERR "Unable to unregister Xen console driver: %d\n", ret); + free_irq(console_irq, NULL); + if ( !(start_info.flags & SIF_INITDOMAIN) ) - (void)evtchn_free_port(0); + unbind_evtchn_from_irq(1); + else + unbind_virq_from_irq(VIRQ_CONSOLE); } module_init(xen_con_init); diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c index a7978ee8d2..faa1bb0b61 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c @@ -40,148 +40,54 @@ static unsigned int ring_cons, ring_prod, ring_overflow; static DECLARE_WAIT_QUEUE_HEAD(evtchn_wait); static struct fasync_struct *evtchn_async_queue; -static evtchn_receiver_t rx_fns[1024]; - -static u32 pend_outstanding[32]; -static u32 disc_outstanding[32]; +/* + * Pending normal notifications and pending exceptional notifications. + * 'Pending' means that we received an upcall but this is not yet ack'ed + * from userspace by writing to /dev/xen/evtchn. + */ +static u32 pend_nrm[32], pend_exc[32]; static spinlock_t lock; -int evtchn_request_port(unsigned int port, evtchn_receiver_t rx_fn) +void evtchn_device_upcall(int port, int exception) { - unsigned long flags; - int rc; + u16 port_subtype; - spin_lock_irqsave(&lock, flags); + spin_lock(&lock); - if ( rx_fns[port] != NULL ) - { - printk(KERN_ALERT "Event channel port %d already in use.\n", port); - rc = -EINVAL; - } - else - { - rx_fns[port] = rx_fn; - rc = 0; - } + mask_evtchn(port); - spin_unlock_irqrestore(&lock, flags); - - return rc; -} - -int evtchn_free_port(unsigned int port) -{ - unsigned long flags; - int rc; - - spin_lock_irqsave(&lock, flags); - - if ( rx_fns[port] == NULL ) + if ( likely(!exception) ) { - printk(KERN_ALERT "Event channel port %d not in use.\n", port); - rc = -EINVAL; + clear_evtchn(port); + set_bit(port, &pend_nrm[0]); + port_subtype = PORT_NORMAL; } else { - rx_fns[port] = NULL; - rc = 0; + clear_evtchn_exception(port); + set_bit(port, &pend_exc[0]); + port_subtype = PORT_EXCEPTION; } - spin_unlock_irqrestore(&lock, flags); - - return rc; -} - -/* - * NB. Clearing port can race a notification from remote end. Caller must - * therefore recheck notification status on return to avoid missing events. - */ -void evtchn_clear_port(unsigned int port) -{ - unsigned int p = port & PORTIDX_MASK; - unsigned long flags; - - spin_lock_irqsave(&lock, flags); - - if ( unlikely(port & PORT_DISCONNECT) ) - { - clear_bit(p, &disc_outstanding[0]); - clear_bit(p, &HYPERVISOR_shared_info->event_channel_disc[0]); - } - else - { - clear_bit(p, &pend_outstanding[0]); - clear_bit(p, &HYPERVISOR_shared_info->event_channel_pend[0]); - } - - spin_unlock_irqrestore(&lock, flags); -} - -static inline void process_bitmask(u32 *sel, - u32 *mask, - u32 *outstanding, - unsigned int port_subtype) -{ - unsigned long l1, l2; - unsigned int l1_idx, l2_idx, port; - - l1 = xchg(sel, 0); - while ( (l1_idx = ffs(l1)) != 0 ) + if ( ring != NULL ) { - l1_idx--; - l1 &= ~(1 << l1_idx); - - l2 = mask[l1_idx] & ~outstanding[l1_idx]; - outstanding[l1_idx] |= l2; - while ( (l2_idx = ffs(l2)) != 0 ) + if ( (ring_prod - ring_cons) < RING_SIZE ) { - l2_idx--; - l2 &= ~(1 << l2_idx); - - port = (l1_idx * 32) + l2_idx; - if ( rx_fns[port] != NULL ) - { - (*rx_fns[port])(port | port_subtype); - } - else if ( ring != NULL ) + ring[RING_MASK(ring_prod)] = (u16)port | port_subtype; + if ( ring_cons == ring_prod++ ) { - if ( (ring_prod - ring_cons) < RING_SIZE ) - { - ring[RING_MASK(ring_prod)] = (u16)(port | port_subtype); - if ( ring_cons == ring_prod++ ) - { - wake_up_interruptible(&evtchn_wait); - kill_fasync(&evtchn_async_queue, SIGIO, POLL_IN); - } - } - else - { - ring_overflow = 1; - } + wake_up_interruptible(&evtchn_wait); + kill_fasync(&evtchn_async_queue, SIGIO, POLL_IN); } } + else + { + ring_overflow = 1; + } } -} -static void evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - shared_info_t *si = HYPERVISOR_shared_info; - unsigned long flags; - - spin_lock_irqsave(&lock, flags); - - process_bitmask(&si->event_channel_pend_sel, - &si->event_channel_pend[0], - &pend_outstanding[0], - PORT_NORMAL); - - process_bitmask(&si->event_channel_disc_sel, - &si->event_channel_disc[0], - &disc_outstanding[0], - PORT_DISCONNECT); - - spin_unlock_irqrestore(&lock, flags); + spin_unlock(&lock); } static void __evtchn_reset_buffer_ring(void) @@ -194,20 +100,18 @@ static void __evtchn_reset_buffer_ring(void) for ( i = 0; i < 32; i++ ) { - m = pend_outstanding[i]; + m = pend_exc[i]; while ( (j = ffs(m)) != 0 ) { m &= ~(1 << --j); - if ( rx_fns[(i * 32) + j] == NULL ) - ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_NORMAL); + ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_EXCEPTION); } - m = disc_outstanding[i]; + m = pend_nrm[i]; while ( (j = ffs(m)) != 0 ) { m &= ~(1 << --j); - if ( rx_fns[(i * 32) + j] == NULL ) - ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_DISCONNECT); + ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_NORMAL); } } } @@ -326,8 +230,14 @@ static ssize_t evtchn_write(struct file *file, const char *buf, goto out; } + spin_lock_irq(&lock); for ( i = 0; i < (count/2); i++ ) - evtchn_clear_port(kbuf[i]); + { + clear_bit(kbuf[i]&PORTIDX_MASK, + (kbuf[i]&PORT_EXCEPTION) ? &pend_exc[0] : &pend_nrm[0]); + unmask_evtchn(kbuf[i]&PORTIDX_MASK); + } + spin_unlock_irq(&lock); rc = count; @@ -455,25 +365,13 @@ static int __init init_module(void) /* (DEVFS) automatically destroy the symlink with its destination. */ devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); - err = request_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN), - evtchn_interrupt, 0, "evtchn", NULL); - if ( err != 0 ) - { - printk(KERN_ALERT "Could not allocate evtchn receive interrupt\n"); - return err; - } - - /* Kickstart servicing of notifications. */ - evtchn_interrupt(0, NULL, NULL); - - printk("Event-channel driver installed.\n"); + printk("Event-channel device installed.\n"); return 0; } static void cleanup_module(void) { - free_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN), NULL); misc_deregister(&evtchn_miscdev); } diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/network/network.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/network/network.c index c5d25442e2..ef54ff7fe9 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/drivers/network/network.c +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/network/network.c @@ -32,6 +32,9 @@ static void network_tx_buf_gc(struct net_device *dev); static void network_alloc_rx_buffers(struct net_device *dev); static void cleanup_module(void); +/* Dynamically-mapped IRQs. */ +static int network_irq, debug_irq; + static struct list_head dev_list; struct net_private @@ -545,7 +548,10 @@ static int __init init_module(void) if ( start_info.flags & SIF_INITDOMAIN ) (void)register_inetaddr_notifier(¬ifier_inetdev); - err = request_irq(HYPEREVENT_IRQ(_EVENT_NET), network_interrupt, + network_irq = bind_virq_to_irq(VIRQ_NET); + debug_irq = bind_virq_to_irq(VIRQ_DEBUG); + + err = request_irq(network_irq, network_interrupt, SA_SAMPLE_RANDOM, "network", NULL); if ( err ) { @@ -553,7 +559,7 @@ static int __init init_module(void) goto fail; } - err = request_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), dbg_network_int, + err = request_irq(debug_irq, dbg_network_int, SA_SHIRQ, "net_dbg", &dbg_network_int); if ( err ) printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); @@ -624,6 +630,12 @@ static void cleanup_module(void) if ( start_info.flags & SIF_INITDOMAIN ) (void)unregister_inetaddr_notifier(¬ifier_inetdev); + + free_irq(network_irq, NULL); + free_irq(debug_irq, NULL); + + unbind_virq_from_irq(VIRQ_NET); + unbind_virq_from_irq(VIRQ_DEBUG); } diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c index 91f3c5c17e..d1a4b21ad0 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c @@ -32,6 +32,9 @@ static void network_tx_buf_gc(struct net_device *dev); static void network_alloc_rx_buffers(struct net_device *dev); static void cleanup_module(void); +/* Dynamically-mapped IRQs. */ +static int network_irq, debug_irq; + static struct list_head dev_list; struct net_private @@ -468,7 +471,10 @@ static int __init init_module(void) INIT_LIST_HEAD(&dev_list); - err = request_irq(HYPEREVENT_IRQ(_EVENT_NET), network_interrupt, + network_irq = bind_virq_to_irq(VIRQ_NET); + debug_irq = bind_virq_to_irq(VIRQ_DEBUG); + + err = request_irq(network_irq, network_interrupt, SA_SAMPLE_RANDOM, "network", NULL); if ( err ) { @@ -476,7 +482,7 @@ static int __init init_module(void) goto fail; } - err = request_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), dbg_network_int, + err = request_irq(debug_irq, dbg_network_int, SA_SHIRQ, "net_dbg", &dbg_network_int); if ( err ) printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); @@ -546,6 +552,12 @@ static void cleanup_module(void) unregister_netdev(dev); kfree(dev); } + + free_irq(network_irq, NULL); + free_irq(debug_irq, NULL); + + unbind_virq_from_irq(VIRQ_NET); + unbind_virq_from_irq(VIRQ_DEBUG); } diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile b/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile index 10fc43e742..9f86b6d297 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile @@ -10,7 +10,7 @@ export-objs := i386_ksyms.o obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \ - i386_ksyms.o i387.o hypervisor.o physirq.o pci-dma.o + i386_ksyms.o i387.o evtchn.o physirq.o pci-dma.o ifdef CONFIG_PCI obj-y += pci-i386.o pci-pc.o pci-irq.o diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S b/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S index 9b1a77d4c1..5f8dcae2fe 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S @@ -68,9 +68,7 @@ IF_MASK = 0x00000200 DF_MASK = 0x00000400 NT_MASK = 0x00004000 -/* - * these are offsets into the task-struct. - */ +/* Offsets into task_struct. */ state = 0 flags = 4 sigpending = 8 @@ -80,6 +78,10 @@ need_resched = 20 tsk_ptrace = 24 processor = 52 +/* Offsets into shared_info_t. */ +#define evtchn_upcall_pending /* 0 */ +#define evtchn_upcall_mask 4 + ENOSYS = 38 @@ -208,14 +210,14 @@ ENTRY(system_call) movl %eax,EAX(%esp) # save the return value ENTRY(ret_from_sys_call) movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi - btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # make tests atomic + btsl $0,evtchn_upcall_mask(%esi) # make tests atomic ret_syscall_tests: cmpl $0,need_resched(%ebx) jne reschedule cmpl $0,sigpending(%ebx) je safesti # ensure need_resched updates are seen signal_return: - btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks + btrl $0,evtchn_upcall_mask(%esi) # reenable event callbacks movl %esp,%eax xorl %edx,%edx call SYMBOL_NAME(do_signal) @@ -252,7 +254,7 @@ ret_from_exception: ALIGN reschedule: - btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks + btrl $0,evtchn_upcall_mask(%esi) # reenable event callbacks call SYMBOL_NAME(schedule) # test jmp ret_from_sys_call @@ -309,18 +311,18 @@ ENTRY(hypervisor_callback) cmpl $ecrit,%eax jb critical_region_fixup 11: push %esp - call do_hypervisor_callback + call evtchn_do_upcall add $4,%esp movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi movb CS(%esp),%cl test $2,%cl # slow return to ring 2 or 3 jne ret_syscall_tests -safesti:btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks +safesti:btrl $0,evtchn_upcall_mask(%esi) # reenable event callbacks scrit: /**** START OF CRITICAL REGION ****/ - cmpl $0,(%esi) - jne 14f # process more events if necessary... + testb $1,evtchn_upcall_pending(%esi) + jnz 14f # process more events if necessary... RESTORE_ALL -14: btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) +14: btsl $0,evtchn_upcall_mask(%esi) jmp 11b ecrit: /**** END OF CRITICAL REGION ****/ # [How we do the fixup]. We want to merge the current stack frame with the @@ -349,8 +351,8 @@ critical_region_fixup: jmp 11b critical_fixup_table: - .byte 0x00,0x00,0x00 # cmpl $0,(%esi) - .byte 0x00,0x00 # jne 14f + .byte 0x00,0x00,0x00 # testb $1,(%esi) + .byte 0x00,0x00 # jnz 14f .byte 0x00 # pop %ebx .byte 0x04 # pop %ecx .byte 0x08 # pop %edx @@ -362,7 +364,7 @@ critical_fixup_table: .byte 0x20 # pop %es .byte 0x24,0x24,0x24 # add $4,%esp .byte 0x28 # iret - .byte 0x00,0x00,0x00,0x00,0x00 # btrl $31,4(%esi) + .byte 0x00,0x00,0x00,0x00,0x00 # btsl $0,4(%esi) .byte 0x00,0x00 # jmp 11b # Hypervisor uses this for application faults while it executes. diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c new file mode 100644 index 0000000000..d312bf0d4f --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c @@ -0,0 +1,317 @@ +/****************************************************************************** + * evtchn.c + * + * Communication via Xen event channels. + * + * Copyright (c) 2002-2004, K A Fraser + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Dynamic IRQ <-> event-channel mappings. */ +static int evtchn_to_dynirq[1024]; +static int dynirq_to_evtchn[NR_IRQS]; + +/* Dynamic IRQ <-> VIRQ mapping. */ +static int virq_to_dynirq[NR_VIRQS]; + +/* + * Reference counts for bindings to dynamic IRQs. + * NB. This array is referenced with respect to DYNIRQ_BASE! + */ +static int dynirq_bindcount[NR_DYNIRQS]; +static spinlock_t dynirq_lock; + +/* Upcall to generic IRQ layer. */ +extern asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs); + +static void evtchn_handle_normal(shared_info_t *s, struct pt_regs *regs) +{ + unsigned long l1, l2; + unsigned int l1i, l2i, port; + int dynirq; + + l1 = xchg(&s->evtchn_pending_sel, 0); + while ( (l1i = ffs(l1)) != 0 ) + { + l1i--; + l1 &= ~(1 << l1i); + + l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i]; + while ( (l2i = ffs(l2)) != 0 ) + { + l2i--; + l2 &= ~(1 << l2i); + + port = (l1i << 5) + l2i; + if ( (dynirq = evtchn_to_dynirq[port]) != -1 ) + do_IRQ(dynirq + DYNIRQ_BASE, regs); + else + evtchn_device_upcall(port, 0); + } + } +} + +static void evtchn_handle_exceptions(shared_info_t *s, struct pt_regs *regs) +{ + unsigned long l1, l2; + unsigned int l1i, l2i, port; + int dynirq; + + l1 = xchg(&s->evtchn_exception_sel, 0); + while ( (l1i = ffs(l1)) != 0 ) + { + l1i--; + l1 &= ~(1 << l1i); + + l2 = s->evtchn_exception[l1i] & ~s->evtchn_mask[l1i]; + while ( (l2i = ffs(l2)) != 0 ) + { + l2i--; + l2 &= ~(1 << l2i); + + port = (l1i << 5) + l2i; + if ( (dynirq = evtchn_to_dynirq[port]) != -1 ) + { + printk(KERN_ALERT "Error on IRQ line %d!\n", + dynirq + DYNIRQ_BASE); + clear_bit(port, &s->evtchn_exception[0]); + } + else + evtchn_device_upcall(port, 1); + } + } +} + +void evtchn_do_upcall(struct pt_regs *regs) +{ + unsigned long flags; + shared_info_t *s = HYPERVISOR_shared_info; + + local_irq_save(flags); + + while ( test_and_clear_bit(0, &s->evtchn_upcall_pending) ) + { + if ( s->evtchn_pending_sel != 0 ) + evtchn_handle_normal(s, regs); + if ( s->evtchn_exception_sel != 0 ) + evtchn_handle_exceptions(s, regs); + } + + local_irq_restore(flags); +} + + +static int find_unbound_dynirq(void) +{ + int i; + + for ( i = 0; i < NR_DYNIRQS; i++ ) + if ( dynirq_bindcount[i] == 0 ) + break; + + if ( i == NR_DYNIRQS ) + BUG(); + + return i; +} + +int bind_virq_to_irq(int virq) +{ + evtchn_op_t op; + int evtchn, dynirq; + + spin_lock(&dynirq_lock); + + if ( (dynirq = virq_to_dynirq[virq]) == -1 ) + { + op.cmd = EVTCHNOP_bind_virq; + op.u.bind_virq.virq = virq; + if ( HYPERVISOR_event_channel_op(&op) != 0 ) + BUG(); + evtchn = op.u.bind_virq.port; + + dynirq = find_unbound_dynirq(); + evtchn_to_dynirq[evtchn] = dynirq; + dynirq_to_evtchn[dynirq] = evtchn; + + virq_to_dynirq[virq] = dynirq; + } + + dynirq_bindcount[dynirq]++; + + spin_unlock(&dynirq_lock); + + return dynirq + DYNIRQ_BASE; +} + +void unbind_virq_from_irq(int virq) +{ + evtchn_op_t op; + int dynirq = virq_to_dynirq[virq]; + int evtchn = dynirq_to_evtchn[dynirq]; + + spin_lock(&dynirq_lock); + + if ( --dynirq_bindcount[dynirq] == 0 ) + { + op.cmd = EVTCHNOP_close; + op.u.close.dom = DOMID_SELF; + op.u.close.port = evtchn; + if ( HYPERVISOR_event_channel_op(&op) != 0 ) + BUG(); + + evtchn_to_dynirq[evtchn] = -1; + dynirq_to_evtchn[dynirq] = -1; + virq_to_dynirq[virq] = -1; + } + + spin_unlock(&dynirq_lock); +} + +int bind_evtchn_to_irq(int evtchn) +{ + int dynirq; + + spin_lock(&dynirq_lock); + + if ( (dynirq = evtchn_to_dynirq[evtchn]) == -1 ) + { + dynirq = find_unbound_dynirq(); + evtchn_to_dynirq[evtchn] = dynirq; + dynirq_to_evtchn[dynirq] = evtchn; + } + + dynirq_bindcount[dynirq]++; + + spin_unlock(&dynirq_lock); + + return dynirq + DYNIRQ_BASE; +} + +void unbind_evtchn_from_irq(int evtchn) +{ + int dynirq = evtchn_to_dynirq[evtchn]; + + spin_lock(&dynirq_lock); + + if ( --dynirq_bindcount[dynirq] == 0 ) + { + evtchn_to_dynirq[evtchn] = -1; + dynirq_to_evtchn[dynirq] = -1; + } + + spin_unlock(&dynirq_lock); +} + + +/* + * Interface to generic handling in irq.c + */ + +static unsigned int startup_dynirq(unsigned int irq) +{ + int dynirq = irq - DYNIRQ_BASE; + unmask_evtchn(dynirq_to_evtchn[dynirq]); + return 0; +} + +static void shutdown_dynirq(unsigned int irq) +{ + int dynirq = irq - DYNIRQ_BASE; + mask_evtchn(dynirq_to_evtchn[dynirq]); +} + +static void enable_dynirq(unsigned int irq) +{ + int dynirq = irq - DYNIRQ_BASE; + unmask_evtchn(dynirq_to_evtchn[dynirq]); +} + +static void disable_dynirq(unsigned int irq) +{ + int dynirq = irq - DYNIRQ_BASE; + mask_evtchn(dynirq_to_evtchn[dynirq]); +} + +static void ack_dynirq(unsigned int irq) +{ + int dynirq = irq - DYNIRQ_BASE; + mask_evtchn(dynirq_to_evtchn[dynirq]); + clear_evtchn(dynirq_to_evtchn[dynirq]); +} + +static void end_dynirq(unsigned int irq) +{ + int dynirq = irq - DYNIRQ_BASE; + if ( !(irq_desc[irq].status & IRQ_DISABLED) ) + unmask_evtchn(dynirq_to_evtchn[dynirq]); +} + +static struct hw_interrupt_type dynirq_type = { + "Dynamic-irq", + startup_dynirq, + shutdown_dynirq, + enable_dynirq, + disable_dynirq, + ack_dynirq, + end_dynirq, + NULL +}; + +static void error_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + printk(KERN_ALERT "unexpected VIRQ_ERROR trap to vector %d\n", irq); +} + +static struct irqaction error_action = { + error_interrupt, + SA_INTERRUPT, + 0, + "error", + NULL, + NULL +}; + +void __init init_IRQ(void) +{ + int i; + + for ( i = 0; i < NR_VIRQS; i++ ) + virq_to_dynirq[i] = -1; + + for ( i = 0; i < 1024; i++ ) + evtchn_to_dynirq[i] = -1; + + for ( i = 0; i < NR_DYNIRQS; i++ ) + { + dynirq_to_evtchn[i] = -1; + dynirq_bindcount[i] = 0; + } + + spin_lock_init(&dynirq_lock); + + for ( i = 0; i < NR_DYNIRQS; i++ ) + { + irq_desc[i + DYNIRQ_BASE].status = IRQ_DISABLED; + irq_desc[i + DYNIRQ_BASE].action = 0; + irq_desc[i + DYNIRQ_BASE].depth = 1; + irq_desc[i + DYNIRQ_BASE].handler = &dynirq_type; + } + + (void)setup_irq(bind_virq_to_irq(VIRQ_ERROR), &error_action); + +#ifdef CONFIG_PCI + /* Also initialise the physical IRQ handlers. */ + physirq_init(); +#endif +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c deleted file mode 100644 index 7c6aca05c5..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c +++ /dev/null @@ -1,170 +0,0 @@ -/****************************************************************************** - * hypervisor.c - * - * Communication to/from hypervisor. - * - * Copyright (c) 2002, K A Fraser - */ - -#include -#include -#include -#include -#include -#include -#include - -multicall_entry_t multicall_list[8]; -int nr_multicall_ents = 0; - -static unsigned long event_mask = 0; - -asmlinkage unsigned int do_physirq(int irq, struct pt_regs *regs) -{ - int cpu = smp_processor_id(); - unsigned long irqs; - shared_info_t *shared = HYPERVISOR_shared_info; - - /* do this manually */ - kstat.irqs[cpu][irq]++; - ack_hypervisor_event(irq); - - barrier(); - irqs = xchg(&shared->physirq_pend, 0); - - __asm__ __volatile__ ( - " push %1 ;" - " sub $4,%%esp ;" - " jmp 3f ;" - "1: btrl %%eax,%0 ;" /* clear bit */ - " mov %%eax,(%%esp) ;" - " call do_IRQ ;" /* do_IRQ(event) */ - "3: bsfl %0,%%eax ;" /* %eax == bit # */ - " jnz 1b ;" - " add $8,%%esp ;" - /* we use %ebx because it is callee-saved */ - : : "b" (irqs), "r" (regs) - /* clobbered by callback function calls */ - : "eax", "ecx", "edx", "memory" ); - - /* do this manually */ - end_hypervisor_event(irq); - - return 0; -} - -void do_hypervisor_callback(struct pt_regs *regs) -{ - unsigned long events, flags; - shared_info_t *shared = HYPERVISOR_shared_info; - - do { - /* Specialised local_irq_save(). */ - flags = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, - &shared->events_mask); - barrier(); - - events = xchg(&shared->events, 0); - events &= event_mask; - - if ( (events & EVENT_PHYSIRQ) != 0 ) - { - do_physirq(_EVENT_PHYSIRQ, regs); - events &= ~EVENT_PHYSIRQ; - } - - __asm__ __volatile__ ( - " push %1 ;" - " sub $4,%%esp ;" - " jmp 2f ;" - "1: btrl %%eax,%0 ;" /* clear bit */ - " add %2,%%eax ;" - " mov %%eax,(%%esp) ;" - " call do_IRQ ;" /* do_IRQ(event) */ - "2: bsfl %0,%%eax ;" /* %eax == bit # */ - " jnz 1b ;" - " add $8,%%esp ;" - /* we use %ebx because it is callee-saved */ - : : "b" (events), "r" (regs), "i" (HYPEREVENT_IRQ_BASE) - /* clobbered by callback function calls */ - : "eax", "ecx", "edx", "memory" ); - - /* Specialised local_irq_restore(). */ - if ( flags ) set_bit(EVENTS_MASTER_ENABLE_BIT, &shared->events_mask); - barrier(); - } - while ( shared->events ); -} - -/* - * Define interface to generic handling in irq.c - */ - -static void shutdown_hypervisor_event(unsigned int irq) -{ - clear_bit(HYPEREVENT_FROM_IRQ(irq), &event_mask); - clear_bit(HYPEREVENT_FROM_IRQ(irq), &HYPERVISOR_shared_info->events_mask); -} - -static void enable_hypervisor_event(unsigned int irq) -{ - set_bit(HYPEREVENT_FROM_IRQ(irq), &event_mask); - set_bit(HYPEREVENT_FROM_IRQ(irq), &HYPERVISOR_shared_info->events_mask); - if ( test_bit(EVENTS_MASTER_ENABLE_BIT, - &HYPERVISOR_shared_info->events_mask) ) - do_hypervisor_callback(NULL); -} - -static void disable_hypervisor_event(unsigned int irq) -{ - clear_bit(HYPEREVENT_FROM_IRQ(irq), &event_mask); - clear_bit(HYPEREVENT_FROM_IRQ(irq), &HYPERVISOR_shared_info->events_mask); -} - -static void ack_hypervisor_event(unsigned int irq) -{ - int ev = HYPEREVENT_FROM_IRQ(irq); - if ( !(event_mask & (1<events_mask); -} - -static unsigned int startup_hypervisor_event(unsigned int irq) -{ - enable_hypervisor_event(irq); - return 0; -} - -static void end_hypervisor_event(unsigned int irq) -{ -} - -static struct hw_interrupt_type hypervisor_irq_type = { - "Hypervisor-event", - startup_hypervisor_event, - shutdown_hypervisor_event, - enable_hypervisor_event, - disable_hypervisor_event, - ack_hypervisor_event, - end_hypervisor_event, - NULL -}; - -void __init init_IRQ(void) -{ - int i; - - for ( i = 0; i < NR_HYPEREVENT_IRQS; i++ ) - { - irq_desc[i + HYPEREVENT_IRQ_BASE].status = IRQ_DISABLED; - irq_desc[i + HYPEREVENT_IRQ_BASE].action = 0; - irq_desc[i + HYPEREVENT_IRQ_BASE].depth = 1; - irq_desc[i + HYPEREVENT_IRQ_BASE].handler = &hypervisor_irq_type; - } - - /* Also initialise the physical IRQ handlers. */ - physirq_init(); -} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/i386_ksyms.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/i386_ksyms.c index 267516500f..034b39d859 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/i386_ksyms.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/i386_ksyms.c @@ -171,5 +171,5 @@ EXPORT_SYMBOL(xquad_portio); EXPORT_SYMBOL(create_xen_proc_entry); EXPORT_SYMBOL(remove_xen_proc_entry); -EXPORT_SYMBOL(do_hypervisor_callback); +EXPORT_SYMBOL(evtchn_do_upcall); EXPORT_SYMBOL(HYPERVISOR_shared_info); diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/physirq.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/physirq.c index 1f7a8e4fee..7c04c9d9dc 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/physirq.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/physirq.c @@ -13,6 +13,7 @@ * by the virq irq type. */ +#ifdef CONFIG_PCI #include #include @@ -46,7 +47,7 @@ static unsigned int startup_physirq_event(unsigned int irq) { printk("startup_physirq_event %d: setup event handler\n", irq); /* set up a event handler to demux virtualised physical interrupts */ - err = request_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ), physirq_interrupt, + err = request_irq(IRQ_FROM_XEN_VIRQ(VIRQ_PHYSIRQ), physirq_interrupt, SA_SAMPLE_RANDOM, "physirq", NULL); if ( err ) { @@ -106,13 +107,13 @@ static void shutdown_physirq_event(unsigned int irq) static void enable_physirq_event(unsigned int irq) { /* XXX just enable all phys interrupts for now */ - enable_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ)); + enable_irq(IRQ_FROM_XEN_VIRQ(VIRQ_PHYSIRQ)); } static void disable_physirq_event(unsigned int irq) { /* XXX just disable all phys interrupts for now */ - disable_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ)); + disable_irq(IRQ_FROM_XEN_VIRQ(VIRQ_PHYSIRQ)); } static void ack_physirq_event(unsigned int irq) @@ -170,3 +171,5 @@ void __init physirq_init(void) irq_desc[i + PHYS_IRQ_BASE].handler = &physirq_irq_type; } } + +#endif diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c index c593bddec7..ad4a2fea4a 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c @@ -58,6 +58,9 @@ shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; unsigned long *phys_to_machine_mapping; +multicall_entry_t multicall_list[8]; +int nr_multicall_ents = 0; + /* * Machine setup.. */ @@ -860,7 +863,8 @@ void __init identify_cpu(struct cpuinfo_x86 *c) break; default: - printk("Unsupported CPU vendor (%d) -- please report!\n"); + printk("Unsupported CPU vendor (%d) -- please report!\n", + c->x86_vendor); } printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n", @@ -1116,7 +1120,10 @@ void __init cpu_init (void) * Time-to-die callback handling. */ -static void die_irq(int irq, void *unused, struct pt_regs *regs) +/* Dynamically-mapped IRQ. */ +static int die_irq; + +static void die_interrupt(int irq, void *unused, struct pt_regs *regs) { extern void ctrl_alt_del(void); ctrl_alt_del(); @@ -1124,7 +1131,8 @@ static void die_irq(int irq, void *unused, struct pt_regs *regs) static int __init setup_die_event(void) { - (void)request_irq(HYPEREVENT_IRQ(_EVENT_DIE), die_irq, 0, "die", NULL); + die_irq = bind_virq_to_irq(VIRQ_DIE); + (void)request_irq(die_irq, die_interrupt, 0, "die", NULL); return 0; } @@ -1241,7 +1249,10 @@ static void stop_task(void *unused) static struct tq_struct stop_tq; -static void stop_irq(int irq, void *unused, struct pt_regs *regs) +/* Dynamically-mapped IRQ. */ +static int stop_irq; + +static void stop_interrupt(int irq, void *unused, struct pt_regs *regs) { stop_tq.routine = stop_task; schedule_task(&stop_tq); @@ -1249,7 +1260,8 @@ static void stop_irq(int irq, void *unused, struct pt_regs *regs) static int __init setup_stop_event(void) { - (void)request_irq(HYPEREVENT_IRQ(_EVENT_STOP), stop_irq, 0, "stop", NULL); + stop_irq = bind_virq_to_irq(VIRQ_STOP); + (void)request_irq(stop_irq, stop_interrupt, 0, "stop", NULL); return 0; } diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/time.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/time.c index bff0f26b4f..663fdf2bad 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/time.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/time.c @@ -115,6 +115,8 @@ static u64 processed_system_time; /* System time (ns) at last processing. */ } \ } while ( 0 ) +/* Dynamically-mapped IRQs. */ +static int time_irq, debug_irq; /* Does this guest OS track Xen time, or set its wall clock independently? */ static int independent_wallclock = 0; @@ -623,13 +625,13 @@ void __init time_init(void) __get_time_values_from_xen(); processed_system_time = shadow_system_time; - (void)setup_irq(HYPEREVENT_IRQ(_EVENT_TIMER), &irq_timer); + time_irq = bind_virq_to_irq(VIRQ_TIMER); + debug_irq = bind_virq_to_irq(VIRQ_DEBUG); - (void)setup_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), &dbg_time); + (void)setup_irq(time_irq, &irq_timer); + (void)setup_irq(debug_irq, &dbg_time); rdtscll(alarm); - - clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); } diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h b/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h index 88c278d86e..2aea319dd5 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h @@ -1,7 +1,8 @@ /****************************************************************************** * evtchn.h * - * Driver for receiving and demuxing event-channel signals. + * Communication via Xen event channels. + * Also definitions for the device that demuxes notifications to userspace. * * Copyright (c) 2004, K A Fraser */ @@ -9,10 +10,81 @@ #ifndef __ASM_EVTCHN_H__ #define __ASM_EVTCHN_H__ -typedef void (*evtchn_receiver_t)(unsigned int); -#define PORT_NORMAL 0x0000 -#define PORT_DISCONNECT 0x8000 -#define PORTIDX_MASK 0x7fff +#include +#include +#include + +/* + * LOW-LEVEL DEFINITIONS + */ + +/* Entry point for notifications into Linux subsystems. */ +void evtchn_do_upcall(struct pt_regs *regs); + +/* Entry point for notifications into the userland character device. */ +void evtchn_device_upcall(int port, int exception); + +static inline void mask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + set_bit(port, &s->evtchn_mask[0]); +} + +/* + * I haven't thought too much about the synchronisation in here against + * other CPUs, but all the bit-update operations are reorder barriers on + * x86 so reordering concerns aren't a problem for now. Some mb() calls + * would be required on weaker architectures I think. -- KAF (24/3/2004) + */ +static inline void unmask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + int need_upcall = 0; + + clear_bit(port, &s->evtchn_mask[0]); + + /* + * The following is basically the equivalent of 'hw_resend_irq'. Just like + * a real IO-APIC we 'lose the interrupt edge' if the channel is masked. + */ + + /* Asserted a standard notification? */ + if ( test_bit (port, &s->evtchn_pending[0]) && + !test_and_set_bit(port>>5, &s->evtchn_pending_sel) ) + need_upcall = 1; + + /* Asserted an exceptional notification? */ + if ( test_bit (port, &s->evtchn_exception[0]) && + !test_and_set_bit(port>>5, &s->evtchn_exception_sel) ) + need_upcall = 1; + + /* If asserted either type of notification, check the master flags. */ + if ( need_upcall && + !test_and_set_bit(0, &s->evtchn_upcall_pending) && + !test_bit (0, &s->evtchn_upcall_mask) ) + evtchn_do_upcall(NULL); +} + +static inline void clear_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + clear_bit(port, &s->evtchn_pending[0]); +} + +static inline void clear_evtchn_exception(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + clear_bit(port, &s->evtchn_exception[0]); +} + + +/* + * CHARACTER-DEVICE DEFINITIONS + */ + +#define PORT_NORMAL 0x0000 +#define PORT_EXCEPTION 0x8000 +#define PORTIDX_MASK 0x7fff /* /dev/xen/evtchn resides at device number major=10, minor=200 */ #define EVTCHN_MINOR 200 @@ -21,9 +93,4 @@ typedef void (*evtchn_receiver_t)(unsigned int); /* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ #define EVTCHN_RESET _IO('E', 1) -int evtchn_request_port(unsigned int port, evtchn_receiver_t rx_fn); -int evtchn_free_port(unsigned int port); -void evtchn_clear_port(unsigned int port); - - #endif /* __ASM_EVTCHN_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h b/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h index 34d0974471..73149d5426 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h @@ -25,10 +25,6 @@ union start_info_union extern union start_info_union start_info_union; #define start_info (start_info_union.start_info) -/* arch/xen/kernel/hypervisor.c */ -void do_hypervisor_callback(struct pt_regs *regs); - - /* arch/xen/mm/hypervisor.c */ /* * NB. ptr values should be PHYSICAL, not MACHINE. 'vals' should be already diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/irq.h b/xenolinux-2.4.25-sparse/include/asm-xen/irq.h index 917a05334d..6d175ce6ab 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/irq.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/irq.h @@ -14,19 +14,32 @@ #include #include -#define NR_IRQS 256 +/* + * The flat IRQ space is divided into two regions: + * 1. A one-to-one mapping of real physical IRQs. This space is only used + * if we have physical device-access privilege. This region is at the + * start of the IRQ space so that existing device drivers do not need + * to be modified to translate physical IRQ numbers into our IRQ space. + * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These + * are bound using the provided bind/unbind functions. + */ -#define PHYS_IRQ_BASE 0 -#define NR_PHYS_IRQS 128 +#define PIRQ_BASE 0 +#define NR_PIRQS 128 -#define HYPEREVENT_IRQ_BASE 128 -#define NR_HYPEREVENT_IRQS 128 +#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) +#define NR_DYNIRQS 128 -#define HYPEREVENT_IRQ(_ev) ((_ev) + HYPEREVENT_IRQ_BASE) -#define HYPEREVENT_FROM_IRQ(_irq) ((_irq) - HYPEREVENT_IRQ_BASE) +#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) extern void physirq_init(void); +/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */ +extern int bind_virq_to_irq(int virq); +extern void unbind_virq_from_irq(int virq); +extern int bind_evtchn_to_irq(int evtchn); +extern void unbind_evtchn_from_irq(int evtchn); + #define irq_cannonicalize(_irq) (_irq) extern void disable_irq(unsigned int); diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/keyboard.h b/xenolinux-2.4.25-sparse/include/asm-xen/keyboard.h index 79d72da929..9066a3bada 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/keyboard.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/keyboard.h @@ -58,8 +58,11 @@ static inline int xen_kbd_controller_present () /* resource allocation */ #define kbd_request_region() \ do { } while (0) -#define kbd_request_irq(handler) \ - request_irq(HYPEREVENT_IRQ(_EVENT_PS2), handler, 0, "ps/2", NULL) +#define kbd_request_irq(handler) \ + do { \ + int irq = bind_virq_to_irq(VIRQ_PS2); \ + request_irq(irq, handler, 0, "ps/2", NULL); \ + } while ( 0 ) /* could implement these with command to xen to filter mouse stuff... */ #define aux_request_irq(hand, dev_id) 0 diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/system.h b/xenolinux-2.4.25-sparse/include/asm-xen/system.h index 3b59252ca3..2c1194a781 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/system.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/system.h @@ -7,6 +7,7 @@ #include #include #include /* for LOCK_PREFIX */ +#include #ifdef __KERNEL__ @@ -319,29 +320,38 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, #define set_wmb(var, value) do { var = value; wmb(); } while (0) +/* + * NB. ALl the following routines are SMP-safe on x86, even where they look + * possibly racy. For example, we must ensure that we clear the mask bit and + * /then/ check teh pending bit. But this will happen because the bit-update + * operations are ordering barriers. + * + * For this reason also, many uses of 'barrier' here are rather anal. But + * they do no harm. + */ #define __cli() \ do { \ - clear_bit(EVENTS_MASTER_ENABLE_BIT, &HYPERVISOR_shared_info->events_mask);\ + set_bit(0, &HYPERVISOR_shared_info->evtchn_upcall_mask); \ barrier(); \ } while (0) #define __sti() \ do { \ shared_info_t *_shared = HYPERVISOR_shared_info; \ - set_bit(EVENTS_MASTER_ENABLE_BIT, &_shared->events_mask); \ + clear_bit(0, &_shared->evtchn_upcall_mask); \ barrier(); \ - if ( unlikely(_shared->events) ) do_hypervisor_callback(NULL); \ + if ( unlikely(test_bit(0, &_shared->evtchn_upcall_pending)) ) \ + evtchn_do_upcall(NULL); \ } while (0) #define __save_flags(x) \ do { \ - (x) = test_bit(EVENTS_MASTER_ENABLE_BIT, \ - &HYPERVISOR_shared_info->events_mask); \ + (x) = test_bit(0, &HYPERVISOR_shared_info->evtchn_upcall_mask); \ barrier(); \ } while (0) -#define __restore_flags(x) do { if (x) __sti(); } while (0) +#define __restore_flags(x) do { if (x) __cli(); else __sti(); } while (0) #define safe_halt() ((void)0) @@ -350,8 +360,7 @@ do { \ #define local_irq_save(x) \ do { \ - (x) = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, \ - &HYPERVISOR_shared_info->events_mask); \ + (x) = test_and_set_bit(0, &HYPERVISOR_shared_info->evtchn_upcall_mask); \ barrier(); \ } while (0) #define local_irq_restore(x) __restore_flags(x) -- cgit v1.2.3