diff options
-rw-r--r-- | tools/xc/lib/xc_linux_build.c | 4 | ||||
-rw-r--r-- | tools/xc/lib/xc_netbsd_build.c | 6 | ||||
-rw-r--r-- | xen/arch/i386/entry.S | 13 | ||||
-rw-r--r-- | xen/arch/i386/traps.c | 10 | ||||
-rw-r--r-- | xen/common/domain.c | 4 | ||||
-rw-r--r-- | xen/common/keyhandler.c | 5 | ||||
-rw-r--r-- | xen/common/schedule.c | 2 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/hypervisor-if.h | 41 | ||||
-rw-r--r-- | xen/include/xen/event.h | 28 | ||||
-rw-r--r-- | xen/include/xen/sched.h | 8 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S | 22 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c | 4 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h | 10 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/include/asm-xen/system.h | 39 |
14 files changed, 119 insertions, 77 deletions
diff --git a/tools/xc/lib/xc_linux_build.c b/tools/xc/lib/xc_linux_build.c index 92fff33a6a..3acfc2173d 100644 --- a/tools/xc/lib/xc_linux_build.c +++ b/tools/xc/lib/xc_linux_build.c @@ -284,7 +284,9 @@ static int setup_guestos(int xc_handle, /* shared_info page starts its life empty. */ shared_info = map_pfn_writeable(pm_handle, shared_info_frame); memset(shared_info, 0, PAGE_SIZE); - shared_info->evtchn_upcall_mask = ~0UL; /* mask all upcalls */ + /* Mask all upcalls... */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + shared_info->vcpu_data[i].evtchn_upcall_mask = 1; unmap_pfn(pm_handle, shared_info); /* Send the page update requests down to the hypervisor. */ diff --git a/tools/xc/lib/xc_netbsd_build.c b/tools/xc/lib/xc_netbsd_build.c index ae7ebecc6e..a53018297c 100644 --- a/tools/xc/lib/xc_netbsd_build.c +++ b/tools/xc/lib/xc_netbsd_build.c @@ -75,7 +75,7 @@ static int setup_guestos(int xc_handle, shared_info_t *shared_info; unsigned long ksize; mmu_t *mmu = NULL; - int pm_handle; + int pm_handle, i; memset(builddomain, 0, sizeof(*builddomain)); @@ -183,7 +183,9 @@ static int setup_guestos(int xc_handle, /* shared_info page starts its life empty. */ shared_info = map_pfn_writeable(pm_handle, shared_info_frame); memset(shared_info, 0, PAGE_SIZE); - shared_info->evtchn_upcall_mask = ~0UL; /* mask all upcalls */ + /* Mask all upcalls... */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + shared_info->vcpu_data[i].evtchn_upcall_mask = 1; unmap_pfn(pm_handle, shared_info); /* Send the page update requests down to the hypervisor. */ diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index eab11e5ad8..0141991704 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -112,8 +112,8 @@ FAILSAFE_SEL = 32 FAILSAFE_ADDR = 36 /* Offsets in shared_info_t */ -UPCALL_PENDING = 0 -UPCALL_MASK = 4 +#define UPCALL_PENDING /* 0 */ +#define UPCALL_MASK 1 /* Offsets in guest_trap_bounce */ GTB_ERROR_CODE = 0 @@ -368,12 +368,11 @@ test_all_events: jnz process_hyp_events /*test_guest_events:*/ movl SHARED_INFO(%ebx),%eax - movl UPCALL_MASK(%eax),%ecx - notl %ecx - andl UPCALL_PENDING(%eax),%ecx # ECX = pending & ~mask - andl $1,%ecx # Is bit 0 pending and not masked? + testb $0xFF,UPCALL_MASK(%eax) + jnz restore_all_guest + testb $0xFF,UPCALL_PENDING(%eax) jz restore_all_guest - lock btsl $0,UPCALL_MASK(%eax) # Upcalls are masked during delivery + movb $1,UPCALL_MASK(%eax) # Upcalls are masked during delivery /*process_guest_events:*/ movzwl PROCESSOR(%ebx),%edx shl $4,%edx # sizeof(guest_trap_bounce) == 16 diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c index d10292f618..d50b101f3a 100644 --- a/xen/arch/i386/traps.c +++ b/xen/arch/i386/traps.c @@ -206,7 +206,7 @@ static inline void do_trap(int trapnr, char *str, gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - set_bit(0, &p->shared_info->evtchn_upcall_mask); + p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; return; fault_in_hypervisor: @@ -277,9 +277,7 @@ asmlinkage void do_int3(struct pt_regs *regs, long error_code) gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - set_bit(0, &p->shared_info->evtchn_upcall_mask); - return; - + p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; } asmlinkage void do_double_fault(void) @@ -353,7 +351,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code) gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - set_bit(0, &p->shared_info->evtchn_upcall_mask); + p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; return; fault_in_hypervisor: @@ -452,7 +450,7 @@ asmlinkage void do_general_protection(struct pt_regs *regs, long error_code) gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - set_bit(0, &p->shared_info->evtchn_upcall_mask); + p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; return; gp_in_kernel: diff --git a/xen/common/domain.c b/xen/common/domain.c index f83562a903..e86a5eba27 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -918,7 +918,9 @@ int construct_dom0(struct task_struct *p, /* Set up shared-info area. */ update_dom_time(p->shared_info); p->shared_info->domain_time = 0; - p->shared_info->evtchn_upcall_mask = ~0UL; /* mask all upcalls */ + /* Mask all upcalls... */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; /* Install the new page tables. */ __cli(); diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index 734df5cffa..2f6a38417c 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -94,8 +94,9 @@ void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) sched_prn_state(p ->state); printk(", hyp_events = %08x\n", p->hyp_events); s = p->shared_info; - printk("Guest: upcall_pend = %08lx, upcall_mask = %08lx\n", - s->evtchn_upcall_pending, s->evtchn_upcall_mask); + printk("Guest: upcall_pend = %02x, upcall_mask = %02x\n", + s->vcpu_data[0].evtchn_upcall_pending, + s->vcpu_data[0].evtchn_upcall_mask); printk("Notifying guest...\n"); send_guest_virq(p, VIRQ_DEBUG); } diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 496b35b9a8..7e8d03dbc0 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -220,7 +220,7 @@ void wake_up(struct task_struct *p) static long do_block(void) { ASSERT(current->domain != IDLE_DOMAIN_ID); - clear_bit(0, ¤t->shared_info->evtchn_upcall_mask); + current->shared_info->vcpu_data[0].evtchn_upcall_mask = 0; current->state = TASK_INTERRUPTIBLE; TRACE_2D(TRC_SCHED_BLOCK, current->domain, current); __enter_scheduler(); diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 2335ed5ad2..3941b34944 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -150,6 +150,9 @@ typedef struct /* Event channel endpoints per domain. */ #define NR_EVENT_CHANNELS 1024 +/* No support for multi-processor guests. */ +#define MAX_VIRT_CPUS 1 + /* * Xen/guestos shared data -- pointer provided in start_info. * NB. We expect that this struct is smaller than a page. @@ -157,13 +160,39 @@ typedef struct typedef struct shared_info_st { /* - * If bit 0 in evtchn_upcall_pending is transitioned 0->1, and bit 0 in - * evtchn_upcall_mask is clear, then an asynchronous upcall is scheduled. - * The upcall mask can be used to prevent unbounded reentrancy and stack - * overflow (in this way, acts as a kind of interrupt-enable flag). + * Per-VCPU information goes here. This will be cleaned up more when Xen + * actually supports multi-VCPU guests. */ - unsigned long evtchn_upcall_pending; - unsigned long evtchn_upcall_mask; + struct { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + u8 evtchn_upcall_pending; + u8 evtchn_upcall_mask; + u8 pad0, pad1; + } vcpu_data[MAX_VIRT_CPUS]; /* * A domain can have up to 1024 "event channels" on which it can send diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h index 3dd4cf383e..542cd3c6ef 100644 --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -18,7 +18,7 @@ */ /* Schedule an asynchronous callback for the specified domain. */ -static inline void __guest_notify(struct task_struct *p) +static inline void guest_schedule_to_run(struct task_struct *p) { #ifdef CONFIG_SMP unsigned long flags, cpu_mask; @@ -41,23 +41,11 @@ static inline void __guest_notify(struct task_struct *p) #endif } -static inline void guest_notify(struct task_struct *p) -{ - /* - * Upcall already pending or upcalls masked? - * NB. Suitably synchronised on x86: - * We must set the pending bit before checking the mask, but this is - * guaranteed to occur because test_and_set_bit() is an ordering barrier. - */ - if ( !test_and_set_bit(0, &p->shared_info->evtchn_upcall_pending) && - !test_bit(0, &p->shared_info->evtchn_upcall_mask) ) - __guest_notify(p); -} - - /* * EVENT-CHANNEL NOTIFICATIONS - * NB. As in guest_notify, evtchn_set_* is suitably synchronised on x86. + * NB. On x86, the atomic bit operations also act as memory barriers. There + * is therefore sufficiently strict ordering for this architecture -- others + * may require explicit memory barriers. */ static inline void evtchn_set_pending(struct task_struct *p, int port) @@ -66,7 +54,11 @@ static inline void evtchn_set_pending(struct task_struct *p, int port) if ( !test_and_set_bit(port, &s->evtchn_pending[0]) && !test_bit (port, &s->evtchn_mask[0]) && !test_and_set_bit(port>>5, &s->evtchn_pending_sel) ) - guest_notify(p); + { + /* The VCPU pending flag must be set /after/ update to evtchn-pend. */ + p->shared_info->vcpu_data[0].evtchn_upcall_pending = 1; + guest_schedule_to_run(p); + } } static inline void evtchn_set_exception(struct task_struct *p, int port) @@ -103,7 +95,7 @@ static inline void send_guest_pirq(struct task_struct *p, int pirq) static inline void send_hyp_event(struct task_struct *p, int event) { if ( !test_and_set_bit(event, &p->hyp_events) ) - __guest_notify(p); + guest_schedule_to_run(p); } /* Called on return from (architecture-dependent) entry.S. */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 4f506df04b..033f860c01 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -297,10 +297,10 @@ static inline long schedule_timeout(long timeout) return 0; } -#define signal_pending(_p) \ - (((_p)->hyp_events != 0) || \ - (test_bit(0, &(_p)->shared_info->evtchn_upcall_pending) && \ - !test_bit(0, &(_p)->shared_info->evtchn_upcall_mask))) +#define signal_pending(_p) \ + ( (_p)->hyp_events || \ + ((_p)->shared_info->vcpu_data[0].evtchn_upcall_pending && \ + !(_p)->shared_info->vcpu_data[0].evtchn_upcall_mask) ) void domain_init(void); diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S b/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S index c744f1bdcb..b78c74fd9c 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S @@ -80,7 +80,7 @@ processor = 52 /* Offsets into shared_info_t. */ #define evtchn_upcall_pending /* 0 */ -#define evtchn_upcall_mask 4 +#define evtchn_upcall_mask 1 ENOSYS = 38 @@ -210,14 +210,14 @@ ENTRY(system_call) movl %eax,EAX(%esp) # save the return value ENTRY(ret_from_sys_call) movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi - lock btsl $0,evtchn_upcall_mask(%esi) # make tests atomic + movb $1,evtchn_upcall_mask(%esi) # make tests atomic ret_syscall_tests: cmpl $0,need_resched(%ebx) jne reschedule cmpl $0,sigpending(%ebx) je safesti # ensure need_resched updates are seen signal_return: - lock btrl $0,evtchn_upcall_mask(%esi) # reenable event callbacks + movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks movl %esp,%eax xorl %edx,%edx call SYMBOL_NAME(do_signal) @@ -254,9 +254,9 @@ ret_from_exception: ALIGN reschedule: - lock btrl $0,evtchn_upcall_mask(%esi) # reenable event callbacks - call SYMBOL_NAME(schedule) # test - jmp ret_from_sys_call + movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks + call SYMBOL_NAME(schedule) # test + jmp ret_from_sys_call ENTRY(divide_error) pushl $0 # no error code @@ -317,12 +317,12 @@ ENTRY(hypervisor_callback) movb CS(%esp),%cl test $2,%cl # slow return to ring 2 or 3 jne ret_syscall_tests -safesti:lock btrl $0,evtchn_upcall_mask(%esi) # reenable event callbacks +safesti:movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks scrit: /**** START OF CRITICAL REGION ****/ - testb $1,evtchn_upcall_pending(%esi) + testb $0xFF,evtchn_upcall_pending(%esi) jnz 14f # process more events if necessary... RESTORE_ALL -14: lock btsl $0,evtchn_upcall_mask(%esi) +14: movb $1,evtchn_upcall_mask(%esi) jmp 11b ecrit: /**** END OF CRITICAL REGION ****/ # [How we do the fixup]. We want to merge the current stack frame with the @@ -351,7 +351,7 @@ critical_region_fixup: jmp 11b critical_fixup_table: - .byte 0x00,0x00,0x00 # testb $1,(%esi) + .byte 0x00,0x00,0x00 # testb $0xFF,(%esi) .byte 0x00,0x00 # jnz 14f .byte 0x00 # pop %ebx .byte 0x04 # pop %ecx @@ -364,7 +364,7 @@ critical_fixup_table: .byte 0x20 # pop %es .byte 0x24,0x24,0x24 # add $4,%esp .byte 0x28 # iret - .byte 0x00,0x00,0x00,0x00,0x00,0x00 # lock btsl $0,4(%esi) + .byte 0x00,0x00,0x00,0x00 # movb $1,4(%esi) .byte 0x00,0x00 # jmp 11b # Hypervisor uses this for application faults while it executes. diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c index c65806a7d9..7425f92047 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c @@ -50,8 +50,10 @@ void evtchn_do_upcall(struct pt_regs *regs) local_irq_save(flags); - while ( synch_test_and_clear_bit(0, &s->evtchn_upcall_pending) ) + while ( s->vcpu_data[0].evtchn_upcall_pending ) { + s->vcpu_data[0].evtchn_upcall_pending = 0; + /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ l1 = xchg(&s->evtchn_pending_sel, 0); while ( (l1i = ffs(l1)) != 0 ) { diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h b/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h index 8dbb460cda..ececad9447 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h @@ -42,10 +42,12 @@ static inline void unmask_evtchn(int port) * a real IO-APIC we 'lose the interrupt edge' if the channel is masked. */ if ( synch_test_bit (port, &s->evtchn_pending[0]) && - !synch_test_and_set_bit(port>>5, &s->evtchn_pending_sel) && - !synch_test_and_set_bit(0, &s->evtchn_upcall_pending) && - !synch_test_bit (0, &s->evtchn_upcall_mask) ) - evtchn_do_upcall(NULL); + !synch_test_and_set_bit(port>>5, &s->evtchn_pending_sel) ) + { + s->vcpu_data[0].evtchn_upcall_pending = 1; + if ( !s->vcpu_data[0].evtchn_upcall_mask ) + evtchn_do_upcall(NULL); + } } static inline void clear_evtchn(int port) diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/system.h b/xenolinux-2.4.25-sparse/include/asm-xen/system.h index 77b325d61a..86d6c7b150 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/system.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/system.h @@ -302,42 +302,55 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, #define safe_halt() ((void)0) -/* - * Note the use of synch_*_bit() operations in the following. These operations - * ensure correct serialisation of checks and updates w.r.t. Xen executing on - * a different CPU. +/* + * The use of 'barrier' in the following reflects their use as local-lock + * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following + * critical operations are executed. All critical operatiosn must complete + * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also + * includes these barriers, for example. */ #define __cli() \ do { \ - synch_set_bit(0, &HYPERVISOR_shared_info->evtchn_upcall_mask); \ + HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \ + barrier(); \ } while (0) #define __sti() \ do { \ shared_info_t *_shared = HYPERVISOR_shared_info; \ - synch_clear_bit(0, &_shared->evtchn_upcall_mask); \ - if ( unlikely(synch_test_bit(0, &_shared->evtchn_upcall_pending)) ) \ + barrier(); \ + _shared->vcpu_data[0].evtchn_upcall_mask = 0; \ + if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ evtchn_do_upcall(NULL); \ } while (0) #define __save_flags(x) \ do { \ - (x) = synch_test_bit(0, &HYPERVISOR_shared_info->evtchn_upcall_mask); \ + (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ } while (0) -#define __restore_flags(x) do { if (x) __cli(); else __sti(); } while (0) +#define __restore_flags(x) \ +do { \ + shared_info_t *_shared = HYPERVISOR_shared_info; \ + barrier(); \ + if ( (_shared->vcpu_data[0].evtchn_upcall_mask = x) == 0 ) \ + if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ + evtchn_do_upcall(NULL); \ +} while (0) #define __save_and_cli(x) \ do { \ - (x) = synch_test_and_set_bit( \ - 0, &HYPERVISOR_shared_info->evtchn_upcall_mask); \ + (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ + HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \ + barrier(); \ } while (0) #define __save_and_sti(x) \ do { \ - (x) = synch_test_and_clear_bit( \ - 0, &HYPERVISOR_shared_info->evtchn_upcall_mask); \ + barrier(); \ + (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ + HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 0; \ } while (0) #define local_irq_save(x) __save_and_cli(x) |