aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-05-15 09:12:51 +0000
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-05-15 09:12:51 +0000
commit8a1284a0b37dc0b0522b80ac78f84047ab6278ae (patch)
tree8a482b4ea9b5851bd7bd40681423ed76e80c7c0d /xen
parentaea7233c353457abe7ae567e08835d8352a83b8d (diff)
downloadxen-8a1284a0b37dc0b0522b80ac78f84047ab6278ae.tar.gz
xen-8a1284a0b37dc0b0522b80ac78f84047ab6278ae.tar.bz2
xen-8a1284a0b37dc0b0522b80ac78f84047ab6278ae.zip
bitkeeper revision 1.891.1.27 (40a5df13MH3cGLFKtrv3l6TuX9uLGw)
Fix the TLB flush during fault handling problem. We can now safely handle a fault during return to guest context, which previously required us to reenable interrupts. Fixed by rejigging the SMP inter-cpu flush code.
Diffstat (limited to 'xen')
-rw-r--r--xen/arch/i386/entry.S8
-rw-r--r--xen/arch/i386/smp.c58
-rw-r--r--xen/arch/i386/traps.c36
-rw-r--r--xen/common/domain.c14
4 files changed, 63 insertions, 53 deletions
diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S
index 758848a67c..d63f383017 100644
--- a/xen/arch/i386/entry.S
+++ b/xen/arch/i386/entry.S
@@ -537,14 +537,6 @@ error_code:
movl %edx,%es
movl %edx,%fs
movl %edx,%gs
- # We force a STI here. In most cases it is illegal to fault with
- # interrupts disabled, so no need to check EFLAGS. There is one
- # case when it /is/ valid -- on final return to guest context, we
- # CLI so we can atomically check for events to notify guest about and
- # return, all in one go. If we fault it is necessary to STI and the
- # worst that will happen is that our return code is no longer atomic.
- # This will do -- noone will ever notice. :-)
- sti
movl %esp,%edx
pushl %esi # push the error code
pushl %edx # push the pt_regs pointer
diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c
index 57869c7b77..4989fc5085 100644
--- a/xen/arch/i386/smp.c
+++ b/xen/arch/i386/smp.c
@@ -21,15 +21,6 @@
#ifdef CONFIG_SMP
/*
- * This lock must be acquired before sending a synchronous IPI to another
- * CPU (i.e., IPI + spin waiting for acknowledgement). The only safe ways of
- * acquiring the lock are spin_lock() and spin_trylock(). The former is only
- * safe if local interrupts are enabled (otherwise we will never see an IPI
- * destined for us which we must acknowledge for the lock to be released).
- */
-static spinlock_t synchronous_ipi_lock = SPIN_LOCK_UNLOCKED;
-
-/*
* Some notes on x86 processor bugs affecting SMP operation:
*
* Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
@@ -220,17 +211,18 @@ static inline void send_IPI_allbutself(int vector)
* 2) Leave the mm if we are in the lazy tlb mode.
*/
+static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
static volatile unsigned long flush_cpumask;
-#define FLUSH_ALL 0xffffffff
asmlinkage void smp_invalidate_interrupt(void)
{
ack_APIC_irq();
perfc_incrc(ipis);
- local_flush_tlb();
- clear_bit(smp_processor_id(), &flush_cpumask);
+ if ( likely(test_and_clear_bit(smp_processor_id(), &flush_cpumask)) )
+ local_flush_tlb();
}
+#ifndef NO_DEVICES_IN_XEN
int try_flush_tlb_mask(unsigned long mask)
{
if ( mask & (1 << smp_processor_id()) )
@@ -241,7 +233,7 @@ int try_flush_tlb_mask(unsigned long mask)
if ( mask != 0 )
{
- if ( unlikely(!spin_trylock(&synchronous_ipi_lock)) )
+ if ( unlikely(!spin_trylock(&flush_lock)) )
return 0;
flush_cpumask = mask;
send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
@@ -250,15 +242,16 @@ int try_flush_tlb_mask(unsigned long mask)
rep_nop();
barrier();
}
- spin_unlock(&synchronous_ipi_lock);
+ spin_unlock(&flush_lock);
}
return 1;
}
+#endif
void flush_tlb_mask(unsigned long mask)
{
- ASSERT(local_irq_is_enabled());
+ ASSERT(!in_irq());
if ( mask & (1 << smp_processor_id()) )
{
@@ -268,7 +261,21 @@ void flush_tlb_mask(unsigned long mask)
if ( mask != 0 )
{
- spin_lock(&synchronous_ipi_lock);
+ /*
+ * We are certainly not reentering a flush_lock region on this CPU
+ * because we are not in an IRQ context. We can therefore wait for the
+ * other guy to release the lock. This is harder than it sounds because
+ * local interrupts might be disabled, and he may be waiting for us to
+ * execute smp_invalidate_interrupt(). We deal with this possibility by
+ * inlining the meat of that function here.
+ */
+ while ( unlikely(!spin_trylock(&flush_lock)) )
+ {
+ if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) )
+ local_flush_tlb();
+ rep_nop();
+ }
+
flush_cpumask = mask;
send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
while ( flush_cpumask != 0 )
@@ -276,13 +283,15 @@ void flush_tlb_mask(unsigned long mask)
rep_nop();
barrier();
}
- spin_unlock(&synchronous_ipi_lock);
+
+ spin_unlock(&flush_lock);
}
}
void new_tlbflush_clock_period(void)
{
- if ( unlikely(!spin_trylock(&synchronous_ipi_lock)) )
+ /* Avoid deadlock because we might be reentering a flush_lock region. */
+ if ( unlikely(!spin_trylock(&flush_lock)) )
return;
/* Someone may acquire the lock and execute the flush before us. */
@@ -305,7 +314,7 @@ void new_tlbflush_clock_period(void)
tlbflush_clock++;
out:
- spin_unlock(&synchronous_ipi_lock);
+ spin_unlock(&flush_lock);
}
static void flush_tlb_all_pge_ipi(void* info)
@@ -324,6 +333,12 @@ void smp_send_event_check_mask(unsigned long cpu_mask)
send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
}
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+
struct call_data_struct {
void (*func) (void *info);
void *info;
@@ -369,7 +384,8 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
ASSERT(local_irq_is_enabled());
- spin_lock(&synchronous_ipi_lock);
+ spin_lock(&call_lock);
+
call_data = &data;
wmb();
/* Send a message to all other CPUs and wait for them to respond */
@@ -383,7 +399,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
while (atomic_read(&data.finished) != cpus)
barrier();
- spin_unlock(&synchronous_ipi_lock);
+ spin_unlock(&call_lock);
return 0;
}
diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c
index b38c2921e3..707f6415ef 100644
--- a/xen/arch/i386/traps.c
+++ b/xen/arch/i386/traps.c
@@ -107,28 +107,9 @@ static inline int kernel_text_address(unsigned long addr)
}
-void show_trace(unsigned long * stack)
-{
- int i;
- unsigned long addr;
-
- printk("Call Trace: ");
- i = 1;
- while (((long) stack & (STACK_SIZE-1)) != 0) {
- addr = *stack++;
- if (kernel_text_address(addr)) {
- if (i && ((i % 6) == 0))
- printk("\n ");
- printk("[<%08lx>] ", addr);
- i++;
- }
- }
- printk("\n");
-}
-
void show_stack(unsigned long *esp)
{
- unsigned long *stack;
+ unsigned long *stack, addr;
int i;
printk("Stack trace from ESP=%p:\n", esp);
@@ -146,6 +127,20 @@ void show_stack(unsigned long *esp)
printk("%08lx ", *stack++);
}
printk("\n");
+
+ printk("Call Trace from ESP=%p: ", esp);
+ stack = esp;
+ i = 0;
+ while (((long) stack & (STACK_SIZE-1)) != 0) {
+ addr = *stack++;
+ if (kernel_text_address(addr)) {
+ if (i && ((i % 6) == 0))
+ printk("\n ");
+ printk("[<%08lx>] ", addr);
+ i++;
+ }
+ }
+ printk("\n");
}
void show_registers(struct pt_regs *regs)
@@ -241,7 +236,6 @@ DO_ERROR_NOCODE( 0, "divide error", divide_error)
DO_ERROR_NOCODE( 4, "overflow", overflow)
DO_ERROR_NOCODE( 5, "bounds", bounds)
DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
-DO_ERROR_NOCODE( 7, "device not available", device_not_available)
DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, "invalid TSS", invalid_TSS)
DO_ERROR(11, "segment not present", segment_not_present)
diff --git a/xen/common/domain.c b/xen/common/domain.c
index f907a8c104..71294c9417 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -284,6 +284,12 @@ struct pfn_info *alloc_domain_page(struct task_struct *p)
unsigned long flags, mask, pfn_stamp, cpu_stamp;
int i;
+#ifdef NO_DEVICES_IN_XEN
+ ASSERT(!in_irq());
+#else
+ ASSERT((p != NULL) || !in_irq());
+#endif
+
spin_lock_irqsave(&free_list_lock, flags);
if ( likely(!list_empty(&free_list)) )
{
@@ -311,11 +317,15 @@ struct pfn_info *alloc_domain_page(struct task_struct *p)
if ( unlikely(mask != 0) )
{
+#ifdef NO_DEVICES_IN_XEN
+ flush_tlb_mask(mask);
+#else
/* In IRQ ctxt, flushing is best-effort only, to avoid deadlock. */
if ( likely(!in_irq()) )
flush_tlb_mask(mask);
else if ( unlikely(!try_flush_tlb_mask(mask)) )
goto free_and_exit;
+#endif
perfc_incrc(need_flush_tlb_flush);
}
}
@@ -324,7 +334,6 @@ struct pfn_info *alloc_domain_page(struct task_struct *p)
page->type_and_flags = 0;
if ( p != NULL )
{
- ASSERT(!in_irq());
wmb(); /* Domain pointer must be visible before updating refcnt. */
spin_lock(&p->page_list_lock);
if ( unlikely(p->tot_pages >= p->max_pages) )
@@ -355,8 +364,7 @@ void free_domain_page(struct pfn_info *page)
unsigned long flags;
struct task_struct *p = page->u.domain;
- if ( unlikely(in_irq()) )
- BUG();
+ ASSERT(!in_irq());
if ( likely(!IS_XEN_HEAP_FRAME(page)) )
{