aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-15 09:55:40 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-15 09:55:40 +0000
commit5080a3f4b2b79dfba61e45ebbefc83531338e20e (patch)
treec041e3970d3180d897d08f3797305926f9ebd383 /xen
parent5a258f039214b3d6cc25477ba02a48ee3df14732 (diff)
parent8a1284a0b37dc0b0522b80ac78f84047ab6278ae (diff)
downloadxen-5080a3f4b2b79dfba61e45ebbefc83531338e20e.tar.gz
xen-5080a3f4b2b79dfba61e45ebbefc83531338e20e.tar.bz2
xen-5080a3f4b2b79dfba61e45ebbefc83531338e20e.zip
bitkeeper revision 1.906 (40a5e91cnvIS_3gLwfnD2G3HV3odHA)
manual merge
Diffstat (limited to 'xen')
-rw-r--r--xen/arch/i386/entry.S9
-rw-r--r--xen/arch/i386/pdb-linux.c100
-rw-r--r--xen/arch/i386/pdb-stub.c174
-rw-r--r--xen/arch/i386/smp.c62
-rw-r--r--xen/arch/i386/traps.c57
-rw-r--r--xen/common/dom0_ops.c29
-rw-r--r--xen/common/domain.c16
-rw-r--r--xen/common/memory.c46
-rw-r--r--xen/include/asm-i386/pdb.h29
-rw-r--r--xen/include/asm-i386/processor.h6
-rw-r--r--xen/include/xen/perfc_defn.h1
11 files changed, 361 insertions, 168 deletions
diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S
index 1d55a51617..d63f383017 100644
--- a/xen/arch/i386/entry.S
+++ b/xen/arch/i386/entry.S
@@ -537,14 +537,7 @@ error_code:
movl %edx,%es
movl %edx,%fs
movl %edx,%gs
- # We force a STI here. In most cases it is illegal to fault with
- # interrupts disabled, so no need to check EFLAGS. There is one
- # case when it /is/ valid -- on final return to guest context, we
- # CLI so we can atomically check for events to notify guest about and
- # return, all in one go. If we fault it is necessary to STI and the
- # worst that will happen is that our return code is no longer atomic.
- # This will do -- noone will ever notice. :-)
- sti
+ movl %esp,%edx
pushl %esi # push the error code
pushl %edx # push the pt_regs pointer
GET_CURRENT(%ebx)
diff --git a/xen/arch/i386/pdb-linux.c b/xen/arch/i386/pdb-linux.c
new file mode 100644
index 0000000000..fd0fc5ed78
--- /dev/null
+++ b/xen/arch/i386/pdb-linux.c
@@ -0,0 +1,100 @@
+
+/*
+ * pervasive debugger
+ * www.cl.cam.ac.uk/netos/pdb
+ *
+ * alex ho
+ * 2004
+ * university of cambridge computer laboratory
+ *
+ * linux & i386 dependent code. bleech.
+ */
+
+#include <asm/pdb.h>
+
+/* offset to the first instruction in the linux system call code
+ where we can safely set a breakpoint */
+unsigned int pdb_linux_syscall_enter_bkpt_offset = 20;
+
+/* offset to eflags saved on the stack after an int 80 */
+unsigned int pdb_linux_syscall_eflags_offset = 48;
+
+/* offset to the instruction pointer saved on the stack after an int 80 */
+unsigned int pdb_linux_syscall_eip_offset = 40;
+
+unsigned char
+pdb_linux_set_bkpt (unsigned long addr)
+{
+ unsigned char old_instruction = *(unsigned char *)addr;
+ *(unsigned char *)addr = 0xcc;
+ return old_instruction;
+}
+
+void
+pdb_linux_clr_bkpt (unsigned long addr, unsigned char value)
+{
+ *(unsigned char *)addr = value;
+}
+
+void
+pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code,
+ trap_info_t *ti)
+{
+ /* set at breakpoint at the beginning of the
+ system call in the target domain */
+
+ pdb_system_call_enter_instr = pdb_linux_set_bkpt(ti->address +
+ pdb_linux_syscall_enter_bkpt_offset);
+ pdb_system_call = 1;
+}
+
+void
+pdb_linux_syscall_exit_bkpt (struct pt_regs *regs, struct pdb_context *pdb_ctx)
+{
+ /*
+ we've hit an int 0x80 in a user's program, jumped into xen
+ (traps.c::do_general_protection()) which re-wrote the next
+ instruction in the os kernel to 0xcc, and then hit that
+ exception.
+
+ we need to re-write the return instruction in the user's
+ program so that we know when we have finished the system call
+ and are back in the user's program.
+
+ at this point our stack should look something like this:
+
+ esp = 0x80a59f0
+ esp + 4 = 0x0
+ esp + 8 = 0x80485a0
+ esp + 12 = 0x2d
+ esp + 16 = 0x80485f4
+ esp + 20 = 0xbffffa48
+ esp + 24 = 0xd
+ esp + 28 = 0xc00a0833
+ esp + 32 = 0x833
+ esp + 36 = 0xd
+ esp + 40 = 0x804dcdd saved eip
+ esp + 44 = 0x82b saved cs
+ esp + 48 = 0x213392 saved eflags
+ esp + 52 = 0xbffffa2c saved esp
+ esp + 56 = 0x833 saved ss
+ esp + 60 = 0x1000000
+ */
+
+ /* restore the entry instruction for the system call */
+ pdb_linux_clr_bkpt(regs->eip - 1, pdb_system_call_enter_instr);
+
+ /* save the address of eflags that was saved on the stack */
+ pdb_system_call_eflags_addr = (regs->esp +
+ pdb_linux_syscall_eflags_offset);
+
+ /* muck with the return instruction so that we trap back into the
+ debugger when re-entering user space */
+ pdb_system_call_next_addr = *(unsigned long *)(regs->esp +
+ pdb_linux_syscall_eip_offset);
+ pdb_linux_get_values (&pdb_system_call_leave_instr, 1,
+ pdb_system_call_next_addr,
+ pdb_ctx->process, pdb_ctx->ptbr);
+ pdb_linux_set_values ("cc", 1, pdb_system_call_next_addr,
+ pdb_ctx->process, pdb_ctx->ptbr);
+}
diff --git a/xen/arch/i386/pdb-stub.c b/xen/arch/i386/pdb-stub.c
index 5b42e9a746..63320561dc 100644
--- a/xen/arch/i386/pdb-stub.c
+++ b/xen/arch/i386/pdb-stub.c
@@ -47,26 +47,13 @@ static int pdb_in_buffer_ptr;
static unsigned char pdb_in_checksum;
static unsigned char pdb_xmit_checksum;
-/* function pointers in the near future... */
-unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid);
-void pdb_linux_get_values(char *buffer, int length, unsigned long address,
- int pid, unsigned long cr3);
-void pdb_linux_set_values(char *buffer, int length, unsigned long address,
- int pid, unsigned long cr3);
-
-struct pdb_context
-{
- int valid;
- int domain;
- int process;
- unsigned long ptbr; /* cached page table base register */
-};
struct pdb_context pdb_ctx;
-
int pdb_continue_thread = 0;
int pdb_general_thread = 0;
void pdb_put_packet (unsigned char *buffer, int ack);
+void pdb_bkpt_check (u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr);
int pdb_initialized = 0;
int pdb_page_fault_possible = 0;
@@ -75,6 +62,12 @@ int pdb_page_fault = 0;
static int pdb_serhnd = -1;
static int pdb_stepping = 0;
+int pdb_system_call = 0;
+unsigned char pdb_system_call_enter_instr = 0; /* original enter instr */
+unsigned char pdb_system_call_leave_instr = 0; /* original next instr */
+unsigned long pdb_system_call_next_addr = 0; /* instr after int 0x80 */
+unsigned long pdb_system_call_eflags_addr = 0; /* saved eflags on stack */
+
static inline void pdb_put_char(unsigned char c)
{
serial_putc(pdb_serhnd, c);
@@ -406,15 +399,49 @@ pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3,
break;
case 'S': /* step with signal */
case 's': /* step */
+ {
+ if ( pdb_system_call_eflags_addr != 0 )
+ {
+ unsigned long eflags;
+ char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */
+
+ pdb_linux_get_values((u_char*)&eflags, sizeof(eflags),
+ pdb_system_call_eflags_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+ eflags |= X86_EFLAGS_TF;
+ mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags));
+ pdb_linux_set_values(eflags_buf, sizeof(eflags),
+ pdb_system_call_eflags_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+ }
+
regs->eflags |= X86_EFLAGS_TF;
pdb_stepping = 1;
return 1;
/* not reached */
+ }
case 'C': /* continue with signal */
case 'c': /* continue */
+ {
+ if ( pdb_system_call_eflags_addr != 0 )
+ {
+ unsigned long eflags;
+ char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */
+
+ pdb_linux_get_values((u_char*)&eflags, sizeof(eflags),
+ pdb_system_call_eflags_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+ eflags &= ~X86_EFLAGS_TF;
+ mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags));
+ pdb_linux_set_values(eflags_buf, sizeof(eflags),
+ pdb_system_call_eflags_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+ }
+
regs->eflags &= ~X86_EFLAGS_TF;
return 1; /* jump out before replying to gdb */
/* not reached */
+ }
case 'd':
remote_debug = !(remote_debug); /* toggle debug flag */
break;
@@ -424,54 +451,11 @@ pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3,
case 'g': /* return the value of the CPU registers */
{
pdb_x86_to_gdb_regs (pdb_out_buffer, regs);
-
- /*
- printk (" reg: %s", pdb_out_buffer);
- printk ("\n");
- printk (" eax: 0x%08lx\n", regs->eax);
- printk (" ecx: 0x%08lx\n", regs->ecx);
- printk (" edx: 0x%08lx\n", regs->edx);
- printk (" ebx: 0x%08lx\n", regs->ebx);
- printk (" esp: 0x%08lx\n", regs->esp);
- printk (" ebp: 0x%08lx\n", regs->ebp);
- printk (" esi: 0x%08lx\n", regs->esi);
- printk (" edi: 0x%08lx\n", regs->edi);
- printk (" eip: 0x%08lx\n", regs->eip);
- printk (" efl: 0x%08lx\n", regs->eflags);
- printk (" xcs: 0x%08x\n", regs->xcs);
- printk (" xss: 0x%08x\n", regs->xss);
- printk (" xds: 0x%08x\n", regs->xds);
- printk (" xes: 0x%08x\n", regs->xes);
- printk (" xfs: 0x%08x\n", regs->xfs);
- printk (" xgs: 0x%08x\n", regs->xgs);
- */
-
break;
}
case 'G': /* set the value of the CPU registers - return OK */
{
pdb_gdb_to_x86_regs (regs, ptr);
-
- /*
- printk (" ptr: %s \n\n", ptr);
- printk (" eax: 0x%08lx\n", regs->eax);
- printk (" ecx: 0x%08lx\n", regs->ecx);
- printk (" edx: 0x%08lx\n", regs->edx);
- printk (" ebx: 0x%08lx\n", regs->ebx);
- printk (" esp: 0x%08lx\n", regs->esp);
- printk (" ebp: 0x%08lx\n", regs->ebp);
- printk (" esi: 0x%08lx\n", regs->esi);
- printk (" edi: 0x%08lx\n", regs->edi);
- printk (" eip: 0x%08lx\n", regs->eip);
- printk (" efl: 0x%08lx\n", regs->eflags);
- printk (" xcs: 0x%08x\n", regs->xcs);
- printk (" xss: 0x%08x\n", regs->xss);
- printk (" xds: 0x%08x\n", regs->xds);
- printk (" xes: 0x%08x\n", regs->xes);
- printk (" xfs: 0x%08x\n", regs->xfs);
- printk (" xgs: 0x%08x\n", regs->xgs);
- */
-
break;
}
case 'H':
@@ -572,17 +556,20 @@ pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3,
if (addr >= PAGE_OFFSET)
{
hex2mem (ptr, (char *)addr, length);
+ pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
}
else if (pdb_ctx.process != -1)
{
pdb_linux_set_values(ptr, length, addr,
pdb_ctx.process,
pdb_ctx.ptbr);
+ pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
}
else
{
pdb_set_values (ptr, length,
pdb_ctx.ptbr, addr);
+ pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
}
pdb_page_fault_possible = 0;
if (pdb_page_fault)
@@ -936,7 +923,6 @@ int pdb_set_values(u_char *buffer, int length,
unsigned long cr3, unsigned long addr)
{
int count = pdb_change_values(buffer, length, cr3, addr, __PDB_SET_VAL);
- pdb_bkpt_check(buffer, length, cr3, addr);
return count;
}
@@ -1176,16 +1162,35 @@ int pdb_handle_exception(int exceptionVector,
__asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+ /* If the exception is an int3 from user space then pdb is only
+ interested if it re-wrote an instruction set the breakpoint.
+ This occurs when leaving a system call from a domain.
+ */
+ if ( exceptionVector == 3 &&
+ (xen_regs->xcs & 3) == 3 &&
+ xen_regs->eip != pdb_system_call_next_addr + 1)
+ {
+ TRC(printf("pdb: user bkpt (0x%x) at 0x%x:0x%lx:0x%lx\n",
+ exceptionVector, xen_regs->xcs & 3, cr3, xen_regs->eip));
+ return 1;
+ }
+
/*
- * If PDB didn't set the breakpoint, is not single stepping, and the user
- * didn't press the magic debug key, then we don't handle the exception.
+ * If PDB didn't set the breakpoint, is not single stepping,
+ * is not entering a system call in a domain,
+ * the user didn't press the magic debug key,
+ * then we don't handle the exception.
*/
bkpt = pdb_bkpt_search(cr3, xen_regs->eip - 1);
if ( (bkpt == NULL) &&
- !pdb_stepping && (exceptionVector != KEYPRESS_EXCEPTION) &&
+ !pdb_stepping &&
+ !pdb_system_call &&
+ xen_regs->eip != pdb_system_call_next_addr + 1 &&
+ (exceptionVector != KEYPRESS_EXCEPTION) &&
xen_regs->eip < 0xc0000000) /* xenolinux for now! */
{
- TRC(printf("pdb: user bkpt at 0x%lx:0x%lx\n", cr3, xen_regs->eip));
+ TRC(printf("pdb: user bkpt (0x%x) at 0x%lx:0x%lx\n",
+ exceptionVector, cr3, xen_regs->eip));
return 1;
}
@@ -1199,12 +1204,54 @@ int pdb_handle_exception(int exceptionVector,
pdb_stepping = 0;
}
+ if ( pdb_system_call )
+ {
+ pdb_system_call = 0;
+
+ pdb_linux_syscall_exit_bkpt (xen_regs, &pdb_ctx);
+
+ /* we don't have a saved breakpoint so we need to rewind eip */
+ xen_regs->eip--;
+
+ /* if ther user doesn't care about breaking when entering a
+ system call then we'll just ignore the exception */
+ if ( (pdb_ctx.system_call & 0x01) == 0 )
+ {
+ return 0;
+ }
+ }
+
if ( exceptionVector == BREAKPT_EXCEPTION && bkpt != NULL)
{
/* Executed Int3: replace breakpoint byte with real program byte. */
xen_regs->eip--;
}
+ /* returning to user space after a system call */
+ if ( xen_regs->eip == pdb_system_call_next_addr + 1)
+ {
+ u_char instr[2]; /* REALLY REALLY REALLY STUPID */
+
+ mem2hex (&pdb_system_call_leave_instr, instr, sizeof(instr));
+
+ pdb_linux_set_values (instr, 1, pdb_system_call_next_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+
+ pdb_system_call_next_addr = 0;
+ pdb_system_call_leave_instr = 0;
+
+ /* manually rewind eip */
+ xen_regs->eip--;
+
+ /* if the user doesn't care about breaking when returning
+ to user space after a system call then we'll just ignore
+ the exception */
+ if ( (pdb_ctx.system_call & 0x02) == 0 )
+ {
+ return 0;
+ }
+ }
+
/* Generate a signal for GDB. */
switch ( exceptionVector )
{
@@ -1267,6 +1314,7 @@ void initialize_pdb()
pdb_ctx.valid = 1;
pdb_ctx.domain = -1;
pdb_ctx.process = -1;
+ pdb_ctx.system_call = 0;
pdb_ctx.ptbr = 0;
printk("pdb: pervasive debugger (%s) www.cl.cam.ac.uk/netos/pdb\n",
diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c
index 5ed43d5551..4989fc5085 100644
--- a/xen/arch/i386/smp.c
+++ b/xen/arch/i386/smp.c
@@ -21,15 +21,6 @@
#ifdef CONFIG_SMP
/*
- * This lock must be acquired before sending a synchronous IPI to another
- * CPU (i.e., IPI + spin waiting for acknowledgement). The only safe ways of
- * acquiring the lock are spin_lock() and spin_trylock(). The former is only
- * safe if local interrupts are enabled (otherwise we will never see an IPI
- * destined for us which we must acknowledge for the lock to be released).
- */
-static spinlock_t synchronous_ipi_lock = SPIN_LOCK_UNLOCKED;
-
-/*
* Some notes on x86 processor bugs affecting SMP operation:
*
* Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
@@ -220,16 +211,18 @@ static inline void send_IPI_allbutself(int vector)
* 2) Leave the mm if we are in the lazy tlb mode.
*/
+static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
static volatile unsigned long flush_cpumask;
-#define FLUSH_ALL 0xffffffff
asmlinkage void smp_invalidate_interrupt(void)
{
ack_APIC_irq();
- local_flush_tlb();
- clear_bit(smp_processor_id(), &flush_cpumask);
+ perfc_incrc(ipis);
+ if ( likely(test_and_clear_bit(smp_processor_id(), &flush_cpumask)) )
+ local_flush_tlb();
}
+#ifndef NO_DEVICES_IN_XEN
int try_flush_tlb_mask(unsigned long mask)
{
if ( mask & (1 << smp_processor_id()) )
@@ -240,7 +233,7 @@ int try_flush_tlb_mask(unsigned long mask)
if ( mask != 0 )
{
- if ( unlikely(!spin_trylock(&synchronous_ipi_lock)) )
+ if ( unlikely(!spin_trylock(&flush_lock)) )
return 0;
flush_cpumask = mask;
send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
@@ -249,15 +242,16 @@ int try_flush_tlb_mask(unsigned long mask)
rep_nop();
barrier();
}
- spin_unlock(&synchronous_ipi_lock);
+ spin_unlock(&flush_lock);
}
return 1;
}
+#endif
void flush_tlb_mask(unsigned long mask)
{
- ASSERT(local_irq_is_enabled());
+ ASSERT(!in_irq());
if ( mask & (1 << smp_processor_id()) )
{
@@ -267,7 +261,21 @@ void flush_tlb_mask(unsigned long mask)
if ( mask != 0 )
{
- spin_lock(&synchronous_ipi_lock);
+ /*
+ * We are certainly not reentering a flush_lock region on this CPU
+ * because we are not in an IRQ context. We can therefore wait for the
+ * other guy to release the lock. This is harder than it sounds because
+ * local interrupts might be disabled, and he may be waiting for us to
+ * execute smp_invalidate_interrupt(). We deal with this possibility by
+ * inlining the meat of that function here.
+ */
+ while ( unlikely(!spin_trylock(&flush_lock)) )
+ {
+ if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) )
+ local_flush_tlb();
+ rep_nop();
+ }
+
flush_cpumask = mask;
send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
while ( flush_cpumask != 0 )
@@ -275,13 +283,15 @@ void flush_tlb_mask(unsigned long mask)
rep_nop();
barrier();
}
- spin_unlock(&synchronous_ipi_lock);
+
+ spin_unlock(&flush_lock);
}
}
void new_tlbflush_clock_period(void)
{
- if ( unlikely(!spin_trylock(&synchronous_ipi_lock)) )
+ /* Avoid deadlock because we might be reentering a flush_lock region. */
+ if ( unlikely(!spin_trylock(&flush_lock)) )
return;
/* Someone may acquire the lock and execute the flush before us. */
@@ -304,7 +314,7 @@ void new_tlbflush_clock_period(void)
tlbflush_clock++;
out:
- spin_unlock(&synchronous_ipi_lock);
+ spin_unlock(&flush_lock);
}
static void flush_tlb_all_pge_ipi(void* info)
@@ -323,6 +333,12 @@ void smp_send_event_check_mask(unsigned long cpu_mask)
send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
}
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+
struct call_data_struct {
void (*func) (void *info);
void *info;
@@ -368,7 +384,8 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
ASSERT(local_irq_is_enabled());
- spin_lock(&synchronous_ipi_lock);
+ spin_lock(&call_lock);
+
call_data = &data;
wmb();
/* Send a message to all other CPUs and wait for them to respond */
@@ -382,7 +399,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
while (atomic_read(&data.finished) != cpus)
barrier();
- spin_unlock(&synchronous_ipi_lock);
+ spin_unlock(&call_lock);
return 0;
}
@@ -419,6 +436,7 @@ void smp_send_stop(void)
asmlinkage void smp_event_check_interrupt(void)
{
ack_APIC_irq();
+ perfc_incrc(ipis);
}
asmlinkage void smp_call_function_interrupt(void)
@@ -428,6 +446,8 @@ asmlinkage void smp_call_function_interrupt(void)
int wait = call_data->wait;
ack_APIC_irq();
+ perfc_incrc(ipis);
+
/*
* Notify initiating CPU that I've grabbed the data and am
* about to execute the function
diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c
index 1457617d29..7250074420 100644
--- a/xen/arch/i386/traps.c
+++ b/xen/arch/i386/traps.c
@@ -107,36 +107,9 @@ static inline int kernel_text_address(unsigned long addr)
}
-
-void show_trace(unsigned long * stack)
-{
- int i;
- unsigned long addr;
-
- printk("Call Trace: ");
- i = 1;
- while (((long) stack & (STACK_SIZE-1)) != 0) {
- addr = *stack++;
- if (kernel_text_address(addr)) {
- if (i && ((i % 6) == 0))
- printk("\n ");
- printk("[<%08lx>] ", addr);
- i++;
- }
- }
- printk("\n");
-}
-
-void show_traceX(void)
-{
- unsigned long *addr;
- __asm__ __volatile__ ("movl %%esp,%0" : "=r" (addr) : );
- show_trace(addr);
-}
-
void show_stack(unsigned long *esp)
{
- unsigned long *stack;
+ unsigned long *stack, addr;
int i;
printk("Stack trace from ESP=%p:\n", esp);
@@ -154,6 +127,20 @@ void show_stack(unsigned long *esp)
printk("%08lx ", *stack++);
}
printk("\n");
+
+ printk("Call Trace from ESP=%p: ", esp);
+ stack = esp;
+ i = 0;
+ while (((long) stack & (STACK_SIZE-1)) != 0) {
+ addr = *stack++;
+ if (kernel_text_address(addr)) {
+ if (i && ((i % 6) == 0))
+ printk("\n ");
+ printk("[<%08lx>] ", addr);
+ i++;
+ }
+ }
+ printk("\n");
}
void show_registers(struct pt_regs *regs)
@@ -250,7 +237,6 @@ DO_ERROR_NOCODE( 0, "divide error", divide_error)
DO_ERROR_NOCODE( 4, "overflow", overflow)
DO_ERROR_NOCODE( 5, "bounds", bounds)
DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
-DO_ERROR_NOCODE( 7, "device not available", device_not_available)
DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, "invalid TSS", invalid_TSS)
DO_ERROR(11, "segment not present", segment_not_present)
@@ -267,10 +253,10 @@ asmlinkage void do_int3(struct pt_regs *regs, long error_code)
struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
trap_info_t *ti;
+ if ( pdb_handle_exception(3, regs) == 0 )
+ return;
if ( (regs->xcs & 3) != 3 )
{
- if ( pdb_handle_exception(3, regs) == 0 )
- return;
if ( unlikely((regs->xcs & 3) == 0) )
{
show_registers(regs);
@@ -445,6 +431,15 @@ asmlinkage void do_general_protection(struct pt_regs *regs, long error_code)
ti = current->thread.traps + (error_code>>3);
if ( TI_GET_DPL(ti) >= (regs->xcs & 3) )
{
+ unsigned long cr3;
+
+ __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+ if (pdb_initialized && pdb_ctx.system_call != 0 &&
+ cr3 == pdb_ctx.ptbr)
+ {
+ pdb_linux_syscall_enter_bkpt(regs, error_code, ti);
+ }
+
gtb->flags = GTBF_TRAP_NOCODE;
regs->eip += 2;
goto finish_propagation;
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
index f768edd5ff..32fa532c9a 100644
--- a/xen/common/dom0_ops.c
+++ b/xen/common/dom0_ops.c
@@ -28,15 +28,6 @@
extern unsigned int alloc_new_dom_mem(struct task_struct *, unsigned int);
-/* Basically used to protect the domain-id space. */
-static spinlock_t create_dom_lock = SPIN_LOCK_UNLOCKED;
-
-static domid_t get_domnr(void)
-{
- static domid_t domnr = 0;
- return ++domnr;
-}
-
static int msr_cpu_mask;
static unsigned long msr_addr;
static unsigned long msr_lo;
@@ -117,23 +108,24 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
case DOM0_CREATEDOMAIN:
{
struct task_struct *p;
- static unsigned int pro = 0;
+ static domid_t domnr = 0;
+ static spinlock_t domnr_lock = SPIN_LOCK_UNLOCKED;
+ unsigned int pro;
domid_t dom;
ret = -ENOMEM;
- spin_lock_irq(&create_dom_lock);
-
- if ( (dom = get_domnr()) == 0 )
- goto exit_create;
+ spin_lock(&domnr_lock);
+ dom = ++domnr;
+ spin_unlock(&domnr_lock);
if (op->u.createdomain.cpu == -1 )
- pro = (pro+1) % smp_num_cpus;
+ pro = (unsigned int)dom % smp_num_cpus;
else
pro = op->u.createdomain.cpu % smp_num_cpus;
p = do_createdomain(dom, pro);
if ( p == NULL )
- goto exit_create;
+ break;
if ( op->u.createdomain.name[0] )
{
@@ -145,16 +137,13 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
if ( ret != 0 )
{
__kill_domain(p);
- goto exit_create;
+ break;
}
ret = 0;
op->u.createdomain.domain = p->domain;
copy_to_user(u_dom0_op, op, sizeof(*op));
-
- exit_create:
- spin_unlock_irq(&create_dom_lock);
}
break;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 88cc659711..df44c4600b 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -292,6 +292,12 @@ struct pfn_info *alloc_domain_page(struct task_struct *p)
unsigned long flags, mask, pfn_stamp, cpu_stamp;
int i;
+#ifdef NO_DEVICES_IN_XEN
+ ASSERT(!in_irq());
+#else
+ ASSERT((p != NULL) || !in_irq());
+#endif
+
spin_lock_irqsave(&free_list_lock, flags);
if ( likely(!list_empty(&free_list)) )
{
@@ -307,7 +313,7 @@ struct pfn_info *alloc_domain_page(struct task_struct *p)
if ( (mask = page->u.cpu_mask) != 0 )
{
pfn_stamp = page->tlbflush_timestamp;
- for ( i = 0; (mask != 0) && (i < NR_CPUS); i++ )
+ for ( i = 0; (mask != 0) && (i < smp_num_cpus); i++ )
{
if ( mask & (1<<i) )
{
@@ -319,11 +325,15 @@ struct pfn_info *alloc_domain_page(struct task_struct *p)
if ( unlikely(mask != 0) )
{
+#ifdef NO_DEVICES_IN_XEN
+ flush_tlb_mask(mask);
+#else
/* In IRQ ctxt, flushing is best-effort only, to avoid deadlock. */
if ( likely(!in_irq()) )
flush_tlb_mask(mask);
else if ( unlikely(!try_flush_tlb_mask(mask)) )
goto free_and_exit;
+#endif
perfc_incrc(need_flush_tlb_flush);
}
}
@@ -332,7 +342,6 @@ struct pfn_info *alloc_domain_page(struct task_struct *p)
page->type_and_flags = 0;
if ( p != NULL )
{
- ASSERT(!in_irq());
wmb(); /* Domain pointer must be visible before updating refcnt. */
spin_lock(&p->page_list_lock);
if ( unlikely(p->tot_pages >= p->max_pages) )
@@ -363,8 +372,7 @@ void free_domain_page(struct pfn_info *page)
unsigned long flags;
struct task_struct *p = page->u.domain;
- if ( unlikely(in_irq()) )
- BUG();
+ ASSERT(!in_irq());
if ( likely(!IS_XEN_HEAP_FRAME(page)) )
{
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 15560b6609..e5d4db2e81 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -804,6 +804,7 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
unsigned long pfn = ptr >> PAGE_SHIFT;
unsigned long old_base_pfn;
struct pfn_info *page = &frame_table[pfn];
+ struct task_struct *p = current, *q;
switch ( cmd )
{
@@ -852,18 +853,18 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
break;
case MMUEXT_NEW_BASEPTR:
- okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, current);
+ okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, p);
if ( likely(okay) )
{
invalidate_shadow_ldt();
percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
- old_base_pfn = pagetable_val(current->mm.pagetable) >> PAGE_SHIFT;
- current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
+ old_base_pfn = pagetable_val(p->mm.pagetable) >> PAGE_SHIFT;
+ p->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
- shadow_mk_pagetable(&current->mm);
+ shadow_mk_pagetable(&p->mm);
- write_ptbase(&current->mm);
+ write_ptbase(&p->mm);
put_page_and_type(&frame_table[old_base_pfn]);
@@ -899,13 +900,13 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
okay = 0;
MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents);
}
- else if ( (current->mm.ldt_ents != ents) ||
- (current->mm.ldt_base != ptr) )
+ else if ( (p->mm.ldt_ents != ents) ||
+ (p->mm.ldt_base != ptr) )
{
invalidate_shadow_ldt();
- current->mm.ldt_base = ptr;
- current->mm.ldt_ents = ents;
- load_LDT(current);
+ p->mm.ldt_base = ptr;
+ p->mm.ldt_ents = ents;
+ load_LDT(p);
percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
if ( ents != 0 )
percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
@@ -921,10 +922,10 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
percpu_info[cpu].subject_id |=
((domid_t)((ptr&~0xFFFF)|(val>>16)))<<32;
- if ( !IS_PRIV(current) )
+ if ( !IS_PRIV(p) )
{
MEM_LOG("Dom %llu has no privilege to set subject domain",
- current->domain);
+ p->domain);
okay = 0;
}
else
@@ -943,19 +944,26 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
}
break;
- /* XXX This function is racey! */
case MMUEXT_REASSIGN_PAGE:
- if ( unlikely(!IS_PRIV(current)) )
+ if ( unlikely(!IS_PRIV(p)) )
{
MEM_LOG("Dom %llu has no privilege to reassign page ownership",
- current->domain);
+ p->domain);
okay = 0;
}
- else if ( likely(percpu_info[cpu].gps != NULL) )
+ else if ( likely((q = percpu_info[cpu].gps) != NULL) &&
+ likely(test_bit(_PGC_allocated, &page->count_and_flags)) &&
+ likely(page->u.domain == p) ) /* won't be smp-guest safe */
{
- current->tot_pages--;
- percpu_info[cpu].gps->tot_pages++;
- page->u.domain = percpu_info[cpu].gps;
+ spin_lock(&p->page_list_lock);
+ p->tot_pages--;
+ list_del(&page->list);
+ spin_unlock(&p->page_list_lock);
+ page->u.domain = q;
+ spin_lock(&q->page_list_lock);
+ q->tot_pages++;
+ list_add_tail(&page->list, &q->page_list);
+ spin_unlock(&q->page_list_lock);
}
else
{
diff --git a/xen/include/asm-i386/pdb.h b/xen/include/asm-i386/pdb.h
index 68efcbccaf..2ed6a9a318 100644
--- a/xen/include/asm-i386/pdb.h
+++ b/xen/include/asm-i386/pdb.h
@@ -14,6 +14,7 @@
#include <asm/ptrace.h>
#include <xen/list.h>
+#include <hypervisor-ifs/dom0_ops.h>
#include <hypervisor-ifs/hypervisor-if.h> /* for domain id */
extern int pdb_initialized;
@@ -37,6 +38,17 @@ extern int pdb_handle_exception(int exceptionVector,
extern int pdb_serial_input(u_char c, struct pt_regs *regs);
extern void pdb_do_debug(dom0_op_t *op);
+/* PDB Context. */
+struct pdb_context
+{
+ int valid;
+ int domain;
+ int process;
+ int system_call; /* 0x01 break on enter, 0x02 break on exit */
+ unsigned long ptbr;
+};
+extern struct pdb_context pdb_ctx;
+
/* Breakpoints. */
struct pdb_breakpoint
{
@@ -56,4 +68,21 @@ extern char *mem2hex (char *, char *, int);
extern char *hex2mem (char *, char *, int);
extern int hexToInt (char **ptr, int *intValue);
+/* Temporary Linux specific definitions */
+extern int pdb_system_call;
+extern unsigned char pdb_system_call_enter_instr; /* original enter instr */
+extern unsigned char pdb_system_call_leave_instr; /* original next instr */
+extern unsigned long pdb_system_call_next_addr; /* instr after int 0x80 */
+extern unsigned long pdb_system_call_eflags_addr; /* saved eflags on stack */
+
+unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid);
+void pdb_linux_get_values(char *buffer, int length, unsigned long address,
+ int pid, unsigned long cr3);
+void pdb_linux_set_values(char *buffer, int length, unsigned long address,
+ int pid, unsigned long cr3);
+void pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code,
+ trap_info_t *ti);
+void pdb_linux_syscall_exit_bkpt (struct pt_regs *regs,
+ struct pdb_context *pdb_ctx);
+
#endif /* __PDB_H__ */
diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h
index f16acb19c3..c2e36d6191 100644
--- a/xen/include/asm-i386/processor.h
+++ b/xen/include/asm-i386/processor.h
@@ -12,6 +12,7 @@
#include <asm/cpufeature.h>
#include <asm/desc.h>
#include <asm/flushtlb.h>
+#include <asm/pdb.h>
#include <xen/config.h>
#include <xen/spinlock.h>
#include <hypervisor-ifs/hypervisor-if.h>
@@ -406,8 +407,9 @@ extern struct desc_struct *idt_tables[];
0, 8))
#define SET_FAST_TRAP(_p) \
- (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
- &((_p)->fast_trap_desc), 8))
+ (pdb_initialized ? (void *) 0 : \
+ (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+ &((_p)->fast_trap_desc), 8)))
long set_fast_trap(struct task_struct *p, int idx);
diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h
index bf06719e35..850edf96c0 100644
--- a/xen/include/xen/perfc_defn.h
+++ b/xen/include/xen/perfc_defn.h
@@ -1,5 +1,6 @@
PERFCOUNTER_CPU( irqs, "#interrupts" )
+PERFCOUNTER_CPU( ipis, "#IPIs" )
PERFCOUNTER_CPU( irq_time, "cycles spent in irq handler" )
PERFCOUNTER_CPU( apic_timer, "apic timer interrupts" )