diff options
author | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2004-05-15 09:55:40 +0000 |
---|---|---|
committer | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2004-05-15 09:55:40 +0000 |
commit | 5080a3f4b2b79dfba61e45ebbefc83531338e20e (patch) | |
tree | c041e3970d3180d897d08f3797305926f9ebd383 /xen | |
parent | 5a258f039214b3d6cc25477ba02a48ee3df14732 (diff) | |
parent | 8a1284a0b37dc0b0522b80ac78f84047ab6278ae (diff) | |
download | xen-5080a3f4b2b79dfba61e45ebbefc83531338e20e.tar.gz xen-5080a3f4b2b79dfba61e45ebbefc83531338e20e.tar.bz2 xen-5080a3f4b2b79dfba61e45ebbefc83531338e20e.zip |
bitkeeper revision 1.906 (40a5e91cnvIS_3gLwfnD2G3HV3odHA)
manual merge
Diffstat (limited to 'xen')
-rw-r--r-- | xen/arch/i386/entry.S | 9 | ||||
-rw-r--r-- | xen/arch/i386/pdb-linux.c | 100 | ||||
-rw-r--r-- | xen/arch/i386/pdb-stub.c | 174 | ||||
-rw-r--r-- | xen/arch/i386/smp.c | 62 | ||||
-rw-r--r-- | xen/arch/i386/traps.c | 57 | ||||
-rw-r--r-- | xen/common/dom0_ops.c | 29 | ||||
-rw-r--r-- | xen/common/domain.c | 16 | ||||
-rw-r--r-- | xen/common/memory.c | 46 | ||||
-rw-r--r-- | xen/include/asm-i386/pdb.h | 29 | ||||
-rw-r--r-- | xen/include/asm-i386/processor.h | 6 | ||||
-rw-r--r-- | xen/include/xen/perfc_defn.h | 1 |
11 files changed, 361 insertions, 168 deletions
diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index 1d55a51617..d63f383017 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -537,14 +537,7 @@ error_code: movl %edx,%es movl %edx,%fs movl %edx,%gs - # We force a STI here. In most cases it is illegal to fault with - # interrupts disabled, so no need to check EFLAGS. There is one - # case when it /is/ valid -- on final return to guest context, we - # CLI so we can atomically check for events to notify guest about and - # return, all in one go. If we fault it is necessary to STI and the - # worst that will happen is that our return code is no longer atomic. - # This will do -- noone will ever notice. :-) - sti + movl %esp,%edx pushl %esi # push the error code pushl %edx # push the pt_regs pointer GET_CURRENT(%ebx) diff --git a/xen/arch/i386/pdb-linux.c b/xen/arch/i386/pdb-linux.c new file mode 100644 index 0000000000..fd0fc5ed78 --- /dev/null +++ b/xen/arch/i386/pdb-linux.c @@ -0,0 +1,100 @@ + +/* + * pervasive debugger + * www.cl.cam.ac.uk/netos/pdb + * + * alex ho + * 2004 + * university of cambridge computer laboratory + * + * linux & i386 dependent code. bleech. + */ + +#include <asm/pdb.h> + +/* offset to the first instruction in the linux system call code + where we can safely set a breakpoint */ +unsigned int pdb_linux_syscall_enter_bkpt_offset = 20; + +/* offset to eflags saved on the stack after an int 80 */ +unsigned int pdb_linux_syscall_eflags_offset = 48; + +/* offset to the instruction pointer saved on the stack after an int 80 */ +unsigned int pdb_linux_syscall_eip_offset = 40; + +unsigned char +pdb_linux_set_bkpt (unsigned long addr) +{ + unsigned char old_instruction = *(unsigned char *)addr; + *(unsigned char *)addr = 0xcc; + return old_instruction; +} + +void +pdb_linux_clr_bkpt (unsigned long addr, unsigned char value) +{ + *(unsigned char *)addr = value; +} + +void +pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code, + trap_info_t *ti) +{ + /* set at breakpoint at the beginning of the + system call in the target domain */ + + pdb_system_call_enter_instr = pdb_linux_set_bkpt(ti->address + + pdb_linux_syscall_enter_bkpt_offset); + pdb_system_call = 1; +} + +void +pdb_linux_syscall_exit_bkpt (struct pt_regs *regs, struct pdb_context *pdb_ctx) +{ + /* + we've hit an int 0x80 in a user's program, jumped into xen + (traps.c::do_general_protection()) which re-wrote the next + instruction in the os kernel to 0xcc, and then hit that + exception. + + we need to re-write the return instruction in the user's + program so that we know when we have finished the system call + and are back in the user's program. + + at this point our stack should look something like this: + + esp = 0x80a59f0 + esp + 4 = 0x0 + esp + 8 = 0x80485a0 + esp + 12 = 0x2d + esp + 16 = 0x80485f4 + esp + 20 = 0xbffffa48 + esp + 24 = 0xd + esp + 28 = 0xc00a0833 + esp + 32 = 0x833 + esp + 36 = 0xd + esp + 40 = 0x804dcdd saved eip + esp + 44 = 0x82b saved cs + esp + 48 = 0x213392 saved eflags + esp + 52 = 0xbffffa2c saved esp + esp + 56 = 0x833 saved ss + esp + 60 = 0x1000000 + */ + + /* restore the entry instruction for the system call */ + pdb_linux_clr_bkpt(regs->eip - 1, pdb_system_call_enter_instr); + + /* save the address of eflags that was saved on the stack */ + pdb_system_call_eflags_addr = (regs->esp + + pdb_linux_syscall_eflags_offset); + + /* muck with the return instruction so that we trap back into the + debugger when re-entering user space */ + pdb_system_call_next_addr = *(unsigned long *)(regs->esp + + pdb_linux_syscall_eip_offset); + pdb_linux_get_values (&pdb_system_call_leave_instr, 1, + pdb_system_call_next_addr, + pdb_ctx->process, pdb_ctx->ptbr); + pdb_linux_set_values ("cc", 1, pdb_system_call_next_addr, + pdb_ctx->process, pdb_ctx->ptbr); +} diff --git a/xen/arch/i386/pdb-stub.c b/xen/arch/i386/pdb-stub.c index 5b42e9a746..63320561dc 100644 --- a/xen/arch/i386/pdb-stub.c +++ b/xen/arch/i386/pdb-stub.c @@ -47,26 +47,13 @@ static int pdb_in_buffer_ptr; static unsigned char pdb_in_checksum; static unsigned char pdb_xmit_checksum; -/* function pointers in the near future... */ -unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid); -void pdb_linux_get_values(char *buffer, int length, unsigned long address, - int pid, unsigned long cr3); -void pdb_linux_set_values(char *buffer, int length, unsigned long address, - int pid, unsigned long cr3); - -struct pdb_context -{ - int valid; - int domain; - int process; - unsigned long ptbr; /* cached page table base register */ -}; struct pdb_context pdb_ctx; - int pdb_continue_thread = 0; int pdb_general_thread = 0; void pdb_put_packet (unsigned char *buffer, int ack); +void pdb_bkpt_check (u_char *buffer, int length, + unsigned long cr3, unsigned long addr); int pdb_initialized = 0; int pdb_page_fault_possible = 0; @@ -75,6 +62,12 @@ int pdb_page_fault = 0; static int pdb_serhnd = -1; static int pdb_stepping = 0; +int pdb_system_call = 0; +unsigned char pdb_system_call_enter_instr = 0; /* original enter instr */ +unsigned char pdb_system_call_leave_instr = 0; /* original next instr */ +unsigned long pdb_system_call_next_addr = 0; /* instr after int 0x80 */ +unsigned long pdb_system_call_eflags_addr = 0; /* saved eflags on stack */ + static inline void pdb_put_char(unsigned char c) { serial_putc(pdb_serhnd, c); @@ -406,15 +399,49 @@ pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3, break; case 'S': /* step with signal */ case 's': /* step */ + { + if ( pdb_system_call_eflags_addr != 0 ) + { + unsigned long eflags; + char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */ + + pdb_linux_get_values((u_char*)&eflags, sizeof(eflags), + pdb_system_call_eflags_addr, + pdb_ctx.process, pdb_ctx.ptbr); + eflags |= X86_EFLAGS_TF; + mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags)); + pdb_linux_set_values(eflags_buf, sizeof(eflags), + pdb_system_call_eflags_addr, + pdb_ctx.process, pdb_ctx.ptbr); + } + regs->eflags |= X86_EFLAGS_TF; pdb_stepping = 1; return 1; /* not reached */ + } case 'C': /* continue with signal */ case 'c': /* continue */ + { + if ( pdb_system_call_eflags_addr != 0 ) + { + unsigned long eflags; + char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */ + + pdb_linux_get_values((u_char*)&eflags, sizeof(eflags), + pdb_system_call_eflags_addr, + pdb_ctx.process, pdb_ctx.ptbr); + eflags &= ~X86_EFLAGS_TF; + mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags)); + pdb_linux_set_values(eflags_buf, sizeof(eflags), + pdb_system_call_eflags_addr, + pdb_ctx.process, pdb_ctx.ptbr); + } + regs->eflags &= ~X86_EFLAGS_TF; return 1; /* jump out before replying to gdb */ /* not reached */ + } case 'd': remote_debug = !(remote_debug); /* toggle debug flag */ break; @@ -424,54 +451,11 @@ pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3, case 'g': /* return the value of the CPU registers */ { pdb_x86_to_gdb_regs (pdb_out_buffer, regs); - - /* - printk (" reg: %s", pdb_out_buffer); - printk ("\n"); - printk (" eax: 0x%08lx\n", regs->eax); - printk (" ecx: 0x%08lx\n", regs->ecx); - printk (" edx: 0x%08lx\n", regs->edx); - printk (" ebx: 0x%08lx\n", regs->ebx); - printk (" esp: 0x%08lx\n", regs->esp); - printk (" ebp: 0x%08lx\n", regs->ebp); - printk (" esi: 0x%08lx\n", regs->esi); - printk (" edi: 0x%08lx\n", regs->edi); - printk (" eip: 0x%08lx\n", regs->eip); - printk (" efl: 0x%08lx\n", regs->eflags); - printk (" xcs: 0x%08x\n", regs->xcs); - printk (" xss: 0x%08x\n", regs->xss); - printk (" xds: 0x%08x\n", regs->xds); - printk (" xes: 0x%08x\n", regs->xes); - printk (" xfs: 0x%08x\n", regs->xfs); - printk (" xgs: 0x%08x\n", regs->xgs); - */ - break; } case 'G': /* set the value of the CPU registers - return OK */ { pdb_gdb_to_x86_regs (regs, ptr); - - /* - printk (" ptr: %s \n\n", ptr); - printk (" eax: 0x%08lx\n", regs->eax); - printk (" ecx: 0x%08lx\n", regs->ecx); - printk (" edx: 0x%08lx\n", regs->edx); - printk (" ebx: 0x%08lx\n", regs->ebx); - printk (" esp: 0x%08lx\n", regs->esp); - printk (" ebp: 0x%08lx\n", regs->ebp); - printk (" esi: 0x%08lx\n", regs->esi); - printk (" edi: 0x%08lx\n", regs->edi); - printk (" eip: 0x%08lx\n", regs->eip); - printk (" efl: 0x%08lx\n", regs->eflags); - printk (" xcs: 0x%08x\n", regs->xcs); - printk (" xss: 0x%08x\n", regs->xss); - printk (" xds: 0x%08x\n", regs->xds); - printk (" xes: 0x%08x\n", regs->xes); - printk (" xfs: 0x%08x\n", regs->xfs); - printk (" xgs: 0x%08x\n", regs->xgs); - */ - break; } case 'H': @@ -572,17 +556,20 @@ pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3, if (addr >= PAGE_OFFSET) { hex2mem (ptr, (char *)addr, length); + pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr); } else if (pdb_ctx.process != -1) { pdb_linux_set_values(ptr, length, addr, pdb_ctx.process, pdb_ctx.ptbr); + pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr); } else { pdb_set_values (ptr, length, pdb_ctx.ptbr, addr); + pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr); } pdb_page_fault_possible = 0; if (pdb_page_fault) @@ -936,7 +923,6 @@ int pdb_set_values(u_char *buffer, int length, unsigned long cr3, unsigned long addr) { int count = pdb_change_values(buffer, length, cr3, addr, __PDB_SET_VAL); - pdb_bkpt_check(buffer, length, cr3, addr); return count; } @@ -1176,16 +1162,35 @@ int pdb_handle_exception(int exceptionVector, __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); + /* If the exception is an int3 from user space then pdb is only + interested if it re-wrote an instruction set the breakpoint. + This occurs when leaving a system call from a domain. + */ + if ( exceptionVector == 3 && + (xen_regs->xcs & 3) == 3 && + xen_regs->eip != pdb_system_call_next_addr + 1) + { + TRC(printf("pdb: user bkpt (0x%x) at 0x%x:0x%lx:0x%lx\n", + exceptionVector, xen_regs->xcs & 3, cr3, xen_regs->eip)); + return 1; + } + /* - * If PDB didn't set the breakpoint, is not single stepping, and the user - * didn't press the magic debug key, then we don't handle the exception. + * If PDB didn't set the breakpoint, is not single stepping, + * is not entering a system call in a domain, + * the user didn't press the magic debug key, + * then we don't handle the exception. */ bkpt = pdb_bkpt_search(cr3, xen_regs->eip - 1); if ( (bkpt == NULL) && - !pdb_stepping && (exceptionVector != KEYPRESS_EXCEPTION) && + !pdb_stepping && + !pdb_system_call && + xen_regs->eip != pdb_system_call_next_addr + 1 && + (exceptionVector != KEYPRESS_EXCEPTION) && xen_regs->eip < 0xc0000000) /* xenolinux for now! */ { - TRC(printf("pdb: user bkpt at 0x%lx:0x%lx\n", cr3, xen_regs->eip)); + TRC(printf("pdb: user bkpt (0x%x) at 0x%lx:0x%lx\n", + exceptionVector, cr3, xen_regs->eip)); return 1; } @@ -1199,12 +1204,54 @@ int pdb_handle_exception(int exceptionVector, pdb_stepping = 0; } + if ( pdb_system_call ) + { + pdb_system_call = 0; + + pdb_linux_syscall_exit_bkpt (xen_regs, &pdb_ctx); + + /* we don't have a saved breakpoint so we need to rewind eip */ + xen_regs->eip--; + + /* if ther user doesn't care about breaking when entering a + system call then we'll just ignore the exception */ + if ( (pdb_ctx.system_call & 0x01) == 0 ) + { + return 0; + } + } + if ( exceptionVector == BREAKPT_EXCEPTION && bkpt != NULL) { /* Executed Int3: replace breakpoint byte with real program byte. */ xen_regs->eip--; } + /* returning to user space after a system call */ + if ( xen_regs->eip == pdb_system_call_next_addr + 1) + { + u_char instr[2]; /* REALLY REALLY REALLY STUPID */ + + mem2hex (&pdb_system_call_leave_instr, instr, sizeof(instr)); + + pdb_linux_set_values (instr, 1, pdb_system_call_next_addr, + pdb_ctx.process, pdb_ctx.ptbr); + + pdb_system_call_next_addr = 0; + pdb_system_call_leave_instr = 0; + + /* manually rewind eip */ + xen_regs->eip--; + + /* if the user doesn't care about breaking when returning + to user space after a system call then we'll just ignore + the exception */ + if ( (pdb_ctx.system_call & 0x02) == 0 ) + { + return 0; + } + } + /* Generate a signal for GDB. */ switch ( exceptionVector ) { @@ -1267,6 +1314,7 @@ void initialize_pdb() pdb_ctx.valid = 1; pdb_ctx.domain = -1; pdb_ctx.process = -1; + pdb_ctx.system_call = 0; pdb_ctx.ptbr = 0; printk("pdb: pervasive debugger (%s) www.cl.cam.ac.uk/netos/pdb\n", diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c index 5ed43d5551..4989fc5085 100644 --- a/xen/arch/i386/smp.c +++ b/xen/arch/i386/smp.c @@ -21,15 +21,6 @@ #ifdef CONFIG_SMP /* - * This lock must be acquired before sending a synchronous IPI to another - * CPU (i.e., IPI + spin waiting for acknowledgement). The only safe ways of - * acquiring the lock are spin_lock() and spin_trylock(). The former is only - * safe if local interrupts are enabled (otherwise we will never see an IPI - * destined for us which we must acknowledge for the lock to be released). - */ -static spinlock_t synchronous_ipi_lock = SPIN_LOCK_UNLOCKED; - -/* * Some notes on x86 processor bugs affecting SMP operation: * * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. @@ -220,16 +211,18 @@ static inline void send_IPI_allbutself(int vector) * 2) Leave the mm if we are in the lazy tlb mode. */ +static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED; static volatile unsigned long flush_cpumask; -#define FLUSH_ALL 0xffffffff asmlinkage void smp_invalidate_interrupt(void) { ack_APIC_irq(); - local_flush_tlb(); - clear_bit(smp_processor_id(), &flush_cpumask); + perfc_incrc(ipis); + if ( likely(test_and_clear_bit(smp_processor_id(), &flush_cpumask)) ) + local_flush_tlb(); } +#ifndef NO_DEVICES_IN_XEN int try_flush_tlb_mask(unsigned long mask) { if ( mask & (1 << smp_processor_id()) ) @@ -240,7 +233,7 @@ int try_flush_tlb_mask(unsigned long mask) if ( mask != 0 ) { - if ( unlikely(!spin_trylock(&synchronous_ipi_lock)) ) + if ( unlikely(!spin_trylock(&flush_lock)) ) return 0; flush_cpumask = mask; send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); @@ -249,15 +242,16 @@ int try_flush_tlb_mask(unsigned long mask) rep_nop(); barrier(); } - spin_unlock(&synchronous_ipi_lock); + spin_unlock(&flush_lock); } return 1; } +#endif void flush_tlb_mask(unsigned long mask) { - ASSERT(local_irq_is_enabled()); + ASSERT(!in_irq()); if ( mask & (1 << smp_processor_id()) ) { @@ -267,7 +261,21 @@ void flush_tlb_mask(unsigned long mask) if ( mask != 0 ) { - spin_lock(&synchronous_ipi_lock); + /* + * We are certainly not reentering a flush_lock region on this CPU + * because we are not in an IRQ context. We can therefore wait for the + * other guy to release the lock. This is harder than it sounds because + * local interrupts might be disabled, and he may be waiting for us to + * execute smp_invalidate_interrupt(). We deal with this possibility by + * inlining the meat of that function here. + */ + while ( unlikely(!spin_trylock(&flush_lock)) ) + { + if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) ) + local_flush_tlb(); + rep_nop(); + } + flush_cpumask = mask; send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); while ( flush_cpumask != 0 ) @@ -275,13 +283,15 @@ void flush_tlb_mask(unsigned long mask) rep_nop(); barrier(); } - spin_unlock(&synchronous_ipi_lock); + + spin_unlock(&flush_lock); } } void new_tlbflush_clock_period(void) { - if ( unlikely(!spin_trylock(&synchronous_ipi_lock)) ) + /* Avoid deadlock because we might be reentering a flush_lock region. */ + if ( unlikely(!spin_trylock(&flush_lock)) ) return; /* Someone may acquire the lock and execute the flush before us. */ @@ -304,7 +314,7 @@ void new_tlbflush_clock_period(void) tlbflush_clock++; out: - spin_unlock(&synchronous_ipi_lock); + spin_unlock(&flush_lock); } static void flush_tlb_all_pge_ipi(void* info) @@ -323,6 +333,12 @@ void smp_send_event_check_mask(unsigned long cpu_mask) send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR); } +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + */ +static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; + struct call_data_struct { void (*func) (void *info); void *info; @@ -368,7 +384,8 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, ASSERT(local_irq_is_enabled()); - spin_lock(&synchronous_ipi_lock); + spin_lock(&call_lock); + call_data = &data; wmb(); /* Send a message to all other CPUs and wait for them to respond */ @@ -382,7 +399,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, while (atomic_read(&data.finished) != cpus) barrier(); - spin_unlock(&synchronous_ipi_lock); + spin_unlock(&call_lock); return 0; } @@ -419,6 +436,7 @@ void smp_send_stop(void) asmlinkage void smp_event_check_interrupt(void) { ack_APIC_irq(); + perfc_incrc(ipis); } asmlinkage void smp_call_function_interrupt(void) @@ -428,6 +446,8 @@ asmlinkage void smp_call_function_interrupt(void) int wait = call_data->wait; ack_APIC_irq(); + perfc_incrc(ipis); + /* * Notify initiating CPU that I've grabbed the data and am * about to execute the function diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c index 1457617d29..7250074420 100644 --- a/xen/arch/i386/traps.c +++ b/xen/arch/i386/traps.c @@ -107,36 +107,9 @@ static inline int kernel_text_address(unsigned long addr) } - -void show_trace(unsigned long * stack) -{ - int i; - unsigned long addr; - - printk("Call Trace: "); - i = 1; - while (((long) stack & (STACK_SIZE-1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - if (i && ((i % 6) == 0)) - printk("\n "); - printk("[<%08lx>] ", addr); - i++; - } - } - printk("\n"); -} - -void show_traceX(void) -{ - unsigned long *addr; - __asm__ __volatile__ ("movl %%esp,%0" : "=r" (addr) : ); - show_trace(addr); -} - void show_stack(unsigned long *esp) { - unsigned long *stack; + unsigned long *stack, addr; int i; printk("Stack trace from ESP=%p:\n", esp); @@ -154,6 +127,20 @@ void show_stack(unsigned long *esp) printk("%08lx ", *stack++); } printk("\n"); + + printk("Call Trace from ESP=%p: ", esp); + stack = esp; + i = 0; + while (((long) stack & (STACK_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); } void show_registers(struct pt_regs *regs) @@ -250,7 +237,6 @@ DO_ERROR_NOCODE( 0, "divide error", divide_error) DO_ERROR_NOCODE( 4, "overflow", overflow) DO_ERROR_NOCODE( 5, "bounds", bounds) DO_ERROR_NOCODE( 6, "invalid operand", invalid_op) -DO_ERROR_NOCODE( 7, "device not available", device_not_available) DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, "invalid TSS", invalid_TSS) DO_ERROR(11, "segment not present", segment_not_present) @@ -267,10 +253,10 @@ asmlinkage void do_int3(struct pt_regs *regs, long error_code) struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); trap_info_t *ti; + if ( pdb_handle_exception(3, regs) == 0 ) + return; if ( (regs->xcs & 3) != 3 ) { - if ( pdb_handle_exception(3, regs) == 0 ) - return; if ( unlikely((regs->xcs & 3) == 0) ) { show_registers(regs); @@ -445,6 +431,15 @@ asmlinkage void do_general_protection(struct pt_regs *regs, long error_code) ti = current->thread.traps + (error_code>>3); if ( TI_GET_DPL(ti) >= (regs->xcs & 3) ) { + unsigned long cr3; + + __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); + if (pdb_initialized && pdb_ctx.system_call != 0 && + cr3 == pdb_ctx.ptbr) + { + pdb_linux_syscall_enter_bkpt(regs, error_code, ti); + } + gtb->flags = GTBF_TRAP_NOCODE; regs->eip += 2; goto finish_propagation; diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c index f768edd5ff..32fa532c9a 100644 --- a/xen/common/dom0_ops.c +++ b/xen/common/dom0_ops.c @@ -28,15 +28,6 @@ extern unsigned int alloc_new_dom_mem(struct task_struct *, unsigned int); -/* Basically used to protect the domain-id space. */ -static spinlock_t create_dom_lock = SPIN_LOCK_UNLOCKED; - -static domid_t get_domnr(void) -{ - static domid_t domnr = 0; - return ++domnr; -} - static int msr_cpu_mask; static unsigned long msr_addr; static unsigned long msr_lo; @@ -117,23 +108,24 @@ long do_dom0_op(dom0_op_t *u_dom0_op) case DOM0_CREATEDOMAIN: { struct task_struct *p; - static unsigned int pro = 0; + static domid_t domnr = 0; + static spinlock_t domnr_lock = SPIN_LOCK_UNLOCKED; + unsigned int pro; domid_t dom; ret = -ENOMEM; - spin_lock_irq(&create_dom_lock); - - if ( (dom = get_domnr()) == 0 ) - goto exit_create; + spin_lock(&domnr_lock); + dom = ++domnr; + spin_unlock(&domnr_lock); if (op->u.createdomain.cpu == -1 ) - pro = (pro+1) % smp_num_cpus; + pro = (unsigned int)dom % smp_num_cpus; else pro = op->u.createdomain.cpu % smp_num_cpus; p = do_createdomain(dom, pro); if ( p == NULL ) - goto exit_create; + break; if ( op->u.createdomain.name[0] ) { @@ -145,16 +137,13 @@ long do_dom0_op(dom0_op_t *u_dom0_op) if ( ret != 0 ) { __kill_domain(p); - goto exit_create; + break; } ret = 0; op->u.createdomain.domain = p->domain; copy_to_user(u_dom0_op, op, sizeof(*op)); - - exit_create: - spin_unlock_irq(&create_dom_lock); } break; diff --git a/xen/common/domain.c b/xen/common/domain.c index 88cc659711..df44c4600b 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -292,6 +292,12 @@ struct pfn_info *alloc_domain_page(struct task_struct *p) unsigned long flags, mask, pfn_stamp, cpu_stamp; int i; +#ifdef NO_DEVICES_IN_XEN + ASSERT(!in_irq()); +#else + ASSERT((p != NULL) || !in_irq()); +#endif + spin_lock_irqsave(&free_list_lock, flags); if ( likely(!list_empty(&free_list)) ) { @@ -307,7 +313,7 @@ struct pfn_info *alloc_domain_page(struct task_struct *p) if ( (mask = page->u.cpu_mask) != 0 ) { pfn_stamp = page->tlbflush_timestamp; - for ( i = 0; (mask != 0) && (i < NR_CPUS); i++ ) + for ( i = 0; (mask != 0) && (i < smp_num_cpus); i++ ) { if ( mask & (1<<i) ) { @@ -319,11 +325,15 @@ struct pfn_info *alloc_domain_page(struct task_struct *p) if ( unlikely(mask != 0) ) { +#ifdef NO_DEVICES_IN_XEN + flush_tlb_mask(mask); +#else /* In IRQ ctxt, flushing is best-effort only, to avoid deadlock. */ if ( likely(!in_irq()) ) flush_tlb_mask(mask); else if ( unlikely(!try_flush_tlb_mask(mask)) ) goto free_and_exit; +#endif perfc_incrc(need_flush_tlb_flush); } } @@ -332,7 +342,6 @@ struct pfn_info *alloc_domain_page(struct task_struct *p) page->type_and_flags = 0; if ( p != NULL ) { - ASSERT(!in_irq()); wmb(); /* Domain pointer must be visible before updating refcnt. */ spin_lock(&p->page_list_lock); if ( unlikely(p->tot_pages >= p->max_pages) ) @@ -363,8 +372,7 @@ void free_domain_page(struct pfn_info *page) unsigned long flags; struct task_struct *p = page->u.domain; - if ( unlikely(in_irq()) ) - BUG(); + ASSERT(!in_irq()); if ( likely(!IS_XEN_HEAP_FRAME(page)) ) { diff --git a/xen/common/memory.c b/xen/common/memory.c index 15560b6609..e5d4db2e81 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -804,6 +804,7 @@ static int do_extended_command(unsigned long ptr, unsigned long val) unsigned long pfn = ptr >> PAGE_SHIFT; unsigned long old_base_pfn; struct pfn_info *page = &frame_table[pfn]; + struct task_struct *p = current, *q; switch ( cmd ) { @@ -852,18 +853,18 @@ static int do_extended_command(unsigned long ptr, unsigned long val) break; case MMUEXT_NEW_BASEPTR: - okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, current); + okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, p); if ( likely(okay) ) { invalidate_shadow_ldt(); percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; - old_base_pfn = pagetable_val(current->mm.pagetable) >> PAGE_SHIFT; - current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT); + old_base_pfn = pagetable_val(p->mm.pagetable) >> PAGE_SHIFT; + p->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT); - shadow_mk_pagetable(¤t->mm); + shadow_mk_pagetable(&p->mm); - write_ptbase(¤t->mm); + write_ptbase(&p->mm); put_page_and_type(&frame_table[old_base_pfn]); @@ -899,13 +900,13 @@ static int do_extended_command(unsigned long ptr, unsigned long val) okay = 0; MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents); } - else if ( (current->mm.ldt_ents != ents) || - (current->mm.ldt_base != ptr) ) + else if ( (p->mm.ldt_ents != ents) || + (p->mm.ldt_base != ptr) ) { invalidate_shadow_ldt(); - current->mm.ldt_base = ptr; - current->mm.ldt_ents = ents; - load_LDT(current); + p->mm.ldt_base = ptr; + p->mm.ldt_ents = ents; + load_LDT(p); percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT; if ( ents != 0 ) percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT; @@ -921,10 +922,10 @@ static int do_extended_command(unsigned long ptr, unsigned long val) percpu_info[cpu].subject_id |= ((domid_t)((ptr&~0xFFFF)|(val>>16)))<<32; - if ( !IS_PRIV(current) ) + if ( !IS_PRIV(p) ) { MEM_LOG("Dom %llu has no privilege to set subject domain", - current->domain); + p->domain); okay = 0; } else @@ -943,19 +944,26 @@ static int do_extended_command(unsigned long ptr, unsigned long val) } break; - /* XXX This function is racey! */ case MMUEXT_REASSIGN_PAGE: - if ( unlikely(!IS_PRIV(current)) ) + if ( unlikely(!IS_PRIV(p)) ) { MEM_LOG("Dom %llu has no privilege to reassign page ownership", - current->domain); + p->domain); okay = 0; } - else if ( likely(percpu_info[cpu].gps != NULL) ) + else if ( likely((q = percpu_info[cpu].gps) != NULL) && + likely(test_bit(_PGC_allocated, &page->count_and_flags)) && + likely(page->u.domain == p) ) /* won't be smp-guest safe */ { - current->tot_pages--; - percpu_info[cpu].gps->tot_pages++; - page->u.domain = percpu_info[cpu].gps; + spin_lock(&p->page_list_lock); + p->tot_pages--; + list_del(&page->list); + spin_unlock(&p->page_list_lock); + page->u.domain = q; + spin_lock(&q->page_list_lock); + q->tot_pages++; + list_add_tail(&page->list, &q->page_list); + spin_unlock(&q->page_list_lock); } else { diff --git a/xen/include/asm-i386/pdb.h b/xen/include/asm-i386/pdb.h index 68efcbccaf..2ed6a9a318 100644 --- a/xen/include/asm-i386/pdb.h +++ b/xen/include/asm-i386/pdb.h @@ -14,6 +14,7 @@ #include <asm/ptrace.h> #include <xen/list.h> +#include <hypervisor-ifs/dom0_ops.h> #include <hypervisor-ifs/hypervisor-if.h> /* for domain id */ extern int pdb_initialized; @@ -37,6 +38,17 @@ extern int pdb_handle_exception(int exceptionVector, extern int pdb_serial_input(u_char c, struct pt_regs *regs); extern void pdb_do_debug(dom0_op_t *op); +/* PDB Context. */ +struct pdb_context +{ + int valid; + int domain; + int process; + int system_call; /* 0x01 break on enter, 0x02 break on exit */ + unsigned long ptbr; +}; +extern struct pdb_context pdb_ctx; + /* Breakpoints. */ struct pdb_breakpoint { @@ -56,4 +68,21 @@ extern char *mem2hex (char *, char *, int); extern char *hex2mem (char *, char *, int); extern int hexToInt (char **ptr, int *intValue); +/* Temporary Linux specific definitions */ +extern int pdb_system_call; +extern unsigned char pdb_system_call_enter_instr; /* original enter instr */ +extern unsigned char pdb_system_call_leave_instr; /* original next instr */ +extern unsigned long pdb_system_call_next_addr; /* instr after int 0x80 */ +extern unsigned long pdb_system_call_eflags_addr; /* saved eflags on stack */ + +unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid); +void pdb_linux_get_values(char *buffer, int length, unsigned long address, + int pid, unsigned long cr3); +void pdb_linux_set_values(char *buffer, int length, unsigned long address, + int pid, unsigned long cr3); +void pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code, + trap_info_t *ti); +void pdb_linux_syscall_exit_bkpt (struct pt_regs *regs, + struct pdb_context *pdb_ctx); + #endif /* __PDB_H__ */ diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h index f16acb19c3..c2e36d6191 100644 --- a/xen/include/asm-i386/processor.h +++ b/xen/include/asm-i386/processor.h @@ -12,6 +12,7 @@ #include <asm/cpufeature.h> #include <asm/desc.h> #include <asm/flushtlb.h> +#include <asm/pdb.h> #include <xen/config.h> #include <xen/spinlock.h> #include <hypervisor-ifs/hypervisor-if.h> @@ -406,8 +407,9 @@ extern struct desc_struct *idt_tables[]; 0, 8)) #define SET_FAST_TRAP(_p) \ - (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ - &((_p)->fast_trap_desc), 8)) + (pdb_initialized ? (void *) 0 : \ + (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ + &((_p)->fast_trap_desc), 8))) long set_fast_trap(struct task_struct *p, int idx); diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h index bf06719e35..850edf96c0 100644 --- a/xen/include/xen/perfc_defn.h +++ b/xen/include/xen/perfc_defn.h @@ -1,5 +1,6 @@ PERFCOUNTER_CPU( irqs, "#interrupts" ) +PERFCOUNTER_CPU( ipis, "#IPIs" ) PERFCOUNTER_CPU( irq_time, "cycles spent in irq handler" ) PERFCOUNTER_CPU( apic_timer, "apic timer interrupts" ) |