diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2003-10-03 16:36:21 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2003-10-03 16:36:21 +0000 |
commit | 922fddb0355189b9e0b4a85bca04d03569b7ad56 (patch) | |
tree | a7545d054999735a5ad4289f36fb6960ce546842 /xen | |
parent | 507e2b770da59d177a72eebe7b30b01a07fb1ff4 (diff) | |
download | xen-922fddb0355189b9e0b4a85bca04d03569b7ad56.tar.gz xen-922fddb0355189b9e0b4a85bca04d03569b7ad56.tar.bz2 xen-922fddb0355189b9e0b4a85bca04d03569b7ad56.zip |
bitkeeper revision 1.478 (3f7da585sVsHrnjL3JadH9su8Cax_g)
Many files:
We now have one ring-0 stack per CPU, rather than one per domain. Register state is saved to a guest-accessible context slot, so we can do scheduler activations or similar real soon now...
Diffstat (limited to 'xen')
-rw-r--r-- | xen/arch/i386/boot/boot.S | 8 | ||||
-rw-r--r-- | xen/arch/i386/entry.S | 9 | ||||
-rw-r--r-- | xen/arch/i386/idle0_task.c | 7 | ||||
-rw-r--r-- | xen/arch/i386/process.c | 149 | ||||
-rw-r--r-- | xen/arch/i386/setup.c | 7 | ||||
-rw-r--r-- | xen/arch/i386/smpboot.c | 54 | ||||
-rw-r--r-- | xen/arch/i386/traps.c | 22 | ||||
-rw-r--r-- | xen/common/kernel.c | 16 | ||||
-rw-r--r-- | xen/common/schedule.c | 47 | ||||
-rw-r--r-- | xen/include/asm-i386/current.h | 39 | ||||
-rw-r--r-- | xen/include/asm-i386/irq.h | 4 | ||||
-rw-r--r-- | xen/include/asm-i386/processor.h | 34 | ||||
-rw-r--r-- | xen/include/asm-i386/ptrace.h | 1 | ||||
-rw-r--r-- | xen/include/asm-i386/system.h | 30 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/hypervisor-if.h | 23 | ||||
-rw-r--r-- | xen/include/xeno/perfc_defn.h | 3 | ||||
-rw-r--r-- | xen/include/xeno/sched.h | 27 |
17 files changed, 188 insertions, 292 deletions
diff --git a/xen/arch/i386/boot/boot.S b/xen/arch/i386/boot/boot.S index b27ff6c0f2..e882428a21 100644 --- a/xen/arch/i386/boot/boot.S +++ b/xen/arch/i386/boot/boot.S @@ -69,7 +69,7 @@ hal_entry: call start_paging lidt idt_descr - jmp initialize_secondary + jmp start_secondary #endif continue_boot_cpu: @@ -212,7 +212,7 @@ bad_cpu: /*** STACK LOCATION ***/ ENTRY(stack_start) - .long SYMBOL_NAME(idle0_task_union)+8192-__PAGE_OFFSET + .long SYMBOL_NAME(cpu0_stack) + 4000 - __PAGE_OFFSET .long __HYPERVISOR_DS /*** DESCRIPTOR TABLES ***/ @@ -258,7 +258,7 @@ ENTRY(gdt_table) .org 0x1000 ENTRY(idle0_pg_table) .org 0x2000 -ENTRY(idle0_task_union) - .org 0x4000 +ENTRY(cpu0_stack) + .org 0x3000 ENTRY(stext) ENTRY(_stext) diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index 84dda61578..3dcbff11c6 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -194,11 +194,12 @@ NT_MASK = 0x00004000 .long 5b,10b; \ .previous -#define GET_CURRENT(reg) \ - movl $-8192, reg; \ - andl %esp, reg +#define GET_CURRENT(reg) \ + movl $4096-4, reg; \ + orl %esp, reg; \ + movl (reg),reg \ -ENTRY(ret_from_newdomain) +ENTRY(continue_nonidle_task) GET_CURRENT(%ebx) jmp test_all_events diff --git a/xen/arch/i386/idle0_task.c b/xen/arch/i386/idle0_task.c index 0d2b9e40bf..a4c3f0d5c9 100644 --- a/xen/arch/i386/idle0_task.c +++ b/xen/arch/i386/idle0_task.c @@ -2,12 +2,7 @@ #include <xeno/sched.h> #include <asm/desc.h> -/* - * Initial task structure. XXX KAF: To get this 8192-byte aligned without - * linker tricks I copy it into aligned BSS area at boot time. - * Actual name idle0_task_union now declared in boot.S. - */ -struct task_struct first_task_struct = IDLE0_TASK(idle0_task_union.task); +struct task_struct idle0_task = IDLE0_TASK(idle0_task); /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c index 8665961b1e..ec381167ff 100644 --- a/xen/arch/i386/process.c +++ b/xen/arch/i386/process.c @@ -31,11 +31,6 @@ #include <xeno/irq.h> #include <xeno/event.h> -#define GET_SYSCALL_REGS(_p) \ - (((struct pt_regs *)(THREAD_SIZE + (unsigned long)(_p))) - 1) - -asmlinkage void ret_from_newdomain(void) __asm__("ret_from_newdomain"); - int hlt_counter; void disable_hlt(void) @@ -63,16 +58,21 @@ static void default_idle(void) } } -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle (void) +void continue_cpu_idle_loop(void) { int cpu = smp_processor_id(); + for ( ; ; ) + { + irq_stat[cpu].idle_timestamp = jiffies; + while (!current->hyp_events && !softirq_pending(cpu)) + default_idle(); + do_hyp_events(); + do_softirq(); + } +} +void startup_cpu_idle_loop(void) +{ /* Just some sanity to ensure that the scheduler is set up okay. */ ASSERT(current->domain == IDLE_DOMAIN_ID); (void)wake_up(current); @@ -85,14 +85,7 @@ void cpu_idle (void) smp_mb(); init_idle(); - for ( ; ; ) - { - irq_stat[cpu].idle_timestamp = jiffies; - while (!current->hyp_events && !softirq_pending(cpu)) - default_idle(); - do_hyp_events(); - do_softirq(); - } + continue_cpu_idle_loop(); } static long no_idt[2]; @@ -186,43 +179,6 @@ void machine_power_off(void) machine_restart(0); } -extern void show_trace(unsigned long* esp); - -void show_regs(struct pt_regs * regs) -{ - unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; - - printk("\n"); - printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); - if (regs->xcs & 3) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx\n",regs->eflags); - printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x FS: %04x GS: %04x\n", - 0xffff & regs->xds, 0xffff & regs->xes, - 0xffff & regs->xfs, 0xffff & regs->xgs); - - __asm__("movl %%cr0, %0": "=r" (cr0)); - __asm__("movl %%cr2, %0": "=r" (cr2)); - __asm__("movl %%cr3, %0": "=r" (cr3)); - /* This could fault if %cr4 does not exist */ - __asm__("1: movl %%cr4, %0 \n" - "2: \n" - ".section __ex_table,\"a\" \n" - ".long 1b,2b \n" - ".previous \n" - : "=r" (cr4): "0" (0)); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); - show_trace(®s->esp); -} - - -/* - * Free current thread data structures etc.. - */ void exit_thread(void) { /* nothing to do ... */ @@ -249,8 +205,7 @@ void new_thread(struct task_struct *p, unsigned long start_stack, unsigned long start_info) { - struct pt_regs *regs = GET_SYSCALL_REGS(p); - memset(regs, 0, sizeof(*regs)); + execution_context_t *ec = &p->shared_info->execution_context; /* * Initial register values: @@ -260,20 +215,14 @@ void new_thread(struct task_struct *p, * ESI = start_info * [EAX,EBX,ECX,EDX,EDI,EBP are zero] */ - p->thread.fs = p->thread.gs = FLAT_RING1_DS; - regs->xds = regs->xes = regs->xfs = regs->xgs = regs->xss = FLAT_RING1_DS; - regs->xcs = FLAT_RING1_CS; - regs->eip = start_pc; - regs->esp = start_stack; - regs->esi = start_info; - - p->thread.esp = (unsigned long) regs; - p->thread.esp0 = (unsigned long) (regs+1); - - p->thread.eip = (unsigned long) ret_from_newdomain; + ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS; + ec->cs = FLAT_RING1_CS; + ec->eip = start_pc; + ec->esp = start_stack; + ec->esi = start_info; - __save_flags(regs->eflags); - regs->eflags |= X86_EFLAGS_IF; + __save_flags(ec->eflags); + ec->eflags |= X86_EFLAGS_IF; /* No fast trap at start of day. */ SET_DEFAULT_FAST_TRAP(&p->thread); @@ -288,34 +237,21 @@ void new_thread(struct task_struct *p, : /* no output */ \ :"r" (thread->debugreg[register])) -/* - * switch_to(x,yn) should switch tasks from x to y. - * - * We fsave/fwait so that an exception goes off at the right time - * (as a call from the fsave or fwait in effect) rather than to - * the wrong process. Lazy FP saving no longer makes any sense - * with modern CPU's, and this simplifies a lot of things (SMP - * and UP become the same). - * - * NOTE! We used to use the x86 hardware context switching. The - * reason for not using it any more becomes apparent when you - * try to recover gracefully from saved state that is no longer - * valid (stale segment register values in particular). With the - * hardware task-switch, there is no way to fix up bad state in - * a reasonable manner. - * - * The fact that Intel documents the hardware task-switching to - * be slow is a fairly red herring - this code is not noticeably - * faster. However, there _is_ some room for improvement here, - * so the performance issues may eventually be a valid point. - * More important, however, is the fact that this allows us much - * more flexibility. - */ -/* NB. prev_p passed in %eax, next_p passed in %edx */ -void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +void switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *next = &next_p->thread; struct tss_struct *tss = init_tss + smp_processor_id(); + execution_context_t *stack_ec = get_execution_context(); + + __cli(); + + /* Switch guest general-register state. */ + memcpy(&prev_p->shared_info->execution_context, + stack_ec, + sizeof(*stack_ec)); + memcpy(stack_ec, + &next_p->shared_info->execution_context, + sizeof(*stack_ec)); unlazy_fpu(prev_p); @@ -323,18 +259,22 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) CLEAR_FAST_TRAP(&prev_p->thread); SET_FAST_TRAP(&next_p->thread); - tss->esp0 = next->esp0; + /* Switch the guest OS ring-1 stack. */ tss->esp1 = next->esp1; tss->ss1 = next->ss1; + /* Switch page tables. */ + __write_cr3_counted(pagetable_val(next_p->mm.pagetable)); + + set_current(next_p); + /* Switch GDT and LDT. */ __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt)); load_LDT(); - /* - * Now maybe reload the debug registers - */ - if (next->debugreg[7]){ + /* Maybe switch the debug registers. */ + if ( next->debugreg[7] ) + { loaddebug(next, 0); loaddebug(next, 1); loaddebug(next, 2); @@ -344,13 +284,14 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) loaddebug(next, 7); } + __sti(); } /* XXX Currently the 'domain' field is ignored! XXX */ long do_iopl(unsigned int domain, unsigned int new_io_pl) { - struct pt_regs *regs = GET_SYSCALL_REGS(current); - regs->eflags = (regs->eflags & 0xffffcfff) | ((new_io_pl&3) << 12); + execution_context_t *ec = get_execution_context(); + ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12); return 0; } diff --git a/xen/arch/i386/setup.c b/xen/arch/i386/setup.c index ec55c1c3ac..58b8496323 100644 --- a/xen/arch/i386/setup.c +++ b/xen/arch/i386/setup.c @@ -254,7 +254,7 @@ void __init cpu_init(void) /* Set up and load the per-CPU TSS and LDT. */ t->ss0 = __HYPERVISOR_DS; - t->esp0 = current->thread.esp0; + t->esp0 = get_stack_top(); set_tss_desc(nr,t); load_TR(nr); __asm__ __volatile__("lldt %%ax"::"a" (0)); @@ -414,14 +414,11 @@ void __init start_of_day(void) check_nmi_watchdog(); zap_low_mappings(); - kmem_cache_init(); - kmem_cache_sizes_init(max_page); + #ifdef CONFIG_PCI pci_init(); #endif do_initcalls(); - - initialize_serial(); /* setup serial 'driver' (for debugging) */ initialize_keyboard(); /* setup keyboard (also for debugging) */ diff --git a/xen/arch/i386/smpboot.c b/xen/arch/i386/smpboot.c index 2ebab30c72..91b03203d0 100644 --- a/xen/arch/i386/smpboot.c +++ b/xen/arch/i386/smpboot.c @@ -395,19 +395,21 @@ void __init smp_callin(void) synchronize_tsc_ap(); } -int cpucount; +static int cpucount; /* * Activate a secondary processor. */ -int __init start_secondary(void *unused) +void __init start_secondary(void) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu = cpucount; /* 6 bytes suitable for passing to LIDT instruction. */ unsigned char idt_load[6]; extern void cpu_init(void); + set_current(idle_task[cpu]); + /* * Dont put anything before smp_callin(), SMP * booting is too fragile that we want to limit the @@ -435,34 +437,13 @@ int __init start_secondary(void *unused) */ local_flush_tlb(); - cpu_idle(); - BUG(); - - return 0; -} + startup_cpu_idle_loop(); -/* - * Everything has been set up for the secondary - * CPUs - they just need to reload everything - * from the task structure - * This function must not return. - */ -void __init initialize_secondary(void) -{ - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the eip. - */ - asm volatile( - "movl %0,%%esp\n\t" - "jmp *%1" - : - :"r" (current->thread.esp),"r" (current->thread.eip)); + BUG(); } extern struct { - void * esp; - unsigned short ss; + unsigned long esp, ss; } stack_start; /* which physical APIC ID maps to which logical CPU number */ @@ -688,9 +669,7 @@ static void __init do_boot_cpu (int apicid) l2_pgentry_t *pagetable; cpu = ++cpucount; - /* - * We can't use kernel_thread since we must avoid to reschedule the child. - */ + if ( (idle = do_newdomain(IDLE_DOMAIN_ID, cpu)) == NULL ) panic("failed 'newdomain' for CPU %d", cpu); @@ -701,9 +680,6 @@ static void __init do_boot_cpu (int apicid) map_cpu_to_boot_apicid(cpu, apicid); - idle->thread.esp = idle->thread.esp0 = (unsigned long)idle + THREAD_SIZE; - idle->thread.eip = (unsigned long) start_secondary; - SET_DEFAULT_FAST_TRAP(&idle->thread); idle_task[cpu] = idle; @@ -713,7 +689,7 @@ static void __init do_boot_cpu (int apicid) /* So we see what's up */ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); - stack_start.esp = (void *) (1024+PAGE_SIZE+(char *)idle-__PAGE_OFFSET); + stack_start.esp = __pa(get_free_page(GFP_KERNEL)) + 4000; /* * This grunge runs the startup process for @@ -735,7 +711,8 @@ static void __init do_boot_cpu (int apicid) /* * Be paranoid about clearing APIC errors. */ - if (APIC_INTEGRATED(apic_version[apicid])) { + if ( APIC_INTEGRATED(apic_version[apicid]) ) + { apic_read_around(APIC_SPIV); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); @@ -774,8 +751,8 @@ static void __init do_boot_cpu (int apicid) printk("CPU%d has booted.\n", cpu); } else { boot_error= 1; - if (*((volatile unsigned char *)phys_to_virt(8192)) - == 0xA5) + if (*((volatile unsigned long *)phys_to_virt(start_eip)) + == 0xA5A5A5A5) /* trampoline started but...? */ printk("Stuck ??\n"); else @@ -794,9 +771,6 @@ static void __init do_boot_cpu (int apicid) clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */ cpucount--; } - - /* mark "stuck" area as not stuck */ - *((volatile unsigned long *)phys_to_virt(8192)) = 0; } diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c index 60bd3f7de1..a555a21476 100644 --- a/xen/arch/i386/traps.c +++ b/xen/arch/i386/traps.c @@ -110,12 +110,9 @@ void show_trace(unsigned long * stack) int i; unsigned long addr; - if (!stack) - stack = (unsigned long*)&stack; - printk("Call Trace: "); i = 1; - while (((long) stack & (THREAD_SIZE-1)) != 0) { + while (((long) stack & (STACK_SIZE-1)) != 0) { addr = *stack++; if (kernel_text_address(addr)) { if (i && ((i % 6) == 0)) @@ -127,30 +124,17 @@ void show_trace(unsigned long * stack) printk("\n"); } -void show_trace_task(struct task_struct *tsk) -{ - unsigned long esp = tsk->thread.esp; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) - return; - show_trace((unsigned long *)esp); -} - -void show_stack(unsigned long * esp) +void show_stack(unsigned long *esp) { unsigned long *stack; int i; - if ( esp == NULL ) - esp = (unsigned long *)&esp; - printk("Stack trace from ESP=%p:\n", esp); stack = esp; for ( i = 0; i < kstack_depth_to_print; i++ ) { - if ( ((long)stack & (THREAD_SIZE-1)) == 0 ) + if ( ((long)stack & (STACK_SIZE-1)) == 0 ) break; if ( i && ((i % 8) == 0) ) printk("\n "); diff --git a/xen/common/kernel.c b/xen/common/kernel.c index 032c96236e..770e21e248 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -30,6 +30,8 @@ #include <xeno/console.h> #include <xeno/net_headers.h> +kmem_cache_t *task_struct_cachep; + static int xpos, ypos; static volatile unsigned char *video; @@ -176,7 +178,7 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) for ( ; ; ) ; } - memcpy(&idle0_task_union, &first_task_struct, sizeof(first_task_struct)); + set_current(&idle0_task); max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10); init_frametable(max_page); @@ -190,6 +192,16 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) memset(current->shared_info, 0, sizeof(shared_info_t)); set_fs(USER_DS); + /* Initialise the slab allocator. */ + kmem_cache_init(); + kmem_cache_sizes_init(max_page); + + task_struct_cachep = kmem_cache_create( + "task_struct_cache", sizeof(struct task_struct), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if ( task_struct_cachep == NULL ) + panic("No slab cache for task structs."); + start_of_day(); /* Create initial domain 0. */ @@ -215,7 +227,7 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) update_dom_time(new_dom->shared_info); wake_up(new_dom); - cpu_idle(); + startup_cpu_idle_loop(); } diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 8041844d00..12a41111bb 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -46,7 +46,7 @@ typedef struct schedule_data_st { spinlock_t lock; /* lock for protecting this */ struct list_head runqueue; /* runqueue */ - struct task_struct *prev, *curr; /* previous and current task */ + struct task_struct *curr; /* current task */ struct task_struct *idle; /* idle task for this cpu */ u32 svt; /* system virtual time. per CPU??? */ struct ac_timer s_timer; /* scheduling timer */ @@ -254,7 +254,7 @@ void reschedule(struct task_struct *p) unsigned long flags; s_time_t now, min_time; - if (p->has_cpu) + if ( p->has_cpu ) return; spin_lock_irqsave(&schedule_data[cpu].lock, flags); @@ -264,25 +264,25 @@ void reschedule(struct task_struct *p) /* domain should run at least for ctx_allow */ min_time = curr->lastschd + ctx_allow; - if ( is_idle_task(curr) || (min_time <= now) ) { + if ( is_idle_task(curr) || (min_time <= now) ) + { /* reschedule */ set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); - if (cpu != smp_processor_id()) + if ( cpu != smp_processor_id() ) smp_send_event_check_cpu(cpu); + return; } /* current hasn't been running for long enough -> reprogram timer. * but don't bother if timer would go off soon anyway */ - if (schedule_data[cpu].s_timer.expires > min_time + TIME_SLOP) { + if ( schedule_data[cpu].s_timer.expires > min_time + TIME_SLOP ) mod_ac_timer(&schedule_data[cpu].s_timer, min_time); - } spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); - return; } @@ -304,9 +304,7 @@ asmlinkage void schedule(void) s32 mcus; u32 next_evt, next_prime_evt, min_avt; - perfc_incrc(sched_run1); - need_resched_back: - perfc_incrc(sched_run2); + perfc_incrc(sched_run); prev = current; next = NULL; @@ -325,7 +323,7 @@ asmlinkage void schedule(void) ASSERT(!in_interrupt()); ASSERT(__task_on_runqueue(prev)); - if (is_idle_task(prev)) + if ( is_idle_task(prev) ) goto deschedule_done; /* do some accounting */ @@ -343,9 +341,12 @@ asmlinkage void schedule(void) /* dequeue */ __del_from_runqueue(prev); - switch (prev->state) { + + switch ( prev->state ) + { case TASK_INTERRUPTIBLE: - if (signal_pending(prev)) { + if ( signal_pending(prev) ) + { prev->state = TASK_RUNNING; /* but has events pending */ break; } @@ -362,7 +363,6 @@ asmlinkage void schedule(void) /* requeue */ __add_to_runqueue_tail(prev); - deschedule_done: clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events); @@ -456,7 +456,6 @@ asmlinkage void schedule(void) prev->has_cpu = 0; next->has_cpu = 1; - schedule_data[this_cpu].prev = prev; schedule_data[this_cpu].curr = next; next->lastschd = now; @@ -472,7 +471,8 @@ asmlinkage void schedule(void) { /* We won't go through the normal tail, so do this by hand */ prev->policy &= ~SCHED_YIELD; - goto same_process; + update_dom_time(prev->shared_info); + return; } perfc_incrc(sched_ctx); @@ -489,23 +489,17 @@ asmlinkage void schedule(void) } #endif - - prepare_to_switch(); switch_to(prev, next); - prev = schedule_data[this_cpu].prev; prev->policy &= ~SCHED_YIELD; if ( prev->state == TASK_DYING ) put_task_struct(prev); - same_process: - /* update the domains notion of time */ - update_dom_time(current->shared_info); + update_dom_time(next->shared_info); - if ( test_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events) ) { - goto need_resched_back; - } - return; + schedule_tail(next); + + BUG(); } /* No locking needed -- pointer comparison is safe :-) */ @@ -566,7 +560,6 @@ void __init scheduler_init(void) { INIT_LIST_HEAD(&schedule_data[i].runqueue); spin_lock_init(&schedule_data[i].lock); - schedule_data[i].prev = &idle0_task; schedule_data[i].curr = &idle0_task; /* a timer for each CPU */ diff --git a/xen/include/asm-i386/current.h b/xen/include/asm-i386/current.h index bc1496a2c9..5a12a1201f 100644 --- a/xen/include/asm-i386/current.h +++ b/xen/include/asm-i386/current.h @@ -5,11 +5,42 @@ struct task_struct; static inline struct task_struct * get_current(void) { - struct task_struct *current; - __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); - return current; - } + struct task_struct *current; + __asm__ ( "orl %%esp,%0; movl (%0),%0" + : "=r" (current) : "0" (4092UL) ); + return current; +} #define current get_current() +static inline void set_current(struct task_struct *p) +{ + __asm__ ( "orl %%esp,%0; movl %1,(%0)" + : : "r" (4092UL), "r" (p) ); +} + +static inline execution_context_t *get_execution_context(void) +{ + execution_context_t *execution_context; + __asm__ ( "andl %%esp,%0; addl $4096-72,%0" + : "=r" (execution_context) : "0" (~4095UL) ); + return execution_context; +} + +static inline unsigned long get_stack_top(void) +{ + unsigned long p; + __asm__ ( "orl %%esp,%0" + : "=r" (p) : "0" (4092UL) ); + return p; +} + +#define schedule_tail(_p) \ + __asm__ __volatile__ ( \ + "andl %%esp,%0; addl $4096-72,%0; movl %0,%%esp; jmp *%1" \ + : : "r" (~4095UL), "r" (unlikely(is_idle_task((_p))) ? \ + continue_cpu_idle_loop : \ + continue_nonidle_task) ) + + #endif /* !(_I386_CURRENT_H) */ diff --git a/xen/include/asm-i386/irq.h b/xen/include/asm-i386/irq.h index 5f0e75ea4d..31435c8753 100644 --- a/xen/include/asm-i386/irq.h +++ b/xen/include/asm-i386/irq.h @@ -120,10 +120,6 @@ extern char _stext, _etext; #define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) -#define GET_CURRENT \ - "movl %esp, %ebx\n\t" \ - "andl $-8192, %ebx\n\t" - /* * SMP has a few special interrupts for IPI messages */ diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h index eb0ca7001a..f358325d1f 100644 --- a/xen/include/asm-i386/processor.h +++ b/xen/include/asm-i386/processor.h @@ -340,16 +340,9 @@ struct tss_struct { }; struct thread_struct { - unsigned long esp0; /* top of the stack */ - unsigned long eip; /* in kernel space, saved on task switch */ - unsigned long esp; /* "" */ - unsigned long fs; /* "" (NB. DS/ES constant in mon, so no save) */ - unsigned long gs; /* "" ("") */ unsigned long esp1, ss1; /* Hardware debugging registers */ unsigned long debugreg[8]; /* %%db0-7 debug registers */ -/* fault info */ - unsigned long cr2, trap_no, error_code; /* floating point info */ union i387_union i387; /* Trap info. */ @@ -376,10 +369,8 @@ extern struct desc_struct *idt_tables[]; &((_p)->fast_trap_desc), 8)) #define INIT_THREAD { \ - sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */ \ - 0, 0, 0, 0, 0, 0, \ + 0, 0, \ { [0 ... 7] = 0 }, /* debugging registers */ \ - 0, 0, 0, \ { { 0, }, }, /* 387 state */ \ 0x20, { 0, 0 }, /* DEFAULT_FAST_TRAP */ \ { {0} } /* io permissions */ \ @@ -387,8 +378,8 @@ extern struct desc_struct *idt_tables[]; #define INIT_TSS { \ 0,0, /* back_link, __blh */ \ - sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */ \ - __HYPERVISOR_DS, 0, /* ss0 */ \ + 0, /* esp0 */ \ + 0, 0, /* ss0 */ \ 0,0,0,0,0,0, /* stack1, stack2 */ \ 0, /* cr3 */ \ 0,0, /* eip,eflags */ \ @@ -416,29 +407,10 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); extern void copy_segments(struct task_struct *p, struct mm_struct * mm); extern void release_segments(struct mm_struct * mm); -/* - * Return saved PC of a blocked thread. - */ -static inline unsigned long thread_saved_pc(struct thread_struct *t) -{ - return ((unsigned long *)t->esp)[3]; -} - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) -#define THREAD_SIZE (2*PAGE_SIZE) -#define alloc_task_struct() \ - ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) -#define put_task_struct(_p) \ - if ( atomic_dec_and_test(&(_p)->refcnt) ) release_task(_p) -#define get_task_struct(_p) \ - atomic_inc(&(_p)->refcnt) - -#define idle0_task (idle0_task_union.task) -#define idle0_stack (idle0_task_union.stack) - struct microcode { unsigned int hdrver; unsigned int rev; diff --git a/xen/include/asm-i386/ptrace.h b/xen/include/asm-i386/ptrace.h index 6570cc4e79..26269afcb0 100644 --- a/xen/include/asm-i386/ptrace.h +++ b/xen/include/asm-i386/ptrace.h @@ -46,7 +46,6 @@ enum EFLAGS { #ifdef __KERNEL__ #define user_mode(regs) ((3 & (regs)->xcs)) -extern void show_regs(struct pt_regs *); #endif #endif diff --git a/xen/include/asm-i386/system.h b/xen/include/asm-i386/system.h index 4200a1051d..a19d629d8c 100644 --- a/xen/include/asm-i386/system.h +++ b/xen/include/asm-i386/system.h @@ -5,34 +5,8 @@ #include <asm/bitops.h> struct task_struct; -extern void FASTCALL(__switch_to(struct task_struct *prev, - struct task_struct *next)); - -#define prepare_to_switch() do { } while(0) -#define switch_to(prev,next) do { \ - asm volatile("pushl %%ebp\n\t" \ - "pushl %%ebx\n\t" \ - "pushl %%esi\n\t" \ - "pushl %%edi\n\t" \ - "movl %%esp,%0\n\t" /* save ESP */ \ - "cli\n\t" \ - "movl %2,%%esp\n\t" /* restore ESP */ \ - "movl %6,%%cr3\n\t" /* restore pagetables */\ - "sti\n\t" \ - "movl $1f,%1\n\t" /* save EIP */ \ - "pushl %3\n\t" /* restore EIP */ \ - "jmp __switch_to\n" \ - "1:\t" \ - "popl %%edi\n\t" \ - "popl %%esi\n\t" \ - "popl %%ebx\n\t" \ - "popl %%ebp\n\t" \ - :"=m" (prev->thread.esp),"=m" (prev->thread.eip) \ - :"m" (next->thread.esp),"m" (next->thread.eip), \ - "a" (prev), "d" (next), \ - "c" (pagetable_val(next->mm.pagetable)) \ - :"memory"); \ -} while (0) +extern void switch_to(struct task_struct *prev, + struct task_struct *next); /* Clear and set 'TS' bit respectively */ #define clts() __asm__ __volatile__ ("clts") diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index cb4ea195bd..9f6175521a 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -186,6 +186,27 @@ typedef struct unsigned long args[7]; } multicall_entry_t; +typedef struct +{ + unsigned long ebx; + unsigned long ecx; + unsigned long edx; + unsigned long esi; + unsigned long edi; + unsigned long ebp; + unsigned long eax; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; + unsigned long _unused; + unsigned long eip; + unsigned long cs; + unsigned long eflags; + unsigned long esp; + unsigned long ss; +} execution_context_t; + /* * Xen/guestos shared data -- pointer provided in start_info. * NB. We expect that this struct is smaller than a page. @@ -241,6 +262,8 @@ typedef struct shared_info_st { */ net_idx_t net_idx[MAX_DOMAIN_VIFS]; + execution_context_t execution_context; + } shared_info_t; /* diff --git a/xen/include/xeno/perfc_defn.h b/xen/include/xeno/perfc_defn.h index 891be99786..0d38959022 100644 --- a/xen/include/xeno/perfc_defn.h +++ b/xen/include/xeno/perfc_defn.h @@ -5,8 +5,7 @@ PERFCOUNTER_CPU( irq_time, "cycles spent in irq handler" ) PERFCOUNTER_CPU( apic_timer, "apic timer interrupts" ) PERFCOUNTER_CPU( ac_timer_max, "ac_timer max error (ns)" ) PERFCOUNTER_CPU( sched_irq, "sched: timer" ) -PERFCOUNTER_CPU( sched_run1, "sched: calls to schedule" ) -PERFCOUNTER_CPU( sched_run2, "sched: runs through scheduler" ) +PERFCOUNTER_CPU( sched_run, "sched: runs through scheduler" ) PERFCOUNTER_CPU( sched_ctx, "sched: context switches" ) PERFCOUNTER( net_hypercalls, "network hypercalls" ) diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 5d451fa960..e272e2f890 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -16,6 +16,7 @@ #include <xeno/time.h> #include <xeno/ac_timer.h> #include <xeno/delay.h> +#include <xeno/slab.h> #define MAX_DOMAIN_NAME 16 @@ -209,21 +210,21 @@ struct task_struct next_task: &(_t) \ } +extern struct task_struct idle0_task; + extern struct task_struct *idle_task[NR_CPUS]; #define IDLE_DOMAIN_ID (~0) #define is_idle_task(_p) ((_p)->domain == IDLE_DOMAIN_ID) -#ifndef IDLE0_TASK_SIZE -#define IDLE0_TASK_SIZE 2048*sizeof(long) -#endif - -union task_union { - struct task_struct task; - unsigned long stack[IDLE0_TASK_SIZE/sizeof(long)]; -}; +#define STACK_SIZE PAGE_SIZE -extern union task_union idle0_task_union; -extern struct task_struct first_task_struct; +extern kmem_cache_t *task_struct_cachep; +#define alloc_task_struct() \ + ((struct task_struct *)kmem_cache_alloc(task_struct_cachep,GFP_KERNEL)) +#define put_task_struct(_p) \ + if ( atomic_dec_and_test(&(_p)->refcnt) ) release_task(_p) +#define get_task_struct(_p) \ + atomic_inc(&(_p)->refcnt) extern struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu); extern int setup_guestos( @@ -292,7 +293,11 @@ static inline long schedule_timeout(long timeout) void domain_init(void); int idle_cpu(int cpu); /* Is CPU 'cpu' idle right now? */ -void cpu_idle(void); /* Idle loop. */ + +void startup_cpu_idle_loop(void); +void continue_cpu_idle_loop(void); + +void continue_nonidle_task(void); /* This hash table is protected by the tasklist_lock. */ #define TASK_HASH_SIZE 256 |