diff options
37 files changed, 659 insertions, 491 deletions
diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c index cc4c0f4561..24c4b2bb23 100644 --- a/tools/libxc/xc_linux_build.c +++ b/tools/libxc/xc_linux_build.c @@ -335,6 +335,7 @@ static int setup_guestos(int xc_handle, /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) shared_info->vcpu_data[i].evtchn_upcall_mask = 1; + shared_info->n_vcpu = 2; munmap(shared_info, PAGE_SIZE); /* Send the page update requests down to the hypervisor. */ diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c index 62b2739d18..0a32ba7b67 100644 --- a/xen/arch/x86/dom0_ops.c +++ b/xen/arch/x86/dom0_ops.c @@ -47,7 +47,7 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op) { long ret = 0; - if ( !IS_PRIV(current) ) + if ( !IS_PRIV(current->domain) ) return -EPERM; switch ( op->cmd ) @@ -101,7 +101,7 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op) return ret; } -void arch_getdomaininfo_ctxt(struct domain *d, full_execution_context_t *c) +void arch_getdomaininfo_ctxt(struct exec_domain *d, full_execution_context_t *c) { int i; @@ -109,7 +109,7 @@ void arch_getdomaininfo_ctxt(struct domain *d, full_execution_context_t *c) memcpy(&c->cpu_ctxt, &d->thread.user_ctxt, sizeof(d->thread.user_ctxt)); - if ( test_bit(DF_DONEFPUINIT, &d->flags) ) + if ( test_bit(EDF_DONEFPUINIT, &d->ed_flags) ) c->flags |= ECF_I387_VALID; memcpy(&c->fpu_ctxt, &d->thread.i387, diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index baa52acb70..af7fdc757e 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -91,7 +91,7 @@ void startup_cpu_idle_loop(void) { /* Just some sanity to ensure that the scheduler is set up okay. */ ASSERT(current->id == IDLE_DOMAIN_ID); - domain_unpause_by_systemcontroller(current); + domain_unpause_by_systemcontroller(current->domain); __enter_scheduler(); /* @@ -210,18 +210,18 @@ void machine_halt(void) __machine_halt(NULL); } -void free_perdomain_pt(struct domain *d) +void free_perdomain_pt(struct exec_domain *d) { free_xenheap_page((unsigned long)d->mm.perdomain_pt); } -void arch_do_createdomain(struct domain *d) +void arch_do_createdomain(struct exec_domain *d) { d->shared_info = (void *)alloc_xenheap_page(); memset(d->shared_info, 0, PAGE_SIZE); d->shared_info->arch.mfn_to_pfn_start = virt_to_phys(&machine_to_phys_mapping[0])>>PAGE_SHIFT; - SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d); + SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d->domain); machine_to_phys_mapping[virt_to_phys(d->shared_info) >> PAGE_SHIFT] = INVALID_P2M_ENTRY; @@ -231,14 +231,14 @@ void arch_do_createdomain(struct domain *d) PAGE_SHIFT] = INVALID_P2M_ENTRY; } -int arch_final_setup_guestos(struct domain *d, full_execution_context_t *c) +int arch_final_setup_guestos(struct exec_domain *d, full_execution_context_t *c) { unsigned long phys_basetab; int i, rc; - clear_bit(DF_DONEFPUINIT, &d->flags); + clear_bit(EDF_DONEFPUINIT, &d->ed_flags); if ( c->flags & ECF_I387_VALID ) - set_bit(DF_DONEFPUINIT, &d->flags); + set_bit(EDF_DONEFPUINIT, &d->ed_flags); memcpy(&d->thread.user_ctxt, &c->cpu_ctxt, @@ -283,7 +283,7 @@ int arch_final_setup_guestos(struct domain *d, full_execution_context_t *c) phys_basetab = c->pt_base; d->mm.pagetable = mk_pagetable(phys_basetab); - if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, + if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d->domain, PGT_base_page_table) ) return -EINVAL; @@ -304,7 +304,7 @@ int arch_final_setup_guestos(struct domain *d, full_execution_context_t *c) #if defined(__i386__) -void new_thread(struct domain *d, +void new_thread(struct exec_domain *d, unsigned long start_pc, unsigned long start_stack, unsigned long start_info) @@ -342,7 +342,7 @@ void new_thread(struct domain *d, :"r" (thread->debugreg[register])) -void switch_to(struct domain *prev_p, struct domain *next_p) +void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p) { struct thread_struct *next = &next_p->thread; struct tss_struct *tss = init_tss + smp_processor_id(); @@ -352,7 +352,7 @@ void switch_to(struct domain *prev_p, struct domain *next_p) __cli(); /* Switch guest general-register state. */ - if ( !is_idle_task(prev_p) ) + if ( !is_idle_task(prev_p->domain) ) { memcpy(&prev_p->thread.user_ctxt, stack_ec, @@ -361,7 +361,7 @@ void switch_to(struct domain *prev_p, struct domain *next_p) CLEAR_FAST_TRAP(&prev_p->thread); } - if ( !is_idle_task(next_p) ) + if ( !is_idle_task(next_p->domain) ) { memcpy(stack_ec, &next_p->thread.user_ctxt, @@ -389,36 +389,36 @@ void switch_to(struct domain *prev_p, struct domain *next_p) write_ptbase(&next_p->mm); } - if ( unlikely(prev_p->io_bitmap != NULL) || - unlikely(next_p->io_bitmap != NULL) ) + if ( unlikely(prev_p->domain->io_bitmap != NULL) || + unlikely(next_p->domain->io_bitmap != NULL) ) { - if ( next_p->io_bitmap != NULL ) + if ( next_p->domain->io_bitmap != NULL ) { /* Copy in the appropriate parts of the IO bitmap. We use the * selector to copy only the interesting parts of the bitmap. */ u64 old_sel = ~0ULL; /* IO bitmap selector for previous task. */ - if ( prev_p->io_bitmap != NULL) + if ( prev_p->domain->io_bitmap != NULL) { - old_sel = prev_p->io_bitmap_sel; + old_sel = prev_p->domain->io_bitmap_sel; /* Replace any areas of the IO bitmap that had bits cleared. */ - for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) - if ( !test_bit(i, &prev_p->io_bitmap_sel) ) + for ( i = 0; i < sizeof(prev_p->domain->io_bitmap_sel) * 8; i++ ) + if ( !test_bit(i, &prev_p->domain->io_bitmap_sel) ) memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], - &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS], + &next_p->domain->io_bitmap[i * IOBMP_SELBIT_LWORDS], IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); } /* Copy in any regions of the new task's bitmap that have bits * clear and we haven't already dealt with. */ - for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) + for ( i = 0; i < sizeof(prev_p->domain->io_bitmap_sel) * 8; i++ ) { if ( test_bit(i, &old_sel) - && !test_bit(i, &next_p->io_bitmap_sel) ) + && !test_bit(i, &next_p->domain->io_bitmap_sel) ) memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], - &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS], + &next_p->domain->io_bitmap[i * IOBMP_SELBIT_LWORDS], IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); } @@ -430,8 +430,8 @@ void switch_to(struct domain *prev_p, struct domain *next_p) /* In this case, we're switching FROM a task with IO port access, * to a task that doesn't use the IO bitmap. We set any TSS bits * that might have been cleared, ready for future use. */ - for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) - if ( !test_bit(i, &prev_p->io_bitmap_sel) ) + for ( i = 0; i < sizeof(prev_p->domain->io_bitmap_sel) * 8; i++ ) + if ( !test_bit(i, &prev_p->domain->io_bitmap_sel) ) memset(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], 0xFF, IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); @@ -536,8 +536,8 @@ void domain_relinquish_memory(struct domain *d) shadow_mode_disable(d); /* Drop the in-use reference to the page-table base. */ - if ( pagetable_val(d->mm.pagetable) != 0 ) - put_page_and_type(&frame_table[pagetable_val(d->mm.pagetable) >> + if ( pagetable_val(d->exec_domain[0]->mm.pagetable) != 0 ) + put_page_and_type(&frame_table[pagetable_val(d->exec_domain[0]->mm.pagetable) >> PAGE_SHIFT]); /* @@ -569,6 +569,7 @@ int construct_dom0(struct domain *p, l1_pgentry_t *l1tab = NULL, *l1start = NULL; struct pfn_info *page = NULL; start_info_t *si; + struct exec_domain *ed = p->exec_domain[0]; /* * This fully describes the memory layout of the initial domain. All @@ -596,7 +597,7 @@ int construct_dom0(struct domain *p, /* Sanity! */ if ( p->id != 0 ) BUG(); - if ( test_bit(DF_CONSTRUCTED, &p->flags) ) + if ( test_bit(DF_CONSTRUCTED, &p->d_flags) ) BUG(); memset(&dsi, 0, sizeof(struct domain_setup_info)); @@ -734,18 +735,18 @@ int construct_dom0(struct domain *p, mpt_alloc = (vpt_start - dsi.v_start) + alloc_start; - SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES); - SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS); + SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES); + SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS); /* * We're basically forcing default RPLs to 1, so that our "what privilege * level are we returning to?" logic works. */ - p->failsafe_selector = FLAT_GUESTOS_CS; - p->event_selector = FLAT_GUESTOS_CS; - p->thread.guestos_ss = FLAT_GUESTOS_DS; + ed->failsafe_selector = FLAT_GUESTOS_CS; + ed->event_selector = FLAT_GUESTOS_CS; + ed->thread.guestos_ss = FLAT_GUESTOS_DS; for ( i = 0; i < 256; i++ ) - p->thread.traps[i].cs = FLAT_GUESTOS_CS; + ed->thread.traps[i].cs = FLAT_GUESTOS_CS; /* WARNING: The new domain must have its 'processor' field filled in! */ l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; @@ -753,8 +754,8 @@ int construct_dom0(struct domain *p, l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR); l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR); - p->mm.pagetable = mk_pagetable((unsigned long)l2start); + mk_l2_pgentry(__pa(ed->mm.perdomain_pt) | __PAGE_HYPERVISOR); + ed->mm.pagetable = mk_pagetable((unsigned long)l2start); l2tab += l2_table_offset(dsi.v_start); mfn = alloc_start >> PAGE_SHIFT; @@ -825,15 +826,16 @@ int construct_dom0(struct domain *p, } /* Set up shared-info area. */ - update_dom_time(p->shared_info); - p->shared_info->domain_time = 0; + update_dom_time(ed->shared_info); + ed->shared_info->domain_time = 0; /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; + ed->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; + ed->shared_info->n_vcpu = 1; /* Install the new page tables. */ __cli(); - write_ptbase(&p->mm); + write_ptbase(&ed->mm); /* Copy the OS image. */ (void)loadelfimage(image_start); @@ -846,7 +848,7 @@ int construct_dom0(struct domain *p, si = (start_info_t *)vstartinfo_start; memset(si, 0, PAGE_SIZE); si->nr_pages = p->tot_pages; - si->shared_info = virt_to_phys(p->shared_info); + si->shared_info = virt_to_phys(ed->shared_info); si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; si->pt_base = vpt_start; si->nr_pt_frames = nr_pt_pages; @@ -898,9 +900,9 @@ int construct_dom0(struct domain *p, /* DOM0 gets access to everything. */ physdev_init_dom0(p); - set_bit(DF_CONSTRUCTED, &p->flags); + set_bit(DF_CONSTRUCTED, &p->d_flags); - new_thread(p, dsi.v_kernentry, vstack_end, vstartinfo_start); + new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); #if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */ shadow_lock(&p->mm); diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c index 6cc15dd3b5..fe7bae3d99 100644 --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -17,10 +17,10 @@ void init_fpu(void) { __asm__("fninit"); if ( cpu_has_xmm ) load_mxcsr(0x1f80); - set_bit(DF_DONEFPUINIT, ¤t->flags); + set_bit(EDF_DONEFPUINIT, ¤t->ed_flags); } -static inline void __save_init_fpu( struct domain *tsk ) +static inline void __save_init_fpu( struct exec_domain *tsk ) { if ( cpu_has_fxsr ) { asm volatile( "fxsave %0 ; fnclex" @@ -29,22 +29,22 @@ static inline void __save_init_fpu( struct domain *tsk ) asm volatile( "fnsave %0 ; fwait" : "=m" (tsk->thread.i387) ); } - clear_bit(DF_USEDFPU, &tsk->flags); + clear_bit(EDF_USEDFPU, &tsk->ed_flags); } -void save_init_fpu( struct domain *tsk ) +void save_init_fpu( struct exec_domain *tsk ) { /* * The guest OS may have set the 'virtual STTS' flag. * This causes us to set the real flag, so we'll need * to temporarily clear it while saving f-p state. */ - if ( test_bit(DF_GUEST_STTS, &tsk->flags) ) clts(); + if ( test_bit(EDF_GUEST_STTS, &tsk->ed_flags) ) clts(); __save_init_fpu(tsk); stts(); } -void restore_fpu( struct domain *tsk ) +void restore_fpu( struct exec_domain *tsk ) { if ( cpu_has_fxsr ) { asm volatile( "fxrstor %0" diff --git a/xen/arch/x86/idle0_task.c b/xen/arch/x86/idle0_task.c index f86722097f..8dc2784e7c 100644 --- a/xen/arch/x86/idle0_task.c +++ b/xen/arch/x86/idle0_task.c @@ -2,17 +2,24 @@ #include <xen/sched.h> #include <asm/desc.h> -#define IDLE0_TASK(_t) \ -{ \ - processor: 0, \ - id: IDLE_DOMAIN_ID, \ - mm: IDLE0_MM, \ - thread: INIT_THREAD, \ - flags: 1<<DF_IDLETASK, \ - refcnt: ATOMIC_INIT(1) \ +#define IDLE0_EXEC_DOMAIN(_ed,_d) \ +{ \ + processor: 0, \ + mm: IDLE0_MM, \ + thread: INIT_THREAD, \ + domain: (_d) \ } -struct domain idle0_task = IDLE0_TASK(idle0_task); +#define IDLE0_DOMAIN(_t) \ +{ \ + id: IDLE_DOMAIN_ID, \ + d_flags: 1<<DF_IDLETASK, \ + refcnt: ATOMIC_INIT(1) \ +} + +struct domain idle0_domain = IDLE0_DOMAIN(idle0_domain); +struct exec_domain idle0_exec_domain = IDLE0_EXEC_DOMAIN(idle0_exec_domain, + &idle0_domain); /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c index 33600b8c58..5bbec081fd 100644 --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -212,7 +212,7 @@ int pirq_guest_unmask(struct domain *d) irq_desc_t *desc; unsigned int i, j, pirq; u32 m; - shared_info_t *s = d->shared_info; + shared_info_t *s = d->exec_domain[0]->shared_info; for ( i = 0; i < ARRAY_SIZE(d->pirq_mask); i++ ) { @@ -279,7 +279,7 @@ int pirq_guest_bind(struct domain *d, int irq, int will_share) /* Attempt to bind the interrupt target to the correct CPU. */ if ( desc->handler->set_affinity != NULL ) desc->handler->set_affinity( - irq, apicid_to_phys_cpu_present(d->processor)); + irq, apicid_to_phys_cpu_present(d->exec_domain[0]->processor)); } else if ( !will_share || !action->shareable ) { diff --git a/xen/arch/x86/memory.c b/xen/arch/x86/memory.c index 39345ed7a3..e24aea10f8 100644 --- a/xen/arch/x86/memory.c +++ b/xen/arch/x86/memory.c @@ -104,7 +104,7 @@ #ifdef VERBOSE #define MEM_LOG(_f, _a...) \ printk("DOM%u: (file=memory.c, line=%d) " _f "\n", \ - current->id , __LINE__ , ## _a ) + current->domain->id , __LINE__ , ## _a ) #else #define MEM_LOG(_f, _a...) ((void)0) #endif @@ -136,7 +136,7 @@ static struct { * Returns the current foreign domain; defaults to the currently-executing * domain if a foreign override hasn't been specified. */ -#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ? : current) +#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ? : current->domain) /* Private domain structs for DOMID_XEN and DOMID_IO. */ static struct domain *dom_xen, *dom_io; @@ -196,7 +196,7 @@ void arch_init_memory(void) } } -static void __invalidate_shadow_ldt(struct domain *d) +static void __invalidate_shadow_ldt(struct exec_domain *d) { int i; unsigned long pfn; @@ -220,7 +220,7 @@ static void __invalidate_shadow_ldt(struct domain *d) } -static inline void invalidate_shadow_ldt(struct domain *d) +static inline void invalidate_shadow_ldt(struct exec_domain *d) { if ( d->mm.shadow_ldt_mapcnt != 0 ) __invalidate_shadow_ldt(d); @@ -248,13 +248,14 @@ static int alloc_segdesc_page(struct pfn_info *page) /* Map shadow page at offset @off. */ int map_ldt_shadow_page(unsigned int off) { - struct domain *d = current; + struct exec_domain *ed = current; + struct domain *d = ed->domain; unsigned long l1e; if ( unlikely(in_irq()) ) BUG(); - __get_user(l1e, (unsigned long *)&linear_pg_table[(d->mm.ldt_base >> + __get_user(l1e, (unsigned long *)&linear_pg_table[(ed->mm.ldt_base >> PAGE_SHIFT) + off]); if ( unlikely(!(l1e & _PAGE_PRESENT)) || @@ -262,8 +263,8 @@ int map_ldt_shadow_page(unsigned int off) d, PGT_ldt_page)) ) return 0; - d->mm.perdomain_pt[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW); - d->mm.shadow_ldt_mapcnt++; + ed->mm.perdomain_pt[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW); + ed->mm.shadow_ldt_mapcnt++; return 1; } @@ -389,7 +390,7 @@ get_page_from_l1e( { /* Revert to caller privileges if FD == DOMID_IO. */ if ( d == dom_io ) - d = current; + d = current->domain; if ( IS_PRIV(d) ) return 1; @@ -474,7 +475,7 @@ static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) == PGT_ldt_page)) && unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) ) - invalidate_shadow_ldt(e); + invalidate_shadow_ldt(e->exec_domain[0]); put_page(page); } } @@ -514,7 +515,7 @@ static int alloc_l2_table(struct pfn_info *page) pl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry((page_nr << PAGE_SHIFT) | __PAGE_HYPERVISOR); pl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(page->u.inuse.domain->mm.perdomain_pt) | + mk_l2_pgentry(__pa(page->u.inuse.domain->exec_domain[0]->mm.perdomain_pt) | __PAGE_HYPERVISOR); #endif @@ -625,7 +626,7 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 ) return update_l2e(pl2e, ol2e, nl2e); - if ( unlikely(!get_page_from_l2e(nl2e, pfn, current, + if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, ((unsigned long)pl2e & ~PAGE_MASK) >> 2)) ) return 0; @@ -672,7 +673,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e) { l1_pgentry_t ol1e; unsigned long _ol1e; - struct domain *d = current; + struct domain *d = current->domain; if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) ) { @@ -749,11 +750,11 @@ void free_page_type(struct pfn_info *page, unsigned int type) BUG(); } - if ( unlikely(d->mm.shadow_mode) && - (get_shadow_status(&d->mm, page_to_pfn(page)) & PSH_shadowed) ) + if ( unlikely(d->exec_domain[0]->mm.shadow_mode) && + (get_shadow_status(&d->exec_domain[0]->mm, page_to_pfn(page)) & PSH_shadowed) ) { unshadow_table(page_to_pfn(page), type); - put_shadow_status(&d->mm); + put_shadow_status(&d->exec_domain[0]->mm); } } @@ -835,11 +836,11 @@ int get_page_type(struct pfn_info *page, u32 type) * circumstances should be very rare. */ struct domain *d = page->u.inuse.domain; - if ( unlikely(NEED_FLUSH(tlbflush_time[d->processor], + if ( unlikely(NEED_FLUSH(tlbflush_time[d->exec_domain[0]->processor], page->tlbflush_timestamp)) ) { perfc_incr(need_flush_tlb_flush); - flush_tlb_cpu(d->processor); + flush_tlb_cpu(d->exec_domain[0]->processor); } /* We lose existing type, back pointer, and validity. */ @@ -918,7 +919,8 @@ static int do_extended_command(unsigned long ptr, unsigned long val) unsigned long pfn = ptr >> PAGE_SHIFT; unsigned long old_base_pfn; struct pfn_info *page = &frame_table[pfn]; - struct domain *d = current, *nd, *e; + struct exec_domain *ed = current; + struct domain *d = ed->domain, *nd, *e; u32 x, y; domid_t domid; grant_ref_t gntref; @@ -979,15 +981,15 @@ static int do_extended_command(unsigned long ptr, unsigned long val) okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, d); if ( likely(okay) ) { - invalidate_shadow_ldt(d); + invalidate_shadow_ldt(ed); percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; - old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT; - d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT); + old_base_pfn = pagetable_val(ed->mm.pagetable) >> PAGE_SHIFT; + ed->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT); - shadow_mk_pagetable(&d->mm); + shadow_mk_pagetable(&ed->mm); - write_ptbase(&d->mm); + write_ptbase(&ed->mm); put_page_and_type(&frame_table[old_base_pfn]); } @@ -1028,13 +1030,13 @@ static int do_extended_command(unsigned long ptr, unsigned long val) okay = 0; MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents); } - else if ( (d->mm.ldt_ents != ents) || - (d->mm.ldt_base != ptr) ) + else if ( (ed->mm.ldt_ents != ents) || + (ed->mm.ldt_base != ptr) ) { - invalidate_shadow_ldt(d); - d->mm.ldt_base = ptr; - d->mm.ldt_ents = ents; - load_LDT(d); + invalidate_shadow_ldt(ed); + ed->mm.ldt_base = ptr; + ed->mm.ldt_ents = ents; + load_LDT(ed); percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT; if ( ents != 0 ) percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT; @@ -1146,13 +1148,13 @@ static int do_extended_command(unsigned long ptr, unsigned long val) * Also, a domain mustn't have PGC_allocated pages when it is dying. */ ASSERT(e->tot_pages <= e->max_pages); - if ( unlikely(test_bit(DF_DYING, &e->flags)) || + if ( unlikely(test_bit(DF_DYING, &e->d_flags)) || unlikely(e->tot_pages == e->max_pages) || unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) ) { MEM_LOG("Transferee has no reservation headroom (%d,%d), or " "provided a bad grant ref, or is dying (%08lx).\n", - e->tot_pages, e->max_pages, e->flags); + e->tot_pages, e->max_pages, e->d_flags); spin_unlock(&e->page_alloc_lock); put_domain(e); okay = 0; @@ -1206,7 +1208,7 @@ static int do_extended_command(unsigned long ptr, unsigned long val) } /* A domain shouldn't have PGC_allocated pages when it is dying. */ - if ( unlikely(test_bit(DF_DYING, &e->flags)) || + if ( unlikely(test_bit(DF_DYING, &e->d_flags)) || unlikely(IS_XEN_HEAP_FRAME(page)) ) { MEM_LOG("Reassignment page is Xen heap, or dest dom is dying."); @@ -1287,7 +1289,8 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) unsigned int cmd; unsigned long prev_spfn = 0; l1_pgentry_t *prev_spl1e = 0; - struct domain *d = current; + struct exec_domain *ed = current; + struct domain *d = ed->domain; u32 type_info; perfc_incrc(calls_to_mmu_update); @@ -1318,7 +1321,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table. */ case MMU_NORMAL_PT_UPDATE: - if ( unlikely(!get_page_from_pagenr(pfn, current)) ) + if ( unlikely(!get_page_from_pagenr(pfn, current->domain)) ) { MEM_LOG("Could not get page for normal update"); break; @@ -1346,13 +1349,13 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) okay = mod_l1_entry((l1_pgentry_t *)va, mk_l1_pgentry(req.val)); - if ( unlikely(d->mm.shadow_mode) && okay && - (get_shadow_status(&d->mm, page-frame_table) & + if ( unlikely(ed->mm.shadow_mode) && okay && + (get_shadow_status(&ed->mm, page-frame_table) & PSH_shadowed) ) { shadow_l1_normal_pt_update( req.ptr, req.val, &prev_spfn, &prev_spl1e); - put_shadow_status(&d->mm); + put_shadow_status(&ed->mm); } put_page_type(page); @@ -1365,12 +1368,12 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) mk_l2_pgentry(req.val), pfn); - if ( unlikely(d->mm.shadow_mode) && okay && - (get_shadow_status(&d->mm, page-frame_table) & + if ( unlikely(ed->mm.shadow_mode) && okay && + (get_shadow_status(&ed->mm, page-frame_table) & PSH_shadowed) ) { shadow_l2_normal_pt_update(req.ptr, req.val); - put_shadow_status(&d->mm); + put_shadow_status(&ed->mm); } put_page_type(page); @@ -1403,9 +1406,9 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) * If in log-dirty mode, mark the corresponding pseudo-physical * page as dirty. */ - if ( unlikely(d->mm.shadow_mode == SHM_logdirty) && - mark_dirty(&d->mm, pfn) ) - d->mm.shadow_dirty_block_count++; + if ( unlikely(ed->mm.shadow_mode == SHM_logdirty) && + mark_dirty(&ed->mm, pfn) ) + ed->mm.shadow_dirty_block_count++; put_page(&frame_table[pfn]); break; @@ -1465,9 +1468,10 @@ int do_update_va_mapping(unsigned long page_nr, unsigned long val, unsigned long flags) { - struct domain *d = current; + struct exec_domain *ed = current; + struct domain *d = ed->domain; int err = 0; - unsigned int cpu = d->processor; + unsigned int cpu = ed->processor; unsigned long deferred_ops; perfc_incrc(calls_to_update_va); @@ -1486,11 +1490,11 @@ int do_update_va_mapping(unsigned long page_nr, mk_l1_pgentry(val))) ) err = -EINVAL; - if ( unlikely(d->mm.shadow_mode) ) + if ( unlikely(ed->mm.shadow_mode) ) { unsigned long sval; - l1pte_propagate_from_guest(&d->mm, &val, &sval); + l1pte_propagate_from_guest(&ed->mm, &val, &sval); if ( unlikely(__put_user(sval, ((unsigned long *)( &shadow_linear_pg_table[page_nr])))) ) @@ -1507,10 +1511,10 @@ int do_update_va_mapping(unsigned long page_nr, * the PTE in the PT-holding page. We need the machine frame number * for this. */ - if ( d->mm.shadow_mode == SHM_logdirty ) + if ( ed->mm.shadow_mode == SHM_logdirty ) mark_dirty(¤t->mm, va_to_l1mfn(page_nr << PAGE_SHIFT)); - check_pagetable(&d->mm, d->mm.pagetable, "va"); /* debug */ + check_pagetable(&ed->mm, ed->mm.pagetable, "va"); /* debug */ } deferred_ops = percpu_info[cpu].deferred_ops; @@ -1537,7 +1541,7 @@ int do_update_va_mapping_otherdomain(unsigned long page_nr, struct domain *d; int rc; - if ( unlikely(!IS_PRIV(current)) ) + if ( unlikely(!IS_PRIV(current->domain)) ) return -EPERM; percpu_info[cpu].foreign = d = find_domain_by_id(domid); @@ -1579,7 +1583,8 @@ void ptwr_flush(const int which) l1_pgentry_t *sl1e = NULL, *pl1e, ol1e, nl1e; l2_pgentry_t *pl2e, nl2e; int i, cpu = smp_processor_id(); - struct domain *d = current; + struct exec_domain *ed = current; + struct domain *d = ed->domain; l1va = ptwr_info[cpu].ptinfo[which].l1va; ptep = (unsigned long *)&linear_pg_table[l1va>>PAGE_SHIFT]; @@ -1601,15 +1606,15 @@ void ptwr_flush(const int which) PTWR_PRINT_WHICH, ptep, pte); pte &= ~_PAGE_RW; - if ( unlikely(d->mm.shadow_mode) ) + if ( unlikely(ed->mm.shadow_mode) ) { /* Write-protect the p.t. page in the shadow page table. */ - l1pte_propagate_from_guest(&d->mm, &pte, &spte); + l1pte_propagate_from_guest(&ed->mm, &pte, &spte); __put_user( spte, (unsigned long *)&shadow_linear_pg_table[l1va>>PAGE_SHIFT]); /* Is the p.t. page itself shadowed? Map it into Xen space if so. */ - sstat = get_shadow_status(&d->mm, pte >> PAGE_SHIFT); + sstat = get_shadow_status(&ed->mm, pte >> PAGE_SHIFT); if ( sstat & PSH_shadowed ) sl1e = map_domain_mem((sstat & PSH_pfn_mask) << PAGE_SHIFT); } @@ -1654,7 +1659,7 @@ void ptwr_flush(const int which) { if ( unlikely(sl1e != NULL) ) l1pte_propagate_from_guest( - &d->mm, &l1_pgentry_val(nl1e), + &ed->mm, &l1_pgentry_val(nl1e), &l1_pgentry_val(sl1e[i])); put_page_type(&frame_table[l1_pgentry_to_pagenr(nl1e)]); } @@ -1677,7 +1682,7 @@ void ptwr_flush(const int which) if ( unlikely(sl1e != NULL) ) l1pte_propagate_from_guest( - &d->mm, &l1_pgentry_val(nl1e), &l1_pgentry_val(sl1e[i])); + &ed->mm, &l1_pgentry_val(nl1e), &l1_pgentry_val(sl1e[i])); if ( unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT) ) put_page_from_l1e(ol1e, d); @@ -1688,7 +1693,7 @@ void ptwr_flush(const int which) * STEP 3. Reattach the L1 p.t. page into the current address space. */ - if ( (which == PTWR_PT_ACTIVE) && likely(!d->mm.shadow_mode) ) + if ( (which == PTWR_PT_ACTIVE) && likely(!ed->mm.shadow_mode) ) { pl2e = &linear_l2_table[ptwr_info[cpu].ptinfo[which].l2_idx]; nl2e = mk_l2_pgentry(l2_pgentry_val(*pl2e) | _PAGE_PRESENT); @@ -1704,7 +1709,7 @@ void ptwr_flush(const int which) if ( unlikely(sl1e != NULL) ) { unmap_domain_mem(sl1e); - put_shadow_status(&d->mm); + put_shadow_status(&ed->mm); } } diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index b565bbd083..ffbc4662ad 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -38,7 +38,7 @@ EXPORT_SYMBOL(mmu_cr4_features); unsigned long wait_init_idle; -struct domain *idle_task[NR_CPUS] = { &idle0_task }; +struct exec_domain *idle_task[NR_CPUS] = { &idle0_exec_domain }; #ifdef CONFIG_ACPI_INTERPRETER int acpi_disabled = 0; diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c index 4c0512ade8..fb15881d52 100644 --- a/xen/arch/x86/shadow.c +++ b/xen/arch/x86/shadow.c @@ -171,7 +171,7 @@ void shadow_mode_init(void) int shadow_mode_enable(struct domain *p, unsigned int mode) { - struct mm_struct *m = &p->mm; + struct mm_struct *m = &p->exec_domain[0]->mm; m->shadow_ht = xmalloc( shadow_ht_buckets * sizeof(struct shadow_status)); @@ -206,7 +206,7 @@ int shadow_mode_enable(struct domain *p, unsigned int mode) void __shadow_mode_disable(struct domain *d) { - struct mm_struct *m = &d->mm; + struct mm_struct *m = &d->exec_domain[0]->mm; struct shadow_status *x, *n; free_shadow_state(m); @@ -243,7 +243,7 @@ static int shadow_mode_table_op( struct domain *d, dom0_shadow_control_t *sc) { unsigned int op = sc->op; - struct mm_struct *m = &d->mm; + struct mm_struct *m = &d->exec_domain[0]->mm; int i, rc = 0; ASSERT(spin_is_locked(&m->shadow_lock)); @@ -356,7 +356,7 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc) unsigned int op = sc->op; int rc = 0; - if ( unlikely(d == current) ) + if ( unlikely(d == current->domain) ) { DPRINTK("Don't try to do a shadow op on yourself!\n"); return -EINVAL; @@ -365,7 +365,7 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc) domain_pause(d); synchronise_pagetables(~0UL); - shadow_lock(&d->mm); + shadow_lock(&d->exec_domain[0]->mm); switch ( op ) { @@ -384,11 +384,11 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc) break; default: - rc = shadow_mode(d) ? shadow_mode_table_op(d, sc) : -EINVAL; + rc = shadow_mode(d->exec_domain[0]) ? shadow_mode_table_op(d, sc) : -EINVAL; break; } - shadow_unlock(&d->mm); + shadow_unlock(&d->exec_domain[0]->mm); domain_unpause(d); @@ -428,9 +428,9 @@ void unshadow_table(unsigned long gpfn, unsigned int type) * guests there won't be a race here as this CPU was the one that * cmpxchg'ed the page to invalid. */ - spfn = __shadow_status(&d->mm, gpfn) & PSH_pfn_mask; - delete_shadow_status(&d->mm, gpfn); - free_shadow_page(&d->mm, &frame_table[spfn]); + spfn = __shadow_status(&d->exec_domain[0]->mm, gpfn) & PSH_pfn_mask; + delete_shadow_status(&d->exec_domain[0]->mm, gpfn); + free_shadow_page(&d->exec_domain[0]->mm, &frame_table[spfn]); } unsigned long shadow_l2_table( @@ -473,7 +473,7 @@ unsigned long shadow_l2_table( spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) | + mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->exec_domain[0]->mm.perdomain_pt) | __PAGE_HYPERVISOR); #endif diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index a8017a4a65..df14479c08 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -647,6 +647,7 @@ static void __init do_boot_cpu (int apicid) */ { struct domain *idle; + struct exec_domain *ed; unsigned long boot_error = 0; int timeout, cpu; unsigned long start_eip, stack; @@ -656,17 +657,19 @@ static void __init do_boot_cpu (int apicid) if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL ) panic("failed 'createdomain' for CPU %d", cpu); - set_bit(DF_IDLETASK, &idle->flags); + ed = idle->exec_domain[0]; - idle->mm.pagetable = mk_pagetable(__pa(idle_pg_table)); + set_bit(DF_IDLETASK, &idle->d_flags); + + ed->mm.pagetable = mk_pagetable(__pa(idle_pg_table)); map_cpu_to_boot_apicid(cpu, apicid); #if defined(__i386__) - SET_DEFAULT_FAST_TRAP(&idle->thread); + SET_DEFAULT_FAST_TRAP(&ed->thread); #endif - idle_task[cpu] = idle; + idle_task[cpu] = ed; /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 8f6cb3c694..53906b5d3d 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -207,7 +207,7 @@ static inline void do_trap(int trapnr, char *str, struct xen_regs *regs, long error_code, int use_error_code) { - struct domain *p = current; + struct exec_domain *ed = current; struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); trap_info_t *ti; unsigned long fixup; @@ -221,7 +221,7 @@ static inline void do_trap(int trapnr, char *str, gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; + ed->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; return; xen_fault: @@ -267,7 +267,7 @@ DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error) asmlinkage void do_int3(struct xen_regs *regs, long error_code) { - struct domain *p = current; + struct exec_domain *ed = current; struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); trap_info_t *ti; @@ -293,7 +293,7 @@ asmlinkage void do_int3(struct xen_regs *regs, long error_code) gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; + ed->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; } asmlinkage void do_double_fault(void) @@ -332,9 +332,10 @@ asmlinkage void do_page_fault(struct xen_regs *regs, long error_code) struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); trap_info_t *ti; unsigned long off, addr, fixup; - struct domain *d = current; + struct exec_domain *ed = current; + struct domain *d = ed->domain; extern int map_ldt_shadow_page(unsigned int); - int cpu = d->processor; + int cpu = ed->processor; __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : ); @@ -356,19 +357,19 @@ asmlinkage void do_page_fault(struct xen_regs *regs, long error_code) return; } - if ( unlikely(d->mm.shadow_mode) && + if ( unlikely(ed->mm.shadow_mode) && (addr < PAGE_OFFSET) && shadow_fault(addr, error_code) ) return; /* Returns TRUE if fault was handled. */ if ( unlikely(addr >= LDT_VIRT_START) && - (addr < (LDT_VIRT_START + (d->mm.ldt_ents*LDT_ENTRY_SIZE))) ) + (addr < (LDT_VIRT_START + (ed->mm.ldt_ents*LDT_ENTRY_SIZE))) ) { /* * Copy a mapping from the guest's LDT, if it is valid. Otherwise we * send the fault up to the guest OS to be handled. */ off = addr - LDT_VIRT_START; - addr = d->mm.ldt_base + off; + addr = ed->mm.ldt_base + off; if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) ) return; /* successfully copied the mapping */ } @@ -376,14 +377,14 @@ asmlinkage void do_page_fault(struct xen_regs *regs, long error_code) if ( unlikely(!(regs->xcs & 3)) ) goto xen_fault; - ti = d->thread.traps + 14; + ti = ed->thread.traps + 14; gtb->flags = GTBF_TRAP_CR2; /* page fault pushes %cr2 */ gtb->cr2 = addr; gtb->error_code = error_code; gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; + ed->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; return; xen_fault: @@ -391,7 +392,7 @@ asmlinkage void do_page_fault(struct xen_regs *regs, long error_code) if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) { perfc_incrc(copy_user_faults); - if ( !d->mm.shadow_mode ) + if ( !ed->mm.shadow_mode ) DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup); regs->eip = fixup; regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS; @@ -435,7 +436,8 @@ asmlinkage void do_page_fault(struct xen_regs *regs, long error_code) asmlinkage void do_general_protection(struct xen_regs *regs, long error_code) { - struct domain *d = current; + struct exec_domain *ed = current; + struct domain *d = ed->domain; struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); trap_info_t *ti; unsigned long fixup; @@ -500,7 +502,7 @@ asmlinkage void do_general_protection(struct xen_regs *regs, long error_code) gtb->cs = ti->cs; gtb->eip = ti->address; if ( TI_GET_IF(ti) ) - d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; + ed->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; return; gp_in_kernel: @@ -582,10 +584,10 @@ static void nmi_softirq(void) return; if ( test_and_clear_bit(0, &nmi_softirq_reason) ) - send_guest_virq(dom0, VIRQ_PARITY_ERR); + send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR); if ( test_and_clear_bit(1, &nmi_softirq_reason) ) - send_guest_virq(dom0, VIRQ_IO_ERR); + send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR); } asmlinkage void math_state_restore(struct xen_regs *regs, long error_code) @@ -593,16 +595,16 @@ asmlinkage void math_state_restore(struct xen_regs *regs, long error_code) /* Prevent recursion. */ clts(); - if ( !test_bit(DF_USEDFPU, ¤t->flags) ) + if ( !test_bit(EDF_USEDFPU, ¤t->ed_flags) ) { - if ( test_bit(DF_DONEFPUINIT, ¤t->flags) ) + if ( test_bit(EDF_DONEFPUINIT, ¤t->ed_flags) ) restore_fpu(current); else init_fpu(); - set_bit(DF_USEDFPU, ¤t->flags); /* so we fnsave on switch_to() */ + set_bit(EDF_USEDFPU, ¤t->ed_flags); /* so we fnsave on switch_to() */ } - if ( test_and_clear_bit(DF_GUEST_STTS, ¤t->flags) ) + if ( test_and_clear_bit(EDF_GUEST_STTS, ¤t->ed_flags) ) { struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); gtb->flags = GTBF_TRAP_NOCODE; @@ -637,7 +639,7 @@ asmlinkage void do_pdb_debug(struct xen_regs *regs, long error_code) asmlinkage void do_debug(struct xen_regs *regs, long error_code) { unsigned int condition; - struct domain *tsk = current; + struct exec_domain *tsk = current; struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); #ifdef XEN_DEBUGGER @@ -831,7 +833,7 @@ long do_set_callbacks(unsigned long event_selector, unsigned long failsafe_selector, unsigned long failsafe_address) { - struct domain *p = current; + struct exec_domain *p = current; if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) ) return -EPERM; @@ -845,7 +847,7 @@ long do_set_callbacks(unsigned long event_selector, } -long set_fast_trap(struct domain *p, int idx) +long set_fast_trap(struct exec_domain *p, int idx) { trap_info_t *ti; @@ -898,13 +900,13 @@ long do_set_fast_trap(int idx) long do_fpu_taskswitch(void) { - set_bit(DF_GUEST_STTS, ¤t->flags); + set_bit(EDF_GUEST_STTS, ¤t->ed_flags); stts(); return 0; } -long set_debugreg(struct domain *p, int reg, unsigned long value) +long set_debugreg(struct exec_domain *p, int reg, unsigned long value) { int i; diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c index 40acc39c81..ac800e2d50 100644 --- a/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c @@ -135,8 +135,8 @@ void __init zap_low_mappings(void) */ static void __synchronise_pagetables(void *mask) { - struct domain *d = current; - if ( ((unsigned long)mask & (1<<d->processor)) && is_idle_task(d) ) + struct exec_domain *d = current; + if ( ((unsigned long)mask & (1<<d->processor)) && is_idle_task(d->domain) ) write_ptbase(&d->mm); } void synchronise_pagetables(unsigned long cpu_mask) @@ -242,22 +242,26 @@ int check_descriptor(unsigned long *d) void destroy_gdt(struct domain *d) { + struct exec_domain *ed; int i; unsigned long pfn; - for ( i = 0; i < 16; i++ ) - { - if ( (pfn = l1_pgentry_to_pagenr(d->mm.perdomain_pt[i])) != 0 ) - put_page_and_type(&frame_table[pfn]); - d->mm.perdomain_pt[i] = mk_l1_pgentry(0); + for_each_exec_domain(d, ed) { + for ( i = 0; i < 16; i++ ) + { + if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_pt[i])) != 0 ) + put_page_and_type(&frame_table[pfn]); + ed->mm.perdomain_pt[i] = mk_l1_pgentry(0); + } } } -long set_gdt(struct domain *d, +long set_gdt(struct exec_domain *ed, unsigned long *frames, unsigned int entries) { + struct domain *d = ed->domain; /* NB. There are 512 8-byte entries per GDT page. */ int i = 0, nr_pages = (entries + 511) / 512; struct desc_struct *vgdt; @@ -302,11 +306,11 @@ long set_gdt(struct domain *d, /* Install the new GDT. */ for ( i = 0; i < nr_pages; i++ ) - d->mm.perdomain_pt[i] = + ed->mm.perdomain_pt[i] = mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR); - SET_GDT_ADDRESS(d, GDT_VIRT_START); - SET_GDT_ENTRIES(d, entries); + SET_GDT_ADDRESS(ed, GDT_VIRT_START); + SET_GDT_ENTRIES(ed, entries); return 0; @@ -353,7 +357,7 @@ long do_update_descriptor( return -EINVAL; page = &frame_table[pfn]; - if ( unlikely(!get_page(page, current)) ) + if ( unlikely(!get_page(page, current->domain)) ) return -EINVAL; /* Check if the given frame is in use in an unsafe context. */ diff --git a/xen/arch/x86/x86_32/seg_fixup.c b/xen/arch/x86/x86_32/seg_fixup.c index 30ee3222a5..2b871df124 100644 --- a/xen/arch/x86/x86_32/seg_fixup.c +++ b/xen/arch/x86/x86_32/seg_fixup.c @@ -105,7 +105,7 @@ static unsigned char insn_decode[256] = { */ int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit) { - struct domain *d = current; + struct exec_domain *d = current; unsigned long *table, a, b; int ldt = !!(seg & 4); int idx = (seg >> 3) & 8191; @@ -171,7 +171,7 @@ int linearise_address(u16 seg, unsigned long off, unsigned long *linear) int fixup_seg(u16 seg, int positive_access) { - struct domain *d = current; + struct exec_domain *d = current; unsigned long *table, a, b, base, limit; int ldt = !!(seg & 4); int idx = (seg >> 3) & 8191; @@ -284,7 +284,7 @@ void *decode_reg(struct xen_regs *regs, u8 b) */ int gpf_emulate_4gb(struct xen_regs *regs) { - struct domain *d = current; + struct exec_domain *d = current; trap_info_t *ti; struct guest_trap_bounce *gtb; u8 modrm, mod, reg, rm, decode; @@ -463,7 +463,7 @@ int gpf_emulate_4gb(struct xen_regs *regs) perfc_incrc(seg_fixups); /* If requested, give a callback on otherwise unused vector 15. */ - if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments_notify) ) + if ( VM_ASSIST(d->domain, VMASST_TYPE_4gb_segments_notify) ) { ti = &d->thread.traps[15]; gtb = &guest_trap_bounce[d->processor]; diff --git a/xen/common/Makefile b/xen/common/Makefile index 84e3a1292f..e042d79584 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -19,6 +19,9 @@ ifneq ($(trace),y) OBJS := $(subst trace.o,,$(OBJS)) endif +OBJS := $(subst sched_atropos.o,,$(OBJS)) +OBJS := $(subst sched_rrobin.o,,$(OBJS)) + default: $(OBJS) $(LD) $(LDFLAGS) -r -o common.o $(OBJS) diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c index 812814068b..e1046064a7 100644 --- a/xen/common/dom0_ops.c +++ b/xen/common/dom0_ops.c @@ -26,7 +26,7 @@ extern unsigned int alloc_new_dom_mem(struct domain *, unsigned int); extern long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op); extern void arch_getdomaininfo_ctxt( - struct domain *, full_execution_context_t *); + struct exec_domain *, full_execution_context_t *); static inline int is_free_domid(domid_t dom) { @@ -96,7 +96,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op) long ret = 0; dom0_op_t curop, *op = &curop; - if ( !IS_PRIV(current) ) + if ( !IS_PRIV(current->domain) ) return -EPERM; if ( copy_from_user(op, u_dom0_op, sizeof(*op)) ) @@ -131,7 +131,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op) if ( d != NULL ) { ret = -EINVAL; - if ( d != current ) + if ( d != current->domain ) { domain_pause_by_systemcontroller(d); ret = 0; @@ -148,7 +148,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op) if ( d != NULL ) { ret = -EINVAL; - if ( test_bit(DF_CONSTRUCTED, &d->flags) ) + if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) { domain_unpause_by_systemcontroller(d); ret = 0; @@ -178,11 +178,14 @@ long do_dom0_op(dom0_op_t *u_dom0_op) { /* Do an initial placement. Pick the least-populated CPU. */ struct domain *d; + struct exec_domain *ed; unsigned int i, cnt[NR_CPUS] = { 0 }; read_lock(&domlist_lock); - for_each_domain ( d ) - cnt[d->processor]++; + for_each_domain ( d ) { + for_each_exec_domain ( d, ed ) + cnt[ed->processor]++; + } read_unlock(&domlist_lock); for ( i = 0; i < smp_num_cpus; i++ ) @@ -217,7 +220,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op) if ( d != NULL ) { ret = -EINVAL; - if ( d != current ) + if ( d != current->domain ) { domain_kill(d); ret = 0; @@ -231,6 +234,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op) { domid_t dom = op->u.pincpudomain.domain; struct domain *d = find_domain_by_id(dom); + struct exec_domain *ed; int cpu = op->u.pincpudomain.cpu; if ( d == NULL ) @@ -239,7 +243,15 @@ long do_dom0_op(dom0_op_t *u_dom0_op) break; } - if ( d == current ) + ed = d->exec_domain[op->u.pincpudomain.exec_domain]; + if ( ed == NULL ) + { + ret = -ESRCH; + put_domain(d); + break; + } + + if ( ed == current ) { ret = -EINVAL; put_domain(d); @@ -248,17 +260,17 @@ long do_dom0_op(dom0_op_t *u_dom0_op) if ( cpu == -1 ) { - clear_bit(DF_CPUPINNED, &d->flags); + clear_bit(EDF_CPUPINNED, &ed->ed_flags); } else { - domain_pause(d); + exec_domain_pause(ed); synchronise_pagetables(~0UL); - if ( d->processor != (cpu % smp_num_cpus) ) - set_bit(DF_MIGRATED, &d->flags); - set_bit(DF_CPUPINNED, &d->flags); - d->processor = cpu % smp_num_cpus; - domain_unpause(d); + if ( ed->processor != (cpu % smp_num_cpus) ) + set_bit(EDF_MIGRATED, &ed->ed_flags); + set_bit(EDF_CPUPINNED, &ed->ed_flags); + ed->processor = cpu % smp_num_cpus; + exec_domain_unpause(ed); } put_domain(d); @@ -321,6 +333,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op) { full_execution_context_t *c; struct domain *d; + struct exec_domain *ed; read_lock(&domlist_lock); @@ -340,24 +353,26 @@ long do_dom0_op(dom0_op_t *u_dom0_op) read_unlock(&domlist_lock); op->u.getdomaininfo.domain = d->id; - + + ed = d->exec_domain[0]; // op->u.getdomaininfo.exec_domain]; + op->u.getdomaininfo.flags = - (test_bit(DF_DYING, &d->flags) ? DOMFLAGS_DYING : 0) | - (test_bit(DF_CRASHED, &d->flags) ? DOMFLAGS_CRASHED : 0) | - (test_bit(DF_SHUTDOWN, &d->flags) ? DOMFLAGS_SHUTDOWN : 0) | - (test_bit(DF_CTRLPAUSE, &d->flags) ? DOMFLAGS_PAUSED : 0) | - (test_bit(DF_BLOCKED, &d->flags) ? DOMFLAGS_BLOCKED : 0) | - (test_bit(DF_RUNNING, &d->flags) ? DOMFLAGS_RUNNING : 0); - - op->u.getdomaininfo.flags |= d->processor << DOMFLAGS_CPUSHIFT; + (test_bit( DF_DYING, &d->d_flags) ? DOMFLAGS_DYING : 0) | + (test_bit( DF_CRASHED, &d->d_flags) ? DOMFLAGS_CRASHED : 0) | + (test_bit( DF_SHUTDOWN, &d->d_flags) ? DOMFLAGS_SHUTDOWN : 0) | + (test_bit(EDF_CTRLPAUSE, &ed->ed_flags) ? DOMFLAGS_PAUSED : 0) | + (test_bit(EDF_BLOCKED, &ed->ed_flags) ? DOMFLAGS_BLOCKED : 0) | + (test_bit(EDF_RUNNING, &ed->ed_flags) ? DOMFLAGS_RUNNING : 0); + + op->u.getdomaininfo.flags |= ed->processor << DOMFLAGS_CPUSHIFT; op->u.getdomaininfo.flags |= d->shutdown_code << DOMFLAGS_SHUTDOWNSHIFT; op->u.getdomaininfo.tot_pages = d->tot_pages; op->u.getdomaininfo.max_pages = d->max_pages; - op->u.getdomaininfo.cpu_time = d->cpu_time; + op->u.getdomaininfo.cpu_time = ed->cpu_time; op->u.getdomaininfo.shared_info_frame = - __pa(d->shared_info) >> PAGE_SHIFT; + __pa(ed->shared_info) >> PAGE_SHIFT; if ( op->u.getdomaininfo.ctxt != NULL ) { @@ -368,13 +383,13 @@ long do_dom0_op(dom0_op_t *u_dom0_op) break; } - if ( d != current ) - domain_pause(d); + if ( ed != current ) + exec_domain_pause(ed); - arch_getdomaininfo_ctxt(d,c); + arch_getdomaininfo_ctxt(ed,c); - if ( d != current ) - domain_unpause(d); + if ( ed != current ) + exec_domain_unpause(ed); if ( copy_to_user(op->u.getdomaininfo.ctxt, c, sizeof(*c)) ) ret = -EINVAL; @@ -524,7 +539,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op) if ( d != NULL ) { /* should only be used *before* domain is built. */ - if ( !test_bit(DF_CONSTRUCTED, &d->flags) ) + if ( !test_bit(DF_CONSTRUCTED, &d->d_flags) ) ret = alloc_new_dom_mem( d, op->u.setdomaininitialmem.initial_memkb ); else diff --git a/xen/common/dom_mem_ops.c b/xen/common/dom_mem_ops.c index 2169fad2a1..b19aadd4a3 100644 --- a/xen/common/dom_mem_ops.c +++ b/xen/common/dom_mem_ops.c @@ -27,7 +27,7 @@ static long alloc_dom_mem(struct domain *d, nr_extents*sizeof(*extent_list))) ) return 0; - if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current) ) + if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current->domain) ) { DPRINTK("Only I/O-capable domains may allocate > order-0 memory.\n"); return 0; @@ -105,8 +105,8 @@ long do_dom_mem_op(unsigned int op, long rc; if ( likely(domid == DOMID_SELF) ) - d = current; - else if ( unlikely(!IS_PRIV(current)) ) + d = current->domain; + else if ( unlikely(!IS_PRIV(current->domain)) ) return -EPERM; else if ( unlikely((d = find_domain_by_id(domid)) == NULL) ) return -ESRCH; diff --git a/xen/common/domain.c b/xen/common/domain.c index d06eacfa89..5ca8697cbe 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -25,20 +25,23 @@ struct domain *domain_list; struct domain *do_createdomain(domid_t dom_id, unsigned int cpu) { struct domain *d, **pd; + struct exec_domain *ed; if ( (d = alloc_domain_struct()) == NULL ) return NULL; + ed = d->exec_domain[0]; + atomic_set(&d->refcnt, 1); - atomic_set(&d->pausecnt, 0); + atomic_set(&ed->pausecnt, 0); - shadow_lock_init(d); + shadow_lock_init(ed); d->id = dom_id; - d->processor = cpu; + ed->processor = cpu; d->create_time = NOW(); - memcpy(&d->thread, &idle0_task.thread, sizeof(d->thread)); + memcpy(&ed->thread, &idle0_exec_domain.thread, sizeof(ed->thread)); spin_lock_init(&d->page_alloc_lock); INIT_LIST_HEAD(&d->page_list); @@ -57,7 +60,7 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu) return NULL; } - arch_do_createdomain(d); + arch_do_createdomain(ed); sched_add_domain(d); @@ -128,7 +131,7 @@ struct domain *find_last_domain(void) void domain_kill(struct domain *d) { domain_pause(d); - if ( !test_and_set_bit(DF_DYING, &d->flags) ) + if ( !test_and_set_bit(DF_DYING, &d->d_flags) ) { sched_rem_domain(d); domain_relinquish_memory(d); @@ -139,12 +142,14 @@ void domain_kill(struct domain *d) void domain_crash(void) { - if ( current->id == 0 ) + struct domain *d = current->domain; + + if ( d->id == 0 ) BUG(); - set_bit(DF_CRASHED, ¤t->flags); + set_bit(DF_CRASHED, &d->d_flags); - send_guest_virq(dom0, VIRQ_DOM_EXC); + send_guest_virq(dom0->exec_domain[0], VIRQ_DOM_EXC); __enter_scheduler(); BUG(); @@ -152,7 +157,9 @@ void domain_crash(void) void domain_shutdown(u8 reason) { - if ( current->id == 0 ) + struct domain *d = current->domain; + + if ( d->id == 0 ) { extern void machine_restart(char *); extern void machine_halt(void); @@ -169,10 +176,10 @@ void domain_shutdown(u8 reason) } } - current->shutdown_code = reason; - set_bit(DF_SHUTDOWN, ¤t->flags); + d->shutdown_code = reason; + set_bit(DF_SHUTDOWN, &d->d_flags); - send_guest_virq(dom0, VIRQ_DOM_EXC); + send_guest_virq(dom0->exec_domain[0], VIRQ_DOM_EXC); __enter_scheduler(); } @@ -206,9 +213,10 @@ unsigned int alloc_new_dom_mem(struct domain *d, unsigned int kbytes) void domain_destruct(struct domain *d) { struct domain **pd; + struct exec_domain *ed; atomic_t old, new; - if ( !test_bit(DF_DYING, &d->flags) ) + if ( !test_bit(DF_DYING, &d->d_flags) ) BUG(); /* May be already destructed, or get_domain() can race us. */ @@ -233,8 +241,9 @@ void domain_destruct(struct domain *d) destroy_event_channels(d); grant_table_destroy(d); - free_perdomain_pt(d); - free_xenheap_page((unsigned long)d->shared_info); + for_each_exec_domain(d, ed) + free_perdomain_pt(ed); + free_xenheap_page((unsigned long)d->exec_domain[0]->shared_info); free_domain_struct(d); } @@ -253,7 +262,7 @@ int final_setup_guestos(struct domain *p, dom0_builddomain_t *builddomain) if ( (c = xmalloc(sizeof(*c))) == NULL ) return -ENOMEM; - if ( test_bit(DF_CONSTRUCTED, &p->flags) ) + if ( test_bit(DF_CONSTRUCTED, &p->d_flags) ) { rc = -EINVAL; goto out; @@ -265,13 +274,13 @@ int final_setup_guestos(struct domain *p, dom0_builddomain_t *builddomain) goto out; } - if ( (rc = arch_final_setup_guestos(p,c)) != 0 ) + if ( (rc = arch_final_setup_guestos(p->exec_domain[0],c)) != 0 ) goto out; /* Set up the shared info structure. */ - update_dom_time(p->shared_info); + update_dom_time(p->exec_domain[0]->shared_info); - set_bit(DF_CONSTRUCTED, &p->flags); + set_bit(DF_CONSTRUCTED, &p->d_flags); out: if ( c != NULL ) diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index 40e218c731..dd1c49b194 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -71,7 +71,7 @@ static int get_free_port(struct domain *d) static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc) { - struct domain *d = current; + struct domain *d = current->domain; int port; spin_lock(&d->event_channel_lock); @@ -100,16 +100,16 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) domid_t dom1 = bind->dom1, dom2 = bind->dom2; long rc = 0; - if ( !IS_PRIV(current) && (dom1 != DOMID_SELF) ) + if ( !IS_PRIV(current->domain) && (dom1 != DOMID_SELF) ) return -EPERM; if ( (port1 < 0) || (port2 < 0) ) return -EINVAL; if ( dom1 == DOMID_SELF ) - dom1 = current->id; + dom1 = current->domain->id; if ( dom2 == DOMID_SELF ) - dom2 = current->id; + dom2 = current->domain->id; if ( ((d1 = find_domain_by_id(dom1)) == NULL) || ((d2 = find_domain_by_id(dom2)) == NULL) ) @@ -183,7 +183,7 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) switch ( d2->event_channel[port2].state ) { case ECS_FREE: - if ( !IS_PRIV(current) && (dom2 != DOMID_SELF) ) + if ( !IS_PRIV(current->domain) && (dom2 != DOMID_SELF) ) ERROR_EXIT(-EPERM); break; @@ -235,7 +235,7 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) static long evtchn_bind_virq(evtchn_bind_virq_t *bind) { - struct domain *d = current; + struct domain *d = current->domain; int port, virq = bind->virq; if ( virq >= ARRAY_SIZE(d->virq_to_evtchn) ) @@ -271,7 +271,7 @@ static long evtchn_bind_virq(evtchn_bind_virq_t *bind) static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) { - struct domain *d = current; + struct domain *d = current->domain; int port, rc, pirq = bind->pirq; if ( pirq >= ARRAY_SIZE(d->pirq_to_evtchn) ) @@ -417,8 +417,8 @@ static long evtchn_close(evtchn_close_t *close) domid_t dom = close->dom; if ( dom == DOMID_SELF ) - dom = current->id; - else if ( !IS_PRIV(current) ) + dom = current->domain->id; + else if ( !IS_PRIV(current->domain) ) return -EPERM; if ( (d = find_domain_by_id(dom)) == NULL ) @@ -433,7 +433,7 @@ static long evtchn_close(evtchn_close_t *close) static long evtchn_send(int lport) { - struct domain *ld = current, *rd; + struct domain *ld = current->domain, *rd; int rport; spin_lock(&ld->event_channel_lock); @@ -466,8 +466,8 @@ static long evtchn_status(evtchn_status_t *status) long rc = 0; if ( dom == DOMID_SELF ) - dom = current->id; - else if ( !IS_PRIV(current) ) + dom = current->domain->id; + else if ( !IS_PRIV(current->domain) ) return -EPERM; if ( (d = find_domain_by_id(dom)) == NULL ) diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index 2c948b7433..9da8866a80 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -74,7 +74,7 @@ __gnttab_map_grant_ref( */ int retries = 0; - ld = current; + ld = current->domain; /* Bitwise-OR avoids short-circuiting which screws control flow. */ if ( unlikely(__get_user(dom, &uop->dom) | @@ -291,7 +291,7 @@ __gnttab_unmap_grant_ref( s16 rc = 0; unsigned long frame, virt; - ld = current; + ld = current->domain; /* Bitwise-OR avoids short-circuiting which screws control flow. */ if ( unlikely(__get_user(virt, &uop->host_virt_addr) | @@ -404,9 +404,9 @@ gnttab_setup_table( if ( op.dom == DOMID_SELF ) { - op.dom = current->id; + op.dom = current->domain->id; } - else if ( unlikely(!IS_PRIV(current)) ) + else if ( unlikely(!IS_PRIV(current->domain)) ) { (void)put_user(GNTST_permission_denied, &uop->status); return 0; diff --git a/xen/common/kernel.c b/xen/common/kernel.c index c7e6b27a4e..3d43152d98 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -29,6 +29,7 @@ unsigned long xenheap_phys_end; xmem_cache_t *domain_struct_cachep; +xmem_cache_t *exec_domain_struct_cachep; struct domain *dom0; vm_assist_info_t vm_assist_info[MAX_VMASST_TYPE + 1]; @@ -184,7 +185,7 @@ void cmain(multiboot_info_t *mbi) } /* Must do this early -- e.g., spinlocks rely on get_current(). */ - set_current(&idle0_task); + set_current(&idle0_exec_domain); /* We initialise the serial devices very early so we can get debugging. */ serial_init_stage1(); @@ -304,6 +305,12 @@ void cmain(multiboot_info_t *mbi) if ( domain_struct_cachep == NULL ) panic("No slab cache for task structs."); + exec_domain_struct_cachep = xmem_cache_create( + "exec_dom_cache", sizeof(struct exec_domain), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if ( exec_domain_struct_cachep == NULL ) + panic("No slab cache for task structs."); + start_of_day(); grant_table_init(); @@ -313,7 +320,7 @@ void cmain(multiboot_info_t *mbi) if ( dom0 == NULL ) panic("Error creating domain 0\n"); - set_bit(DF_PRIVILEGED, &dom0->flags); + set_bit(DF_PRIVILEGED, &dom0->d_flags); shadow_mode_init(); @@ -352,7 +359,7 @@ void cmain(multiboot_info_t *mbi) /* Give up the VGA console if DOM0 is configured to grab it. */ console_endboot(cmdline && strstr(cmdline, "tty0")); - domain_unpause_by_systemcontroller(current); + domain_unpause_by_systemcontroller(current->domain); domain_unpause_by_systemcontroller(dom0); startup_cpu_idle_loop(); } @@ -370,7 +377,7 @@ long do_xen_version(int cmd) long do_vm_assist(unsigned int cmd, unsigned int type) { - return vm_assist(current, cmd, type); + return vm_assist(current->domain, cmd, type); } long do_ni_hypercall(void) diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index d52aabd771..b8acae71a1 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -67,6 +67,7 @@ static void halt_machine(unsigned char key) void do_task_queues(unsigned char key) { struct domain *d; + struct exec_domain *ed; s_time_t now = NOW(); struct list_head *ent; struct pfn_info *page; @@ -78,10 +79,8 @@ void do_task_queues(unsigned char key) for_each_domain ( d ) { - printk("Xen: DOM %u, CPU %d [has=%c] flags=%lx refcnt=%d nr_pages=%d " - "xenheap_pages=%d\n", - d->id, d->processor, - test_bit(DF_RUNNING, &d->flags) ? 'T':'F', d->flags, + printk("Xen: DOM %u, flags=%lx refcnt=%d nr_pages=%d " + "xenheap_pages=%d\n", d->id, d->d_flags, atomic_read(&d->refcnt), d->tot_pages, d->xenheap_pages); if ( d->tot_pages < 10 ) @@ -95,16 +94,22 @@ void do_task_queues(unsigned char key) } } - page = virt_to_page(d->shared_info); + page = virt_to_page(d->exec_domain[0]->shared_info); printk("Shared_info@%08x: caf=%08x, taf=%08x\n", page_to_phys(page), page->count_info, page->u.inuse.type_info); - printk("Guest: upcall_pend = %02x, upcall_mask = %02x\n", - d->shared_info->vcpu_data[0].evtchn_upcall_pending, - d->shared_info->vcpu_data[0].evtchn_upcall_mask); + for_each_exec_domain ( d, ed ) { + printk("Guest: CPU %d [has=%c] flags=%lx " + "upcall_pend = %02x, upcall_mask = %02x\n", + ed->processor, + test_bit(EDF_RUNNING, &ed->ed_flags) ? 'T':'F', + ed->ed_flags, + ed->shared_info->vcpu_data[0].evtchn_upcall_pending, + ed->shared_info->vcpu_data[0].evtchn_upcall_mask); + } printk("Notifying guest...\n"); - send_guest_virq(d, VIRQ_DEBUG); + send_guest_virq(d->exec_domain[0], VIRQ_DEBUG); } read_unlock(&domlist_lock); diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index c8c96dbee3..e11ed6786d 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -456,13 +456,13 @@ struct pfn_info *alloc_domheap_pages(struct domain *d, int order) spin_lock(&d->page_alloc_lock); - if ( unlikely(test_bit(DF_DYING, &d->flags)) || + if ( unlikely(test_bit(DF_DYING, &d->d_flags)) || unlikely((d->tot_pages + (1 << order)) > d->max_pages) ) { DPRINTK("Over-allocation for domain %u: %u > %u\n", d->id, d->tot_pages + (1 << order), d->max_pages); DPRINTK("...or the domain is dying (%d)\n", - !!test_bit(DF_DYING, &d->flags)); + !!test_bit(DF_DYING, &d->d_flags)); spin_unlock(&d->page_alloc_lock); free_heap_pages(MEMZONE_DOM, pg, order); return NULL; @@ -491,7 +491,9 @@ void free_domheap_pages(struct pfn_info *pg, int order) { int i, drop_dom_ref; struct domain *d = pg->u.inuse.domain; + struct exec_domain *ed; void *p; + int cpu_mask = 0; ASSERT(!in_irq()); @@ -513,11 +515,14 @@ void free_domheap_pages(struct pfn_info *pg, int order) /* NB. May recursively lock from domain_relinquish_memory(). */ spin_lock_recursive(&d->page_alloc_lock); + for_each_exec_domain(d, ed) + cpu_mask |= 1 << ed->processor; + for ( i = 0; i < (1 << order); i++ ) { ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0); pg[i].tlbflush_timestamp = tlbflush_current_time(); - pg[i].u.free.cpu_mask = 1 << d->processor; + pg[i].u.free.cpu_mask = cpu_mask; list_del(&pg[i].list); /* @@ -525,7 +530,7 @@ void free_domheap_pages(struct pfn_info *pg, int order) * if it cares about the secrecy of their contents. However, after * a domain has died we assume responsibility for erasure. */ - if ( unlikely(test_bit(DF_DYING, &d->flags)) ) + if ( unlikely(test_bit(DF_DYING, &d->d_flags)) ) { p = map_domain_mem(page_to_phys(&pg[i])); clear_page(p); diff --git a/xen/common/physdev.c b/xen/common/physdev.c index b1c16d00b3..7e9c952f44 100644 --- a/xen/common/physdev.c +++ b/xen/common/physdev.c @@ -128,7 +128,7 @@ int physdev_pci_access_modify( struct pci_dev *pdev; int i, j, rc = 0; - if ( !IS_PRIV(current) ) + if ( !IS_PRIV(current->domain) ) BUG(); if ( (bus > PCI_BUSMAX) || (dev > PCI_DEVMAX) || (func > PCI_FUNCMAX) ) @@ -146,10 +146,10 @@ int physdev_pci_access_modify( return -ESRCH; /* Make the domain privileged. */ - set_bit(DF_PHYSDEV, &p->flags); + set_bit(DF_PHYSDEV, &p->d_flags); /* FIXME: MAW for now make the domain REALLY privileged so that it * can run a backend driver (hw access should work OK otherwise) */ - set_bit(DF_PRIVILEGED, &p->flags); + set_bit(DF_PRIVILEGED, &p->d_flags); /* Grant write access to the specified device. */ if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL ) @@ -494,7 +494,7 @@ static long pci_cfgreg_read(int bus, int dev, int func, int reg, int ret; phys_dev_t *pdev; - if ( (ret = check_dev_acc(current, bus, dev, func, &pdev)) != 0 ) + if ( (ret = check_dev_acc(current->domain, bus, dev, func, &pdev)) != 0 ) { /* PCI spec states that reads from non-existent devices should return * all 1s. In this case the domain has no read access, which should @@ -559,7 +559,7 @@ static long pci_cfgreg_write(int bus, int dev, int func, int reg, int ret; phys_dev_t *pdev; - if ( (ret = check_dev_acc(current, bus, dev, func, &pdev)) != 0 ) + if ( (ret = check_dev_acc(current->domain, bus, dev, func, &pdev)) != 0 ) return ret; /* special treatment for some registers */ @@ -621,7 +621,7 @@ static long pci_probe_root_buses(u32 *busmask) memset(busmask, 0, 256/8); - list_for_each ( tmp, ¤t->pcidev_list ) + list_for_each ( tmp, ¤t->domain->pcidev_list ) { pdev = list_entry(tmp, phys_dev_t, node); set_bit(pdev->dev->bus->number, busmask); @@ -665,7 +665,7 @@ long do_physdev_op(physdev_op_t *uop) break; case PHYSDEVOP_PCI_INITIALISE_DEVICE: - if ( (ret = check_dev_acc(current, + if ( (ret = check_dev_acc(current->domain, op.u.pci_initialise_device.bus, op.u.pci_initialise_device.dev, op.u.pci_initialise_device.func, @@ -678,7 +678,7 @@ long do_physdev_op(physdev_op_t *uop) break; case PHYSDEVOP_IRQ_UNMASK_NOTIFY: - ret = pirq_guest_unmask(current); + ret = pirq_guest_unmask(current->domain); break; case PHYSDEVOP_IRQ_STATUS_QUERY: @@ -757,6 +757,6 @@ void physdev_init_dom0(struct domain *p) list_add(&pdev->node, &p->pcidev_list); } - set_bit(DF_PHYSDEV, &p->flags); + set_bit(DF_PHYSDEV, &p->d_flags); } diff --git a/xen/common/sched_bvt.c b/xen/common/sched_bvt.c index 1d447b718b..065b91949a 100644 --- a/xen/common/sched_bvt.c +++ b/xen/common/sched_bvt.c @@ -28,13 +28,19 @@ #include <xen/softirq.h> /* all per-domain BVT-specific scheduling info is stored here */ -struct bvt_dom_info +struct bvt_edom_info { - struct domain *domain; /* domain this info belongs to */ struct list_head run_list; /* runqueue list pointers */ - u32 mcu_advance; /* inverse of weight */ u32 avt; /* actual virtual time */ u32 evt; /* effective virtual time */ + struct exec_domain *exec_domain; + struct bvt_dom_info *inf; +}; + +struct bvt_dom_info +{ + struct domain *domain; /* domain this info belongs to */ + u32 mcu_advance; /* inverse of weight */ int warpback; /* warp? */ int warp; /* warp set and within the warp limits*/ @@ -43,6 +49,8 @@ struct bvt_dom_info struct ac_timer warp_timer; /* deals with warpl */ s_time_t warpu; /* unwarp time requirement */ struct ac_timer unwarp_timer; /* deals with warpu */ + + struct bvt_edom_info ed_inf[MAX_VIRT_CPUS]; }; struct bvt_cpu_info @@ -52,8 +60,9 @@ struct bvt_cpu_info }; #define BVT_INFO(p) ((struct bvt_dom_info *)(p)->sched_priv) +#define EBVT_INFO(p) ((struct bvt_edom_info *)(p)->ed_sched_priv) #define CPU_INFO(cpu) ((struct bvt_cpu_info *)(schedule_data[cpu]).sched_priv) -#define RUNLIST(p) ((struct list_head *)&(BVT_INFO(p)->run_list)) +#define RUNLIST(p) ((struct list_head *)&(EBVT_INFO(p)->run_list)) #define RUNQUEUE(cpu) ((struct list_head *)&(CPU_INFO(cpu)->runqueue)) #define CPU_SVT(cpu) (CPU_INFO(cpu)->svt) @@ -64,24 +73,24 @@ static s32 ctx_allow = (s32)MILLISECS(5); /* context switch allowance */ static xmem_cache_t *dom_info_cache; -static inline void __add_to_runqueue_head(struct domain *d) +static inline void __add_to_runqueue_head(struct exec_domain *d) { list_add(RUNLIST(d), RUNQUEUE(d->processor)); } -static inline void __add_to_runqueue_tail(struct domain *d) +static inline void __add_to_runqueue_tail(struct exec_domain *d) { list_add_tail(RUNLIST(d), RUNQUEUE(d->processor)); } -static inline void __del_from_runqueue(struct domain *d) +static inline void __del_from_runqueue(struct exec_domain *d) { struct list_head *runlist = RUNLIST(d); list_del(runlist); runlist->next = NULL; } -static inline int __task_on_runqueue(struct domain *d) +static inline int __task_on_runqueue(struct exec_domain *d) { return (RUNLIST(d))->next != NULL; } @@ -91,7 +100,7 @@ static inline int __task_on_runqueue(struct domain *d) static void warp_timer_fn(unsigned long pointer) { struct bvt_dom_info *inf = (struct bvt_dom_info *)pointer; - unsigned int cpu = inf->domain->processor; + unsigned int cpu = inf->domain->exec_domain[0]->processor; spin_lock_irq(&schedule_data[cpu].schedule_lock); @@ -114,7 +123,7 @@ static void warp_timer_fn(unsigned long pointer) static void unwarp_timer_fn(unsigned long pointer) { struct bvt_dom_info *inf = (struct bvt_dom_info *)pointer; - unsigned int cpu = inf->domain->processor; + unsigned int cpu = inf->domain->exec_domain[0]->processor; spin_lock_irq(&schedule_data[cpu].schedule_lock); @@ -127,24 +136,25 @@ static void unwarp_timer_fn(unsigned long pointer) spin_unlock_irq(&schedule_data[cpu].schedule_lock); } -static inline u32 calc_avt(struct domain *d, s_time_t now) +static inline u32 calc_avt(struct exec_domain *d, s_time_t now) { u32 ranfor, mcus; - struct bvt_dom_info *inf = BVT_INFO(d); + struct bvt_dom_info *inf = BVT_INFO(d->domain); + struct bvt_edom_info *einf = EBVT_INFO(d); ranfor = (u32)(now - d->lastschd); mcus = (ranfor + MCU - 1)/MCU; - return inf->avt + mcus * inf->mcu_advance; + return einf->avt + mcus * inf->mcu_advance; } /* * Calculate the effective virtual time for a domain. Take into account * warping limits */ -static inline u32 calc_evt(struct domain *d, u32 avt) +static inline u32 calc_evt(struct exec_domain *d, u32 avt) { - struct bvt_dom_info *inf = BVT_INFO(d); + struct bvt_dom_info *inf = BVT_INFO(d->domain); /* TODO The warp routines need to be rewritten GM */ if ( inf->warp ) @@ -159,25 +169,32 @@ static inline u32 calc_evt(struct domain *d, u32 avt) * * Returns non-zero on failure. */ -int bvt_alloc_task(struct domain *d) +int bvt_alloc_task(struct exec_domain *ed) { - if ( (d->sched_priv = xmem_cache_alloc(dom_info_cache)) == NULL ) - return -1; - memset(d->sched_priv, 0, sizeof(struct bvt_dom_info)); + struct domain *d = ed->domain; + if ( (d->sched_priv == NULL) ) { + if ( (d->sched_priv = xmem_cache_alloc(dom_info_cache)) == NULL ) + return -1; + memset(d->sched_priv, 0, sizeof(struct bvt_dom_info)); + } + ed->ed_sched_priv = &BVT_INFO(d)->ed_inf[ed->eid]; + BVT_INFO(d)->ed_inf[ed->eid].inf = BVT_INFO(d); return 0; } /* * Add and remove a domain */ -void bvt_add_task(struct domain *d) +void bvt_add_task(struct exec_domain *d) { - struct bvt_dom_info *inf = BVT_INFO(d); + struct bvt_dom_info *inf = BVT_INFO(d->domain); + struct bvt_edom_info *einf = EBVT_INFO(d); ASSERT(inf != NULL); ASSERT(d != NULL); inf->mcu_advance = MCU_ADVANCE; - inf->domain = d; + inf->domain = d->domain; + einf->exec_domain = d; inf->warpback = 0; /* Set some default values here. */ inf->warp = 0; @@ -194,36 +211,36 @@ void bvt_add_task(struct domain *d) inf->unwarp_timer.data = (unsigned long)inf; inf->unwarp_timer.function = &unwarp_timer_fn; - if ( d->id == IDLE_DOMAIN_ID ) + if ( d->domain->id == IDLE_DOMAIN_ID ) { - inf->avt = inf->evt = ~0U; + einf->avt = einf->evt = ~0U; } else { /* Set avt and evt to system virtual time. */ - inf->avt = CPU_SVT(d->processor); - inf->evt = CPU_SVT(d->processor); + einf->avt = CPU_SVT(d->processor); + einf->evt = CPU_SVT(d->processor); } } -int bvt_init_idle_task(struct domain *p) +int bvt_init_idle_task(struct exec_domain *p) { if ( bvt_alloc_task(p) < 0 ) return -1; bvt_add_task(p); - set_bit(DF_RUNNING, &p->flags); + set_bit(EDF_RUNNING, &p->ed_flags); if ( !__task_on_runqueue(p) ) __add_to_runqueue_head(p); return 0; } -void bvt_wake(struct domain *d) +void bvt_wake(struct exec_domain *d) { - struct bvt_dom_info *inf = BVT_INFO(d); - struct domain *curr; + struct bvt_edom_info *einf = EBVT_INFO(d); + struct exec_domain *curr; s_time_t now, r_time; int cpu = d->processor; u32 curr_evt; @@ -237,31 +254,31 @@ void bvt_wake(struct domain *d) /* Set the BVT parameters. AVT should always be updated if CPU migration ocurred.*/ - if ( inf->avt < CPU_SVT(cpu) || - unlikely(test_bit(DF_MIGRATED, &d->flags)) ) - inf->avt = CPU_SVT(cpu); + if ( einf->avt < CPU_SVT(cpu) || + unlikely(test_bit(EDF_MIGRATED, &d->ed_flags)) ) + einf->avt = CPU_SVT(cpu); /* Deal with warping here. */ - inf->evt = calc_evt(d, inf->avt); + einf->evt = calc_evt(d, einf->avt); curr = schedule_data[cpu].curr; curr_evt = calc_evt(curr, calc_avt(curr, now)); /* Calculate the time the current domain would run assuming the second smallest evt is of the newly woken domain */ r_time = curr->lastschd + - ((inf->evt - curr_evt) / BVT_INFO(curr)->mcu_advance) + + ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) + ctx_allow; - if ( is_idle_task(curr) || (inf->evt <= curr_evt) ) + if ( is_idle_task(curr->domain) || (einf->evt <= curr_evt) ) cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); else if ( schedule_data[cpu].s_timer.expires > r_time ) mod_ac_timer(&schedule_data[cpu].s_timer, r_time); } -static void bvt_sleep(struct domain *d) +static void bvt_sleep(struct exec_domain *d) { - if ( test_bit(DF_RUNNING, &d->flags) ) + if ( test_bit(EDF_RUNNING, &d->ed_flags) ) cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); else if ( __task_on_runqueue(d) ) __del_from_runqueue(d); @@ -347,25 +364,27 @@ int bvt_adjdom( */ static task_slice_t bvt_do_schedule(s_time_t now) { - struct domain *prev = current, *next = NULL, *next_prime, *p; + struct domain *d; + struct exec_domain *prev = current, *next = NULL, *next_prime, *ed; struct list_head *tmp; int cpu = prev->processor; s32 r_time; /* time for new dom to run */ u32 next_evt, next_prime_evt, min_avt; - struct bvt_dom_info *prev_inf = BVT_INFO(prev); - struct bvt_dom_info *p_inf = NULL; - struct bvt_dom_info *next_inf = NULL; - struct bvt_dom_info *next_prime_inf = NULL; + struct bvt_dom_info *prev_inf = BVT_INFO(prev->domain); + struct bvt_edom_info *prev_einf = EBVT_INFO(prev); + struct bvt_edom_info *p_einf = NULL; + struct bvt_edom_info *next_einf = NULL; + struct bvt_edom_info *next_prime_einf = NULL; task_slice_t ret; - ASSERT(prev->sched_priv != NULL); - ASSERT(prev_inf != NULL); + ASSERT(prev->ed_sched_priv != NULL); + ASSERT(prev_einf != NULL); ASSERT(__task_on_runqueue(prev)); - if ( likely(!is_idle_task(prev)) ) + if ( likely(!is_idle_task(prev->domain)) ) { - prev_inf->avt = calc_avt(prev, now); - prev_inf->evt = calc_evt(prev, prev_inf->avt); + prev_einf->avt = calc_avt(prev, now); + prev_einf->evt = calc_evt(prev, prev_einf->avt); if(prev_inf->warpback && prev_inf->warpl > 0) rem_ac_timer(&prev_inf->warp_timer); @@ -385,8 +404,8 @@ static task_slice_t bvt_do_schedule(s_time_t now) * *and* the task the second lowest evt. * this code is O(n) but we expect n to be small. */ - next_inf = BVT_INFO(schedule_data[cpu].idle); - next_prime_inf = NULL; + next_einf = EBVT_INFO(schedule_data[cpu].idle); + next_prime_einf = NULL; next_evt = ~0U; next_prime_evt = ~0U; @@ -394,42 +413,42 @@ static task_slice_t bvt_do_schedule(s_time_t now) list_for_each ( tmp, RUNQUEUE(cpu) ) { - p_inf = list_entry(tmp, struct bvt_dom_info, run_list); + p_einf = list_entry(tmp, struct bvt_edom_info, run_list); - if ( p_inf->evt < next_evt ) + if ( p_einf->evt < next_evt ) { - next_prime_inf = next_inf; + next_prime_einf = next_einf; next_prime_evt = next_evt; - next_inf = p_inf; - next_evt = p_inf->evt; + next_einf = p_einf; + next_evt = p_einf->evt; } else if ( next_prime_evt == ~0U ) { - next_prime_evt = p_inf->evt; - next_prime_inf = p_inf; + next_prime_evt = p_einf->evt; + next_prime_einf = p_einf; } - else if ( p_inf->evt < next_prime_evt ) + else if ( p_einf->evt < next_prime_evt ) { - next_prime_evt = p_inf->evt; - next_prime_inf = p_inf; + next_prime_evt = p_einf->evt; + next_prime_einf = p_einf; } /* Determine system virtual time. */ - if ( p_inf->avt < min_avt ) - min_avt = p_inf->avt; + if ( p_einf->avt < min_avt ) + min_avt = p_einf->avt; } - if(next_inf->warp && next_inf->warpl > 0) + if(next_einf->inf->warp && next_einf->inf->warpl > 0) { /* Set the timer up */ - next_inf->warp_timer.expires = now + next_inf->warpl; + next_einf->inf->warp_timer.expires = now + next_einf->inf->warpl; /* Add it to the heap */ - add_ac_timer(&next_inf->warp_timer); + add_ac_timer(&next_einf->inf->warp_timer); } /* Extract the domain pointers from the dom infos */ - next = next_inf->domain; - next_prime = next_prime_inf->domain; + next = next_einf->exec_domain; + next_prime = next_prime_einf->exec_domain; /* Update system virtual time. */ if ( min_avt != ~0U ) @@ -442,13 +461,15 @@ static task_slice_t bvt_do_schedule(s_time_t now) write_lock(&domlist_lock); - for_each_domain ( p ) + for_each_domain ( d ) { - if ( p->processor == cpu ) - { - p_inf = BVT_INFO(p); - p_inf->evt -= 0xe0000000; - p_inf->avt -= 0xe0000000; + for_each_exec_domain (d, ed) { + if ( ed->processor == cpu ) + { + p_einf = EBVT_INFO(ed); + p_einf->evt -= 0xe0000000; + p_einf->avt -= 0xe0000000; + } } } @@ -458,13 +479,13 @@ static task_slice_t bvt_do_schedule(s_time_t now) } /* work out time for next run through scheduler */ - if ( is_idle_task(next) ) + if ( is_idle_task(next->domain) ) { r_time = ctx_allow; goto sched_done; } - if ( (next_prime == NULL) || is_idle_task(next_prime) ) + if ( (next_prime == NULL) || is_idle_task(next_prime->domain) ) { /* We have only one runnable task besides the idle task. */ r_time = 10 * ctx_allow; /* RN: random constant */ @@ -478,7 +499,7 @@ static task_slice_t bvt_do_schedule(s_time_t now) */ ASSERT(next_prime_inf->evt >= next_inf->evt); - r_time = ((next_prime_inf->evt - next_inf->evt)/next_inf->mcu_advance) + r_time = ((next_prime_einf->evt - next_einf->evt)/next_einf->inf->mcu_advance) + ctx_allow; ASSERT(r_time >= ctx_allow); @@ -490,12 +511,12 @@ static task_slice_t bvt_do_schedule(s_time_t now) } -static void bvt_dump_runq_el(struct domain *p) +static void bvt_dump_runq_el(struct exec_domain *p) { - struct bvt_dom_info *inf = BVT_INFO(p); + struct bvt_edom_info *inf = EBVT_INFO(p); printk("mcua=%d ev=0x%08X av=0x%08X ", - inf->mcu_advance, inf->evt, inf->avt); + inf->inf->mcu_advance, inf->evt, inf->avt); } static void bvt_dump_settings(void) @@ -507,8 +528,8 @@ static void bvt_dump_cpu_state(int i) { struct list_head *list, *queue; int loop = 0; - struct bvt_dom_info *d_inf; - struct domain *d; + struct bvt_edom_info *d_inf; + struct exec_domain *d; printk("svt=0x%08lX ", CPU_SVT(i)); @@ -518,10 +539,10 @@ static void bvt_dump_cpu_state(int i) list_for_each ( list, queue ) { - d_inf = list_entry(list, struct bvt_dom_info, run_list); - d = d_inf->domain; - printk("%3d: %u has=%c ", loop++, d->id, - test_bit(DF_RUNNING, &d->flags) ? 'T':'F'); + d_inf = list_entry(list, struct bvt_edom_info, run_list); + d = d_inf->exec_domain; + printk("%3d: %u has=%c ", loop++, d->domain->id, + test_bit(EDF_RUNNING, &d->ed_flags) ? 'T':'F'); bvt_dump_runq_el(d); printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time); printk(" l: %lx n: %lx p: %lx\n", diff --git a/xen/common/schedule.c b/xen/common/schedule.c index c037c9e177..3bc93b7bfa 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -68,12 +68,12 @@ static void fallback_timer_fn(unsigned long unused); schedule_data_t schedule_data[NR_CPUS]; extern struct scheduler sched_bvt_def; -extern struct scheduler sched_rrobin_def; -extern struct scheduler sched_atropos_def; +// extern struct scheduler sched_rrobin_def; +// extern struct scheduler sched_atropos_def; static struct scheduler *schedulers[] = { &sched_bvt_def, - &sched_rrobin_def, - &sched_atropos_def, +// &sched_rrobin_def, +// &sched_atropos_def, NULL }; @@ -94,29 +94,46 @@ static struct ac_timer t_timer[NR_CPUS]; static struct ac_timer fallback_timer[NR_CPUS]; extern xmem_cache_t *domain_struct_cachep; +extern xmem_cache_t *exec_domain_struct_cachep; void free_domain_struct(struct domain *d) { + struct exec_domain *ed; + SCHED_OP(free_task, d); + for_each_exec_domain(d, ed) + xmem_cache_free(exec_domain_struct_cachep, ed); xmem_cache_free(domain_struct_cachep, d); } struct domain *alloc_domain_struct(void) { struct domain *d; + struct exec_domain *ed = NULL; if ( (d = xmem_cache_alloc(domain_struct_cachep)) == NULL ) return NULL; memset(d, 0, sizeof(*d)); - if ( SCHED_OP(alloc_task, d) < 0 ) - { - xmem_cache_free(domain_struct_cachep, d); - return NULL; - } + if ( (ed = xmem_cache_alloc(exec_domain_struct_cachep)) == NULL ) + goto out; + + memset(ed, 0, sizeof(*ed)); + + d->exec_domain[0] = ed; + ed->domain = d; + + if ( SCHED_OP(alloc_task, ed) < 0 ) + goto out; return d; + + out: + if ( ed ) + xmem_cache_free(exec_domain_struct_cachep, ed); + xmem_cache_free(domain_struct_cachep, d); + return NULL; } /* @@ -124,23 +141,27 @@ struct domain *alloc_domain_struct(void) */ void sched_add_domain(struct domain *d) { - /* Must be unpaused by control software to start execution. */ - set_bit(DF_CTRLPAUSE, &d->flags); + struct exec_domain *ed; + + for_each_exec_domain(d, ed) { + /* Must be unpaused by control software to start execution. */ + set_bit(EDF_CTRLPAUSE, &ed->ed_flags); + } if ( d->id != IDLE_DOMAIN_ID ) { /* Initialise the per-domain timer. */ init_ac_timer(&d->timer); - d->timer.cpu = d->processor; + d->timer.cpu = d->exec_domain[0]->processor; d->timer.data = (unsigned long)d; d->timer.function = &dom_timer_fn; } else { - schedule_data[d->processor].idle = d; + schedule_data[d->exec_domain[0]->processor].idle = d->exec_domain[0]; } - SCHED_OP(add_task, d); + SCHED_OP(add_task, d->exec_domain[0]); TRACE_2D(TRC_SCHED_DOM_ADD, d->id, d); } @@ -158,7 +179,7 @@ void init_idle_task(void) BUG(); } -void domain_sleep(struct domain *d) +void domain_sleep(struct exec_domain *d) { unsigned long flags; @@ -170,14 +191,14 @@ void domain_sleep(struct domain *d) spin_unlock_irqrestore(&schedule_data[d->processor].schedule_lock, flags); /* Synchronous. */ - while ( test_bit(DF_RUNNING, &d->flags) && !domain_runnable(d) ) + while ( test_bit(EDF_RUNNING, &d->ed_flags) && !domain_runnable(d) ) { smp_mb(); cpu_relax(); } } -void domain_wake(struct domain *d) +void domain_wake(struct exec_domain *d) { unsigned long flags; @@ -192,7 +213,7 @@ void domain_wake(struct domain *d) #endif } - clear_bit(DF_MIGRATED, &d->flags); + clear_bit(EDF_MIGRATED, &d->ed_flags); spin_unlock_irqrestore(&schedule_data[d->processor].schedule_lock, flags); } @@ -202,7 +223,7 @@ long do_block(void) { ASSERT(current->id != IDLE_DOMAIN_ID); current->shared_info->vcpu_data[0].evtchn_upcall_mask = 0; - set_bit(DF_BLOCKED, ¤t->flags); + set_bit(EDF_BLOCKED, ¤t->ed_flags); TRACE_2D(TRC_SCHED_BLOCK, current->id, current); __enter_scheduler(); return 0; @@ -254,7 +275,7 @@ long do_sched_op(unsigned long op) /* Per-domain one-shot-timer hypercall. */ long do_set_timer_op(unsigned long timeout_hi, unsigned long timeout_lo) { - struct domain *p = current; + struct domain *p = current->domain; rem_ac_timer(&p->timer); @@ -303,9 +324,9 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd) TRACE_1D(TRC_SCHED_ADJDOM, d->id); - spin_lock_irq(&schedule_data[d->processor].schedule_lock); + spin_lock_irq(&schedule_data[d->exec_domain[0]->processor].schedule_lock); SCHED_OP(adjdom, d, cmd); - spin_unlock_irq(&schedule_data[d->processor].schedule_lock); + spin_unlock_irq(&schedule_data[d->exec_domain[0]->processor].schedule_lock); put_domain(d); return 0; @@ -318,14 +339,14 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd) */ void __enter_scheduler(void) { - struct domain *prev = current, *next = NULL; + struct exec_domain *prev = current, *next = NULL; int cpu = prev->processor; s_time_t now; task_slice_t next_slice; s32 r_time; /* time for new dom to run */ cleanup_writable_pagetable( - prev, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); + prev->domain, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); perfc_incrc(sched_run); @@ -337,11 +358,11 @@ void __enter_scheduler(void) ASSERT(!in_irq()); - if ( test_bit(DF_BLOCKED, &prev->flags) ) + if ( test_bit(EDF_BLOCKED, &prev->ed_flags) ) { /* This check is needed to avoid a race condition. */ if ( event_pending(prev) ) - clear_bit(DF_BLOCKED, &prev->flags); + clear_bit(EDF_BLOCKED, &prev->ed_flags); else SCHED_OP(do_block, prev); } @@ -363,12 +384,12 @@ void __enter_scheduler(void) add_ac_timer(&schedule_data[cpu].s_timer); /* Must be protected by the schedule_lock! */ - set_bit(DF_RUNNING, &next->flags); + set_bit(EDF_RUNNING, &next->ed_flags); spin_unlock_irq(&schedule_data[cpu].schedule_lock); /* Ensure that the domain has an up-to-date time base. */ - if ( !is_idle_task(next) ) + if ( !is_idle_task(next->domain) ) update_dom_time(next->shared_info); if ( unlikely(prev == next) ) @@ -404,10 +425,10 @@ void __enter_scheduler(void) * 'prev' (after this point, a dying domain's info structure may be freed * without warning). */ - clear_bit(DF_RUNNING, &prev->flags); + clear_bit(EDF_RUNNING, &prev->ed_flags); /* Mark a timer event for the newly-scheduled domain. */ - if ( !is_idle_task(next) ) + if ( !is_idle_task(next->domain) ) send_guest_virq(next, VIRQ_TIMER); schedule_tail(next); @@ -418,7 +439,7 @@ void __enter_scheduler(void) /* No locking needed -- pointer comparison is safe :-) */ int idle_cpu(int cpu) { - struct domain *p = schedule_data[cpu].curr; + struct exec_domain *p = schedule_data[cpu].curr; return p == idle_task[cpu]; } @@ -442,11 +463,11 @@ static void s_timer_fn(unsigned long unused) /* Periodic tick timer: send timer event to current domain*/ static void t_timer_fn(unsigned long unused) { - struct domain *p = current; + struct exec_domain *p = current; TRACE_0D(TRC_SCHED_T_TIMER_FN); - if ( !is_idle_task(p) ) { + if ( !is_idle_task(p->domain) ) { update_dom_time(p->shared_info); send_guest_virq(p, VIRQ_TIMER); } @@ -459,24 +480,26 @@ static void t_timer_fn(unsigned long unused) static void dom_timer_fn(unsigned long data) { struct domain *p = (struct domain *)data; + struct exec_domain *ed = p->exec_domain[0]; TRACE_0D(TRC_SCHED_DOM_TIMER_FN); - update_dom_time(p->shared_info); - send_guest_virq(p, VIRQ_TIMER); + update_dom_time(ed->shared_info); + send_guest_virq(ed, VIRQ_TIMER); } /* Fallback timer to ensure guests get time updated 'often enough'. */ static void fallback_timer_fn(unsigned long unused) { - struct domain *p = current; + struct exec_domain *ed = current; + struct domain *p = ed->domain; TRACE_0D(TRC_SCHED_FALLBACK_TIMER_FN); if ( !is_idle_task(p) ) - update_dom_time(p->shared_info); + update_dom_time(ed->shared_info); - fallback_timer[p->processor].expires = NOW() + MILLISECS(500); - add_ac_timer(&fallback_timer[p->processor]); + fallback_timer[ed->processor].expires = NOW() + MILLISECS(500); + add_ac_timer(&fallback_timer[ed->processor]); } /* Initialise the data structures. */ @@ -489,7 +512,7 @@ void __init scheduler_init(void) for ( i = 0; i < NR_CPUS; i++ ) { spin_lock_init(&schedule_data[i].schedule_lock); - schedule_data[i].curr = &idle0_task; + schedule_data[i].curr = &idle0_exec_domain; init_ac_timer(&schedule_data[i].s_timer); schedule_data[i].s_timer.cpu = i; @@ -507,7 +530,7 @@ void __init scheduler_init(void) fallback_timer[i].function = &fallback_timer_fn; } - schedule_data[0].idle = &idle0_task; + schedule_data[0].idle = &idle0_exec_domain; extern char opt_sched[]; diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c index 51e97fc82f..6fe2dff341 100644 --- a/xen/drivers/char/console.c +++ b/xen/drivers/char/console.c @@ -253,7 +253,7 @@ static void __serial_rx(unsigned char c, struct xen_regs *regs) { serial_rx_ring[SERIAL_RX_MASK(serial_rx_prod)] = c; if ( serial_rx_prod++ == serial_rx_cons ) - send_guest_virq(dom0, VIRQ_CONSOLE); + send_guest_virq(dom0->exec_domain[0], VIRQ_CONSOLE); } } @@ -286,7 +286,7 @@ long do_console_io(int cmd, int count, char *buffer) #ifndef VERBOSE /* Only domain-0 may access the emergency console. */ - if ( current->id != 0 ) + if ( current->domain->id != 0 ) return -EPERM; #endif diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 7dde4efa6d..a0dd3e58c2 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -2,12 +2,12 @@ #ifndef __ASM_X86_DOMAIN_H__ #define __ASM_X86_DOMAIN_H__ -extern void arch_do_createdomain(struct domain *d); +extern void arch_do_createdomain(struct exec_domain *d); extern int arch_final_setup_guestos( - struct domain *d, full_execution_context_t *c); + struct exec_domain *d, full_execution_context_t *c); -extern void free_perdomain_pt(struct domain *d); +extern void free_perdomain_pt(struct exec_domain *d); extern void domain_relinquish_memory(struct domain *d); diff --git a/xen/include/asm-x86/i387.h b/xen/include/asm-x86/i387.h index 5221da49db..a09422d74f 100644 --- a/xen/include/asm-x86/i387.h +++ b/xen/include/asm-x86/i387.h @@ -15,16 +15,16 @@ #include <asm/processor.h> extern void init_fpu(void); -extern void save_init_fpu( struct domain *tsk ); -extern void restore_fpu( struct domain *tsk ); +extern void save_init_fpu( struct exec_domain *tsk ); +extern void restore_fpu( struct exec_domain *tsk ); #define unlazy_fpu( tsk ) do { \ - if ( test_bit(DF_USEDFPU, &tsk->flags) ) \ + if ( test_bit(EDF_USEDFPU, &tsk->ed_flags) ) \ save_init_fpu( tsk ); \ } while (0) #define clear_fpu( tsk ) do { \ - if ( test_and_clear_bit(DF_USEDFPU, &tsk->flags) ) { \ + if ( test_and_clear_bit(EDF_USEDFPU, &tsk->ed_flags) ) { \ asm volatile("fwait"); \ stts(); \ } \ diff --git a/xen/include/asm-x86/ldt.h b/xen/include/asm-x86/ldt.h index 5b13bec994..7546d2b0da 100644 --- a/xen/include/asm-x86/ldt.h +++ b/xen/include/asm-x86/ldt.h @@ -3,7 +3,7 @@ #ifndef __ASSEMBLY__ -static inline void load_LDT(struct domain *p) +static inline void load_LDT(struct exec_domain *p) { unsigned int cpu; struct desc_struct *desc; diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index e647aa7139..2476314cfc 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -18,6 +18,7 @@ #include <public/xen.h> struct domain; +struct exec_domain; /* * Default implementation of macro that returns current @@ -327,7 +328,7 @@ extern struct desc_struct *idt_tables[]; &((_p)->fast_trap_desc), 8)) #endif -long set_fast_trap(struct domain *p, int idx); +long set_fast_trap(struct exec_domain *p, int idx); #define INIT_THREAD { \ 0, 0, \ @@ -420,11 +421,11 @@ static inline void write_ptbase(struct mm_struct *mm) #define GET_GDT_ADDRESS(_p) (*(unsigned long *)((_p)->mm.gdt + 2)) void destroy_gdt(struct domain *d); -long set_gdt(struct domain *d, +long set_gdt(struct exec_domain *d, unsigned long *frames, unsigned int entries); -long set_debugreg(struct domain *p, int reg, unsigned long value); +long set_debugreg(struct exec_domain *p, int reg, unsigned long value); struct microcode { unsigned int hdrver; diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h index cffcf2862c..2455dec816 100644 --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -40,7 +40,7 @@ extern int shadow_mode_enable(struct domain *p, unsigned int mode); extern void __shadow_mode_disable(struct domain *d); static inline void shadow_mode_disable(struct domain *d) { - if ( shadow_mode(d) ) + if ( shadow_mode(d->exec_domain[0]) ) __shadow_mode_disable(d); } @@ -62,7 +62,7 @@ struct shadow_status { #ifdef VERBOSE #define SH_LOG(_f, _a...) \ printk("DOM%u: (file=shadow.c, line=%d) " _f "\n", \ - current->id , __LINE__ , ## _a ) + current->domain->id , __LINE__ , ## _a ) #else #define SH_LOG(_f, _a...) #endif diff --git a/xen/include/asm-x86/x86_32/current.h b/xen/include/asm-x86/x86_32/current.h index 7d87b89d72..d7054d694d 100644 --- a/xen/include/asm-x86/x86_32/current.h +++ b/xen/include/asm-x86/x86_32/current.h @@ -6,9 +6,9 @@ struct domain; #define STACK_RESERVED \ (sizeof(execution_context_t) + sizeof(struct domain *)) -static inline struct domain * get_current(void) +static inline struct exec_domain * get_current(void) { - struct domain *current; + struct exec_domain *current; __asm__ ( "orl %%esp,%0; andl $~3,%0; movl (%0),%0" : "=r" (current) : "0" (STACK_SIZE-4) ); return current; @@ -16,7 +16,7 @@ static inline struct domain * get_current(void) #define current get_current() -static inline void set_current(struct domain *p) +static inline void set_current(struct exec_domain *p) { __asm__ ( "orl %%esp,%0; andl $~3,%0; movl %1,(%0)" : : "r" (STACK_SIZE-4), "r" (p) ); @@ -43,7 +43,7 @@ static inline unsigned long get_stack_top(void) __asm__ __volatile__ ( \ "andl %%esp,%0; addl %2,%0; movl %0,%%esp; jmp *%1" \ : : "r" (~(STACK_SIZE-1)), \ - "r" (unlikely(is_idle_task((_p))) ? \ + "r" (unlikely(is_idle_task((_p)->domain)) ? \ continue_cpu_idle_loop : \ continue_nonidle_task), \ "i" (STACK_SIZE-STACK_RESERVED) ) diff --git a/xen/include/public/dom0_ops.h b/xen/include/public/dom0_ops.h index e19141ca02..f388f984b6 100644 --- a/xen/include/public/dom0_ops.h +++ b/xen/include/public/dom0_ops.h @@ -85,7 +85,7 @@ typedef struct { typedef struct { /* IN variables. */ domid_t domain; /* 0 */ /* NB. IN/OUT variable. */ - u16 __pad; + u16 exec_domain; /* OUT variables. */ #define DOMFLAGS_DYING (1<<0) /* Domain is scheduled to die. */ #define DOMFLAGS_CRASHED (1<<1) /* Crashed domain; frozen for postmortem. */ @@ -208,7 +208,7 @@ typedef struct { typedef struct { /* IN variables. */ domid_t domain; /* 0 */ - u16 __pad; + u16 exec_domain; s32 cpu; /* 4: -1 implies unpin */ } PACKED dom0_pincpudomain_t; /* 8 bytes */ diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index f103509b04..ca3cc054d6 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -251,7 +251,7 @@ typedef struct #define NR_EVENT_CHANNELS 1024 /* No support for multi-processor guests. */ -#define MAX_VIRT_CPUS 1 +#define MAX_VIRT_CPUS 4 /* * Xen/guestos shared data -- pointer provided in start_info. @@ -294,6 +294,8 @@ typedef struct shared_info_st u8 pad0, pad1; } PACKED vcpu_data[MAX_VIRT_CPUS]; /* 0 */ + u32 n_vcpu; + /* * A domain can have up to 1024 "event channels" on which it can send * and receive asynchronous event notifications. There are three classes diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h index 1fcef0f03e..86af2954a7 100644 --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -22,7 +22,8 @@ static inline void evtchn_set_pending(struct domain *d, int port) { - shared_info_t *s = d->shared_info; + struct exec_domain *ed = d->exec_domain[0]; + shared_info_t *s = ed->shared_info; int running; /* These three operations must happen in strict order. */ @@ -42,10 +43,10 @@ static inline void evtchn_set_pending(struct domain *d, int port) * NB2. We save DF_RUNNING across the unblock to avoid a needless * IPI for domains that we IPI'd to unblock. */ - running = test_bit(DF_RUNNING, &d->flags); - domain_unblock(d); + running = test_bit(EDF_RUNNING, &ed->ed_flags); + exec_domain_unblock(ed); if ( running ) - smp_send_event_check_cpu(d->processor); + smp_send_event_check_cpu(ed->processor); } } @@ -54,8 +55,9 @@ static inline void evtchn_set_pending(struct domain *d, int port) * @d: Domain to which virtual IRQ should be sent * @virq: Virtual IRQ number (VIRQ_*) */ -static inline void send_guest_virq(struct domain *d, int virq) +static inline void send_guest_virq(struct exec_domain *ed, int virq) { + struct domain *d = ed->domain; evtchn_set_pending(d, d->virq_to_evtchn[virq]); } diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h index 15f992614a..0e6cd69d3b 100644 --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -13,8 +13,8 @@ typedef struct schedule_data_st { spinlock_t schedule_lock; /* spinlock protecting curr pointer TODO check this */ - struct domain *curr; /* current task */ - struct domain *idle; /* idle task for this cpu */ + struct exec_domain *curr; /* current task */ + struct exec_domain *idle; /* idle task for this cpu */ void * sched_priv; struct ac_timer s_timer; /* scheduling timer */ #ifdef BUCKETS @@ -25,7 +25,7 @@ typedef struct schedule_data_st typedef struct task_slice_st { - struct domain *task; + struct exec_domain *task; s_time_t time; } task_slice_t; @@ -36,14 +36,14 @@ struct scheduler unsigned int sched_id; /* ID for this scheduler */ int (*init_scheduler) (); - int (*init_idle_task) (struct domain *); - int (*alloc_task) (struct domain *); - void (*add_task) (struct domain *); + int (*init_idle_task) (struct exec_domain *); + int (*alloc_task) (struct exec_domain *); + void (*add_task) (struct exec_domain *); void (*free_task) (struct domain *); void (*rem_task) (struct domain *); - void (*sleep) (struct domain *); - void (*wake) (struct domain *); - void (*do_block) (struct domain *); + void (*sleep) (struct exec_domain *); + void (*wake) (struct exec_domain *); + void (*do_block) (struct exec_domain *); task_slice_t (*do_schedule) (s_time_t); int (*control) (struct sched_ctl_cmd *); int (*adjdom) (struct domain *, diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index e44696bdbe..c1a4ece8ae 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -53,7 +53,7 @@ typedef struct event_channel_st int init_event_channels(struct domain *d); void destroy_event_channels(struct domain *d); -struct domain +struct exec_domain { /* * DO NOT CHANGE THE ORDER OF THE FOLLOWING. @@ -89,6 +89,27 @@ struct domain * From here on things can be added and shuffled without special attention */ + struct domain *domain; + struct exec_domain *ed_next_list; + int eid; + + struct mm_struct mm; + + struct thread_struct thread; + + s_time_t lastschd; /* time this domain was last scheduled */ + s_time_t lastdeschd; /* time this domain was last descheduled */ + s_time_t cpu_time; /* total CPU time received till now */ + s_time_t wokenup; /* time domain got woken up */ + void *ed_sched_priv; /* scheduler-specific data */ + + unsigned long ed_flags; + + atomic_t pausecnt; + +}; + +struct domain { domid_t id; s_time_t create_time; @@ -101,16 +122,9 @@ struct domain /* Scheduling. */ int shutdown_code; /* code value from OS (if DF_SHUTDOWN). */ - s_time_t lastschd; /* time this domain was last scheduled */ - s_time_t lastdeschd; /* time this domain was last descheduled */ - s_time_t cpu_time; /* total CPU time received till now */ - s_time_t wokenup; /* time domain got woken up */ struct ac_timer timer; /* one-shot timer for timeout values */ void *sched_priv; /* scheduler-specific data */ - struct mm_struct mm; - - struct thread_struct thread; struct domain *next_list, *next_hash; /* Event channel information. */ @@ -142,11 +156,12 @@ struct domain #define IOBMP_SELBIT_LWORDS (IO_BITMAP_SIZE / 64) unsigned long *io_bitmap; /* Pointer to task's IO bitmap or NULL */ - unsigned long flags; + unsigned long d_flags; unsigned long vm_assist; atomic_t refcnt; - atomic_t pausecnt; + + struct exec_domain *exec_domain[MAX_VIRT_CPUS]; }; struct domain_setup_info @@ -161,11 +176,12 @@ struct domain_setup_info #include <asm/uaccess.h> /* for KERNEL_DS */ -extern struct domain idle0_task; +extern struct domain idle0_domain; +extern struct exec_domain idle0_exec_domain; -extern struct domain *idle_task[NR_CPUS]; +extern struct exec_domain *idle_task[NR_CPUS]; #define IDLE_DOMAIN_ID (0x7FFFU) -#define is_idle_task(_p) (test_bit(DF_IDLETASK, &(_p)->flags)) +#define is_idle_task(_p) (test_bit(DF_IDLETASK, &(_p)->d_flags)) void free_domain_struct(struct domain *d); struct domain *alloc_domain_struct(); @@ -220,7 +236,7 @@ extern void domain_kill(struct domain *d); extern void domain_crash(void); extern void domain_shutdown(u8 reason); -void new_thread(struct domain *d, +void new_thread(struct exec_domain *d, unsigned long start_pc, unsigned long start_stack, unsigned long start_info); @@ -237,13 +253,13 @@ long sched_ctl(struct sched_ctl_cmd *); long sched_adjdom(struct sched_adjdom_cmd *); int sched_id(); void init_idle_task(void); -void domain_wake(struct domain *d); -void domain_sleep(struct domain *d); +void domain_wake(struct exec_domain *d); +void domain_sleep(struct exec_domain *d); void __enter_scheduler(void); -extern void switch_to(struct domain *prev, - struct domain *next); +extern void switch_to(struct exec_domain *prev, + struct exec_domain *next); void domain_init(void); @@ -263,65 +279,100 @@ extern struct domain *domain_list; #define for_each_domain(_p) \ for ( (_p) = domain_list; (_p) != NULL; (_p) = (_p)->next_list ) -#define DF_DONEFPUINIT 0 /* Has the FPU been initialised for this task? */ -#define DF_USEDFPU 1 /* Has this task used the FPU since last save? */ -#define DF_GUEST_STTS 2 /* Has the guest OS requested 'stts'? */ -#define DF_CONSTRUCTED 3 /* Has the guest OS been fully built yet? */ -#define DF_IDLETASK 4 /* Is this one of the per-CPU idle domains? */ -#define DF_PRIVILEGED 5 /* Is this domain privileged? */ -#define DF_PHYSDEV 6 /* May this domain do IO to physical devices? */ -#define DF_BLOCKED 7 /* Domain is blocked waiting for an event. */ -#define DF_CTRLPAUSE 8 /* Domain is paused by controller software. */ -#define DF_SHUTDOWN 9 /* Guest shut itself down for some reason. */ -#define DF_CRASHED 10 /* Domain crashed inside Xen, cannot continue. */ -#define DF_DYING 11 /* Death rattle. */ -#define DF_RUNNING 12 /* Currently running on a CPU. */ -#define DF_CPUPINNED 13 /* Disables auto-migration. */ -#define DF_MIGRATED 14 /* Domain migrated between CPUs. */ - -static inline int domain_runnable(struct domain *d) +#define for_each_exec_domain(_d,_ed) \ + for ( (_ed) = _d->exec_domain[0]; (_ed) != NULL; (_ed) = (_ed)->ed_next_list ) + +#define EDF_DONEFPUINIT 0 /* Has the FPU been initialised for this task? */ +#define EDF_USEDFPU 1 /* Has this task used the FPU since last save? */ +#define EDF_GUEST_STTS 2 /* Has the guest OS requested 'stts'? */ +#define DF_CONSTRUCTED 3 /* Has the guest OS been fully built yet? */ +#define DF_IDLETASK 4 /* Is this one of the per-CPU idle domains? */ +#define DF_PRIVILEGED 5 /* Is this domain privileged? */ +#define DF_PHYSDEV 6 /* May this domain do IO to physical devices? */ +#define EDF_BLOCKED 7 /* Domain is blocked waiting for an event. */ +#define EDF_CTRLPAUSE 8 /* Domain is paused by controller software. */ +#define DF_SHUTDOWN 9 /* Guest shut itself down for some reason. */ +#define DF_CRASHED 10 /* Domain crashed inside Xen, cannot continue. */ +#define DF_DYING 11 /* Death rattle. */ +#define EDF_RUNNING 12 /* Currently running on a CPU. */ +#define EDF_CPUPINNED 13 /* Disables auto-migration. */ +#define EDF_MIGRATED 14 /* Domain migrated between CPUs. */ + +static inline int domain_runnable(struct exec_domain *d) { return ( (atomic_read(&d->pausecnt) == 0) && - !(d->flags & ((1<<DF_BLOCKED)|(1<<DF_CTRLPAUSE)| - (1<<DF_SHUTDOWN)|(1<<DF_CRASHED))) ); + !(d->ed_flags & ((1<<EDF_BLOCKED)|(1<<EDF_CTRLPAUSE))) && + !(d->domain->d_flags & ((1<<DF_SHUTDOWN)|(1<<DF_CRASHED))) ); +} + +static inline void exec_domain_pause(struct exec_domain *ed) +{ + ASSERT(ed != current); + atomic_inc(&ed->pausecnt); + domain_sleep(ed); } static inline void domain_pause(struct domain *d) { - ASSERT(d != current); - atomic_inc(&d->pausecnt); - domain_sleep(d); + struct exec_domain *ed; + + for_each_exec_domain(d, ed) + exec_domain_pause(ed); +} + +static inline void exec_domain_unpause(struct exec_domain *ed) +{ + ASSERT(ed != current); + if ( atomic_dec_and_test(&ed->pausecnt) ) + domain_wake(ed); } static inline void domain_unpause(struct domain *d) { - ASSERT(d != current); - if ( atomic_dec_and_test(&d->pausecnt) ) - domain_wake(d); + struct exec_domain *ed; + + for_each_exec_domain(d, ed) + exec_domain_unpause(ed); +} + +static inline void exec_domain_unblock(struct exec_domain *ed) +{ + if ( test_and_clear_bit(EDF_BLOCKED, &ed->ed_flags) ) + domain_wake(ed); } static inline void domain_unblock(struct domain *d) { - if ( test_and_clear_bit(DF_BLOCKED, &d->flags) ) - domain_wake(d); + struct exec_domain *ed; + + for_each_exec_domain(d, ed) + exec_domain_unblock(ed); } static inline void domain_pause_by_systemcontroller(struct domain *d) { - ASSERT(d != current); - if ( !test_and_set_bit(DF_CTRLPAUSE, &d->flags) ) - domain_sleep(d); + struct exec_domain *ed; + + for_each_exec_domain(d, ed) { + ASSERT(ed != current); + if ( !test_and_set_bit(EDF_CTRLPAUSE, &ed->ed_flags) ) + domain_sleep(ed); + } } static inline void domain_unpause_by_systemcontroller(struct domain *d) { - if ( test_and_clear_bit(DF_CTRLPAUSE, &d->flags) ) - domain_wake(d); + struct exec_domain *ed; + + for_each_exec_domain(d, ed) { + if ( test_and_clear_bit(EDF_CTRLPAUSE, &ed->ed_flags) ) + domain_wake(ed); + } } -#define IS_PRIV(_d) (test_bit(DF_PRIVILEGED, &(_d)->flags)) -#define IS_CAPABLE_PHYSDEV(_d) (test_bit(DF_PHYSDEV, &(_d)->flags)) +#define IS_PRIV(_d) (test_bit(DF_PRIVILEGED, &(_d)->d_flags)) +#define IS_CAPABLE_PHYSDEV(_d) (test_bit(DF_PHYSDEV, &(_d)->d_flags)) #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist)) |