From a8bcd6b668d01a28f6a14e4f2a96c84861605617 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Thu, 25 Nov 2004 17:24:07 +0000 Subject: bitkeeper revision 1.1159.187.15 (41a61537tODn12flBND8W6jum0b79Q) Fix hypercall preemption. At the same time I reimplemented most of the multi-hypercall in arch-independent C code. --- .rootkeys | 3 ++ xen/arch/x86/domain.c | 29 +++++++--- xen/arch/x86/memory.c | 40 +++++++++++--- xen/arch/x86/traps.c | 7 ++- xen/arch/x86/x86_32/asm-offsets.c | 8 +++ xen/arch/x86/x86_32/entry.S | 101 ----------------------------------- xen/common/dom_mem_ops.c | 96 ++++++++++++++++++++------------- xen/common/grant_table.c | 8 +-- xen/common/multicall.c | 76 ++++++++++++++++++++++++++ xen/include/asm-x86/mm.h | 3 -- xen/include/asm-x86/multicall.h | 26 +++++++++ xen/include/asm-x86/x86_32/uaccess.h | 3 ++ xen/include/xen/multicall.h | 21 ++++++++ xen/include/xen/sched.h | 8 +-- 14 files changed, 263 insertions(+), 166 deletions(-) create mode 100644 xen/common/multicall.c create mode 100644 xen/include/asm-x86/multicall.h create mode 100644 xen/include/xen/multicall.h diff --git a/.rootkeys b/.rootkeys index 94adf6c091..80ab5c2c8f 100644 --- a/.rootkeys +++ b/.rootkeys @@ -697,6 +697,7 @@ 3e4cd9d8LAAghUY0hNIK72uc2ch_Nw xen/common/keyhandler.c 3ddb79bduhSEZI8xa7IbGQCpap5y2A xen/common/lib.c 3ddb79bdS39UXxUtZnaScie83-7VTQ xen/common/memory.c +41a61536SZbR6cj1ukWTb0DYU-vz9w xen/common/multicall.c 3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c 3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c 4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c @@ -792,6 +793,7 @@ 40ec25fd7cSvbP7Biw91zaU_g0xsEQ xen/include/asm-x86/mm.h 3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-x86/mpspec.h 3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-x86/msr.h +41a61536MFhNalgbVmYGXAhQsPTZNw xen/include/asm-x86/multicall.h 3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-x86/page.h 3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-x86/pci.h 404f1bb41Yl-5ZjIWnG66HDCj6OIWA xen/include/asm-x86/pda.h @@ -853,6 +855,7 @@ 3ddb79c18Ajy7micDGQQfJ0zWgEHtA xen/include/xen/list.h 3ddb79c1gs2VbLbQlw0dcDUXYIepDA xen/include/xen/mm.h 3ddb79c1ieLZfGSFwfvvSQ2NK1BMSg xen/include/xen/multiboot.h +41a61536ii6j2lJ2rXwMOLaG1CHPvw xen/include/xen/multicall.h 3ddb79c2Fg44_PBPVxHSC0gTOMq4Ow xen/include/xen/pci.h 3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen/include/xen/pci_ids.h 3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen/include/xen/perfc.h diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 77a39a2a97..f5627a1ebd 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -32,6 +32,7 @@ #include #include #include +#include #if !defined(CONFIG_X86_64BITMODE) /* No ring-3 access in initial page tables. */ @@ -428,17 +429,31 @@ long do_iopl(domid_t domain, unsigned int new_io_pl) void hypercall_create_continuation(unsigned int op, unsigned int nr_args, ...) { - execution_context_t *ec = get_execution_context(); - unsigned long *preg = &ec->ebx; + struct mc_state *mcs = &mc_state[smp_processor_id()]; + execution_context_t *ec; + unsigned long *preg; unsigned int i; va_list args; - ec->eax = op; - ec->eip -= 2; /* re-execute 'int 0x82' */ - va_start(args, nr_args); - for ( i = 0; i < nr_args; i++ ) - *preg++ = va_arg(args, unsigned long); + + if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) + { + __set_bit(_MCSF_call_preempted, &mcs->flags); + + for ( i = 0; i < nr_args; i++ ) + mcs->call.args[i] = va_arg(args, unsigned long); + } + else + { + ec = get_execution_context(); + ec->eax = op; + ec->eip -= 2; /* re-execute 'int 0x82' */ + + for ( i = 0, preg = &ec->ebx; i < nr_args; i++, preg++ ) + *preg = va_arg(args, unsigned long); + } + va_end(args); } diff --git a/xen/arch/x86/memory.c b/xen/arch/x86/memory.c index c6d9951bc5..f1cf17cec2 100644 --- a/xen/arch/x86/memory.c +++ b/xen/arch/x86/memory.c @@ -1277,14 +1277,20 @@ static int do_extended_command(unsigned long ptr, unsigned long val) return okay; } - -int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) +int do_mmu_update( + mmu_update_t *ureqs, unsigned int count, unsigned int *pdone) { +/* + * We steal the m.s.b. of the @count parameter to indicate whether this + * invocation of do_mmu_update() is resuming a previously preempted call. + */ +#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) + mmu_update_t req; unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0; struct pfn_info *page; int rc = 0, okay = 1, i, cpu = smp_processor_id(); - unsigned int cmd; + unsigned int cmd, done = 0; unsigned long prev_spfn = 0; l1_pgentry_t *prev_spl1e = 0; struct domain *d = current; @@ -1295,13 +1301,30 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) cleanup_writable_pagetable(d, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); - if ( unlikely(!access_ok(VERIFY_READ, ureqs, count * sizeof(req))) ) + /* + * If we are resuming after preemption, read how much work we have already + * done. This allows us to set the @done output parameter correctly. + */ + if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) + { + count &= ~MMU_UPDATE_PREEMPTED; + if ( unlikely(pdone != NULL) ) + (void)get_user(done, pdone); + } + + if ( unlikely(!array_access_ok(VERIFY_READ, ureqs, count, sizeof(req))) ) return -EFAULT; for ( i = 0; i < count; i++ ) { - hypercall_may_preempt( - __HYPERVISOR_mmu_update, 3, ureqs, count-i, success_count); + if ( hypercall_preempt_check() ) + { + hypercall_create_continuation( + __HYPERVISOR_mmu_update, 3, ureqs, + (count - i) | MMU_UPDATE_PREEMPTED, pdone); + rc = __HYPERVISOR_mmu_update; + break; + } if ( unlikely(__copy_from_user(&req, ureqs, sizeof(req)) != 0) ) { @@ -1457,8 +1480,9 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) percpu_info[cpu].foreign = NULL; } - if ( unlikely(success_count != NULL) ) - put_user(i, success_count); + /* Add incremental work we have done to the @done output parameter. */ + if ( unlikely(pdone != NULL) ) + __put_user(done + i, pdone); return rc; } diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 3a8f8cdd15..5662a6388e 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -804,7 +804,12 @@ long do_set_trap_table(trap_info_t *traps) for ( ; ; ) { - hypercall_may_preempt(__HYPERVISOR_set_trap_table, 1, traps); + if ( hypercall_preempt_check() ) + { + hypercall_create_continuation( + __HYPERVISOR_set_trap_table, 1, traps); + return __HYPERVISOR_set_trap_table; + } if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT; diff --git a/xen/arch/x86/x86_32/asm-offsets.c b/xen/arch/x86/x86_32/asm-offsets.c index 33cfd2e04c..f187acbc4c 100644 --- a/xen/arch/x86/x86_32/asm-offsets.c +++ b/xen/arch/x86/x86_32/asm-offsets.c @@ -59,4 +59,12 @@ void __dummy__(void) OFFSET(TRAPBOUNCE_cs, struct trap_bounce, cs); OFFSET(TRAPBOUNCE_eip, struct trap_bounce, eip); BLANK(); + + OFFSET(MULTICALL_op, multicall_entry_t, op); + OFFSET(MULTICALL_arg0, multicall_entry_t, args[0]); + OFFSET(MULTICALL_arg1, multicall_entry_t, args[1]); + OFFSET(MULTICALL_arg2, multicall_entry_t, args[2]); + OFFSET(MULTICALL_arg3, multicall_entry_t, args[3]); + OFFSET(MULTICALL_arg4, multicall_entry_t, args[4]); + OFFSET(MULTICALL_result, multicall_entry_t, args[5]); } diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S index c92090a141..b235f710f9 100644 --- a/xen/arch/x86/x86_32/entry.S +++ b/xen/arch/x86/x86_32/entry.S @@ -69,107 +69,6 @@ ENTRY(continue_nonidle_task) GET_CURRENT(%ebx) jmp test_all_events - ALIGN -/* - * HYPERVISOR_multicall(call_list, nr_calls) - * Execute a list of 'nr_calls' hypercalls, pointed at by 'call_list'. - * This is fairly easy except that: - * 1. We may fault reading the call list, and must patch that up; and - * 2. We cannot recursively call HYPERVISOR_multicall, or a malicious - * caller could cause our stack to blow up. - */ -#define MULTICALL_ENTRY_ORDER 5 -do_multicall: - popl %eax - cmpl $SYMBOL_NAME(multicall_return_from_call),%eax - je multicall_return_from_call - pushl %ebx - movl 4(%esp),%ebx /* EBX == call_list */ - movl 8(%esp),%ecx /* ECX == nr_calls */ - /* Ensure the entire multicall list is below HYPERVISOR_VIRT_START. */ - movl %ecx,%eax - shll $MULTICALL_ENTRY_ORDER,%eax - addl %ebx,%eax /* EAX == end of multicall list */ - jc bad_multicall_address - cmpl $__HYPERVISOR_VIRT_START,%eax - jnc bad_multicall_address -multicall_loop: - pushl %ecx - movl 4(%esp),%ecx # %ecx = struct domain - movl DOMAIN_processor(%ecx),%eax - shl $6,%eax # sizeof(irq_cpustat) == 64 - testl $~0,SYMBOL_NAME(irq_stat)(%eax,1) - jnz multicall_preempt -multicall_fault1: - pushl 20(%ebx) # args[4] -multicall_fault2: - pushl 16(%ebx) # args[3] -multicall_fault3: - pushl 12(%ebx) # args[2] -multicall_fault4: - pushl 8(%ebx) # args[1] -multicall_fault5: - pushl 4(%ebx) # args[0] -multicall_fault6: - movl (%ebx),%eax # op - andl $(NR_hypercalls-1),%eax - call *SYMBOL_NAME(hypercall_table)(,%eax,4) -multicall_return_from_call: -multicall_fault7: - movl %eax,24(%ebx) # args[5] == result - addl $20,%esp - popl %ecx - addl $(1< #include -static long alloc_dom_mem(struct domain *d, - unsigned long *extent_list, - unsigned long nr_extents, - unsigned int extent_order) +/* + * To allow safe resume of do_dom_mem_op() after preemption, we need to know + * at what point in the page list to resume. For this purpose I steal the + * high-order bits of the @op parameter, which are otherwise unused and zero. + */ +#define START_EXTENT_SHIFT 4 /* op[:4] == start_extent */ + +#define PREEMPT_CHECK(_op) \ + if ( hypercall_preempt_check() ) { \ + hypercall_create_continuation( \ + __HYPERVISOR_dom_mem_op, 5, \ + (_op) | (i << START_EXTENT_SHIFT), \ + extent_list, nr_extents, extent_order, \ + (d == current) ? DOMID_SELF : d->id); \ + return __HYPERVISOR_dom_mem_op; \ + } + +static long +alloc_dom_mem(struct domain *d, + unsigned long *extent_list, + unsigned long start_extent, + unsigned long nr_extents, + unsigned int extent_order) { struct pfn_info *page; unsigned long i; - if ( unlikely(!access_ok(VERIFY_WRITE, extent_list, - nr_extents*sizeof(*extent_list))) ) - return 0; + if ( unlikely(!array_access_ok(VERIFY_WRITE, extent_list, + nr_extents, sizeof(*extent_list))) ) + return start_extent; if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current) ) { DPRINTK("Only I/O-capable domains may allocate > order-0 memory.\n"); - return 0; + return start_extent; } - for ( i = 0; i < nr_extents; i++ ) + for ( i = start_extent; i < nr_extents; i++ ) { - hypercall_may_preempt( - __HYPERVISOR_dom_mem_op, 5, - MEMOP_increase_reservation, - &extent_list[i], nr_extents-i, extent_order, - (d == current) ? DOMID_SELF : d->id); + PREEMPT_CHECK(MEMOP_increase_reservation); if ( unlikely((page = alloc_domheap_pages(d, extent_order)) == NULL) ) { @@ -55,25 +70,23 @@ static long alloc_dom_mem(struct domain *d, return i; } -static long free_dom_mem(struct domain *d, - unsigned long *extent_list, - unsigned long nr_extents, - unsigned int extent_order) +static long +free_dom_mem(struct domain *d, + unsigned long *extent_list, + unsigned long start_extent, + unsigned long nr_extents, + unsigned int extent_order) { struct pfn_info *page; unsigned long i, j, mpfn; - if ( unlikely(!access_ok(VERIFY_READ, extent_list, - nr_extents*sizeof(*extent_list))) ) - return 0; + if ( unlikely(!array_access_ok(VERIFY_READ, extent_list, + nr_extents, sizeof(*extent_list))) ) + return start_extent; - for ( i = 0; i < nr_extents; i++ ) + for ( i = start_extent; i < nr_extents; i++ ) { - hypercall_may_preempt( - __HYPERVISOR_dom_mem_op, 5, - MEMOP_decrease_reservation, - &extent_list[i], nr_extents-i, extent_order, - (d == current) ? DOMID_SELF : d->id); + PREEMPT_CHECK(MEMOP_decrease_reservation); if ( unlikely(__get_user(mpfn, &extent_list[i]) != 0) ) return i; @@ -106,15 +119,24 @@ static long free_dom_mem(struct domain *d, return i; } - -long do_dom_mem_op(unsigned int op, - unsigned long *extent_list, - unsigned long nr_extents, - unsigned int extent_order, - domid_t domid) + +long +do_dom_mem_op(unsigned long op, + unsigned long *extent_list, + unsigned long nr_extents, + unsigned int extent_order, + domid_t domid) { struct domain *d; - long rc; + unsigned long rc, start_extent; + + /* Extract @start_extent from @op. */ + start_extent = op >> START_EXTENT_SHIFT; + op &= (1 << START_EXTENT_SHIFT) - 1; + + if ( unlikely(start_extent > nr_extents) || + unlikely(nr_extents > (~0UL >> START_EXTENT_SHIFT)) ) + return -EINVAL; if ( likely(domid == DOMID_SELF) ) d = current; @@ -126,10 +148,12 @@ long do_dom_mem_op(unsigned int op, switch ( op ) { case MEMOP_increase_reservation: - rc = alloc_dom_mem(d, extent_list, nr_extents, extent_order); + rc = alloc_dom_mem( + d, extent_list, start_extent, nr_extents, extent_order); break; case MEMOP_decrease_reservation: - rc = free_dom_mem(d, extent_list, nr_extents, extent_order); + rc = free_dom_mem( + d, extent_list, start_extent, nr_extents, extent_order); break; default: rc = -ENOSYS; diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index e01cad9e80..1a207711e2 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -446,14 +446,14 @@ do_grant_table_op( switch ( cmd ) { case GNTTABOP_map_grant_ref: - if ( unlikely(!access_ok(VERIFY_WRITE, uop, - count * sizeof(gnttab_map_grant_ref_t))) ) + if ( unlikely(!array_access_ok( + VERIFY_WRITE, uop, count, sizeof(gnttab_map_grant_ref_t))) ) return -EFAULT; rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count); break; case GNTTABOP_unmap_grant_ref: - if ( unlikely(!access_ok(VERIFY_WRITE, uop, - count * sizeof(gnttab_unmap_grant_ref_t))) ) + if ( unlikely(!array_access_ok( + VERIFY_WRITE, uop, count, sizeof(gnttab_unmap_grant_ref_t))) ) return -EFAULT; rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count); break; diff --git a/xen/common/multicall.c b/xen/common/multicall.c new file mode 100644 index 0000000000..04b5a7bdd2 --- /dev/null +++ b/xen/common/multicall.c @@ -0,0 +1,76 @@ +/****************************************************************************** + * multicall.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct mc_state mc_state[NR_CPUS]; + +long do_multicall(multicall_entry_t *call_list, unsigned int nr_calls) +{ + struct mc_state *mcs = &mc_state[smp_processor_id()]; + unsigned int i; + + if ( unlikely(__test_and_set_bit(_MCSF_in_multicall, &mcs->flags)) ) + { + DPRINTK("Multicall reentry is disallowed.\n"); + return -EINVAL; + } + + if ( unlikely(!array_access_ok(VERIFY_WRITE, call_list, + nr_calls, sizeof(*call_list))) ) + { + DPRINTK("Bad memory range %p for %u*%u bytes.\n", + call_list, nr_calls, sizeof(*call_list)); + goto fault; + } + + for ( i = 0; i < nr_calls; i++ ) + { + if ( unlikely(__copy_from_user(&mcs->call, &call_list[i], + sizeof(*call_list))) ) + { + DPRINTK("Error copying from user range %p for %u bytes.\n", + &call_list[i], sizeof(*call_list)); + goto fault; + } + + do_multicall_call(&mcs->call); + + if ( unlikely(__put_user(mcs->call.args[5], &call_list[i].args[5])) ) + { + DPRINTK("Error writing result back to multicall block.\n"); + goto fault; + } + + if ( hypercall_preempt_check() ) + { + /* If the sub-call wasn't preempted, skip over it. */ + if ( !test_bit(_MCSF_call_preempted, &mcs->flags) ) + i++; + + /* Only create a continuation if there is work left to be done. */ + if ( i < nr_calls ) + { + mcs->flags = 0; + hypercall_create_continuation( + __HYPERVISOR_multicall, 2, &call_list[i], nr_calls-i); + return __HYPERVISOR_multicall; + } + } + } + + mcs->flags = 0; + return 0; + + fault: + mcs->flags = 0; + return -EFAULT; +} diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 31a669c08d..87ffe1ecc1 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -221,9 +221,6 @@ extern unsigned long *machine_to_phys_mapping; #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START) #endif -/* Part of the domain API. */ -int do_mmu_update(mmu_update_t *updates, int count, int *success_count); - #define DEFAULT_GDT_ENTRIES (LAST_RESERVED_GDT_ENTRY+1) #define DEFAULT_GDT_ADDRESS ((unsigned long)gdt_table) diff --git a/xen/include/asm-x86/multicall.h b/xen/include/asm-x86/multicall.h new file mode 100644 index 0000000000..e1a7770354 --- /dev/null +++ b/xen/include/asm-x86/multicall.h @@ -0,0 +1,26 @@ +/****************************************************************************** + * asm-x86/multicall.h + */ + +#ifndef __ASM_X86_MULTICALL_H__ +#define __ASM_X86_MULTICALL_H__ + +#include + +#define do_multicall_call(_call) \ + do { \ + __asm__ __volatile__ ( \ + "pushl "STR(MULTICALL_arg4)"(%0); " \ + "pushl "STR(MULTICALL_arg3)"(%0); " \ + "pushl "STR(MULTICALL_arg2)"(%0); " \ + "pushl "STR(MULTICALL_arg1)"(%0); " \ + "pushl "STR(MULTICALL_arg0)"(%0); " \ + "movl "STR(MULTICALL_op)"(%0),%%eax; " \ + "andl $("STR(NR_hypercalls)"-1),%%eax; " \ + "call *hypercall_table(,%%eax,4); " \ + "movl %%eax,"STR(MULTICALL_result)"(%0); "\ + "addl $20,%%esp; " \ + : : "b" (_call) : "eax", "ecx", "edx" ); \ + } while ( 0 ) + +#endif /* __ASM_X86_MULTICALL_H__ */ diff --git a/xen/include/asm-x86/x86_32/uaccess.h b/xen/include/asm-x86/x86_32/uaccess.h index 7531a29445..650492d59e 100644 --- a/xen/include/asm-x86/x86_32/uaccess.h +++ b/xen/include/asm-x86/x86_32/uaccess.h @@ -66,6 +66,9 @@ extern struct movsl_mask { */ #define access_ok(type,addr,size) (likely(__range_ok(addr,size) == 0)) +#define array_access_ok(type,addr,count,size) \ + (likely(count < (~0UL/size)) && access_ok(type,addr,count*size)) + /* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is diff --git a/xen/include/xen/multicall.h b/xen/include/xen/multicall.h new file mode 100644 index 0000000000..5982471330 --- /dev/null +++ b/xen/include/xen/multicall.h @@ -0,0 +1,21 @@ +/****************************************************************************** + * multicall.h + */ + +#ifndef __XEN_MULTICALL_H__ +#define __XEN_MULTICALL_H__ + +#include + +#define _MCSF_in_multicall 0 +#define _MCSF_call_preempted 1 +#define MCSF_in_multicall (1<<_MCSF_in_multicall) +#define MCSF_call_preempted (1<<_MCSF_call_preempted) +struct mc_state { + unsigned long flags; + multicall_entry_t call; +} __cacheline_aligned; + +extern struct mc_state mc_state[NR_CPUS]; + +#endif /* __XEN_MULTICALL_H__ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index a67547e6b9..b0c8d70397 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -219,12 +219,8 @@ void continue_cpu_idle_loop(void); void continue_nonidle_task(void); void hypercall_create_continuation(unsigned int op, unsigned int nr_args, ...); -#define hypercall_may_preempt(_op, _nr_args, _args...) \ - do { \ - if ( unlikely(softirq_pending(smp_processor_id())) ) { \ - hypercall_create_continuation(_op , _nr_args , ##_args); \ - return _op; \ - } } while ( 0 ) +#define hypercall_preempt_check() \ + (unlikely(softirq_pending(smp_processor_id()))) /* This domain_hash and domain_list are protected by the domlist_lock. */ #define DOMAIN_HASH_SIZE 256 -- cgit v1.2.3