diff options
Diffstat (limited to 'linux-2.6-xen-sparse/arch/i386')
-rw-r--r-- | linux-2.6-xen-sparse/arch/i386/Kconfig | 5 | ||||
-rw-r--r-- | linux-2.6-xen-sparse/arch/i386/kernel/Makefile | 11 | ||||
-rw-r--r-- | linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c | 7 | ||||
-rw-r--r-- | linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S | 43 | ||||
-rw-r--r-- | linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c | 14 | ||||
-rw-r--r-- | linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c | 20 | ||||
-rw-r--r-- | linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S | 2 | ||||
-rw-r--r-- | linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c | 14 | ||||
-rw-r--r-- | linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c | 138 |
9 files changed, 167 insertions, 87 deletions
diff --git a/linux-2.6-xen-sparse/arch/i386/Kconfig b/linux-2.6-xen-sparse/arch/i386/Kconfig index 9687edf9f0..661d0bbecd 100644 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig @@ -1180,11 +1180,6 @@ config X86_NO_TSS depends on X86_XEN default y -config X86_SYSENTER - bool - depends on !X86_NO_TSS - default y - config X86_NO_IDT bool depends on X86_XEN diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile index 5661a9bb68..d0ccf7b375 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile +++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile @@ -49,14 +49,12 @@ else vsyscall_note := vsyscall-note.o endif -VSYSCALL_TYPES-y := int80 -VSYSCALL_TYPES-$(CONFIG_X86_SYSENTER) += sysenter # vsyscall.o contains the vsyscall DSO images as __initdata. # We must build both images before we can assemble it. # Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/vsyscall.o: $(foreach F,$(VSYSCALL_TYPES-y),$(obj)/vsyscall-$F.so) -targets += $(foreach F,$(VSYSCALL_TYPES-y),vsyscall-$F.o vsyscall-$F.so) -targets += $(vsyscall_note) vsyscall.lds +$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so +targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) +targets += vsyscall-note.o vsyscall.lds # The DSO images are built using a special linker script. quiet_cmd_syscall = SYSCALL $@ @@ -83,8 +81,7 @@ $(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o SYSCFLAGS_vsyscall-syms.o = -r $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ - $(foreach F,$(VSYSCALL_TYPES-y),$(obj)/vsyscall-$F.o) \ - $(obj)/$(vsyscall_note) FORCE + $(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE $(call if_changed,syscall) ifdef CONFIG_XEN diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c b/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c index 3c4a0f46eb..547eb452b3 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c @@ -64,10 +64,13 @@ void foo(void) OFFSET(pbe_orig_address, pbe, orig_address); OFFSET(pbe_next, pbe, next); -#ifdef CONFIG_X86_SYSENTER +#ifndef CONFIG_X86_NO_TSS /* Offset from the sysenter stack to tss.esp0 */ - DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) - + DEFINE(SYSENTER_stack_esp0, offsetof(struct tss_struct, esp0) - sizeof(struct tss_struct)); +#else + /* sysenter stack points directly to esp0 */ + DEFINE(SYSENTER_stack_esp0, 0); #endif DEFINE(PAGE_SIZE_asm, PAGE_SIZE); diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S index 072172cda7..8c5ace45ef 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S +++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S @@ -202,13 +202,12 @@ need_resched: jmp need_resched #endif -#ifdef CONFIG_X86_SYSENTER /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ # sysenter call handler stub ENTRY(sysenter_entry) - movl TSS_sysenter_esp0(%esp),%esp + movl SYSENTER_stack_esp0(%esp),%esp sysenter_past_esp: sti pushl $(__USER_DS) @@ -240,7 +239,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) - cli + DISABLE_INTERRUPTS movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work @@ -248,9 +247,23 @@ sysenter_past_esp: movl EIP(%esp), %edx movl OLDESP(%esp), %ecx xorl %ebp,%ebp +#ifdef CONFIG_XEN + __ENABLE_INTERRUPTS +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ + __TEST_PENDING + jnz 14f # process more events if necessary... + movl ESI(%esp), %esi + sysexit +14: __DISABLE_INTERRUPTS +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ + push %esp + call evtchn_do_upcall + add $4,%esp + jmp ret_from_intr +#else sti sysexit -#endif /* CONFIG_X86_SYSENTER */ +#endif /* !CONFIG_XEN */ # system call handler stub @@ -532,6 +545,11 @@ error_code: # So, on entry to the handler we detect whether we interrupted an # existing activation in its critical region -- if so, we pop the current # activation and restart the handler using the previous one. +# +# The sysexit critical region is slightly different. sysexit +# atomically removes the entire stack frame. If we interrupt in the +# critical region we know that the entire frame is present and correct +# so we can simply throw away the new one. ENTRY(hypervisor_callback) pushl %eax SAVE_ALL @@ -540,6 +558,11 @@ ENTRY(hypervisor_callback) jb 11f cmpl $ecrit,%eax jb critical_region_fixup + cmpl $sysexit_scrit,%eax + jb 11f + cmpl $sysexit_ecrit,%eax + ja 11f + addl $0x34,%esp # Remove cs...ebx from stack frame. 11: push %esp call evtchn_do_upcall add $4,%esp @@ -683,13 +706,13 @@ device_available_emulate: call math_state_restore jmp ret_from_exception -#ifdef CONFIG_X86_SYSENTER +#ifndef CONFIG_XEN /* * Debug traps and NMI can happen at the one SYSENTER instruction * that sets up the real kernel stack. Check here, since we can't * allow the wrong stack to be used. * - * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have + * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have * already pushed 3 words if it hits on the sysenter instruction: * eflags, cs and eip. * @@ -701,19 +724,19 @@ device_available_emulate: cmpw $__KERNEL_CS,4(%esp); \ jne ok; \ label: \ - movl TSS_sysenter_esp0+offset(%esp),%esp; \ + movl SYSENTER_stack_esp0+offset(%esp),%esp; \ pushfl; \ pushl $__KERNEL_CS; \ pushl $sysenter_past_esp -#endif /* CONFIG_X86_SYSENTER */ +#endif /* CONFIG_XEN */ KPROBE_ENTRY(debug) -#ifdef CONFIG_X86_SYSENTER +#ifndef CONFIG_XEN cmpl $sysenter_entry,(%esp) jne debug_stack_correct FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) debug_stack_correct: -#endif /* !CONFIG_X86_SYSENTER */ +#endif /* !CONFIG_XEN */ pushl $-1 # mark this as an int SAVE_ALL xorl %edx,%edx # error code 0 diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c index 47edb0524b..cf0e62c5ff 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c @@ -1205,7 +1205,6 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; * int assign_irq_vector(int irq) { - static int current_vector = FIRST_DEVICE_VECTOR; physdev_op_t op; BUG_ON(irq >= NR_IRQ_VECTORS); @@ -1216,13 +1215,12 @@ int assign_irq_vector(int irq) op.u.irq_op.irq = irq; if (HYPERVISOR_physdev_op(&op)) return -ENOSPC; - current_vector = op.u.irq_op.vector; - vector_irq[current_vector] = irq; + vector_irq[op.u.irq_op.vector] = irq; if (irq != AUTO_ASSIGN) - IO_APIC_VECTOR(irq) = current_vector; + IO_APIC_VECTOR(irq) = op.u.irq_op.vector; - return current_vector; + return op.u.irq_op.vector; } #ifndef CONFIG_XEN @@ -2485,6 +2483,12 @@ static int __init io_apic_bug_finalize(void) { if(sis_apic_bug == -1) sis_apic_bug = 0; + if (xen_start_info->flags & SIF_INITDOMAIN) { + dom0_op_t op = { .cmd = DOM0_PLATFORM_QUIRK }; + op.u.platform_quirk.quirk_id = sis_apic_bug ? + QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL; + HYPERVISOR_dom0_op(&op); + } return 0; } diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c index 99193bb47f..844c87e78c 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c @@ -20,11 +20,15 @@ #include <asm/pgtable.h> #include <asm/unistd.h> +#ifdef CONFIG_XEN +#include <xen/interface/callback.h> +#endif + extern asmlinkage void sysenter_entry(void); void enable_sep_cpu(void) { -#ifdef CONFIG_X86_SYSENTER +#ifndef CONFIG_X86_NO_TSS int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); @@ -54,14 +58,24 @@ int __init sysenter_setup(void) { syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); -#ifdef CONFIG_X86_SYSENTER +#ifdef CONFIG_XEN + if (boot_cpu_has(X86_FEATURE_SEP)) { + struct callback_register sysenter = { + .type = CALLBACKTYPE_sysenter, + .address = { __KERNEL_CS, (unsigned long)sysenter_entry }, + }; + + if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0) + clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); + } +#endif + if (boot_cpu_has(X86_FEATURE_SEP)) { memcpy(syscall_page, &vsyscall_sysenter_start, &vsyscall_sysenter_end - &vsyscall_sysenter_start); return 0; } -#endif memcpy(syscall_page, &vsyscall_int80_start, diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S b/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S index 432aa46649..b403890fe3 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S +++ b/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S @@ -7,11 +7,9 @@ vsyscall_int80_start: .incbin "arch/i386/kernel/vsyscall-int80.so" vsyscall_int80_end: -#ifdef CONFIG_X86_SYSENTER .globl vsyscall_sysenter_start, vsyscall_sysenter_end vsyscall_sysenter_start: .incbin "arch/i386/kernel/vsyscall-sysenter.so" vsyscall_sysenter_end: -#endif __FINIT diff --git a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c index b6109de24e..d8a11c78d3 100644 --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c @@ -306,14 +306,14 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) BUG_ON(rc); } if (HAVE_SHARED_KERNEL_PMD) - memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + KERNEL_PGD_PTRS); } else { spin_lock_irqsave(&pgd_lock, flags); - memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + KERNEL_PGD_PTRS); memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); pgd_list_add(pgd); spin_unlock_irqrestore(&pgd_lock, flags); @@ -360,7 +360,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); if (!pmd) goto out_oom; - set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); } spin_lock_irqsave(&pgd_lock, flags); diff --git a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c index f5184a2c0c..bcfdc23e74 100644 --- a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c @@ -35,8 +35,9 @@ static void xenoprof_stop(void); void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot); static int xenoprof_enabled = 0; -static int num_events = 0; +static unsigned int num_events = 0; static int is_primary = 0; +static int active_defined; /* sample buffers shared with Xen */ xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS]; @@ -106,7 +107,7 @@ static irqreturn_t xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs) { int head, tail, size; - xenoprof_buf_t * buf; + struct xenoprof_buf * buf; int cpu; cpu = smp_processor_id(); @@ -196,28 +197,49 @@ static int bind_virq(void) static int xenoprof_setup(void) { int ret; + int i; ret = bind_virq(); if (ret) return ret; if (is_primary) { - ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, - (unsigned long)NULL, - (unsigned long)NULL); + struct xenoprof_counter counter; + + /* Define dom0 as an active domain if not done yet */ + if (!active_defined) { + domid_t domid; + ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL); + if (ret) + goto err; + domid = 0; + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid); + if (ret) + goto err; + active_defined = 1; + } + + ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL); if (ret) goto err; + for (i=0; i<num_events; i++) { + counter.ind = i; + counter.count = (uint64_t)counter_config[i].count; + counter.enabled = (uint32_t)counter_config[i].enabled; + counter.event = (uint32_t)counter_config[i].event; + counter.kernel = (uint32_t)counter_config[i].kernel; + counter.user = (uint32_t)counter_config[i].user; + counter.unit_mask = (uint64_t)counter_config[i].unit_mask; + HYPERVISOR_xenoprof_op(XENOPROF_counter, + &counter); + } + ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL); - ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, - (unsigned long)&counter_config, - (unsigned long)num_events); if (ret) goto err; } - ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, - (unsigned long)NULL, - (unsigned long)NULL); + ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL); if (ret) goto err; @@ -233,17 +255,15 @@ static void xenoprof_shutdown(void) { xenoprof_enabled = 0; - HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, - (unsigned long)NULL, - (unsigned long)NULL); + HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL); if (is_primary) { - HYPERVISOR_xenoprof_op(XENOPROF_release_counters, - (unsigned long)NULL, - (unsigned long)NULL); + HYPERVISOR_xenoprof_op(XENOPROF_release_counters, NULL); + active_defined = 0; } unbind_virq(); + } @@ -252,9 +272,8 @@ static int xenoprof_start(void) int ret = 0; if (is_primary) - ret = HYPERVISOR_xenoprof_op(XENOPROF_start, - (unsigned long)NULL, - (unsigned long)NULL); + ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL); + return ret; } @@ -262,20 +281,43 @@ static int xenoprof_start(void) static void xenoprof_stop(void) { if (is_primary) - HYPERVISOR_xenoprof_op(XENOPROF_stop, - (unsigned long)NULL, - (unsigned long)NULL); + HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL); } static int xenoprof_set_active(int * active_domains, - unsigned int adomains) + unsigned int adomains) { int ret = 0; - if (is_primary) - ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, - (unsigned long)active_domains, - (unsigned long)adomains); + int i; + int set_dom0 = 0; + domid_t domid; + + if (!is_primary) + return 0; + + if (adomains > MAX_OPROF_DOMAINS) + return -E2BIG; + + ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL); + if (ret) + return ret; + + for (i=0; i<adomains; i++) { + domid = active_domains[i]; + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid); + if (ret) + return (ret); + if (active_domains[i] == 0) + set_dom0 = 1; + } + /* dom0 must always be active but may not be in the list */ + if (!set_dom0) { + domid = 0; + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid); + } + + active_defined = 1; return ret; } @@ -325,44 +367,48 @@ static int using_xenoprof; int __init oprofile_arch_init(struct oprofile_operations * ops) { - xenoprof_init_result_t result; - xenoprof_buf_t * buf; - int max_samples = 16; + struct xenoprof_init init; + struct xenoprof_buf * buf; int vm_size; int npages; + int ret; int i; - int ret = HYPERVISOR_xenoprof_op(XENOPROF_init, - (unsigned long)max_samples, - (unsigned long)&result); + init.max_samples = 16; + ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init); if (!ret) { pgprot_t prot = __pgprot(_KERNPG_TABLE); - num_events = result.num_events; - is_primary = result.is_primary; - nbuf = result.nbuf; + num_events = init.num_events; + is_primary = init.is_primary; + nbuf = init.nbuf; - npages = (result.bufsize * nbuf - 1) / PAGE_SIZE + 1; + /* just in case - make sure we do not overflow event list + (i.e. counter_config list) */ + if (num_events > OP_MAX_COUNTER) + num_events = OP_MAX_COUNTER; + + npages = (init.bufsize * nbuf - 1) / PAGE_SIZE + 1; vm_size = npages * PAGE_SIZE; - shared_buffer = (char *) vm_map_xen_pages(result.buf_maddr, - vm_size, prot); + shared_buffer = (char *)vm_map_xen_pages(init.buf_maddr, + vm_size, prot); if (!shared_buffer) { ret = -ENOMEM; goto out; } for (i=0; i< nbuf; i++) { - buf = (xenoprof_buf_t*) - &shared_buffer[i * result.bufsize]; + buf = (struct xenoprof_buf*) + &shared_buffer[i * init.bufsize]; BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); xenoprof_buf[buf->vcpu_id] = buf; } /* cpu_type is detected by Xen */ cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0; - strncpy(cpu_type, result.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1); + strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1); xenoprof_ops.cpu_type = cpu_type; init_driverfs(); @@ -371,6 +417,8 @@ int __init oprofile_arch_init(struct oprofile_operations * ops) for (i=0; i<NR_CPUS; i++) ovf_irq[i] = -1; + + active_defined = 0; } out: printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, " @@ -389,7 +437,5 @@ void __exit oprofile_arch_exit(void) shared_buffer = NULL; } if (is_primary) - HYPERVISOR_xenoprof_op(XENOPROF_shutdown, - (unsigned long)NULL, - (unsigned long)NULL); + HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL); } |