diff options
author | Ian Jackson <Ian.Jackson@eu.citrix.com> | 2011-01-17 17:24:21 +0000 |
---|---|---|
committer | Ian Jackson <Ian.Jackson@eu.citrix.com> | 2011-01-17 17:24:21 +0000 |
commit | 38da12ffee01c2d88ba6b6441142e58e59212af9 (patch) | |
tree | 6c693dbab0ff2873d052e92c3768494c4f336b75 | |
parent | 85b911622372484047c5fbb9dddf52022a265075 (diff) | |
parent | e7adac50b0bd072af734d245204d70c054844eec (diff) | |
download | xen-38da12ffee01c2d88ba6b6441142e58e59212af9.tar.gz xen-38da12ffee01c2d88ba6b6441142e58e59212af9.tar.bz2 xen-38da12ffee01c2d88ba6b6441142e58e59212af9.zip |
Merge
26 files changed, 387 insertions, 204 deletions
diff --git a/tools/firmware/hvmloader/util.c b/tools/firmware/hvmloader/util.c index fe561a4f99..c58ea1071a 100644 --- a/tools/firmware/hvmloader/util.c +++ b/tools/firmware/hvmloader/util.c @@ -425,10 +425,10 @@ static char *printnum(char *p, unsigned long num, int base) static void _doprint(void (*put)(char), const char *fmt, va_list ap) { - register char *str, c; + char *str, c; int lflag, zflag, nflag; char buffer[17]; - unsigned value; + unsigned long value; int i, slen, pad; for ( ; *fmt != '\0'; fmt++ ) @@ -457,29 +457,40 @@ static void _doprint(void (*put)(char), const char *fmt, va_list ap) lflag = 1; c = *++fmt; } - if ( (c == 'd') || (c == 'u') || (c == 'o') || (c == 'x') ) + if ( (c == 'd') || (c == 'u') || (c == 'o') || + (c == 'x') || (c == 'X') ) { if ( lflag ) - value = va_arg(ap, unsigned); + { + value = va_arg(ap, unsigned long); + if ( (c == 'd') && ((long)value < 0) ) + { + value = -value; + put('-'); + } + } else - value = (unsigned) va_arg(ap, unsigned int); - str = buffer; - printnum(str, value, - c == 'o' ? 8 : (c == 'x' ? 16 : 10)); - goto printn; - } - else if ( (c == 'O') || (c == 'D') || (c == 'X') ) - { - value = va_arg(ap, unsigned); + { + value = va_arg(ap, unsigned int); + if ( (c == 'd') && ((int)value < 0) ) + { + value = -(int)value; + put('-'); + } + } str = buffer; printnum(str, value, - c == 'O' ? 8 : (c == 'X' ? 16 : 10)); - printn: + c == 'o' ? 8 : ((c == 'x') || (c == 'X') ? 16 : 10)); slen = strlen(str); for ( i = pad - slen; i > 0; i-- ) put(zflag ? '0' : ' '); while ( *str ) - put(*str++); + { + char ch = *str++; + if ( (ch >= 'a') && (c == 'X') ) + ch += 'A'-'a'; + put(ch); + } } else if ( c == 's' ) { diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c index d3a2e194a1..dbac09467c 100644 --- a/tools/libxc/xc_hvm_build.c +++ b/tools/libxc/xc_hvm_build.c @@ -431,8 +431,9 @@ int xc_hvm_build(xc_interface *xch, /* xc_hvm_build_target_mem: * Create a domain for a pre-ballooned virtualized Linux, using * files/filenames. If target < memsize, domain is created with - * memsize pages marked populate-on-demand, and with a PoD cache size - * of target. If target == memsize, pages are populated normally. + * memsize pages marked populate-on-demand, + * calculating pod cache size based on target. + * If target == memsize, pages are populated normally. */ int xc_hvm_build_target_mem(xc_interface *xch, uint32_t domid, diff --git a/tools/libxc/xenctrl_osdep_ENOSYS.c b/tools/libxc/xenctrl_osdep_ENOSYS.c index 90ae348a58..4c156e999e 100644 --- a/tools/libxc/xenctrl_osdep_ENOSYS.c +++ b/tools/libxc/xenctrl_osdep_ENOSYS.c @@ -27,10 +27,10 @@ static int ENOSYS_privcmd_close(xc_interface *xch, xc_osdep_handle h) static int ENOSYS_privcmd_hypercall(xc_interface *xch, xc_osdep_handle h, privcmd_hypercall_t *hypercall) { - IPRINTF(xch, "ENOSYS_privcmd %p: hypercall: %02lld(%#llx,%#llx,%#llx,%#llx,%#llx,%#llx)\n", + IPRINTF(xch, "ENOSYS_privcmd %p: hypercall: %02lld(%#llx,%#llx,%#llx,%#llx,%#llx)\n", h, hypercall->op, hypercall->arg[0], hypercall->arg[1], hypercall->arg[2], - hypercall->arg[3], hypercall->arg[4], hypercall->arg[5]); + hypercall->arg[3], hypercall->arg[4]); return -ENOSYS; } diff --git a/tools/misc/xen-hptool.c b/tools/misc/xen-hptool.c index 374d88315d..24c3e956c0 100644 --- a/tools/misc/xen-hptool.c +++ b/tools/misc/xen-hptool.c @@ -2,6 +2,7 @@ #include <xc_private.h> #include <xc_core.h> #include <errno.h> +#include <unistd.h> #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) @@ -241,6 +242,20 @@ static int hp_mem_offline_func(int argc, char *argv[]) return ret; } +static int exec_cpu_hp_fn(int (*hp_fn)(xc_interface *, int), int cpu) +{ + int ret; + + for ( ; ; ) + { + ret = (*hp_fn)(xch, cpu); + if ( (ret >= 0) || (errno != EBUSY) ) + break; + usleep(100000); /* 100ms */ + } + + return ret; +} static int hp_cpu_online_func(int argc, char *argv[]) { @@ -254,7 +269,7 @@ static int hp_cpu_online_func(int argc, char *argv[]) cpu = atoi(argv[0]); printf("Prepare to online CPU %d\n", cpu); - ret = xc_cpu_online(xch, cpu); + ret = exec_cpu_hp_fn(xc_cpu_online, cpu); if (ret < 0) fprintf(stderr, "CPU %d online failed (error %d: %s)\n", cpu, errno, strerror(errno)); @@ -275,7 +290,7 @@ static int hp_cpu_offline_func(int argc, char *argv[]) } cpu = atoi(argv[0]); printf("Prepare to offline CPU %d\n", cpu); - ret = xc_cpu_offline(xch, cpu); + ret = exec_cpu_hp_fn(xc_cpu_offline, cpu); if (ret < 0) fprintf(stderr, "CPU %d offline failed (error %d: %s)\n", cpu, errno, strerror(errno)); diff --git a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c index 74c851de91..759a0d1cae 100644 --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c @@ -377,18 +377,13 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, return -ENOENT; } - if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) { - printk(KERN_ERR ":MEM I/O resource 0x%lx @ 0x%lx busy\n", - mmio_addr, mmio_len); - return -EBUSY; - } + ret = pci_request_region(pdev, 1, DRV_NAME); + if (ret < 0) + return ret; - if (request_region(ioaddr, iolen, DRV_NAME) == NULL) { - printk(KERN_ERR DRV_NAME ":I/O resource 0x%lx @ 0x%lx busy\n", - iolen, ioaddr); - release_mem_region(mmio_addr, mmio_len); - return -EBUSY; - } + ret = pci_request_region(pdev, 0, DRV_NAME); + if (ret < 0) + goto mem_out; platform_mmio = mmio_addr; platform_mmiolen = mmio_len; @@ -424,8 +419,9 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, out: if (ret) { - release_mem_region(mmio_addr, mmio_len); - release_region(ioaddr, iolen); + pci_release_region(pdev, 0); +mem_out: + pci_release_region(pdev, 1); } return ret; diff --git a/xen/arch/ia64/xen/xensetup.c b/xen/arch/ia64/xen/xensetup.c index 9b3e725032..fe042e450a 100644 --- a/xen/arch/ia64/xen/xensetup.c +++ b/xen/arch/ia64/xen/xensetup.c @@ -606,7 +606,11 @@ printk("num_online_cpus=%d, max_cpus=%d\n",num_online_cpus(),max_cpus); if ( num_online_cpus() >= max_cpus ) break; if ( !cpu_online(i) ) - cpu_up(i); + { + int ret = cpu_up(i); + if ( ret != 0 ) + printk("Failed to bring up CPU %u (error %d)\n", i, ret); + } } local_irq_disable(); diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c index b4a633d544..5dfdeb431f 100644 --- a/xen/arch/x86/acpi/power.c +++ b/xen/arch/x86/acpi/power.c @@ -206,6 +206,7 @@ static int enter_state(u32 state) enable_cpu: cpufreq_add_cpu(0); microcode_resume_cpu(0); + rcu_barrier(); mtrr_aps_sync_begin(); enable_nonboot_cpus(); mtrr_aps_sync_end(); diff --git a/xen/arch/x86/acpi/suspend.c b/xen/arch/x86/acpi/suspend.c index c82ea96a82..6f2ff5b82b 100644 --- a/xen/arch/x86/acpi/suspend.c +++ b/xen/arch/x86/acpi/suspend.c @@ -24,8 +24,7 @@ static uint16_t saved_segs[4]; void save_rest_processor_state(void) { - if ( !is_idle_vcpu(current) ) - unlazy_fpu(current); + save_init_fpu(current); #if defined(CONFIG_X86_64) asm volatile ( diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index df659dfd5d..376a8e4d44 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1384,7 +1384,7 @@ static void __context_switch(void) memcpy(&p->arch.guest_context.user_regs, stack_regs, CTXT_SWITCH_STACK_BYTES); - unlazy_fpu(p); + save_init_fpu(p); p->arch.ctxt_switch_from(p); } diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 62c018fa44..1a89bfb575 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1994,11 +1994,20 @@ static enum hvm_copy_result __hvm_copy( int count, todo = size; /* + * XXX Disable for 4.1.0: PV-on-HVM drivers will do grant-table ops + * such as query_size. Grant-table code currently does copy_to/from_guest + * accesses under the big per-domain lock, which this test would disallow. + * The test is not needed until we implement sleeping-on-waitqueue when + * we access a paged-out frame, and that's post 4.1.0 now. + */ +#if 0 + /* * If the required guest memory is paged out, this function may sleep. * Hence we bail immediately if called from atomic context. */ if ( in_atomic() ) return HVMCOPY_unhandleable; +#endif while ( todo > 0 ) { diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c index 477efec973..0b052349e3 100644 --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -16,18 +16,101 @@ #include <asm/i387.h> #include <asm/asm_defns.h> -void init_fpu(void) +static bool_t __read_mostly cpu_has_xsaveopt; + +static void xsave(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x27" + : + : "a" (-1), "d" (-1), "D"(ptr) + : "memory" ); +} + +static void xsaveopt(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x37" + : + : "a" (-1), "d" (-1), "D"(ptr) + : "memory" ); +} + +static void xrstor(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x2f" + : + : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) ); +} + +static void load_mxcsr(unsigned long val) +{ + val &= 0xffbf; + asm volatile ( "ldmxcsr %0" : : "m" (val) ); +} + +static void init_fpu(void); +static void restore_fpu(struct vcpu *v); + +void setup_fpu(struct vcpu *v) +{ + ASSERT(!is_idle_vcpu(v)); + + /* Avoid recursion. */ + clts(); + + if ( v->fpu_dirtied ) + return; + + if ( cpu_has_xsave ) + { + /* + * XCR0 normally represents what guest OS set. In case of Xen itself, + * we set all supported feature mask before doing save/restore. + */ + set_xcr0(v->arch.xcr0_accum); + xrstor(v); + set_xcr0(v->arch.xcr0); + } + else if ( v->fpu_initialised ) + { + restore_fpu(v); + } + else + { + init_fpu(); + } + + v->fpu_initialised = 1; + v->fpu_dirtied = 1; +} + +static void init_fpu(void) { asm volatile ( "fninit" ); if ( cpu_has_xmm ) load_mxcsr(0x1f80); - current->fpu_initialised = 1; } void save_init_fpu(struct vcpu *v) { - unsigned long cr0 = read_cr0(); - char *fpu_ctxt = v->arch.guest_context.fpu_ctxt.x; + unsigned long cr0; + char *fpu_ctxt; + + if ( !v->fpu_dirtied ) + return; + + ASSERT(!is_idle_vcpu(v)); + + cr0 = read_cr0(); + fpu_ctxt = v->arch.guest_context.fpu_ctxt.x; /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ if ( cr0 & X86_CR0_TS ) @@ -91,7 +174,7 @@ void save_init_fpu(struct vcpu *v) write_cr0(cr0|X86_CR0_TS); } -void restore_fpu(struct vcpu *v) +static void restore_fpu(struct vcpu *v) { char *fpu_ctxt = v->arch.guest_context.fpu_ctxt.x; @@ -138,6 +221,7 @@ void restore_fpu(struct vcpu *v) } #define XSTATE_CPUID 0xd +#define XSAVE_AREA_MIN_SIZE (512 + 64) /* FP/SSE + XSAVE.HEADER */ /* * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all @@ -152,32 +236,24 @@ u64 xfeature_mask; /* Cached xcr0 for fast read */ DEFINE_PER_CPU(uint64_t, xcr0); -bool_t __read_mostly cpu_has_xsaveopt; - void xsave_init(void) { u32 eax, ebx, ecx, edx; int cpu = smp_processor_id(); u32 min_size; - if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) { - printk(XENLOG_ERR "XSTATE_CPUID missing\n"); + if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) return; - } cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - printk("%s: cpu%d: cntxt_max_size: 0x%x and states: %08x:%08x\n", - __func__, cpu, ecx, edx, eax); - - if ( ((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE) || - ((eax & XSTATE_YMM) && !(eax & XSTATE_SSE)) ) - { - BUG(); - } + BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE); + BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE)); /* FP/SSE, XSAVE.HEADER, YMM */ - min_size = 512 + 64 + ((eax & XSTATE_YMM) ? XSTATE_YMM_SIZE : 0); + min_size = XSAVE_AREA_MIN_SIZE; + if ( eax & XSTATE_YMM ) + min_size += XSTATE_YMM_SIZE; BUG_ON(ecx < min_size); /* @@ -214,9 +290,11 @@ int xsave_alloc_save_area(struct vcpu *v) { void *save_area; - if ( !cpu_has_xsave ) + if ( !cpu_has_xsave || is_idle_vcpu(v) ) return 0; + BUG_ON(xsave_cntxt_size < XSAVE_AREA_MIN_SIZE); + /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */ save_area = _xmalloc(xsave_cntxt_size, 64); if ( save_area == NULL ) diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c index 812df80228..2733fc3b9e 100644 --- a/xen/arch/x86/platform_hypercall.c +++ b/xen/arch/x86/platform_hypercall.c @@ -55,11 +55,9 @@ static long cpu_frequency_change_helper(void *data) return cpu_frequency_change(this_cpu(freq)); } -static long cpu_down_helper(void *data) -{ - int cpu = (unsigned long)data; - return cpu_down(cpu); -} +/* from sysctl.c */ +long cpu_up_helper(void *data); +long cpu_down_helper(void *data); ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) { @@ -443,40 +441,43 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) case XENPF_cpu_online: { - int cpu; + int cpu = op->u.cpu_ol.cpuid; - cpu = op->u.cpu_ol.cpuid; - if (!cpu_present(cpu)) + if ( !cpu_present(cpu) ) { ret = -EINVAL; break; } - else if (cpu_online(cpu)) + + if ( cpu_online(cpu) ) { ret = 0; break; } - ret = cpu_up(cpu); + ret = continue_hypercall_on_cpu( + 0, cpu_up_helper, (void *)(unsigned long)cpu); break; } case XENPF_cpu_offline: { - int cpu; + int cpu = op->u.cpu_ol.cpuid; - cpu = op->u.cpu_ol.cpuid; - if (!cpu_present(cpu)) + if ( !cpu_present(cpu) ) { ret = -EINVAL; break; - } else if (!cpu_online(cpu)) + } + + if ( !cpu_online(cpu) ) { ret = 0; break; } + ret = continue_hypercall_on_cpu( - 0, cpu_down_helper, (void *)(unsigned long)cpu); + 0, cpu_down_helper, (void *)(unsigned long)cpu); break; } break; diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index d6f9e11f59..4bce2f4739 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -1246,7 +1246,11 @@ void __init __start_xen(unsigned long mbi_p) numa_add_cpu(i); if ( (num_online_cpus() < max_cpus) && !cpu_online(i) ) - cpu_up(i); + { + int ret = cpu_up(i); + if ( ret != 0 ) + printk("Failed to bring up CPU %u (error %d)\n", i, ret); + } } printk("Brought up %ld CPUs\n", (long)num_online_cpus()); diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c index ac7b56a29b..042bde4c46 100644 --- a/xen/arch/x86/sysctl.c +++ b/xen/arch/x86/sysctl.c @@ -30,10 +30,30 @@ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -static long cpu_down_helper(void *data) +long cpu_up_helper(void *data) { int cpu = (unsigned long)data; - return cpu_down(cpu); + int ret = cpu_up(cpu); + if ( ret == -EBUSY ) + { + /* On EBUSY, flush RCU work and have one more go. */ + rcu_barrier(); + ret = cpu_up(cpu); + } + return ret; +} + +long cpu_down_helper(void *data) +{ + int cpu = (unsigned long)data; + int ret = cpu_down(cpu); + if ( ret == -EBUSY ) + { + /* On EBUSY, flush RCU work and have one more go. */ + rcu_barrier(); + ret = cpu_down(cpu); + } + return ret; } extern int __node_distance(int a, int b); @@ -41,7 +61,7 @@ extern int __node_distance(int a, int b); long arch_do_sysctl( struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) { - long ret = 0, status; + long ret = 0; switch ( sysctl->cmd ) { @@ -167,41 +187,20 @@ long arch_do_sysctl( { unsigned int cpu = sysctl->u.cpu_hotplug.cpu; - if (cpu_present(cpu)) { - status = cpu_online(cpu) ? XEN_CPU_HOTPLUG_STATUS_ONLINE : - XEN_CPU_HOTPLUG_STATUS_OFFLINE; - } else { - status = -EINVAL; - } - switch ( sysctl->u.cpu_hotplug.op ) { case XEN_SYSCTL_CPU_HOTPLUG_ONLINE: - ret = cpu_up(cpu); - /* - * In the case of a true hotplug, this CPU wasn't present - * before, so return the 'new' status for it. - */ - if (ret == 0 && status == -EINVAL) - status = XEN_CPU_HOTPLUG_STATUS_NEW; + ret = continue_hypercall_on_cpu( + 0, cpu_up_helper, (void *)(unsigned long)cpu); break; case XEN_SYSCTL_CPU_HOTPLUG_OFFLINE: ret = continue_hypercall_on_cpu( 0, cpu_down_helper, (void *)(unsigned long)cpu); break; - case XEN_SYSCTL_CPU_HOTPLUG_STATUS: - ret = 0; - break; default: ret = -EINVAL; break; } - - /* - * If the operation was successful, return the old status. - */ - if (ret >= 0) - ret = status; } break; diff --git a/xen/arch/x86/x86_64/mmconfig-shared.c b/xen/arch/x86/x86_64/mmconfig-shared.c index f1d790ec9e..f36ae75153 100644 --- a/xen/arch/x86/x86_64/mmconfig-shared.c +++ b/xen/arch/x86/x86_64/mmconfig-shared.c @@ -26,7 +26,7 @@ #include "mmconfig.h" static int __initdata known_bridge; -unsigned int __cpuinitdata pci_probe = PCI_PROBE_CONF1 | PCI_PROBE_MMCONF; +unsigned int pci_probe = PCI_PROBE_CONF1 | PCI_PROBE_MMCONF; static void __init parse_mmcfg(char *s) { diff --git a/xen/common/cpu.c b/xen/common/cpu.c index 2a248275a2..5cdfd0fb10 100644 --- a/xen/common/cpu.c +++ b/xen/common/cpu.c @@ -108,7 +108,6 @@ int cpu_down(unsigned int cpu) fail: notifier_rc = notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, hcpu, &nb); BUG_ON(notifier_rc != NOTIFY_DONE); - printk("Failed to take down CPU %u (error %d)\n", cpu, err); cpu_hotplug_done(); return err; } @@ -150,7 +149,6 @@ int cpu_up(unsigned int cpu) fail: notifier_rc = notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu, &nb); BUG_ON(notifier_rc != NOTIFY_DONE); - printk("Failed to bring up CPU %u (error %d)\n", cpu, err); cpu_hotplug_done(); return err; } diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c index b93c1db697..8f795d9a37 100644 --- a/xen/common/rcupdate.c +++ b/xen/common/rcupdate.c @@ -44,6 +44,7 @@ #include <xen/percpu.h> #include <xen/softirq.h> #include <xen/cpu.h> +#include <xen/stop_machine.h> /* Definition for rcupdate control block. */ struct rcu_ctrlblk rcu_ctrlblk = { @@ -60,6 +61,49 @@ static int qhimark = 10000; static int qlowmark = 100; static int rsinterval = 1000; +struct rcu_barrier_data { + struct rcu_head head; + atomic_t *cpu_count; +}; + +static void rcu_barrier_callback(struct rcu_head *head) +{ + struct rcu_barrier_data *data = container_of( + head, struct rcu_barrier_data, head); + atomic_inc(data->cpu_count); +} + +static int rcu_barrier_action(void *_cpu_count) +{ + struct rcu_barrier_data data = { .cpu_count = _cpu_count }; + + ASSERT(!local_irq_is_enabled()); + local_irq_enable(); + + /* + * When callback is executed, all previously-queued RCU work on this CPU + * is completed. When all CPUs have executed their callback, data.cpu_count + * will have been incremented to include every online CPU. + */ + call_rcu(&data.head, rcu_barrier_callback); + + while ( atomic_read(data.cpu_count) != cpus_weight(cpu_online_map) ) + { + process_pending_softirqs(); + cpu_relax(); + } + + local_irq_disable(); + + return 0; +} + +int rcu_barrier(void) +{ + atomic_t cpu_count = ATOMIC_INIT(0); + return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS); +} + static void force_quiescent_state(struct rcu_data *rdp, struct rcu_ctrlblk *rcp) { diff --git a/xen/common/stop_machine.c b/xen/common/stop_machine.c index 31d5c6fff7..9bf271452d 100644 --- a/xen/common/stop_machine.c +++ b/xen/common/stop_machine.c @@ -61,6 +61,10 @@ static void stopmachine_set_state(enum stopmachine_state state) atomic_set(&stopmachine_data.done, 0); smp_wmb(); stopmachine_data.state = state; +} + +static void stopmachine_wait_state(void) +{ while ( atomic_read(&stopmachine_data.done) != stopmachine_data.nr_cpus ) cpu_relax(); } @@ -101,16 +105,20 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) tasklet_schedule_on_cpu(&per_cpu(stopmachine_tasklet, i), i); stopmachine_set_state(STOPMACHINE_PREPARE); + stopmachine_wait_state(); local_irq_disable(); stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); + stopmachine_wait_state(); - if ( cpu == smp_processor_id() ) - stopmachine_data.fn_result = (*fn)(data); stopmachine_set_state(STOPMACHINE_INVOKE); + if ( (cpu == smp_processor_id()) || (cpu == NR_CPUS) ) + stopmachine_data.fn_result = (*fn)(data); + stopmachine_wait_state(); ret = stopmachine_data.fn_result; stopmachine_set_state(STOPMACHINE_EXIT); + stopmachine_wait_state(); local_irq_enable(); spin_unlock(&stopmachine_lock); @@ -140,7 +148,8 @@ static void stopmachine_action(unsigned long cpu) local_irq_disable(); break; case STOPMACHINE_INVOKE: - if ( stopmachine_data.fn_cpu == smp_processor_id() ) + if ( (stopmachine_data.fn_cpu == smp_processor_id()) || + (stopmachine_data.fn_cpu == NR_CPUS) ) stopmachine_data.fn_result = stopmachine_data.fn(stopmachine_data.fn_data); break; diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h index 5427952e83..eceeda8279 100644 --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -86,5 +86,6 @@ void __init platform_quirks_init(void); void vtd_ops_preamble_quirk(struct iommu* iommu); void vtd_ops_postamble_quirk(struct iommu* iommu); void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map); +void pci_vtd_quirk(struct pci_dev *pdev); #endif // _VTD_EXTERN_H_ diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index b04203dcc7..b69d5e3cc0 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -1773,9 +1773,13 @@ void iommu_set_pgd(struct domain *d) ASSERT( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled ); iommu_hap_pt_share = vtd_ept_share(); + if ( !iommu_hap_pt_share ) + goto out; + pgd_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d))); hd->pgd_maddr = pagetable_get_paddr(pagetable_from_mfn(pgd_mfn)); +out: dprintk(XENLOG_INFO VTDPREFIX, "VT-d page table %s with EPT table\n", iommu_hap_pt_share ? "shares" : "not sharing"); @@ -1910,6 +1914,7 @@ static void __init setup_dom0_devices(struct domain *d) list_add(&pdev->domain_list, &d->arch.pdev_list); domain_context_mapping(d, pdev->bus, pdev->devfn); pci_enable_acs(pdev); + pci_vtd_quirk(pdev); } } spin_unlock(&pcidevs_lock); diff --git a/xen/drivers/passthrough/vtd/quirks.c b/xen/drivers/passthrough/vtd/quirks.c index 3b893e5f6e..f6a7ac0294 100644 --- a/xen/drivers/passthrough/vtd/quirks.c +++ b/xen/drivers/passthrough/vtd/quirks.c @@ -47,11 +47,13 @@ #define IS_CTG(id) (id == 0x2a408086) #define IS_ILK(id) (id == 0x00408086 || id == 0x00448086 || id== 0x00628086 || id == 0x006A8086) #define IS_CPT(id) (id == 0x01008086 || id == 0x01048086) +#define IS_SNB_GFX(id) (id == 0x01068086 || id == 0x01168086 || id == 0x01268086 || id == 0x01028086 || id == 0x01128086 || id == 0x01228086 || id == 0x010A8086) u32 ioh_id; u32 igd_id; bool_t rwbf_quirk; static int is_cantiga_b3; +static int is_snb_gfx; static u8 *igd_reg_va; /* @@ -92,6 +94,12 @@ static void cantiga_b3_errata_init(void) is_cantiga_b3 = 1; } +/* check for Sandybridge IGD device ID's */ +static void snb_errata_init(void) +{ + is_snb_gfx = IS_SNB_GFX(igd_id); +} + /* * QUIRK to workaround Cantiga IGD VT-d low power errata. * This errata impacts IGD assignment on Cantiga systems @@ -104,12 +112,15 @@ static void cantiga_b3_errata_init(void) /* * map IGD MMIO+0x2000 page to allow Xen access to IGD 3D register. */ -static void map_igd_reg(void) +static void *map_igd_reg(void) { u64 igd_mmio, igd_reg; - if ( !is_cantiga_b3 || igd_reg_va != NULL ) - return; + if ( !is_cantiga_b3 && !is_snb_gfx ) + return NULL; + + if ( igd_reg_va ) + return igd_reg_va; /* get IGD mmio address in PCI BAR */ igd_mmio = ((u64)pci_conf_read32(0, IGD_DEV, 0, 0x14) << 32) + @@ -125,6 +136,7 @@ static void map_igd_reg(void) #else igd_reg_va = ioremap_nocache(igd_reg, 0x100); #endif + return igd_reg_va; } /* @@ -138,6 +150,9 @@ static int cantiga_vtd_ops_preamble(struct iommu* iommu) if ( !is_igd_drhd(drhd) || !is_cantiga_b3 ) return 0; + if ( !map_igd_reg() ) + return 0; + /* * read IGD register at IGD MMIO + 0x20A4 to force IGD * to exit low power state. Since map_igd_reg() @@ -148,11 +163,64 @@ static int cantiga_vtd_ops_preamble(struct iommu* iommu) } /* + * Sandybridge RC6 power management inhibit state erratum. + * This can cause power high power consumption. + * Workaround is to prevent graphics get into RC6 + * state when doing VT-d IOTLB operations, do the VT-d + * IOTLB operation, and then re-enable RC6 state. + */ +static void snb_vtd_ops_preamble(struct iommu* iommu) +{ + struct intel_iommu *intel = iommu->intel; + struct acpi_drhd_unit *drhd = intel ? intel->drhd : NULL; + s_time_t start_time; + + if ( !is_igd_drhd(drhd) || !is_snb_gfx ) + return; + + if ( !map_igd_reg() ) + return; + + *((volatile u32 *)(igd_reg_va + 0x54)) = 0x000FFFFF; + *((volatile u32 *)(igd_reg_va + 0x700)) = 0; + + start_time = NOW(); + while ( (*((volatile u32 *)(igd_reg_va + 0x2AC)) & 0xF) != 0 ) + { + if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT ) + { + dprintk(XENLOG_INFO VTDPREFIX, + "snb_vtd_ops_preamble: failed to disable idle handshake\n"); + break; + } + cpu_relax(); + } + + *((volatile u32*)(igd_reg_va + 0x50)) = 0x10001; +} + +static void snb_vtd_ops_postamble(struct iommu* iommu) +{ + struct intel_iommu *intel = iommu->intel; + struct acpi_drhd_unit *drhd = intel ? intel->drhd : NULL; + + if ( !is_igd_drhd(drhd) || !is_snb_gfx ) + return; + + if ( !map_igd_reg() ) + return; + + *((volatile u32 *)(igd_reg_va + 0x54)) = 0xA; + *((volatile u32 *)(igd_reg_va + 0x50)) = 0x10000; +} + +/* * call before VT-d translation enable and IOTLB flush operations. */ void vtd_ops_preamble_quirk(struct iommu* iommu) { cantiga_vtd_ops_preamble(iommu); + snb_vtd_ops_preamble(iommu); } /* @@ -160,7 +228,7 @@ void vtd_ops_preamble_quirk(struct iommu* iommu) */ void vtd_ops_postamble_quirk(struct iommu* iommu) { - return; + snb_vtd_ops_postamble(iommu); } /* initialize platform identification flags */ @@ -179,6 +247,8 @@ void __init platform_quirks_init(void) /* initialize cantiga B3 identification */ cantiga_b3_errata_init(); + snb_errata_init(); + /* ioremap IGD MMIO+0x2000 page */ map_igd_reg(); } @@ -250,11 +320,14 @@ void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map) id = pci_conf_read32(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 0); switch (id) { - case 0x00878086: + case 0x00878086: /* Kilmer Peak */ case 0x00898086: - case 0x00828086: + case 0x00828086: /* Taylor Peak */ case 0x00858086: - case 0x42388086: + case 0x008F8086: /* Rainbow Peak */ + case 0x00908086: + case 0x00918086: + case 0x42388086: /* Puma Peak */ case 0x422b8086: case 0x422c8086: map_me_phantom_function(domain, 22, map); @@ -262,6 +335,26 @@ void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map) default: break; } + } +} +/* + * Mask reporting Intel VT-d faults to IOH core logic: + * - Some platform escalates VT-d faults to platform errors + * - This can cause system failure upon non-fatal VT-d faults + * - Potential security issue if malicious guest trigger VT-d faults + */ +void pci_vtd_quirk(struct pci_dev *pdev) +{ + int bus = pdev->bus; + int dev = PCI_SLOT(pdev->devfn); + int func = PCI_FUNC(pdev->devfn); + int id, val; + + id = pci_conf_read32(bus, dev, func, 0); + if ( id == 0x342e8086 || id == 0x3c288086 ) + { + val = pci_conf_read32(bus, dev, func, 0x1AC); + pci_conf_write32(bus, dev, func, 0x1AC, val | (1 << 31)); } } diff --git a/xen/include/asm-x86/i387.h b/xen/include/asm-x86/i387.h index 4cb67d482c..9be8206b82 100644 --- a/xen/include/asm-x86/i387.h +++ b/xen/include/asm-x86/i387.h @@ -16,7 +16,6 @@ extern unsigned int xsave_cntxt_size; extern u64 xfeature_mask; -extern bool_t cpu_has_xsaveopt; void xsave_init(void); int xsave_alloc_save_area(struct vcpu *v); @@ -75,84 +74,7 @@ static inline uint64_t get_xcr0(void) return this_cpu(xcr0); } -static inline void xsave(struct vcpu *v) -{ - struct xsave_struct *ptr; - - ptr =(struct xsave_struct *)v->arch.xsave_area; - - asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27" - : - : "a" (-1), "d" (-1), "D"(ptr) - : "memory"); -} - -static inline void xsaveopt(struct vcpu *v) -{ - struct xsave_struct *ptr; - - ptr =(struct xsave_struct *)v->arch.xsave_area; - - asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x37" - : - : "a" (-1), "d" (-1), "D"(ptr) - : "memory"); -} - -static inline void xrstor(struct vcpu *v) -{ - struct xsave_struct *ptr; - - ptr =(struct xsave_struct *)v->arch.xsave_area; - - asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f" - : - : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr)); -} - -extern void init_fpu(void); +extern void setup_fpu(struct vcpu *v); extern void save_init_fpu(struct vcpu *v); -extern void restore_fpu(struct vcpu *v); - -#define unlazy_fpu(v) do { \ - if ( (v)->fpu_dirtied ) \ - save_init_fpu(v); \ -} while ( 0 ) - -#define load_mxcsr(val) do { \ - unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \ - __asm__ __volatile__ ( "ldmxcsr %0" : : "m" (__mxcsr) ); \ -} while ( 0 ) - -static inline void setup_fpu(struct vcpu *v) -{ - /* Avoid recursion. */ - clts(); - - if ( !v->fpu_dirtied ) - { - v->fpu_dirtied = 1; - if ( cpu_has_xsave ) - { - if ( !v->fpu_initialised ) - v->fpu_initialised = 1; - - /* XCR0 normally represents what guest OS set. In case of Xen - * itself, we set all supported feature mask before doing - * save/restore. - */ - set_xcr0(v->arch.xcr0_accum); - xrstor(v); - set_xcr0(v->arch.xcr0); - } - else - { - if ( v->fpu_initialised ) - restore_fpu(v); - else - init_fpu(); - } - } -} #endif /* __ASM_I386_I387_H */ diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index d4bbc57cae..ac7551701b 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -253,21 +253,12 @@ struct xen_sysctl_get_pmstat { typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); -/* - * Status codes. Must be greater than 0 to avoid confusing - * sysctl callers that see 0 as a plain successful return. - */ -#define XEN_CPU_HOTPLUG_STATUS_OFFLINE 1 -#define XEN_CPU_HOTPLUG_STATUS_ONLINE 2 -#define XEN_CPU_HOTPLUG_STATUS_NEW 3 - /* XEN_SYSCTL_cpu_hotplug */ struct xen_sysctl_cpu_hotplug { /* IN variables */ uint32_t cpu; /* Physical cpu. */ #define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0 #define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1 -#define XEN_SYSCTL_CPU_HOTPLUG_STATUS 2 uint32_t op; /* hotplug opcode */ }; typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; diff --git a/xen/include/xen/rcupdate.h b/xen/include/xen/rcupdate.h index bb4af9d6e0..09802bd622 100644 --- a/xen/include/xen/rcupdate.h +++ b/xen/include/xen/rcupdate.h @@ -197,4 +197,6 @@ void rcu_check_callbacks(int cpu); void fastcall call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); +int rcu_barrier(void); + #endif /* __XEN_RCUPDATE_H */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 5087781710..c5def0b03f 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -209,7 +209,7 @@ struct domain spinlock_t domain_lock; spinlock_t page_alloc_lock; /* protects all the following fields */ - struct page_list_head page_list; /* linked list, of size tot_pages */ + struct page_list_head page_list; /* linked list */ struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */ unsigned int tot_pages; /* number of pages currently possesed */ unsigned int max_pages; /* maximum value for tot_pages */ diff --git a/xen/include/xen/stop_machine.h b/xen/include/xen/stop_machine.h index 7d4506869f..c63da1b309 100644 --- a/xen/include/xen/stop_machine.h +++ b/xen/include/xen/stop_machine.h @@ -5,7 +5,7 @@ * stop_machine_run: freeze the machine on all CPUs and run this function * @fn: the function to run * @data: the data ptr for the @fn() - * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS). + * @cpu: the cpu to run @fn() on (or all, if @cpu == NR_CPUS). * * Description: This causes every other cpu to enter a safe point, with * each of which disables interrupts, and finally interrupts are disabled |