aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2008-07-04 16:27:44 +0100
committerKeir Fraser <keir.fraser@citrix.com>2008-07-04 16:27:44 +0100
commit355b0469a8d017b80d9ce1078c90fe628c8b3bbe (patch)
tree9a24bfe6fe9a73ea9635cd06ed0880f63a56720a
parentb12217e1416a2f4ebabfa78b5d0a578d0d879d04 (diff)
downloadxen-355b0469a8d017b80d9ce1078c90fe628c8b3bbe.tar.gz
xen-355b0469a8d017b80d9ce1078c90fe628c8b3bbe.tar.bz2
xen-355b0469a8d017b80d9ce1078c90fe628c8b3bbe.zip
x86: MCA support.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
-rw-r--r--xen/arch/x86/cpu/mcheck/Makefile3
-rw-r--r--xen/arch/x86/cpu/mcheck/amd_f10.c131
-rw-r--r--xen/arch/x86/cpu/mcheck/amd_k8.c324
-rw-r--r--xen/arch/x86/cpu/mcheck/amd_nonfatal.c303
-rw-r--r--xen/arch/x86/cpu/mcheck/k7.c7
-rw-r--r--xen/arch/x86/cpu/mcheck/mce.c554
-rw-r--r--xen/arch/x86/cpu/mcheck/mce.h26
-rw-r--r--xen/arch/x86/cpu/mcheck/non-fatal.c30
-rw-r--r--xen/arch/x86/cpu/mcheck/x86_mca.h72
-rw-r--r--xen/arch/x86/nmi.c4
-rw-r--r--xen/arch/x86/traps.c123
-rw-r--r--xen/arch/x86/x86_32/asm-offsets.c6
-rw-r--r--xen/arch/x86/x86_32/entry.S36
-rw-r--r--xen/arch/x86/x86_32/traps.c9
-rw-r--r--xen/arch/x86/x86_64/asm-offsets.c6
-rw-r--r--xen/arch/x86/x86_64/compat/entry.S35
-rw-r--r--xen/arch/x86/x86_64/compat/traps.c9
-rw-r--r--xen/arch/x86/x86_64/entry.S35
-rw-r--r--xen/arch/x86/x86_64/traps.c9
-rw-r--r--xen/common/domain.c4
-rw-r--r--xen/common/event_channel.c15
-rw-r--r--xen/include/Makefile1
-rw-r--r--xen/include/asm-x86/event.h7
-rw-r--r--xen/include/asm-x86/mm.h3
-rw-r--r--xen/include/asm-x86/traps.h50
-rw-r--r--xen/include/public/arch-x86/xen-mca.h279
-rw-r--r--xen/include/public/arch-x86/xen.h4
-rw-r--r--xen/include/xen/event.h3
-rw-r--r--xen/include/xen/sched.h15
29 files changed, 2017 insertions, 86 deletions
diff --git a/xen/arch/x86/cpu/mcheck/Makefile b/xen/arch/x86/cpu/mcheck/Makefile
index a5cbb02b7b..3ecc791402 100644
--- a/xen/arch/x86/cpu/mcheck/Makefile
+++ b/xen/arch/x86/cpu/mcheck/Makefile
@@ -1,4 +1,7 @@
+obj-y += amd_nonfatal.o
obj-y += k7.o
+obj-y += amd_k8.o
+obj-y += amd_f10.o
obj-y += mce.o
obj-y += non-fatal.o
obj-y += p4.o
diff --git a/xen/arch/x86/cpu/mcheck/amd_f10.c b/xen/arch/x86/cpu/mcheck/amd_f10.c
new file mode 100644
index 0000000000..9c26ef9fe8
--- /dev/null
+++ b/xen/arch/x86/cpu/mcheck/amd_f10.c
@@ -0,0 +1,131 @@
+/*
+ * MCA implementation for AMD Family10 CPUs
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+/* K8 common MCA documentation published at
+ *
+ * AMD64 Architecture Programmer's Manual Volume 2:
+ * System Programming
+ * Publication # 24593 Revision: 3.12
+ * Issue Date: September 2006
+ */
+
+/* Family10 MCA documentation published at
+ *
+ * BIOS and Kernel Developer's Guide
+ * For AMD Family 10h Processors
+ * Publication # 31116 Revision: 1.08
+ * Isse Date: June 10, 2007
+ */
+
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/config.h>
+#include <xen/smp.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+#include "x86_mca.h"
+
+
+static int amd_f10_handler(struct mc_info *mi, uint16_t bank, uint64_t status)
+{
+ struct mcinfo_extended mc_ext;
+
+ /* Family 0x10 introduced additional MSR that belong to the
+ * northbridge bank (4). */
+ if (bank != 4)
+ return 0;
+
+ if (!(status & MCi_STATUS_VAL))
+ return 0;
+
+ if (!(status & MCi_STATUS_MISCV))
+ return 0;
+
+ memset(&mc_ext, 0, sizeof(mc_ext));
+ mc_ext.common.type = MC_TYPE_EXTENDED;
+ mc_ext.common.size = sizeof(mc_ext);
+ mc_ext.mc_msrs = 3;
+
+ mc_ext.mc_msr[0].reg = MSR_F10_MC4_MISC1;
+ mc_ext.mc_msr[1].reg = MSR_F10_MC4_MISC2;
+ mc_ext.mc_msr[2].reg = MSR_F10_MC4_MISC3;
+
+ rdmsrl(MSR_F10_MC4_MISC1, mc_ext.mc_msr[0].value);
+ rdmsrl(MSR_F10_MC4_MISC2, mc_ext.mc_msr[1].value);
+ rdmsrl(MSR_F10_MC4_MISC3, mc_ext.mc_msr[2].value);
+
+ x86_mcinfo_add(mi, &mc_ext);
+ return 1;
+}
+
+
+extern void k8_machine_check(struct cpu_user_regs *regs, long error_code);
+
+/* AMD Family10 machine check */
+void amd_f10_mcheck_init(struct cpuinfo_x86 *c)
+{
+ uint64_t value;
+ uint32_t i;
+ int cpu_nr;
+
+ machine_check_vector = k8_machine_check;
+ mc_callback_bank_extended = amd_f10_handler;
+ cpu_nr = smp_processor_id();
+ wmb();
+
+ rdmsrl(MSR_IA32_MCG_CAP, value);
+ if (value & MCG_CTL_P) /* Control register present ? */
+ wrmsrl (MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
+ nr_mce_banks = value & MCG_CAP_COUNT;
+
+ for (i = 0; i < nr_mce_banks; i++) {
+ switch (i) {
+ case 4: /* Northbridge */
+ /* Enable error reporting of all errors,
+ * enable error checking and
+ * disable sync flooding */
+ wrmsrl(MSR_IA32_MC4_CTL, 0x02c3c008ffffffffULL);
+ wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL);
+
+ /* XXX: We should write the value 0x1087821UL into
+ * to register F3x180 here, which sits in
+ * the PCI extended configuration space.
+ * Since this is not possible here, we can only hope,
+ * Dom0 is doing that.
+ */
+ break;
+
+ default:
+ /* Enable error reporting of all errors */
+ wrmsrl(MSR_IA32_MC0_CTL + 4 * i, 0xffffffffffffffffULL);
+ wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
+ break;
+ }
+ }
+
+ set_in_cr4(X86_CR4_MCE);
+ printk("CPU%i: AMD Family10h machine check reporting enabled.\n", cpu_nr);
+}
diff --git a/xen/arch/x86/cpu/mcheck/amd_k8.c b/xen/arch/x86/cpu/mcheck/amd_k8.c
new file mode 100644
index 0000000000..55910f2c69
--- /dev/null
+++ b/xen/arch/x86/cpu/mcheck/amd_k8.c
@@ -0,0 +1,324 @@
+/*
+ * MCA implementation for AMD K8 CPUs
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+/* K8 common MCA documentation published at
+ *
+ * AMD64 Architecture Programmer's Manual Volume 2:
+ * System Programming
+ * Publication # 24593 Revision: 3.12
+ * Issue Date: September 2006
+ *
+ * URL:
+ * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24593.pdf
+ */
+
+/* The related documentation for K8 Revisions A - E is:
+ *
+ * BIOS and Kernel Developer's Guide for
+ * AMD Athlon 64 and AMD Opteron Processors
+ * Publication # 26094 Revision: 3.30
+ * Issue Date: February 2006
+ *
+ * URL:
+ * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26094.PDF
+ */
+
+/* The related documentation for K8 Revisions F - G is:
+ *
+ * BIOS and Kernel Developer's Guide for
+ * AMD NPT Family 0Fh Processors
+ * Publication # 32559 Revision: 3.04
+ * Issue Date: December 2006
+ *
+ * URL:
+ * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/32559.pdf
+ */
+
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+
+#include <asm/processor.h>
+#include <asm/shared.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+#include "x86_mca.h"
+
+
+/* Machine Check Handler for AMD K8 family series */
+void k8_machine_check(struct cpu_user_regs *regs, long error_code)
+{
+ struct vcpu *vcpu = current;
+ struct domain *curdom;
+ struct mc_info *mc_data;
+ struct mcinfo_global mc_global;
+ struct mcinfo_bank mc_info;
+ uint64_t status, addrv, miscv, uc;
+ uint32_t i;
+ unsigned int cpu_nr;
+ uint32_t xen_impacted = 0;
+#define DOM_NORMAL 0
+#define DOM0_TRAP 1
+#define DOMU_TRAP 2
+#define DOMU_KILLED 4
+ uint32_t dom_state = DOM_NORMAL;
+
+ /* This handler runs as interrupt gate. So IPIs from the
+ * polling service routine are defered until we finished.
+ */
+
+ /* Disable interrupts for the _vcpu_. It may not re-scheduled to
+ * an other physical CPU or the impacted process in the guest
+ * continues running with corrupted data, otherwise. */
+ vcpu_schedule_lock_irq(vcpu);
+
+ mc_data = x86_mcinfo_getptr();
+ cpu_nr = smp_processor_id();
+ curdom = vcpu->domain;
+
+ memset(&mc_global, 0, sizeof(mc_global));
+ mc_global.common.type = MC_TYPE_GLOBAL;
+ mc_global.common.size = sizeof(mc_global);
+
+ mc_global.mc_domid = curdom->domain_id; /* impacted domain */
+ mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
+ BUG_ON(cpu_nr != vcpu->processor);
+ mc_global.mc_core_threadid = 0;
+ mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
+#if 0 /* TODO: on which socket is this physical core?
+ It's not clear to me how to figure this out. */
+ mc_global.mc_socketid = ???;
+#endif
+ mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
+ rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
+
+ /* Quick check, who is impacted */
+ xen_impacted = is_idle_domain(curdom);
+
+ /* Dom0 */
+ x86_mcinfo_clear(mc_data);
+ x86_mcinfo_add(mc_data, &mc_global);
+
+ for (i = 0; i < nr_mce_banks; i++) {
+ struct domain *d;
+
+ rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
+
+ if (!(status & MCi_STATUS_VAL))
+ continue;
+
+ /* An error happened in this bank.
+ * This is expected to be an uncorrectable error,
+ * since correctable errors get polled.
+ */
+ uc = status & MCi_STATUS_UC;
+
+ memset(&mc_info, 0, sizeof(mc_info));
+ mc_info.common.type = MC_TYPE_BANK;
+ mc_info.common.size = sizeof(mc_info);
+ mc_info.mc_bank = i;
+ mc_info.mc_status = status;
+
+ addrv = 0;
+ if (status & MCi_STATUS_ADDRV) {
+ rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addrv);
+
+ d = maddr_get_owner(addrv);
+ if (d != NULL)
+ mc_info.mc_domid = d->domain_id;
+ }
+
+ miscv = 0;
+ if (status & MCi_STATUS_MISCV)
+ rdmsrl(MSR_IA32_MC0_MISC + 4 * i, miscv);
+
+ mc_info.mc_addr = addrv;
+ mc_info.mc_misc = miscv;
+
+ x86_mcinfo_add(mc_data, &mc_info); /* Dom0 */
+
+ if (mc_callback_bank_extended)
+ mc_callback_bank_extended(mc_data, i, status);
+
+ /* clear status */
+ wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
+ wmb();
+ add_taint(TAINT_MACHINE_CHECK);
+ }
+
+ status = mc_global.mc_gstatus;
+
+ /* clear MCIP or cpu enters shutdown state
+ * in case another MCE occurs. */
+ status &= ~MCG_STATUS_MCIP;
+ wrmsrl(MSR_IA32_MCG_STATUS, status);
+ wmb();
+
+ /* For the details see the discussion "MCE/MCA concept" on xen-devel.
+ * The thread started here:
+ * http://lists.xensource.com/archives/html/xen-devel/2007-05/msg01015.html
+ */
+
+ /* MCG_STATUS_RIPV:
+ * When this bit is not set, then the instruction pointer onto the stack
+ * to resume at is not valid. If xen is interrupted, then we panic anyway
+ * right below. Otherwise it is up to the guest to figure out if
+ * guest kernel or guest userland is affected and should kill either
+ * itself or the affected process.
+ */
+
+ /* MCG_STATUS_EIPV:
+ * Evaluation of EIPV is the job of the guest.
+ */
+
+ if (xen_impacted) {
+ /* Now we are going to panic anyway. Allow interrupts, so that
+ * printk on serial console can work. */
+ vcpu_schedule_unlock_irq(vcpu);
+
+ /* Uh, that means, machine check exception
+ * inside Xen occured. */
+ printk("Machine check exception occured in Xen.\n");
+
+ /* if MCG_STATUS_EIPV indicates, the IP on the stack is related
+ * to the error then it makes sense to print a stack trace.
+ * That can be useful for more detailed error analysis and/or
+ * error case studies to figure out, if we can clear
+ * xen_impacted and kill a DomU instead
+ * (i.e. if a guest only control structure is affected, but then
+ * we must ensure the bad pages are not re-used again).
+ */
+ if (status & MCG_STATUS_EIPV) {
+ printk("MCE: Instruction Pointer is related to the error. "
+ "Therefore, print the execution state.\n");
+ show_execution_state(regs);
+ }
+ x86_mcinfo_dump(mc_data);
+ panic("End of MCE. Use mcelog to decode above error codes.\n");
+ }
+
+ /* If Dom0 registered a machine check handler, which is only possible
+ * with a PV MCA driver, then ... */
+ if ( guest_has_trap_callback(dom0, 0, TRAP_machine_check) ) {
+ dom_state = DOM0_TRAP;
+
+ /* ... deliver machine check trap to Dom0. */
+ send_guest_trap(dom0, 0, TRAP_machine_check);
+
+ /* Xen may tell Dom0 now to notify the DomU.
+ * But this will happen through a hypercall. */
+ } else
+ /* Dom0 did not register a machine check handler, but if DomU
+ * did so, then... */
+ if ( guest_has_trap_callback(curdom, vcpu->vcpu_id, TRAP_machine_check) ) {
+ dom_state = DOMU_TRAP;
+
+ /* ... deliver machine check trap to DomU */
+ send_guest_trap(curdom, vcpu->vcpu_id, TRAP_machine_check);
+ } else {
+ /* hmm... noone feels responsible to handle the error.
+ * So, do a quick check if a DomU is impacted or not.
+ */
+ if (curdom == dom0) {
+ /* Dom0 is impacted. Since noone can't handle
+ * this error, panic! */
+ x86_mcinfo_dump(mc_data);
+ panic("MCE occured in Dom0, which it can't handle\n");
+
+ /* UNREACHED */
+ } else {
+ dom_state = DOMU_KILLED;
+
+ /* Enable interrupts. This basically results in
+ * calling sti on the *physical* cpu. But after
+ * domain_crash() the vcpu pointer is invalid.
+ * Therefore, we must unlock the irqs before killing
+ * it. */
+ vcpu_schedule_unlock_irq(vcpu);
+
+ /* DomU is impacted. Kill it and continue. */
+ domain_crash(curdom);
+ }
+ }
+
+
+ switch (dom_state) {
+ case DOM0_TRAP:
+ case DOMU_TRAP:
+ /* Enable interrupts. */
+ vcpu_schedule_unlock_irq(vcpu);
+
+ /* guest softirqs and event callbacks are scheduled
+ * immediately after this handler exits. */
+ break;
+ case DOMU_KILLED:
+ /* Nothing to do here. */
+ break;
+ default:
+ BUG();
+ }
+}
+
+
+/* AMD K8 machine check */
+void amd_k8_mcheck_init(struct cpuinfo_x86 *c)
+{
+ uint64_t value;
+ uint32_t i;
+ int cpu_nr;
+
+ machine_check_vector = k8_machine_check;
+ cpu_nr = smp_processor_id();
+ wmb();
+
+ rdmsrl(MSR_IA32_MCG_CAP, value);
+ if (value & MCG_CTL_P) /* Control register present ? */
+ wrmsrl (MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
+ nr_mce_banks = value & MCG_CAP_COUNT;
+
+ for (i = 0; i < nr_mce_banks; i++) {
+ switch (i) {
+ case 4: /* Northbridge */
+ /* Enable error reporting of all errors,
+ * enable error checking and
+ * disable sync flooding */
+ wrmsrl(MSR_IA32_MC4_CTL, 0x02c3c008ffffffffULL);
+ wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL);
+ break;
+
+ default:
+ /* Enable error reporting of all errors */
+ wrmsrl(MSR_IA32_MC0_CTL + 4 * i, 0xffffffffffffffffULL);
+ wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
+ break;
+ }
+ }
+
+ set_in_cr4(X86_CR4_MCE);
+ printk("CPU%i: AMD K8 machine check reporting enabled.\n", cpu_nr);
+}
diff --git a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c
new file mode 100644
index 0000000000..03827fac5f
--- /dev/null
+++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c
@@ -0,0 +1,303 @@
+/*
+ * MCA implementation for AMD CPUs
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+/* K8 common MCA documentation published at
+ *
+ * AMD64 Architecture Programmer's Manual Volume 2:
+ * System Programming
+ * Publication # 24593 Revision: 3.12
+ * Issue Date: September 2006
+ *
+ * URL:
+ * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24593.pdf
+ */
+
+/* The related documentation for K8 Revisions A - E is:
+ *
+ * BIOS and Kernel Developer's Guide for
+ * AMD Athlon 64 and AMD Opteron Processors
+ * Publication # 26094 Revision: 3.30
+ * Issue Date: February 2006
+ *
+ * URL:
+ * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26094.PDF
+ */
+
+/* The related documentation for K8 Revisions F - G is:
+ *
+ * BIOS and Kernel Developer's Guide for
+ * AMD NPT Family 0Fh Processors
+ * Publication # 32559 Revision: 3.04
+ * Issue Date: December 2006
+ *
+ * URL:
+ * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/32559.pdf
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+#include <xen/timer.h>
+#include <xen/event.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+#include "x86_mca.h"
+
+static struct timer mce_timer;
+
+#define MCE_PERIOD MILLISECS(15000)
+#define MCE_MIN MILLISECS(2000)
+#define MCE_MAX MILLISECS(30000)
+
+static s_time_t period = MCE_PERIOD;
+static int hw_threshold = 0;
+static int adjust = 0;
+
+/* The polling service routine:
+ * Collects information of correctable errors and notifies
+ * Dom0 via an event.
+ */
+void mce_amd_checkregs(void *info)
+{
+ struct vcpu *vcpu = current;
+ struct mc_info *mc_data;
+ struct mcinfo_global mc_global;
+ struct mcinfo_bank mc_info;
+ uint64_t status, addrv, miscv;
+ unsigned int i;
+ unsigned int event_enabled;
+ unsigned int cpu_nr;
+ int error_found;
+
+ /* We don't need a slot yet. Only allocate one on error. */
+ mc_data = NULL;
+
+ cpu_nr = smp_processor_id();
+ event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
+ error_found = 0;
+
+ memset(&mc_global, 0, sizeof(mc_global));
+ mc_global.common.type = MC_TYPE_GLOBAL;
+ mc_global.common.size = sizeof(mc_global);
+
+ mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
+ mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
+ BUG_ON(cpu_nr != vcpu->processor);
+ mc_global.mc_core_threadid = 0;
+ mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
+#if 0 /* TODO: on which socket is this physical core?
+ It's not clear to me how to figure this out. */
+ mc_global.mc_socketid = ???;
+#endif
+ mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
+ rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
+
+ for (i = 0; i < nr_mce_banks; i++) {
+ struct domain *d;
+
+ rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
+
+ if (!(status & MCi_STATUS_VAL))
+ continue;
+
+ if (mc_data == NULL) {
+ /* Now we need a slot to fill in error telemetry. */
+ mc_data = x86_mcinfo_getptr();
+ BUG_ON(mc_data == NULL);
+ x86_mcinfo_clear(mc_data);
+ x86_mcinfo_add(mc_data, &mc_global);
+ }
+
+ memset(&mc_info, 0, sizeof(mc_info));
+ mc_info.common.type = MC_TYPE_BANK;
+ mc_info.common.size = sizeof(mc_info);
+ mc_info.mc_bank = i;
+ mc_info.mc_status = status;
+
+ /* Increase polling frequency */
+ error_found = 1;
+
+ addrv = 0;
+ if (status & MCi_STATUS_ADDRV) {
+ rdmsrl(MSR_IA32_MC0_ADDR + i * 4, addrv);
+
+ d = maddr_get_owner(addrv);
+ if (d != NULL)
+ mc_info.mc_domid = d->domain_id;
+ }
+
+ miscv = 0;
+ if (status & MCi_STATUS_MISCV)
+ rdmsrl(MSR_IA32_MC0_MISC + i * 4, miscv);
+
+ mc_info.mc_addr = addrv;
+ mc_info.mc_misc = miscv;
+ x86_mcinfo_add(mc_data, &mc_info);
+
+ if (mc_callback_bank_extended)
+ mc_callback_bank_extended(mc_data, i, status);
+
+ /* clear status */
+ wrmsrl(MSR_IA32_MC0_STATUS + i * 4, 0x0ULL);
+ wmb();
+ }
+
+ if (error_found > 0) {
+ /* If Dom0 enabled the VIRQ_MCA event, then ... */
+ if (event_enabled)
+ /* ... notify it. */
+ send_guest_global_virq(dom0, VIRQ_MCA);
+ else
+ /* ... or dump it */
+ x86_mcinfo_dump(mc_data);
+ }
+
+ adjust += error_found;
+}
+
+/* polling service routine invoker:
+ * Adjust poll frequency at runtime. No error means slow polling frequency,
+ * an error means higher polling frequency.
+ * It uses hw threshold register introduced in AMD K8 RevF to detect
+ * multiple correctable errors between two polls. In that case,
+ * increase polling frequency higher than normal.
+ */
+static void mce_amd_work_fn(void *data)
+{
+ on_each_cpu(mce_amd_checkregs, data, 1, 1);
+
+ if (adjust > 0) {
+ if ( !guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) {
+ /* Dom0 did not enable VIRQ_MCA, so Xen is reporting. */
+ printk("MCE: polling routine found correctable error. "
+ " Use mcelog to parse above error output.\n");
+ }
+ }
+
+ if (hw_threshold) {
+ uint64_t value;
+ uint32_t counter;
+
+ rdmsrl(MSR_IA32_MC4_MISC, value);
+ /* Only the error counter field is of interest
+ * Bit field is described in AMD K8 BKDG chapter 6.4.5.5
+ */
+ counter = (value & 0xFFF00000000ULL) >> 32U;
+
+ /* HW does not count *all* kinds of correctable errors.
+ * Thus it is possible, that the polling routine finds an
+ * correctable error even if the HW reports nothing.
+ * However, the other way around is not possible (= BUG).
+ */
+ if (counter > 0) {
+ /* HW reported correctable errors,
+ * the polling routine did not find...
+ */
+ BUG_ON(adjust == 0);
+ /* subtract 1 to not double count the error
+ * from the polling service routine */
+ adjust += (counter - 1);
+
+ /* Restart counter */
+ /* No interrupt, reset counter value */
+ value &= ~(0x60FFF00000000ULL);
+ /* Counter enable */
+ value |= (1ULL << 51);
+ wrmsrl(MSR_IA32_MC4_MISC, value);
+ wmb();
+ }
+ }
+
+ if (adjust > 0) {
+ /* Increase polling frequency */
+ adjust++; /* adjust == 1 must have an effect */
+ period /= adjust;
+ } else {
+ /* Decrease polling frequency */
+ period *= 2;
+ }
+ if (period > MCE_MAX) {
+ /* limit: Poll at least every 30s */
+ period = MCE_MAX;
+ }
+ if (period < MCE_MIN) {
+ /* limit: Poll every 2s.
+ * When this is reached an uncorrectable error
+ * is expected to happen, if Dom0 does nothing.
+ */
+ period = MCE_MIN;
+ }
+
+ set_timer(&mce_timer, NOW() + period);
+ adjust = 0;
+}
+
+void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c)
+{
+ if (c->x86_vendor != X86_VENDOR_AMD)
+ return;
+
+ /* Assume we are on K8 or newer AMD CPU here */
+
+ /* The threshold bitfields in MSR_IA32_MC4_MISC has
+ * been introduced along with the SVME feature bit. */
+ if (cpu_has(c, X86_FEATURE_SVME)) {
+ uint64_t value;
+
+ /* hw threshold registers present */
+ hw_threshold = 1;
+ rdmsrl(MSR_IA32_MC4_MISC, value);
+
+ if (value & (1ULL << 61)) { /* Locked bit */
+ /* Locked by BIOS. Not available for use */
+ hw_threshold = 0;
+ }
+ if (!(value & (1ULL << 63))) { /* Valid bit */
+ /* No CtrP present */
+ hw_threshold = 0;
+ } else {
+ if (!(value & (1ULL << 62))) { /* Counter Bit */
+ /* No counter field present */
+ hw_threshold = 0;
+ }
+ }
+
+ if (hw_threshold) {
+ /* No interrupt, reset counter value */
+ value &= ~(0x60FFF00000000ULL);
+ /* Counter enable */
+ value |= (1ULL << 51);
+ wrmsrl(MSR_IA32_MC4_MISC, value);
+ /* serialize */
+ wmb();
+ printk(XENLOG_INFO "MCA: Use hw thresholding to adjust polling frequency\n");
+ }
+ }
+
+ init_timer(&mce_timer, mce_amd_work_fn, NULL, 0);
+ set_timer(&mce_timer, NOW() + period);
+
+ return;
+}
diff --git a/xen/arch/x86/cpu/mcheck/k7.c b/xen/arch/x86/cpu/mcheck/k7.c
index 456bb5ddde..59e60a14e8 100644
--- a/xen/arch/x86/cpu/mcheck/k7.c
+++ b/xen/arch/x86/cpu/mcheck/k7.c
@@ -66,8 +66,8 @@ static fastcall void k7_machine_check(struct cpu_user_regs * regs, long error_co
}
-/* AMD K7 machine check is Intel like */
-void amd_mcheck_init(struct cpuinfo_x86 *c)
+/* AMD K7 machine check */
+void amd_k7_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
@@ -75,7 +75,6 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
machine_check_vector = k7_machine_check;
wmb();
- printk (KERN_INFO "Intel machine check architecture supported.\n");
rdmsr (MSR_IA32_MCG_CAP, l, h);
if (l & (1<<8)) /* Control register present ? */
wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
@@ -90,6 +89,6 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
}
set_in_cr4 (X86_CR4_MCE);
- printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+ printk (KERN_INFO "CPU%d: AMD K7 machine check reporting enabled.\n",
smp_processor_id());
}
diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
index b5ce899246..6406c33ca1 100644
--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -8,83 +8,569 @@
#include <xen/kernel.h>
#include <xen/config.h>
#include <xen/smp.h>
+#include <xen/errno.h>
#include <asm/processor.h>
#include <asm/system.h>
#include "mce.h"
+#include "x86_mca.h"
int mce_disabled = 0;
-int nr_mce_banks;
+unsigned int nr_mce_banks;
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
+/* XXX For now a fixed array is used. Later this should be changed
+ * to a dynamic allocated array with the size calculated in relation
+ * to physical cpus present in the machine.
+ * The more physical cpus are available, the more entries you need.
+ */
+#define MAX_MCINFO 10
+
+struct mc_machine_notify {
+ struct mc_info mc;
+ uint32_t fetch_idx;
+ uint32_t valid;
+};
+
+struct mc_machine {
+
+ /* Array structure used for collecting machine check error telemetry. */
+ struct mc_info mc[MAX_MCINFO];
+
+ /* We handle multiple machine check reports lockless by
+ * iterating through the array using the producer/consumer concept.
+ */
+ /* Producer array index to fill with machine check error data.
+ * Index must be increased atomically. */
+ uint32_t error_idx;
+
+ /* Consumer array index to fetch machine check error data from.
+ * Index must be increased atomically. */
+ uint32_t fetch_idx;
+
+ /* Integer array holding the indeces of the mc array that allows
+ * a Dom0 to notify a DomU to re-fetch the same machine check error
+ * data. The notification and refetch also uses its own
+ * producer/consumer mechanism, because Dom0 may decide to not report
+ * every error to the impacted DomU.
+ */
+ struct mc_machine_notify notify[MAX_MCINFO];
+
+ /* Array index to get fetch_idx from.
+ * Index must be increased atomically. */
+ uint32_t notifyproducer_idx;
+ uint32_t notifyconsumer_idx;
+};
+
+/* Global variable with machine check information. */
+struct mc_machine mc_data;
+
/* Handle unconfigured int18 (should never happen) */
-static fastcall void unexpected_machine_check(struct cpu_user_regs * regs, long error_code)
+static void unexpected_machine_check(struct cpu_user_regs *regs, long error_code)
{
- printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
+ printk(XENLOG_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+ smp_processor_id());
}
+
/* Call the installed machine check handler for this CPU setup. */
-void fastcall (*machine_check_vector)(struct cpu_user_regs *, long error_code) = unexpected_machine_check;
+void (*machine_check_vector)(struct cpu_user_regs *regs, long error_code) = unexpected_machine_check;
+
+/* Init machine check callback handler
+ * It is used to collect additional information provided by newer
+ * CPU families/models without the need to duplicate the whole handler.
+ * This avoids having many handlers doing almost nearly the same and each
+ * with its own tweaks ands bugs. */
+int (*mc_callback_bank_extended)(struct mc_info *, uint16_t, uint64_t) = NULL;
+
+
+static void amd_mcheck_init(struct cpuinfo_x86 *ci)
+{
+
+ switch (ci->x86) {
+ case 6:
+ amd_k7_mcheck_init(ci);
+ break;
+
+ case 0xf:
+ amd_k8_mcheck_init(ci);
+ break;
+
+ case 0x10:
+ amd_f10_mcheck_init(ci);
+ break;
+
+ default:
+ /* Assume that machine check support is available.
+ * The minimum provided support is at least the K8. */
+ amd_k8_mcheck_init(ci);
+ }
+}
/* This has to be run for each processor */
void mcheck_init(struct cpuinfo_x86 *c)
{
- if (mce_disabled==1)
+ if (mce_disabled == 1) {
+ printk(XENLOG_INFO "MCE support disabled by bootparam\n");
return;
+ }
+
+ if (!cpu_has(c, X86_FEATURE_MCE)) {
+ printk(XENLOG_INFO "CPU%i: No machine check support available\n",
+ smp_processor_id());
+ return;
+ }
+
+ memset(&mc_data, 0, sizeof(struct mc_machine));
switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- amd_mcheck_init(c);
- break;
+ case X86_VENDOR_AMD:
+ amd_mcheck_init(c);
+ break;
- case X86_VENDOR_INTEL:
+ case X86_VENDOR_INTEL:
#ifndef CONFIG_X86_64
- if (c->x86==5)
- intel_p5_mcheck_init(c);
- if (c->x86==6)
- intel_p6_mcheck_init(c);
+ if (c->x86==5)
+ intel_p5_mcheck_init(c);
+ if (c->x86==6)
+ intel_p6_mcheck_init(c);
#endif
- if (c->x86==15)
- intel_p4_mcheck_init(c);
- break;
+ if (c->x86==15)
+ intel_p4_mcheck_init(c);
+ break;
#ifndef CONFIG_X86_64
- case X86_VENDOR_CENTAUR:
- if (c->x86==5)
- winchip_mcheck_init(c);
- break;
+ case X86_VENDOR_CENTAUR:
+ if (c->x86==5)
+ winchip_mcheck_init(c);
+ break;
#endif
- default:
+ default:
+ break;
+ }
+}
+
+
+static void __init mcheck_disable(char *str)
+{
+ mce_disabled = 1;
+}
+
+static void __init mcheck_enable(char *str)
+{
+ mce_disabled = -1;
+}
+
+custom_param("nomce", mcheck_disable);
+custom_param("mce", mcheck_enable);
+
+
+#include <xen/guest_access.h>
+#include <asm/traps.h>
+
+struct mc_info *x86_mcinfo_getptr(void)
+{
+ struct mc_info *mi;
+ uint32_t entry, next;
+
+ for (;;) {
+ entry = mc_data.error_idx;
+ smp_rmb();
+ next = entry + 1;
+ if (cmpxchg(&mc_data.error_idx, entry, next) == entry)
break;
}
+
+ mi = &(mc_data.mc[(entry % MAX_MCINFO)]);
+ BUG_ON(mc_data.error_idx < mc_data.fetch_idx);
+
+ return mi;
+}
+
+static int x86_mcinfo_matches_guest(const struct mc_info *mi,
+ const struct domain *d, const struct vcpu *v)
+{
+ struct mcinfo_common *mic;
+ struct mcinfo_global *mig;
+
+ x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
+ mig = (struct mcinfo_global *)mic;
+ if (mig == NULL)
+ return 0;
+
+ if (d->domain_id != mig->mc_domid)
+ return 0;
+
+ if (v->vcpu_id != mig->mc_vcpuid)
+ return 0;
+
+ return 1;
}
-static unsigned long old_cr4 __initdata;
-void __init stop_mce(void)
+#define x86_mcinfo_mcdata(idx) (mc_data.mc[(idx % MAX_MCINFO)])
+
+static struct mc_info *x86_mcinfo_getfetchptr(uint32_t *fetch_idx,
+ const struct domain *d, const struct vcpu *v)
{
- old_cr4 = read_cr4();
- clear_in_cr4(X86_CR4_MCE);
+ struct mc_info *mi;
+
+ /* This function is called from the fetch hypercall with
+ * the mc_lock spinlock held. Thus, no need for locking here.
+ */
+ mi = &(x86_mcinfo_mcdata(mc_data.fetch_idx));
+ if ((d != dom0) && !x86_mcinfo_matches_guest(mi, d, v)) {
+ /* Bogus domU command detected. */
+ *fetch_idx = 0;
+ return NULL;
+ }
+
+ *fetch_idx = mc_data.fetch_idx;
+ mc_data.fetch_idx++;
+ BUG_ON(mc_data.fetch_idx > mc_data.error_idx);
+
+ return mi;
}
-void __init restart_mce(void)
+
+static void x86_mcinfo_marknotified(struct xen_mc_notifydomain *mc_notifydomain)
{
- if (old_cr4 & X86_CR4_MCE)
- set_in_cr4(X86_CR4_MCE);
+ struct mc_machine_notify *mn;
+ struct mcinfo_common *mic = NULL;
+ struct mcinfo_global *mig;
+ struct domain *d;
+ int i;
+
+ /* This function is called from the notifier hypercall with
+ * the mc_notify_lock spinlock held. Thus, no need for locking here.
+ */
+
+ /* First invalidate entries for guests that disappeared after
+ * notification (e.g. shutdown/crash). This step prevents the
+ * notification array from filling up with stalling/leaking entries.
+ */
+ for (i = mc_data.notifyconsumer_idx; i < mc_data.notifyproducer_idx; i++) {
+ mn = &(mc_data.notify[(i % MAX_MCINFO)]);
+ x86_mcinfo_lookup(mic, &mn->mc, MC_TYPE_GLOBAL);
+ BUG_ON(mic == NULL);
+ mig = (struct mcinfo_global *)mic;
+ d = get_domain_by_id(mig->mc_domid);
+ if (d == NULL) {
+ /* Domain does not exist. */
+ mn->valid = 0;
+ }
+ if ((!mn->valid) && (i == mc_data.notifyconsumer_idx))
+ mc_data.notifyconsumer_idx++;
+ }
+
+ /* Now put in the error telemetry. Since all error data fetchable
+ * by domUs are uncorrectable errors, they are very important.
+ * So we dump them before overriding them. When a guest takes that long,
+ * then we can assume something bad already happened (crash, hang, etc.)
+ */
+ mn = &(mc_data.notify[(mc_data.notifyproducer_idx % MAX_MCINFO)]);
+
+ if (mn->valid) {
+ struct mcinfo_common *mic = NULL;
+ struct mcinfo_global *mig;
+
+ /* To not loose the information, we dump it. */
+ x86_mcinfo_lookup(mic, &mn->mc, MC_TYPE_GLOBAL);
+ BUG_ON(mic == NULL);
+ mig = (struct mcinfo_global *)mic;
+ printk(XENLOG_WARNING "Domain ID %u was notified by Dom0 to "
+ "fetch machine check error telemetry. But Domain ID "
+ "did not do that in time.\n",
+ mig->mc_domid);
+ x86_mcinfo_dump(&mn->mc);
+ }
+
+ memcpy(&mn->mc, &(x86_mcinfo_mcdata(mc_notifydomain->fetch_idx)),
+ sizeof(struct mc_info));
+ mn->fetch_idx = mc_notifydomain->fetch_idx;
+ mn->valid = 1;
+
+ mc_data.notifyproducer_idx++;
+
+ /* By design there can never be more notifies than machine check errors.
+ * If that ever happens, then we hit a bug. */
+ BUG_ON(mc_data.notifyproducer_idx > mc_data.fetch_idx);
+ BUG_ON(mc_data.notifyconsumer_idx > mc_data.notifyproducer_idx);
}
-static void __init mcheck_disable(char *str)
+static struct mc_info *x86_mcinfo_getnotifiedptr(uint32_t *fetch_idx,
+ const struct domain *d, const struct vcpu *v)
{
- mce_disabled = 1;
+ struct mc_machine_notify *mn = NULL;
+ uint32_t i;
+ int found;
+
+ /* This function is called from the fetch hypercall with
+ * the mc_notify_lock spinlock held. Thus, no need for locking here.
+ */
+
+ /* The notifier data is filled in the order guests get notified, but
+ * guests may fetch them in a different order. That's why we need
+ * the game with valid/invalid entries. */
+ found = 0;
+ for (i = mc_data.notifyconsumer_idx; i < mc_data.notifyproducer_idx; i++) {
+ mn = &(mc_data.notify[(i % MAX_MCINFO)]);
+ if (!mn->valid) {
+ if (i == mc_data.notifyconsumer_idx)
+ mc_data.notifyconsumer_idx++;
+ continue;
+ }
+ if (x86_mcinfo_matches_guest(&mn->mc, d, v)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* This domain has never been notified. This must be
+ * a bogus domU command. */
+ *fetch_idx = 0;
+ return NULL;
+ }
+
+ BUG_ON(mn == NULL);
+ *fetch_idx = mn->fetch_idx;
+ mn->valid = 0;
+
+ BUG_ON(mc_data.notifyconsumer_idx > mc_data.notifyproducer_idx);
+ return &mn->mc;
}
-static void __init mcheck_enable(char *str)
+
+void x86_mcinfo_clear(struct mc_info *mi)
{
- mce_disabled = -1;
+ memset(mi, 0, sizeof(struct mc_info));
+ x86_mcinfo_nentries(mi) = 0;
}
-custom_param("nomce", mcheck_disable);
-custom_param("mce", mcheck_enable);
+
+int x86_mcinfo_add(struct mc_info *mi, void *mcinfo)
+{
+ int i;
+ unsigned long end1, end2;
+ struct mcinfo_common *mic, *mic_base, *mic_index;
+
+ mic = (struct mcinfo_common *)mcinfo;
+ mic_index = mic_base = x86_mcinfo_first(mi);
+
+ /* go to first free entry */
+ for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
+ mic_index = x86_mcinfo_next(mic_index);
+ }
+
+ /* check if there is enough size */
+ end1 = (unsigned long)((uint8_t *)mic_base + sizeof(struct mc_info));
+ end2 = (unsigned long)((uint8_t *)mic_index + mic->size);
+
+ if (end1 < end2)
+ return -ENOSPC; /* No space. Can't add entry. */
+
+ /* there's enough space. add entry. */
+ memcpy(mic_index, mic, mic->size);
+ x86_mcinfo_nentries(mi)++;
+
+ return 0;
+}
+
+
+/* Dump machine check information in a format,
+ * mcelog can parse. This is used only when
+ * Dom0 does not take the notification. */
+void x86_mcinfo_dump(struct mc_info *mi)
+{
+ struct mcinfo_common *mic = NULL;
+ struct mcinfo_global *mc_global;
+ struct mcinfo_bank *mc_bank;
+
+ /* first print the global info */
+ x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
+ if (mic == NULL)
+ return;
+ mc_global = (struct mcinfo_global *)mic;
+ if (mc_global->mc_flags & MC_FLAG_UNCORRECTABLE) {
+ printk(XENLOG_WARNING
+ "CPU%d: Machine Check Exception: %16"PRIx64"\n",
+ mc_global->mc_coreid, mc_global->mc_gstatus);
+ } else {
+ printk(XENLOG_WARNING "MCE: The hardware reports a non "
+ "fatal, correctable incident occured on "
+ "CPU %d.\n",
+ mc_global->mc_coreid);
+ }
+
+ /* then the bank information */
+ x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK); /* finds the first entry */
+ do {
+ if (mic == NULL)
+ return;
+ if (mic->type != MC_TYPE_BANK)
+ continue;
+
+ mc_bank = (struct mcinfo_bank *)mic;
+
+ printk(XENLOG_WARNING "Bank %d: %16"PRIx64,
+ mc_bank->mc_bank,
+ mc_bank->mc_status);
+ if (mc_bank->mc_status & MCi_STATUS_MISCV)
+ printk("[%16"PRIx64"]", mc_bank->mc_misc);
+ if (mc_bank->mc_status & MCi_STATUS_ADDRV)
+ printk(" at %16"PRIx64, mc_bank->mc_addr);
+
+ printk("\n");
+ mic = x86_mcinfo_next(mic); /* next entry */
+ if ((mic == NULL) || (mic->size == 0))
+ break;
+ } while (1);
+}
+
+
+
+/* Machine Check Architecture Hypercall */
+long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc)
+{
+ long ret = 0;
+ struct xen_mc curop, *op = &curop;
+ struct vcpu *v = current;
+ struct domain *domU;
+ struct xen_mc_fetch *mc_fetch;
+ struct xen_mc_notifydomain *mc_notifydomain;
+ struct mc_info *mi;
+ uint32_t flags;
+ uint32_t fetch_idx;
+ uint16_t vcpuid;
+ /* Use a different lock for the notify hypercall in order to allow
+ * a DomU to fetch mc data while Dom0 notifies another DomU. */
+ static DEFINE_SPINLOCK(mc_lock);
+ static DEFINE_SPINLOCK(mc_notify_lock);
+
+ if ( copy_from_guest(op, u_xen_mc, 1) )
+ return -EFAULT;
+
+ if ( op->interface_version != XEN_MCA_INTERFACE_VERSION )
+ return -EACCES;
+
+ switch ( op->cmd ) {
+ case XEN_MC_fetch:
+ /* This hypercall is for any domain */
+ mc_fetch = &op->u.mc_fetch;
+
+ switch (mc_fetch->flags) {
+ case XEN_MC_CORRECTABLE:
+ /* But polling mode is Dom0 only, because
+ * correctable errors are reported to Dom0 only */
+ if ( !IS_PRIV(v->domain) )
+ return -EPERM;
+ break;
+
+ case XEN_MC_TRAP:
+ break;
+ default:
+ return -EFAULT;
+ }
+
+ flags = XEN_MC_OK;
+ spin_lock(&mc_lock);
+
+ if ( IS_PRIV(v->domain) ) {
+ /* this must be Dom0. So a notify hypercall
+ * can't have happened before. */
+ mi = x86_mcinfo_getfetchptr(&fetch_idx, dom0, v);
+ } else {
+ /* Hypercall comes from an unprivileged domain */
+ domU = v->domain;
+ if (guest_has_trap_callback(dom0, 0, TRAP_machine_check)) {
+ /* Dom0 must have notified this DomU before
+ * via the notify hypercall. */
+ mi = x86_mcinfo_getnotifiedptr(&fetch_idx, domU, v);
+ } else {
+ /* Xen notified the DomU. */
+ mi = x86_mcinfo_getfetchptr(&fetch_idx, domU, v);
+ }
+ }
+
+ if (mi) {
+ memcpy(&mc_fetch->mc_info, mi,
+ sizeof(struct mc_info));
+ } else {
+ /* There is no data for a bogus DomU command. */
+ flags |= XEN_MC_NODATA;
+ memset(&mc_fetch->mc_info, 0, sizeof(struct mc_info));
+ }
+
+ mc_fetch->flags = flags;
+ mc_fetch->fetch_idx = fetch_idx;
+
+ if ( copy_to_guest(u_xen_mc, op, 1) )
+ ret = -EFAULT;
+
+ spin_unlock(&mc_lock);
+ break;
+
+ case XEN_MC_notifydomain:
+ /* This hypercall is for Dom0 only */
+ if ( !IS_PRIV(v->domain) )
+ return -EPERM;
+
+ spin_lock(&mc_notify_lock);
+
+ mc_notifydomain = &op->u.mc_notifydomain;
+ domU = get_domain_by_id(mc_notifydomain->mc_domid);
+ vcpuid = mc_notifydomain->mc_vcpuid;
+
+ if ((domU == NULL) || (domU == dom0)) {
+ /* It's not possible to notify a non-existent domain
+ * or the dom0. */
+ spin_unlock(&mc_notify_lock);
+ return -EACCES;
+ }
+
+ if (vcpuid >= MAX_VIRT_CPUS) {
+ /* It's not possible to notify a vcpu, Xen can't
+ * assign to a domain. */
+ spin_unlock(&mc_notify_lock);
+ return -EACCES;
+ }
+
+ mc_notifydomain->flags = XEN_MC_OK;
+
+ mi = &(x86_mcinfo_mcdata(mc_notifydomain->fetch_idx));
+ if (!x86_mcinfo_matches_guest(mi, domU, domU->vcpu[vcpuid])) {
+ /* The error telemetry is not for the guest, Dom0
+ * wants to notify. */
+ mc_notifydomain->flags |= XEN_MC_NOMATCH;
+ } else if ( guest_has_trap_callback(domU, vcpuid,
+ TRAP_machine_check) )
+ {
+ /* Send notification */
+ if ( send_guest_trap(domU, vcpuid, TRAP_machine_check) )
+ mc_notifydomain->flags |= XEN_MC_NOTDELIVERED;
+ } else
+ mc_notifydomain->flags |= XEN_MC_CANNOTHANDLE;
+
+#ifdef DEBUG
+ /* sanity check - these two flags are mutually exclusive */
+ if ((flags & XEN_MC_CANNOTHANDLE) && (flags & XEN_MC_NOTDELIVERED))
+ BUG();
+#endif
+
+ if ( copy_to_guest(u_xen_mc, op, 1) )
+ ret = -EFAULT;
+
+ if (ret == 0) {
+ x86_mcinfo_marknotified(mc_notifydomain);
+ }
+
+ spin_unlock(&mc_notify_lock);
+ break;
+ }
+
+ return ret;
+}
diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h
index 0f1d38f734..20f3491147 100644
--- a/xen/arch/x86/cpu/mcheck/mce.h
+++ b/xen/arch/x86/cpu/mcheck/mce.h
@@ -1,14 +1,30 @@
#include <xen/init.h>
+#include <asm/traps.h>
-void amd_mcheck_init(struct cpuinfo_x86 *c);
+/* Init functions */
+void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
+void amd_k7_mcheck_init(struct cpuinfo_x86 *c);
+void amd_k8_mcheck_init(struct cpuinfo_x86 *c);
+void amd_f10_mcheck_init(struct cpuinfo_x86 *c);
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
-/* Call the installed machine check handler for this CPU setup. */
-extern fastcall void (*machine_check_vector)(struct cpu_user_regs *, long error_code);
+/* Function pointer used in the handlers to collect additional information
+ * provided by newer CPU families/models without the need to duplicate
+ * the whole handler resulting in various handlers each with its own
+ * tweaks and bugs */
+extern int (*mc_callback_bank_extended)(struct mc_info *mi,
+ uint16_t bank, uint64_t status);
-extern int mce_disabled __initdata;
-extern int nr_mce_banks;
+/* Helper functions used for collecting error telemetry */
+struct mc_info *x86_mcinfo_getptr(void);
+void x86_mcinfo_clear(struct mc_info *mi);
+int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
+void x86_mcinfo_dump(struct mc_info *mi);
+
+/* Global variables */
+extern int mce_disabled __initdata;
+extern unsigned int nr_mce_banks;
diff --git a/xen/arch/x86/cpu/mcheck/non-fatal.c b/xen/arch/x86/cpu/mcheck/non-fatal.c
index 42ea833166..4984eed757 100644
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c
@@ -68,19 +68,29 @@ static int __init init_nonfatal_mce_checker(void)
if (!cpu_has(c, X86_FEATURE_MCA))
return -ENODEV;
- /* Some Athlons misbehave when we frob bank 0 */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 6)
- firstbank = 1;
- else
- firstbank = 0;
-
/*
* Check for non-fatal errors every MCE_RATE s
*/
- init_timer(&mce_timer, mce_work_fn, NULL, 0);
- set_timer(&mce_timer, NOW() + MCE_PERIOD);
- printk(KERN_INFO "Machine check exception polling timer started.\n");
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (c->x86 == 6) { /* K7 */
+ firstbank = 1;
+ init_timer(&mce_timer, mce_work_fn, NULL, 0);
+ set_timer(&mce_timer, NOW() + MCE_PERIOD);
+ break;
+ }
+
+ /* Assume we are on K8 or newer AMD CPU here */
+ amd_nonfatal_mcheck_init(c);
+ break;
+
+ case X86_VENDOR_INTEL:
+ init_timer(&mce_timer, mce_work_fn, NULL, 0);
+ set_timer(&mce_timer, NOW() + MCE_PERIOD);
+ break;
+ }
+
+ printk(KERN_INFO "MCA: Machine check polling timer started.\n");
return 0;
}
__initcall(init_nonfatal_mce_checker);
diff --git a/xen/arch/x86/cpu/mcheck/x86_mca.h b/xen/arch/x86/cpu/mcheck/x86_mca.h
new file mode 100644
index 0000000000..b20808e451
--- /dev/null
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h
@@ -0,0 +1,72 @@
+/*
+ * MCA implementation for AMD K7/K8 CPUs
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+/* The MCA/MCE MSRs should not be used anywhere else.
+ * They are cpu family/model specific and are only for use
+ * in terms of machine check handling.
+ * So we define them here rather in <asm/msr.h>.
+ */
+
+
+/* Bitfield of the MSR_IA32_MCG_CAP register */
+#define MCG_CAP_COUNT 0x00000000000000ffULL
+#define MCG_CTL_P 0x0000000000000100ULL
+/* Bits 9-63 are reserved */
+
+/* Bitfield of the MSR_IA32_MCG_STATUS register */
+#define MCG_STATUS_RIPV 0x0000000000000001ULL
+#define MCG_STATUS_EIPV 0x0000000000000002ULL
+#define MCG_STATUS_MCIP 0x0000000000000004ULL
+/* Bits 3-63 are reserved */
+
+/* Bitfield of MSR_K8_MCi_STATUS registers */
+/* MCA error code */
+#define MCi_STATUS_MCA 0x000000000000ffffULL
+/* model-specific error code */
+#define MCi_STATUS_MSEC 0x00000000ffff0000ULL
+/* Other information */
+#define MCi_STATUS_OTHER 0x01ffffff00000000ULL
+/* processor context corrupt */
+#define MCi_STATUS_PCC 0x0200000000000000ULL
+/* MSR_K8_MCi_ADDR register valid */
+#define MCi_STATUS_ADDRV 0x0400000000000000ULL
+/* MSR_K8_MCi_MISC register valid */
+#define MCi_STATUS_MISCV 0x0800000000000000ULL
+/* error condition enabled */
+#define MCi_STATUS_EN 0x1000000000000000ULL
+/* uncorrected error */
+#define MCi_STATUS_UC 0x2000000000000000ULL
+/* status register overflow */
+#define MCi_STATUS_OVER 0x4000000000000000ULL
+/* valid */
+#define MCi_STATUS_VAL 0x8000000000000000ULL
+
+/* Bitfield of MSi_STATUS_OTHER field */
+/* reserved bits */
+#define MCi_STATUS_OTHER_RESERVED1 0x00001fff00000000ULL
+/* uncorrectable ECC error */
+#define MCi_STATUS_OTEHR_UC_ECC 0x0000200000000000ULL
+/* correctable ECC error */
+#define MCi_STATUS_OTHER_C_ECC 0x0000400000000000ULL
+/* ECC syndrome of an ECC error */
+#define MCi_STATUS_OTHER_ECC_SYNDROME 0x007f800000000000ULL
+/* reserved bits */
+#define MCi_STATUS_OTHER_RESERVED2 0x0180000000000000ULL
+
diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
index 7b69e71264..580707e947 100644
--- a/xen/arch/x86/nmi.c
+++ b/xen/arch/x86/nmi.c
@@ -457,10 +457,10 @@ static void do_nmi_stats(unsigned char key)
if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) )
return;
- if ( v->nmi_pending || v->nmi_masked )
+ if ( v->nmi_pending || (v->trap_priority >= VCPU_TRAP_NMI) )
printk("dom0 vpu0: NMI %s%s\n",
v->nmi_pending ? "pending " : "",
- v->nmi_masked ? "masked " : "");
+ (v->trap_priority >= VCPU_TRAP_NMI) ? "masked " : "");
else
printk("dom0 vcpu0: NMI neither pending nor masked\n");
}
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 89032bb91d..1b65a9bb9d 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -61,6 +61,7 @@
#include <asm/msr.h>
#include <asm/shared.h>
#include <asm/x86_emulate.h>
+#include <asm/traps.h>
#include <asm/hvm/vpt.h>
#include <public/arch-x86/cpuid.h>
@@ -486,6 +487,20 @@ static unsigned int check_guest_io_breakpoint(struct vcpu *v,
}
/*
+ * Called from asm to set up the MCE trapbounce info.
+ * Returns 0 if no callback is set up, else 1.
+ */
+asmlinkage int set_guest_machinecheck_trapbounce(void)
+{
+ struct vcpu *v = current;
+ struct trap_bounce *tb = &v->arch.trap_bounce;
+
+ do_guest_trap(TRAP_machine_check, guest_cpu_user_regs(), 0);
+ tb->flags &= ~TBF_EXCEPTION; /* not needed for MCE delivery path */
+ return !null_trap_bounce(v, tb);
+}
+
+/*
* Called from asm to set up the NMI trapbounce info.
* Returns 0 if no callback is set up, else 1.
*/
@@ -904,8 +919,6 @@ asmlinkage void do_int3(struct cpu_user_regs *regs)
asmlinkage void do_machine_check(struct cpu_user_regs *regs)
{
- extern fastcall void (*machine_check_vector)(
- struct cpu_user_regs *, long error_code);
machine_check_vector(regs, regs->error_code);
}
@@ -2678,25 +2691,51 @@ asmlinkage void do_general_protection(struct cpu_user_regs *regs)
panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
}
+static DEFINE_PER_CPU(struct softirq_trap, softirq_trap);
+
static void nmi_mce_softirq(void)
{
- /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
- vcpu_kick(dom0->vcpu[0]);
+ int cpu = smp_processor_id();
+ struct softirq_trap *st = &per_cpu(softirq_trap, cpu);
+ cpumask_t affinity;
+
+ BUG_ON(st == NULL);
+ BUG_ON(st->vcpu == NULL);
+
+ /* Set the tmp value unconditionally, so that
+ * the check in the iret hypercall works. */
+ st->vcpu->cpu_affinity_tmp = st->vcpu->cpu_affinity;
+
+ if ((cpu != st->processor)
+ || (st->processor != st->vcpu->processor))
+ {
+ /* We are on a different physical cpu.
+ * Make sure to wakeup the vcpu on the
+ * specified processor.
+ */
+ cpus_clear(affinity);
+ cpu_set(st->processor, affinity);
+ vcpu_set_affinity(st->vcpu, &affinity);
+
+ /* Affinity is restored in the iret hypercall. */
+ }
+
+ /* Only used to defer wakeup of domain/vcpu to
+ * a safe (non-NMI/MCE) context.
+ */
+ vcpu_kick(st->vcpu);
}
static void nmi_dom0_report(unsigned int reason_idx)
{
- struct domain *d;
- struct vcpu *v;
+ struct domain *d = dom0;
- if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) )
+ if ( (d == NULL) || (d->vcpu[0] == NULL) )
return;
set_bit(reason_idx, nmi_reason(d));
- /* Not safe to wake a vcpu here, or even to schedule a tasklet! */
- if ( !test_and_set_bool(v->nmi_pending) )
- raise_softirq(NMI_MCE_SOFTIRQ);
+ send_guest_trap(d, 0, TRAP_nmi);
}
asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
@@ -3010,6 +3049,70 @@ long unregister_guest_nmi_callback(void)
return 0;
}
+int guest_has_trap_callback(struct domain *d, uint16_t vcpuid, unsigned int trap_nr)
+{
+ struct vcpu *v;
+ struct trap_info *t;
+
+ BUG_ON(d == NULL);
+ BUG_ON(vcpuid >= MAX_VIRT_CPUS);
+
+ /* Sanity check - XXX should be more fine grained. */
+ BUG_ON(trap_nr > TRAP_syscall);
+
+ v = d->vcpu[vcpuid];
+ t = &v->arch.guest_context.trap_ctxt[trap_nr];
+
+ return (t->address != 0);
+}
+
+
+int send_guest_trap(struct domain *d, uint16_t vcpuid, unsigned int trap_nr)
+{
+ struct vcpu *v;
+ struct softirq_trap *st;
+
+ BUG_ON(d == NULL);
+ BUG_ON(vcpuid >= MAX_VIRT_CPUS);
+ v = d->vcpu[vcpuid];
+
+ switch (trap_nr) {
+ case TRAP_nmi:
+ if ( !test_and_set_bool(v->nmi_pending) ) {
+ st = &per_cpu(softirq_trap, smp_processor_id());
+ st->domain = dom0;
+ st->vcpu = dom0->vcpu[0];
+ st->processor = st->vcpu->processor;
+
+ /* not safe to wake up a vcpu here */
+ raise_softirq(NMI_MCE_SOFTIRQ);
+ return 0;
+ }
+ break;
+
+ case TRAP_machine_check:
+
+ /* We are called by the machine check (exception or polling) handlers
+ * on the physical CPU that reported a machine check error. */
+
+ if ( !test_and_set_bool(v->mce_pending) ) {
+ st = &per_cpu(softirq_trap, smp_processor_id());
+ st->domain = d;
+ st->vcpu = v;
+ st->processor = v->processor;
+
+ /* not safe to wake up a vcpu here */
+ raise_softirq(NMI_MCE_SOFTIRQ);
+ return 0;
+ }
+ break;
+ }
+
+ /* delivery failed */
+ return -EIO;
+}
+
+
long do_set_trap_table(XEN_GUEST_HANDLE(const_trap_info_t) traps)
{
struct trap_info cur;
diff --git a/xen/arch/x86/x86_32/asm-offsets.c b/xen/arch/x86/x86_32/asm-offsets.c
index ad8d1a2e6f..0ed2a0a26f 100644
--- a/xen/arch/x86/x86_32/asm-offsets.c
+++ b/xen/arch/x86/x86_32/asm-offsets.c
@@ -67,7 +67,11 @@ void __dummy__(void)
arch.guest_context.kernel_sp);
OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
- OFFSET(VCPU_nmi_masked, struct vcpu, nmi_masked);
+ OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
+ OFFSET(VCPU_old_trap_priority, struct vcpu, old_trap_priority);
+ OFFSET(VCPU_trap_priority, struct vcpu, trap_priority);
+ DEFINE(VCPU_TRAP_NMI, VCPU_TRAP_NMI);
+ DEFINE(VCPU_TRAP_MCE, VCPU_TRAP_MCE);
DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
BLANK();
diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S
index 02409f949a..8a08617981 100644
--- a/xen/arch/x86/x86_32/entry.S
+++ b/xen/arch/x86/x86_32/entry.S
@@ -229,6 +229,8 @@ test_all_events:
shl $IRQSTAT_shift,%eax
test %ecx,irq_stat(%eax,1)
jnz process_softirqs
+ testb $1,VCPU_mce_pending(%ebx)
+ jnz process_mce
testb $1,VCPU_nmi_pending(%ebx)
jnz process_nmi
test_guest_events:
@@ -255,15 +257,35 @@ process_softirqs:
jmp test_all_events
ALIGN
+/* %ebx: struct vcpu */
+process_mce:
+ cmpw $VCPU_TRAP_MCE,VCPU_trap_priority(%ebx)
+ jae test_guest_events
+ sti
+ movb $0,VCPU_mce_pending(%ebx)
+ call set_guest_machinecheck_trapbounce
+ test %eax,%eax
+ jz test_all_events
+ movw VCPU_trap_priority(%ebx),%dx # safe priority for the
+ movw %dx,VCPU_old_trap_priority(%ebx) # iret hypercall
+ movw $VCPU_TRAP_MCE,VCPU_trap_priority(%ebx)
+ jmp process_trap
+
+ ALIGN
+/* %ebx: struct vcpu */
process_nmi:
- testb $1,VCPU_nmi_masked(%ebx)
- jnz test_guest_events
+ cmpw $VCPU_TRAP_NMI,VCPU_trap_priority(%ebx)
+ jae test_guest_events
sti
movb $0,VCPU_nmi_pending(%ebx)
call set_guest_nmi_trapbounce
test %eax,%eax
jz test_all_events
- movb $1,VCPU_nmi_masked(%ebx)
+ movw VCPU_trap_priority(%ebx),%dx # safe priority for the
+ movw %dx,VCPU_old_trap_priority(%ebx) # iret hypercall
+ movw $VCPU_TRAP_NMI,VCPU_trap_priority(%ebx)
+ /* FALLTHROUGH */
+process_trap:
leal VCPU_trap_bounce(%ebx),%edx
call create_bounce_frame
jmp test_all_events
@@ -681,6 +703,10 @@ ENTRY(hypercall_table)
.long do_sysctl /* 35 */
.long do_domctl
.long do_kexec_op
+ .rept __HYPERVISOR_arch_0-((.-hypercall_table)/4)
+ .long do_ni_hypercall
+ .endr
+ .long do_mca /* 48 */
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
@@ -724,6 +750,10 @@ ENTRY(hypercall_args_table)
.byte 1 /* do_sysctl */ /* 35 */
.byte 1 /* do_domctl */
.byte 2 /* do_kexec_op */
+ .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
+ .byte 0 /* do_ni_hypercall */
+ .endr
+ .byte 1 /* do_mca */ /* 48 */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c
index a09d15b523..f99050350c 100644
--- a/xen/arch/x86/x86_32/traps.c
+++ b/xen/arch/x86/x86_32/traps.c
@@ -255,8 +255,13 @@ unsigned long do_iret(void)
goto exit_and_crash;
}
- /* No longer in NMI context. */
- v->nmi_masked = 0;
+ /* Restore affinity. */
+ if ((v->trap_priority >= VCPU_TRAP_NMI)
+ && !cpus_equal(v->cpu_affinity_tmp, v->cpu_affinity))
+ vcpu_set_affinity(v, &v->cpu_affinity_tmp);
+
+ /* Restore previous trap priority */
+ v->trap_priority = v->old_trap_priority;
/* Restore upcall mask from supplied EFLAGS.IF. */
vcpu_info(v, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF);
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 54a27b2ae1..ca00490756 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -92,7 +92,11 @@ void __dummy__(void)
OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
- OFFSET(VCPU_nmi_masked, struct vcpu, nmi_masked);
+ OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
+ OFFSET(VCPU_old_trap_priority, struct vcpu, old_trap_priority);
+ OFFSET(VCPU_trap_priority, struct vcpu, trap_priority);
+ DEFINE(VCPU_TRAP_NMI, VCPU_TRAP_NMI);
+ DEFINE(VCPU_TRAP_MCE, VCPU_TRAP_MCE);
DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
DEFINE(_VGCF_syscall_disables_events, _VGCF_syscall_disables_events);
BLANK();
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index 0251580e79..085babedc4 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -101,6 +101,8 @@ ENTRY(compat_test_all_events)
leaq irq_stat(%rip),%rcx
testl $~0,(%rcx,%rax,1)
jnz compat_process_softirqs
+ testb $1,VCPU_mce_pending(%rbx)
+ jnz compat_process_mce
testb $1,VCPU_nmi_pending(%rbx)
jnz compat_process_nmi
compat_test_guest_events:
@@ -129,15 +131,34 @@ compat_process_softirqs:
ALIGN
/* %rbx: struct vcpu */
+compat_process_mce:
+ cmpw $VCPU_TRAP_MCE,VCPU_trap_priority(%rbx)
+ jae compat_test_guest_events
+ sti
+ movb $0,VCPU_mce_pending(%rbx)
+ call set_guest_machinecheck_trapbounce
+ testl %eax,%eax
+ jz compat_test_all_events
+ movw VCPU_trap_priority(%rbx),%dx # safe priority for the
+ movw %dx,VCPU_old_trap_priority(%rbx) # iret hypercall
+ movw $VCPU_TRAP_MCE,VCPU_trap_priority(%rbx)
+ jmp compat_process_trap
+
+ ALIGN
+/* %rbx: struct vcpu */
compat_process_nmi:
- testb $1,VCPU_nmi_masked(%rbx)
- jnz compat_test_guest_events
+ cmpw $VCPU_TRAP_NMI,VCPU_trap_priority(%rbx)
+ jae compat_test_guest_events
sti
movb $0,VCPU_nmi_pending(%rbx)
call set_guest_nmi_trapbounce
testl %eax,%eax
jz compat_test_all_events
- movb $1,VCPU_nmi_masked(%rbx)
+ movw VCPU_trap_priority(%rbx),%dx # safe priority for the
+ movw %dx,VCPU_old_trap_priority(%rbx) # iret hypercall
+ movw $VCPU_TRAP_NMI,VCPU_trap_priority(%rbx)
+ /* FALLTHROUGH */
+compat_process_trap:
leaq VCPU_trap_bounce(%rbx),%rdx
call compat_create_bounce_frame
jmp compat_test_all_events
@@ -386,6 +407,10 @@ ENTRY(compat_hypercall_table)
.quad do_sysctl /* 35 */
.quad do_domctl
.quad compat_kexec_op
+ .rept __HYPERVISOR_arch_0-((.-compat_hypercall_table)/8)
+ .quad compat_ni_hypercall
+ .endr
+ .quad do_mca /* 48 */
.rept NR_hypercalls-((.-compat_hypercall_table)/8)
.quad compat_ni_hypercall
.endr
@@ -429,6 +454,10 @@ ENTRY(compat_hypercall_args_table)
.byte 1 /* do_sysctl */ /* 35 */
.byte 1 /* do_domctl */
.byte 2 /* compat_kexec_op */
+ .rept __HYPERVISOR_arch_0-(.-compat_hypercall_args_table)
+ .byte 0 /* compat_ni_hypercall */
+ .endr
+ .byte 1 /* do_mca */
.rept NR_hypercalls-(.-compat_hypercall_args_table)
.byte 0 /* compat_ni_hypercall */
.endr
diff --git a/xen/arch/x86/x86_64/compat/traps.c b/xen/arch/x86/x86_64/compat/traps.c
index 65c1e90147..9d381ab2f6 100644
--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -121,8 +121,13 @@ unsigned int compat_iret(void)
else
regs->_esp += 16;
- /* No longer in NMI context. */
- v->nmi_masked = 0;
+ /* Restore affinity. */
+ if ((v->trap_priority >= VCPU_TRAP_NMI)
+ && !cpus_equal(v->cpu_affinity_tmp, v->cpu_affinity))
+ vcpu_set_affinity(v, &v->cpu_affinity_tmp);
+
+ /* Restore previous trap priority */
+ v->trap_priority = v->old_trap_priority;
/* Restore upcall mask from supplied EFLAGS.IF. */
vcpu_info(v, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF);
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 6c14d233c2..d6491ce2ed 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -205,6 +205,8 @@ test_all_events:
leaq irq_stat(%rip),%rcx
testl $~0,(%rcx,%rax,1)
jnz process_softirqs
+ testb $1,VCPU_mce_pending(%rbx)
+ jnz process_mce
testb $1,VCPU_nmi_pending(%rbx)
jnz process_nmi
test_guest_events:
@@ -231,15 +233,34 @@ process_softirqs:
ALIGN
/* %rbx: struct vcpu */
+process_mce:
+ cmpw $VCPU_TRAP_MCE,VCPU_trap_priority(%rbx)
+ jae test_guest_events
+ sti
+ movb $0,VCPU_mce_pending(%rbx)
+ call set_guest_machinecheck_trapbounce
+ test %eax,%eax
+ jz test_all_events
+ movw VCPU_trap_priority(%rbx),%dx # safe priority for the
+ movw %dx,VCPU_old_trap_priority(%rbx) # iret hypercall
+ movw $VCPU_TRAP_MCE,VCPU_trap_priority(%rbx)
+ jmp process_trap
+
+ ALIGN
+/* %rbx: struct vcpu */
process_nmi:
- testb $1,VCPU_nmi_masked(%rbx)
- jnz test_guest_events
+ cmpw $VCPU_TRAP_NMI,VCPU_trap_priority(%rbx)
+ jae test_guest_events
sti
movb $0,VCPU_nmi_pending(%rbx)
call set_guest_nmi_trapbounce
test %eax,%eax
jz test_all_events
- movb $1,VCPU_nmi_masked(%rbx)
+ movw VCPU_trap_priority(%rbx),%dx # safe priority for the
+ movw %dx,VCPU_old_trap_priority(%rbx) # iret hypercall
+ movw $VCPU_TRAP_NMI,VCPU_trap_priority(%rbx)
+ /* FALLTHROUGH */
+process_trap:
leaq VCPU_trap_bounce(%rbx),%rdx
call create_bounce_frame
jmp test_all_events
@@ -671,6 +692,10 @@ ENTRY(hypercall_table)
.quad do_sysctl /* 35 */
.quad do_domctl
.quad do_kexec_op
+ .rept __HYPERVISOR_arch_0-((.-hypercall_table)/8)
+ .quad do_ni_hypercall
+ .endr
+ .quad do_mca /* 48 */
.rept NR_hypercalls-((.-hypercall_table)/8)
.quad do_ni_hypercall
.endr
@@ -715,6 +740,10 @@ ENTRY(hypercall_args_table)
.byte 1 /* do_domctl */
.byte 2 /* do_kexec */
.byte 1 /* do_xsm_op */
+ .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
+ .byte 0 /* do_ni_hypercall */
+ .endr
+ .byte 1 /* do_mca */ /* 48 */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
index 14769786ec..698a9b8691 100644
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -288,8 +288,13 @@ unsigned long do_iret(void)
regs->rcx = iret_saved.rcx;
}
- /* No longer in NMI context. */
- v->nmi_masked = 0;
+ /* Restore affinity. */
+ if ((v->trap_priority >= VCPU_TRAP_NMI)
+ && !cpus_equal(v->cpu_affinity_tmp, v->cpu_affinity))
+ vcpu_set_affinity(v, &v->cpu_affinity_tmp);
+
+ /* Restore previous trap priority */
+ v->trap_priority = v->old_trap_priority;
/* Restore upcall mask from supplied EFLAGS.IF. */
vcpu_info(v, evtchn_upcall_mask) = !(iret_saved.rflags & EF_IE);
diff --git a/xen/common/domain.c b/xen/common/domain.c
index ab59987cfe..b420b4b219 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -654,7 +654,9 @@ void vcpu_reset(struct vcpu *v)
v->is_polling = 0;
v->is_initialised = 0;
v->nmi_pending = 0;
- v->nmi_masked = 0;
+ v->mce_pending = 0;
+ v->old_trap_priority = VCPU_TRAP_NONE;
+ v->trap_priority = VCPU_TRAP_NONE;
clear_bit(_VPF_blocked, &v->pause_flags);
domain_unlock(v->domain);
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index eb06b352cf..84cc455fdb 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -587,6 +587,21 @@ void send_guest_vcpu_virq(struct vcpu *v, int virq)
evtchn_set_pending(v, port);
}
+int guest_enabled_event(struct vcpu *v, int virq)
+{
+ int port;
+
+ if ( unlikely(v == NULL) )
+ return 0;
+
+ port = v->virq_to_evtchn[virq];
+ if ( port == 0 )
+ return 0;
+
+ /* virq is in use */
+ return 1;
+}
+
void send_guest_global_virq(struct domain *d, int virq)
{
int port;
diff --git a/xen/include/Makefile b/xen/include/Makefile
index 64f72865ae..64ae8c92f0 100644
--- a/xen/include/Makefile
+++ b/xen/include/Makefile
@@ -20,6 +20,7 @@ headers-y := \
compat/xen.h \
compat/xencomm.h \
compat/xenoprof.h
+headers-$(CONFIG_X86) += compat/arch-x86/xen-mca.h
headers-$(CONFIG_X86) += compat/arch-x86/xen.h
headers-$(CONFIG_X86) += compat/arch-x86/xen-$(compat-arch-y).h
headers-y += compat/arch-$(compat-arch-y).h compat/xlat.h
diff --git a/xen/include/asm-x86/event.h b/xen/include/asm-x86/event.h
index b1323089b1..02a900f20d 100644
--- a/xen/include/asm-x86/event.h
+++ b/xen/include/asm-x86/event.h
@@ -69,7 +69,12 @@ static inline void local_event_delivery_enable(void)
/* No arch specific virq definition now. Default to global. */
static inline int arch_virq_is_global(int virq)
{
- return 1;
+ switch (virq) {
+ case VIRQ_MCA:
+ return 1;
+ default:
+ return 1;
+ }
}
#endif
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 74ac964b80..6e903a1809 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -141,6 +141,9 @@ static inline u32 pickle_domptr(struct domain *domain)
#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+#define maddr_get_owner(ma) (page_get_owner(maddr_to_page((ma))))
+#define vaddr_get_owner(va) (page_get_owner(virt_to_page((va))))
+
#define XENSHARE_writable 0
#define XENSHARE_readonly 1
extern void share_xen_page_with_guest(
diff --git a/xen/include/asm-x86/traps.h b/xen/include/asm-x86/traps.h
new file mode 100644
index 0000000000..2d055301f2
--- /dev/null
+++ b/xen/include/asm-x86/traps.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2007, 2008 Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef ASM_TRAP_H
+#define ASM_TRAP_H
+
+struct softirq_trap {
+ struct domain *domain; /* domain to inject trap */
+ struct vcpu *vcpu; /* vcpu to inject trap */
+ int processor; /* physical cpu to inject trap */
+};
+
+struct cpu_user_regs;
+
+extern void (*machine_check_vector)(struct cpu_user_regs *regs, long error_code);
+
+/**
+ * guest_has_trap_callback
+ *
+ * returns true (non-zero) if guest registered a trap handler
+ */
+extern int guest_has_trap_callback(struct domain *d, uint16_t vcpuid,
+ unsigned int trap_nr);
+
+/**
+ * send_guest_trap
+ *
+ * delivers trap to guest analogous to send_guest_global_virq
+ * return 0 on successful delivery
+ */
+extern int send_guest_trap(struct domain *d, uint16_t vcpuid,
+ unsigned int trap_nr);
+
+#endif /* ASM_TRAP_H */
diff --git a/xen/include/public/arch-x86/xen-mca.h b/xen/include/public/arch-x86/xen-mca.h
new file mode 100644
index 0000000000..103d41fd3d
--- /dev/null
+++ b/xen/include/public/arch-x86/xen-mca.h
@@ -0,0 +1,279 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ *
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ * (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ * (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ * This is better done as separate task, physical CPU hotplugging,
+ * and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ * move a DomU (or Dom0 itself) away from a malicious page
+ * producing correctable errors.
+ * 9. offlining physical page:
+ * Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ * and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+#define XEN_MCA_INTERFACE_VERSION 0x03000001
+
+/* IN: Dom0 calls hypercall from MC event handler. */
+#define XEN_MC_CORRECTABLE 0x0
+/* IN: Dom0/DomU calls hypercall from MC trap handler. */
+#define XEN_MC_TRAP 0x1
+/* XEN_MC_CORRECTABLE and XEN_MC_TRAP are mutually exclusive. */
+
+/* OUT: All is ok */
+#define XEN_MC_OK 0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED 0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA 0x2
+/* OUT: Between notification time and this hypercall an other
+ * (most likely) correctable error happened. The fetched data,
+ * does not match the original machine check data. */
+#define XEN_MC_NOMATCH 0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers
+ */
+#define MC_TYPE_GLOBAL 0
+#define MC_TYPE_BANK 1
+#define MC_TYPE_EXTENDED 2
+
+struct mcinfo_common {
+ uint16_t type; /* structure type */
+ uint16_t size; /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE (1 << 0)
+#define MC_FLAG_UNCORRECTABLE (1 << 1)
+
+/* contains global x86 mc information */
+struct mcinfo_global {
+ struct mcinfo_common common;
+
+ /* running domain at the time in error (most likely the impacted one) */
+ uint16_t mc_domid;
+ uint32_t mc_socketid; /* physical socket of the physical core */
+ uint16_t mc_coreid; /* physical impacted core */
+ uint16_t mc_core_threadid; /* core thread of physical core */
+ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+ uint64_t mc_gstatus; /* global status */
+ uint32_t mc_flags;
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+ struct mcinfo_common common;
+
+ uint16_t mc_bank; /* bank nr */
+ uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0
+ * and if mc_addr is valid. Never valid on DomU. */
+ uint64_t mc_status; /* bank status */
+ uint64_t mc_addr; /* bank address, only valid
+ * if addr bit is set in mc_status */
+ uint64_t mc_misc;
+};
+
+
+struct mcinfo_msr {
+ uint64_t reg; /* MSR */
+ uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */
+struct mcinfo_extended {
+ struct mcinfo_common common;
+
+ /* You can fill up to five registers.
+ * If you need more, then use this structure
+ * multiple times. */
+
+ uint32_t mc_msrs; /* Number of msr with valid values. */
+ struct mcinfo_msr mc_msr[5];
+};
+
+#define MCINFO_HYPERCALLSIZE 1024
+#define MCINFO_MAXSIZE 768
+
+struct mc_info {
+ /* Number of mcinfo_* entries in mi_data */
+ uint32_t mi_nentries;
+
+ uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)];
+};
+typedef struct mc_info mc_info_t;
+
+
+
+/*
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ * uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi) \
+ (_mi)->mi_nentries
+/* Prototype:
+ * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi) \
+ (struct mcinfo_common *)((_mi)->mi_data)
+/* Prototype:
+ * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic) \
+ (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)
+
+/* Prototype:
+ * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+#define x86_mcinfo_lookup(_ret, _mi, _type) \
+ do { \
+ uint32_t found, i; \
+ struct mcinfo_common *_mic; \
+ \
+ found = 0; \
+ (_ret) = NULL; \
+ if (_mi == NULL) break; \
+ _mic = x86_mcinfo_first(_mi); \
+ for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \
+ if (_mic->type == (_type)) { \
+ found = 1; \
+ break; \
+ } \
+ _mic = x86_mcinfo_next(_mic); \
+ } \
+ (_ret) = found ? _mic : NULL; \
+ } while (0)
+
+
+/* Usecase 1
+ * Register machine check trap callback handler
+ * (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch 1
+struct xen_mc_fetch {
+ /* IN/OUT variables. */
+ uint32_t flags;
+
+/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
+/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */
+
+ /* OUT variables. */
+ uint32_t fetch_idx; /* only useful for Dom0 for the notify hypercall */
+ struct mc_info mc_info;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain 2
+struct xen_mc_notifydomain {
+ /* IN variables. */
+ uint16_t mc_domid; /* The unprivileged domain to notify. */
+ uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify.
+ * Usually echo'd value from the fetch hypercall. */
+ uint32_t fetch_idx; /* echo'd value from the fetch hypercall. */
+
+ /* IN/OUT variables. */
+ uint32_t flags;
+
+/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
+
+
+struct xen_mc {
+ uint32_t cmd;
+ uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+ union {
+ struct xen_mc_fetch mc_fetch;
+ struct xen_mc_notifydomain mc_notifydomain;
+ uint8_t pad[MCINFO_HYPERCALLSIZE];
+ } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_t);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
diff --git a/xen/include/public/arch-x86/xen.h b/xen/include/public/arch-x86/xen.h
index 5f7579aab1..084348fbc2 100644
--- a/xen/include/public/arch-x86/xen.h
+++ b/xen/include/public/arch-x86/xen.h
@@ -76,6 +76,10 @@ typedef unsigned long xen_pfn_t;
/* Maximum number of virtual CPUs in multi-processor guests. */
#define MAX_VIRT_CPUS 32
+
+/* Machine check support */
+#include "xen-mca.h"
+
#ifndef __ASSEMBLY__
typedef unsigned long xen_ulong_t;
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index 67b83c50a6..ed3fd0fd20 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -50,6 +50,9 @@ int alloc_unbound_xen_event_channel(
void free_xen_event_channel(
struct vcpu *local_vcpu, int port);
+/* Query if event channel is in use by the guest */
+int guest_enabled_event(struct vcpu *v, int virq);
+
/* Notify remote end of a Xen-attached event channel.*/
void notify_via_xen_event_channel(int lport);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 7fa80d300c..9d38805bf3 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -112,10 +112,21 @@ struct vcpu
bool_t is_initialised;
/* Currently running on a CPU? */
bool_t is_running;
+ /* MCE callback pending for this VCPU? */
+ bool_t mce_pending;
/* NMI callback pending for this VCPU? */
bool_t nmi_pending;
- /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
- bool_t nmi_masked;
+
+ /* Higher priorized traps may interrupt lower priorized traps,
+ * lower priorized traps wait until higher priorized traps finished.
+ * Note: This concept is known as "system priority level" (spl)
+ * in the UNIX world. */
+ uint16_t old_trap_priority;
+ uint16_t trap_priority;
+#define VCPU_TRAP_NONE 0
+#define VCPU_TRAP_NMI 1
+#define VCPU_TRAP_MCE 2
+
/* Require shutdown to be deferred for some asynchronous operation? */
bool_t defer_shutdown;
/* VCPU is paused following shutdown request (d->is_shutting_down)? */