aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir@xen.org>2011-06-23 11:48:18 +0100
committerKeir Fraser <keir@xen.org>2011-06-23 11:48:18 +0100
commit01e18d73eef64413a34a59b14f7c223bb7d6d99f (patch)
tree502c69eacac5643e64dcce42e04188bcc007405e
parentbc2cd7056b4989dfd93d8feca8bd1f9c1de17158 (diff)
downloadxen-01e18d73eef64413a34a59b14f7c223bb7d6d99f.tar.gz
xen-01e18d73eef64413a34a59b14f7c223bb7d6d99f.tar.bz2
xen-01e18d73eef64413a34a59b14f7c223bb7d6d99f.zip
kexec: Backport fixes from xen-unstable
KEXEC: prevent panic on the kexec path when talking to the DMAR hardware Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> xen-unstable changeset: 23547:b5955b9fc26c xen-unstable date: Thu Jun 16 16:11:13 2011 +0100 KEXEC: correctly revert x2apic state when kexecing Introduce the boolean variable 'kexecing' which indicates to functions whether we are on the kexec path or not. This is used by disable_local_APIC() to try and revert the APIC mode back to how it was found on boot. We also need some fudging of the x2apic_enabled variable. It is used in multiple places over the codebase to mean multiple things, including: What did the user specifify on the command line? Did the BIOS boot me in x2apic mode? Is the BSP Local APIC in x2apic mode? What mode is my Local APIC in? Therefore, set it up to prevent a protection fault when disabling the IOAPICs. (In this case, it is used in the "What mode is my Local APIC in?" case, so the processor doesnt suffer a protection fault because of trying to use x2apic MSRs when it should be using xapic MMIO) Finally, make sure that interrupts are disabled when jumping into the purgatory code. It would be bad to service interrupts in the Xen context when the next kernel is booting. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> xen-unstable changeset: 23542:23c068b10923 xen-unstable date: Wed Jun 15 16:16:41 2011 +0100 IOMMU: add crash_shutdown iommu_op The kdump kernel has problems booting with interrupt/dma remapping enabled, so we need a new iommu_ops called crash_shutdown which is basically suspend but doesn't need to bother saving state. Make sure that crash_shutdown is called on the kexec path. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> xen-unstable changeset: 23541:c6307ddd3ab1 xen-unstable date: Wed Jun 15 16:10:11 2011 +0100 IOMMU VTD BUG: disable Extended Interrupt Mode when disabling Interupt Remapping Experimental evidence shows that Extended Interrupt Mode remains in effect even after Interrupt Remapping is disabled in each DMAR Global Command Register. A consiquence of this is that when we switch from x2apic mode back to xapic mode, and disable interrupt remapping for the kdump kernel, interrupts passing through the IO APICs are in x2apic format as opposed xapic. This causes a triple fault in the kexec kernel. As EIM is explicitly set up each time Interrup Remapping is enabled, it is safe for us to clobber this when taring down. Also, change the header definition of IRTA_REG_EIME_SHIFT. It caused verbose and error-prone code, and was only used in 1 place before. We now have IRTA_EIME which is the specific bit in the register. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> xen-unstable changeset: 23540:96f53d2b966e xen-unstable date: Wed Jun 15 16:07:45 2011 +0100 IOMMU VTD BUG: disable Extended Interrupt Mode when disabling Interrupt Remapping Experimental evidence shows that Extended Interrupt Mode remains in effect even after Interrupt Remapping is disabled in each DMAR Global Command Register. A consiquence of this is that when we switch from x2apic mode back to xapic mode, and disable interrupt remapping for the kdump kernel, interrupts passing through the IO APICs are in x2apic format as opposed xapic. This causes a triple fault in the kexec kernel. As EIM is explicitly set up each time Interrup Remapping is enabled, it is safe for us to clobber this when taring down. Also, change the header definition of IRTA_REG_EIME_SHIFT. It caused verbose and error-prone code, and was only used in 1 place before. We now have IRTA_EIME which is the specific bit in the register. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> xen-unstable changeset: 23515:337520d94cba xen-unstable date: Tue Jun 14 13:04:09 2011 +0100 x86/apic: record local APIC state on boot Xen does not store the boot local APIC state which leads to problems when shutting down for a kexec jump. This patch records the boot state so we can return to the boot state when kexecing. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Keir Fraser <keir@xen.org> Acked-by: Jan Beulich <jbeulich@novell.com> xen-unstable changeset: 23514:d04608ad70f8 xen-unstable date: Tue Jun 14 13:02:00 2011 +0100 x86/kexec: nmi_shootdown_cpus() should leave irqs disabled Signed-off-by: Keir Fraser <keir@xen.org> xen-unstable changeset: 23513:595a0c0804a9 xen-unstable date: Tue Jun 14 12:49:41 2011 +0100
-rw-r--r--xen/arch/x86/apic.c90
-rw-r--r--xen/arch/x86/crash.c13
-rw-r--r--xen/arch/x86/genapic/probe.c2
-rw-r--r--xen/arch/x86/machine_kexec.c5
-rw-r--r--xen/common/kexec.c6
-rw-r--r--xen/drivers/passthrough/amd/pci_amd_iommu.c1
-rw-r--r--xen/drivers/passthrough/iommu.c8
-rw-r--r--xen/drivers/passthrough/vtd/dmar.h11
-rw-r--r--xen/drivers/passthrough/vtd/intremap.c24
-rw-r--r--xen/drivers/passthrough/vtd/iommu.c20
-rw-r--r--xen/drivers/passthrough/vtd/iommu.h2
-rw-r--r--xen/include/asm-x86/apic.h10
-rw-r--r--xen/include/asm-x86/hvm/svm/amd-iommu-proto.h1
-rw-r--r--xen/include/xen/iommu.h2
-rw-r--r--xen/include/xen/kexec.h2
15 files changed, 189 insertions, 8 deletions
diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
index 587042c4fb..9bbf1540ae 100644
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -37,6 +37,7 @@
#include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */
#include <mach_apic.h>
#include <io_ports.h>
+#include <xen/kexec.h>
static bool_t tdt_enabled __read_mostly;
static bool_t tdt_enable __initdata = 1;
@@ -74,6 +75,12 @@ int apic_verbosity;
static bool_t __initdata opt_x2apic = 1;
boolean_param("x2apic", opt_x2apic);
+/*
+ * Bootstrap processor local APIC boot mode - so we can undo our changes
+ * to the APIC state.
+ */
+static enum apic_mode apic_boot_mode = APIC_MODE_INVALID;
+
bool_t __read_mostly x2apic_enabled = 0;
bool_t __read_mostly directed_eoi_enabled = 0;
@@ -354,6 +361,33 @@ void disable_local_APIC(void)
wrmsrl(MSR_IA32_APICBASE, msr_content &
~(MSR_IA32_APICBASE_ENABLE|MSR_IA32_APICBASE_EXTD));
}
+
+ if ( kexecing )
+ {
+ uint64_t msr_content;
+ rdmsrl(MSR_IA32_APICBASE, msr_content);
+ msr_content &= ~(MSR_IA32_APICBASE_ENABLE|MSR_IA32_APICBASE_EXTD);
+ wrmsrl(MSR_IA32_APICBASE, msr_content);
+
+ switch ( apic_boot_mode )
+ {
+ case APIC_MODE_DISABLED:
+ break; /* Nothing to do - we did this above */
+ case APIC_MODE_XAPIC:
+ msr_content |= MSR_IA32_APICBASE_ENABLE;
+ wrmsrl(MSR_IA32_APICBASE, msr_content);
+ break;
+ case APIC_MODE_X2APIC:
+ msr_content |= (MSR_IA32_APICBASE_ENABLE|MSR_IA32_APICBASE_EXTD);
+ wrmsrl(MSR_IA32_APICBASE, msr_content);
+ break;
+ default:
+ printk("Default case when reverting #%d lapic to boot state\n",
+ smp_processor_id());
+ break;
+ }
+ }
+
}
extern int ioapic_ack_new;
@@ -1482,6 +1516,62 @@ int __init APIC_init_uniprocessor (void)
return 0;
}
+static const char * __init apic_mode_to_str(const enum apic_mode mode)
+{
+ switch ( mode )
+ {
+ case APIC_MODE_INVALID:
+ return "invalid";
+ case APIC_MODE_DISABLED:
+ return "disabled";
+ case APIC_MODE_XAPIC:
+ return "xapic";
+ case APIC_MODE_X2APIC:
+ return "x2apic";
+ default:
+ return "unrecognised";
+ }
+}
+
+/* Needs to be called during startup. It records the state the BIOS
+ * leaves the local APIC so we can undo upon kexec.
+ */
+void __init record_boot_APIC_mode(void)
+{
+ /* Sanity check - we should only ever run once, but could possibly
+ * be called several times */
+ if ( APIC_MODE_INVALID != apic_boot_mode )
+ return;
+
+ apic_boot_mode = current_local_apic_mode();
+
+ apic_printk(APIC_DEBUG, "APIC boot state is '%s'\n",
+ apic_mode_to_str(apic_boot_mode));
+}
+
+/* Look at the bits in MSR_IA32_APICBASE and work out which
+ * APIC mode we are in */
+enum apic_mode current_local_apic_mode(void)
+{
+ u64 msr_contents;
+
+ rdmsrl(MSR_IA32_APICBASE, msr_contents);
+
+ /* Reading EXTD bit from the MSR is only valid if CPUID
+ * says so, else reserved */
+ if ( cpu_has(&current_cpu_data, X86_FEATURE_X2APIC)
+ && (msr_contents & MSR_IA32_APICBASE_EXTD) )
+ return APIC_MODE_X2APIC;
+
+ /* EN bit should always be valid as long as we can read the MSR
+ */
+ if ( msr_contents & MSR_IA32_APICBASE_ENABLE )
+ return APIC_MODE_XAPIC;
+
+ return APIC_MODE_DISABLED;
+}
+
+
void check_for_unexpected_msi(unsigned int vector)
{
unsigned long v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
index 1564a01b12..ab18abb4d1 100644
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -27,6 +27,7 @@
#include <asm/hvm/support.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
+#include <xen/iommu.h>
static atomic_t waiting_for_crash_ipi;
static unsigned int crashing_cpu;
@@ -77,10 +78,18 @@ static void nmi_shootdown_cpus(void)
msecs--;
}
+ /* Crash shutdown any IOMMU functionality as the crashdump kernel is not
+ * happy when booting if interrupt/dma remapping is still enabled */
+ iommu_crash_shutdown();
+
__stop_this_cpu();
- disable_IO_APIC();
- local_irq_enable();
+ /* This is a bit of a hack due to the problems with the x2apic_enabled
+ * variable, but we can't do any better without a significant refactoring
+ * of the APIC code */
+ x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
+
+ disable_IO_APIC();
}
void machine_crash_shutdown(void)
diff --git a/xen/arch/x86/genapic/probe.c b/xen/arch/x86/genapic/probe.c
index 20e72922db..9883f8aa7e 100644
--- a/xen/arch/x86/genapic/probe.c
+++ b/xen/arch/x86/genapic/probe.c
@@ -61,6 +61,8 @@ void __init generic_apic_probe(void)
{
int i, changed;
+ record_boot_APIC_mode();
+
check_x2apic_preenabled();
cmdline_apic = changed = (genapic != NULL);
diff --git a/xen/arch/x86/machine_kexec.c b/xen/arch/x86/machine_kexec.c
index 9e5e7abda5..592d82aebc 100644
--- a/xen/arch/x86/machine_kexec.c
+++ b/xen/arch/x86/machine_kexec.c
@@ -99,6 +99,11 @@ void machine_kexec(xen_kexec_image_t *image)
if ( hpet_broadcast_is_available() )
hpet_disable_legacy_broadcast();
+ /* We are about to permenantly jump out of the Xen context into the kexec
+ * purgatory code. We really dont want to be still servicing interupts.
+ */
+ local_irq_disable();
+
/*
* compat_machine_kexec() returns to idle pagetables, which requires us
* to be running on a static GDT mapping (idle pagetables have no GDT
diff --git a/xen/common/kexec.c b/xen/common/kexec.c
index f889e5bb22..bddf23c535 100644
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -29,6 +29,8 @@
#include <compat/kexec.h>
#endif
+bool_t kexecing = FALSE;
+
static DEFINE_PER_CPU_READ_MOSTLY(void *, crash_notes);
static Elf_Note *xen_crash_note;
@@ -220,6 +222,8 @@ void kexec_crash(void)
if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
return;
+ kexecing = TRUE;
+
kexec_common_shutdown();
kexec_crash_save_cpu();
machine_crash_shutdown();
@@ -232,6 +236,8 @@ static long kexec_reboot(void *_image)
{
xen_kexec_image_t *image = _image;
+ kexecing = TRUE;
+
kexec_common_shutdown();
machine_reboot_kexec(image);
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 26db9d5c3c..c7d2ad9f22 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -455,4 +455,5 @@ const struct iommu_ops amd_iommu_ops = {
.read_msi_from_ire = amd_iommu_read_msi_from_ire,
.suspend = amd_iommu_suspend,
.resume = amd_iommu_resume,
+ .crash_shutdown = amd_iommu_suspend,
};
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 822aa5ecf0..793aa8225a 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -422,6 +422,14 @@ void iommu_suspend()
ops->suspend();
}
+void iommu_crash_shutdown(void)
+{
+ const struct iommu_ops *ops = iommu_get_ops();
+ if ( iommu_enabled )
+ ops->crash_shutdown();
+ iommu_enabled = 0;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/drivers/passthrough/vtd/dmar.h b/xen/drivers/passthrough/vtd/dmar.h
index e0c0fff854..2ed90c4ec9 100644
--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -23,6 +23,7 @@
#include <xen/list.h>
#include <xen/iommu.h>
+#include <xen/kexec.h>
extern u8 dmar_host_address_width;
@@ -102,9 +103,13 @@ do { \
sts = op(iommu->reg, offset); \
if ( cond ) \
break; \
- if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT ) \
- panic("%s:%d:%s: DMAR hardware is malfunctional\n", \
- __FILE__, __LINE__, __func__); \
+ if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT ) { \
+ if ( !kexecing ) \
+ panic("%s:%d:%s: DMAR hardware is malfunctional\n",\
+ __FILE__, __LINE__, __func__); \
+ else \
+ break; \
+ } \
cpu_relax(); \
} \
} while (0)
diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
index d91700fcee..9c17aaa2d3 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -772,8 +772,7 @@ int enable_intremap(struct iommu *iommu, int eim)
#ifdef CONFIG_X86
/* set extended interrupt mode bit */
- ir_ctrl->iremap_maddr |=
- eim ? (1 << IRTA_REG_EIME_SHIFT) : 0;
+ ir_ctrl->iremap_maddr |= eim ? IRTA_EIME : 0;
#endif
spin_lock_irqsave(&iommu->register_lock, flags);
@@ -808,6 +807,7 @@ int enable_intremap(struct iommu *iommu, int eim)
void disable_intremap(struct iommu *iommu)
{
u32 sts;
+ u64 irta;
unsigned long flags;
if ( !ecap_intr_remap(iommu->ecap) )
@@ -822,6 +822,26 @@ void disable_intremap(struct iommu *iommu)
IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
!(sts & DMA_GSTS_IRES), sts);
+
+ /* If we are disabling Interrupt Remapping, make sure we dont stay in
+ * Extended Interrupt Mode, as this is unaffected by the Interrupt
+ * Remapping flag in each DMAR Global Control Register.
+ * Specifically, local apics in xapic mode do not like interrupts delivered
+ * in x2apic mode. Any code turning interrupt remapping back on will set
+ * EIME back correctly.
+ */
+ if ( !ecap_eim(iommu->ecap) )
+ goto out;
+
+ /* Can't read the register unless we ecaps says we can */
+ irta = dmar_readl(iommu->reg, DMAR_IRTA_REG);
+ if ( !(irta & IRTA_EIME) )
+ goto out;
+
+ dmar_writel(iommu->reg, DMAR_IRTA_REG, irta & ~IRTA_EIME);
+ IOMMU_WAIT_OP(iommu, DMAR_IRTA_REG, dmar_readl,
+ !(irta & IRTA_EIME), irta);
+
out:
spin_unlock_irqrestore(&iommu->register_lock, flags);
}
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index e8c7f668d3..939ec4a15d 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2238,6 +2238,25 @@ static void vtd_suspend(void)
}
}
+static void vtd_crash_shutdown(void)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+
+ if ( !iommu_enabled )
+ return;
+
+ iommu_flush_all();
+
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ iommu_disable_translation(iommu);
+ disable_intremap(drhd->iommu);
+ disable_qinval(drhd->iommu);
+ }
+}
+
static void vtd_resume(void)
{
struct acpi_drhd_unit *drhd;
@@ -2288,6 +2307,7 @@ const struct iommu_ops intel_iommu_ops = {
.read_msi_from_ire = msi_msg_read_remap_rte,
.suspend = vtd_suspend,
.resume = vtd_resume,
+ .crash_shutdown = vtd_crash_shutdown,
};
/*
diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
index 6e689a5889..cb6c631ed3 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -467,7 +467,7 @@ struct qinval_entry {
#define IEC_GLOBAL_INVL 0
#define IEC_INDEX_INVL 1
-#define IRTA_REG_EIME_SHIFT 11
+#define IRTA_EIME (((u64)1) << 11)
/* 2^(IRTA_REG_TABLE_SIZE + 1) = IREMAP_ENTRY_NR */
#define IRTA_REG_TABLE_SIZE ( IREMAP_PAGE_ORDER + 7 )
diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h
index b2c46a9ecc..83311dfff8 100644
--- a/xen/include/asm-x86/apic.h
+++ b/xen/include/asm-x86/apic.h
@@ -21,6 +21,14 @@
#define IO_APIC_REDIR_DEST_LOGICAL 0x00800
#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000
+/* Possible APIC states */
+enum apic_mode {
+ APIC_MODE_INVALID, /* Not set yet */
+ APIC_MODE_DISABLED, /* If uniprocessor, or MP in uniprocessor mode */
+ APIC_MODE_XAPIC, /* xAPIC mode - default upon chipset reset */
+ APIC_MODE_X2APIC /* x2APIC mode - common for large MP machines */
+};
+
extern int apic_verbosity;
extern bool_t x2apic_enabled;
extern bool_t directed_eoi_enabled;
@@ -206,6 +214,8 @@ extern void disable_APIC_timer(void);
extern void enable_APIC_timer(void);
extern int lapic_suspend(void);
extern int lapic_resume(void);
+extern void record_boot_APIC_mode(void);
+extern enum apic_mode current_local_apic_mode(void);
extern int check_nmi_watchdog (void);
extern void enable_NMI_through_LVT0 (void * dummy);
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 0bd92403fd..52ddc721e6 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -91,6 +91,7 @@ unsigned int amd_iommu_read_ioapic_from_ire(
/* power management support */
void amd_iommu_resume(void);
void amd_iommu_suspend(void);
+void amd_iommu_crash_shutdown(void);
static inline u32 get_field_from_reg_u32(u32 reg_value, u32 mask, u32 shift)
{
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 4b93c31e38..76925b28f1 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -133,6 +133,7 @@ struct iommu_ops {
unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
void (*suspend)(void);
void (*resume)(void);
+ void (*crash_shutdown)(void);
};
void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
@@ -142,6 +143,7 @@ unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
void iommu_suspend(void);
void iommu_resume(void);
+void iommu_crash_shutdown(void);
void iommu_set_dom0_mapping(struct domain *d);
diff --git a/xen/include/xen/kexec.h b/xen/include/xen/kexec.h
index 9b51004c38..1f8027e1af 100644
--- a/xen/include/xen/kexec.h
+++ b/xen/include/xen/kexec.h
@@ -12,6 +12,8 @@ typedef struct xen_kexec_reserve {
extern xen_kexec_reserve_t kexec_crash_area;
+extern bool_t kexecing;
+
void set_kexec_crash_area_size(u64 system_ram);
/* We have space for 4 images to support atomic update