/****************************************************************************** * crash.c * * Based heavily on arch/i386/kernel/crash.c from Linux 2.6.16 * * Xen port written by: * - Simon 'Horms' Horman * - Magnus Damm */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static cpumask_t waiting_to_crash; static unsigned int crashing_cpu; static DEFINE_PER_CPU_READ_MOSTLY(bool_t, crash_save_done); /* This becomes the NMI handler for non-crashing CPUs, when Xen is crashing. */ void __attribute__((noreturn)) do_nmi_crash(struct cpu_user_regs *regs) { int cpu = smp_processor_id(); /* nmi_shootdown_cpus() should ensure that this assertion is correct. */ ASSERT(cpu != crashing_cpu); /* Save crash information and shut down CPU. Attempt only once. */ if ( !this_cpu(crash_save_done) ) { /* Disable the interrupt stack table for the MCE handler. This * prevents race conditions between clearing MCIP and receving a * new MCE, during which the exception frame would be clobbered * and the MCE handler fall into an infinite loop. We are soon * going to disable the NMI watchdog, so the loop would not be * caught. * * We do not need to change the NMI IST, as the nmi_crash * handler is immue to corrupt exception frames, by virtue of * being designed never to return. * * This update is safe from a security point of view, as this * pcpu is never going to try to sysret back to a PV vcpu. */ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); kexec_crash_save_cpu(); __stop_this_cpu(); this_cpu(crash_save_done) = 1; cpumask_clear_cpu(cpu, &waiting_to_crash); } /* Poor mans self_nmi(). __stop_this_cpu() has reverted the LAPIC * back to its boot state, so we are unable to rely on the regular * apic_* functions, due to 'x2apic_enabled' being possibly wrong. * (The likely scenario is that we have reverted from x2apic mode to * xapic, at which point #GPFs will occur if we use the apic_* * functions) * * The ICR and APIC ID of the LAPIC are still valid even during * software disable (Intel SDM Vol 3, 10.4.7.2). As a result, we * can deliberately queue up another NMI at the LAPIC which will not * be delivered as the hardware NMI latch is currently in effect. * This means that if NMIs become unlatched (e.g. following a * non-fatal MCE), the LAPIC will force us back here rather than * wandering back into regular Xen code. */ switch ( current_local_apic_mode() ) { u32 apic_id; case APIC_MODE_X2APIC: apic_id = apic_rdmsr(APIC_ID); apic_wrmsr(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL | ((u64)apic_id << 32)); break; case APIC_MODE_XAPIC: apic_id = GET_xAPIC_ID(apic_mem_read(APIC_ID)); while ( apic_mem_read(APIC_ICR) & APIC_ICR_BUSY ) cpu_relax(); apic_mem_write(APIC_ICR2, apic_id << 24); apic_mem_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL); break; default: break; } for ( ; ; ) halt(); } static void nmi_shootdown_cpus(void) { unsigned long msecs; int i, cpu = smp_processor_id(); local_irq_disable(); crashing_cpu = cpu; local_irq_count(crashing_cpu) = 0; cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu)); /* Change NMI trap handlers. Non-crashing pcpus get nmi_crash which * invokes do_nmi_crash (above), which cause them to write state and * fall into a loop. The crashing pcpu gets the nop handler to * cause it to return to this function ASAP. */ for ( i = 0; i < nr_cpu_ids; i++ ) { if ( idt_tables[i] == NULL ) continue; if ( i == cpu ) { /* * Disable the interrupt stack tables for this cpu's MCE and NMI * handlers, and alter the NMI handler to have no operation. * Disabling the stack tables prevents stack corruption race * conditions, while changing the handler helps prevent cascading * faults; we are certainly going to crash by this point. * * This update is safe from a security point of view, as this pcpu * is never going to try to sysret back to a PV vcpu. */ _set_gate_lower(&idt_tables[i][TRAP_nmi], 14, 0, &trap_nop); set_ist(&idt_tables[i][TRAP_machine_check], IST_NONE); } else { /* Do not update stack table for other pcpus. */ _update_gate_addr_lower(&idt_tables[i][TRAP_nmi], &nmi_crash); } } /* Ensure the new callback function is set before sending out the NMI. */ wmb(); smp_send_nmi_allbutself(); msecs = 1000; /* Wait at most a second for the other cpus to stop */ while ( !cpumask_empty(&waiting_to_crash) && msecs ) { mdelay(1); msecs--; } /* Leave a hint of how well we did trying to shoot down the other cpus */ if ( cpumask_empty(&waiting_to_crash) ) printk("Shot down all CPUs\n"); else { cpulist_scnprintf(keyhandler_scratch, sizeof keyhandler_scratch, &waiting_to_crash); printk("Failed to shoot down CPUs {%s}\n", keyhandler_scratch); } /* Crash shutdown any IOMMU functionality as the crashdump kernel is not * happy when booting if interrupt/dma remapping is still enabled */ iommu_crash_shutdown(); __stop_this_cpu(); /* This is a bit of a hack due to the problems with the x2apic_enabled * variable, but we can't do any better without a significant refactoring * of the APIC code */ x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC); disable_IO_APIC(); hpet_disable(); } void machine_crash_shutdown(void) { crash_xen_info_t *info; nmi_shootdown_cpus(); info = kexec_crash_save_info(); info->xen_phys_start = xen_phys_start; info->dom0_pfn_to_mfn_frame_list_list = arch_get_pfn_to_mfn_frame_list_list(dom0); } /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */