aboutsummaryrefslogtreecommitdiffstats
path: root/linux-2.6-xen-sparse/arch/x86_64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'linux-2.6-xen-sparse/arch/x86_64/kernel')
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile74
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile10
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c197
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/asm-offsets.c77
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/crash.c190
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c774
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/early_printk-xen.c302
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S1325
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/genapic-xen.c143
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c161
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S203
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c162
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c57
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c2269
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/ioport-xen.c99
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c197
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/ldt-xen.c282
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/machine_kexec.c279
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c1011
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/pci-swiotlb-xen.c55
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c829
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c1677
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c361
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c600
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c1175
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/vsyscall-xen.c239
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/xen_entry.S40
27 files changed, 0 insertions, 12788 deletions
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
deleted file mode 100644
index fca2eae4d6..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
+++ /dev/null
@@ -1,74 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := head.o head64.o init_task.o vmlinux.lds
-EXTRA_AFLAGS := -traditional
-obj-y := process.o signal.o entry.o traps.o irq.o \
- ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
- x8664_ksyms.o i387.o syscall.o vsyscall.o \
- setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- pci-dma.o pci-nommu.o alternative.o
-
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-$(CONFIG_X86_MCE) += mce.o
-obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
-obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
-obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
-obj-$(CONFIG_ACPI) += acpi/
-obj-$(CONFIG_X86_MSR) += msr.o
-obj-$(CONFIG_MICROCODE) += microcode.o
-obj-$(CONFIG_X86_CPUID) += cpuid.o
-obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
-obj-$(CONFIG_X86_XEN_GENAPIC) += genapic.o genapic_xen.o
-obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \
- genapic.o genapic_cluster.o genapic_flat.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o
-obj-$(CONFIG_ACPI_SLEEP) += suspend.o
-obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
-obj-$(CONFIG_CPU_FREQ) += cpufreq/
-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o
-obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o
-obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
-obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
-obj-$(CONFIG_X86_VSMP) += vsmp.o
-obj-$(CONFIG_K8_NB) += k8.o
-obj-$(CONFIG_AUDIT) += audit.o
-
-obj-$(CONFIG_MODULES) += module.o
-
-obj-y += topology.o
-obj-y += intel_cacheinfo.o
-
-CFLAGS_vsyscall.o := $(PROFILING) -g0
-
-bootflag-y += ../../i386/kernel/bootflag.o
-cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o
-topology-y += ../../i386/kernel/topology.o
-microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o
-intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
-quirks-y += ../../i386/kernel/quirks.o
-i8237-y += ../../i386/kernel/i8237.o
-msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
-alternative-y += ../../i386/kernel/alternative.o
-
-ifdef CONFIG_XEN
-time-y += ../../i386/kernel/time-xen.o
-pci-dma-y += ../../i386/kernel/pci-dma-xen.o
-microcode-$(subst m,y,$(CONFIG_MICROCODE)) := ../../i386/kernel/microcode-xen.o
-quirks-y := ../../i386/kernel/quirks-xen.o
-
-n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o
-
-include $(srctree)/scripts/Makefile.xen
-
-obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
-obj-y := $(call cherrypickxen, $(obj-y))
-extra-y := $(call cherrypickxen, $(extra-y))
-%/head-xen.o %/head-xen.s: EXTRA_AFLAGS :=
-endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile b/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile
deleted file mode 100644
index 57b7fe1c11..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-obj-y := boot.o
-boot-y := ../../../i386/kernel/acpi/boot.o
-obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
-
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y += processor.o
-processor-y := ../../../i386/kernel/acpi/processor.o ../../../i386/kernel/acpi/cstate.o
-endif
-
-boot-$(CONFIG_XEN) := ../../../i386/kernel/acpi/boot-xen.o
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c
deleted file mode 100644
index d5f44ac710..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Local APIC handling, local APIC timers
- *
- * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively.
- * Maciej W. Rozycki : Various updates and fixes.
- * Mikael Pettersson : Power Management for UP-APIC.
- * Pavel Machek and
- * Mikael Pettersson : PM converted to driver model.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/module.h>
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-#include <asm/hpet.h>
-#include <asm/idle.h>
-
-int apic_verbosity;
-
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
- printk("unexpected IRQ trap at vector %02x\n", irq);
- /*
- * Currently unexpected vectors happen only on SMP and APIC.
- * We _must_ ack these because every local APIC has only N
- * irq slots per priority level, and a 'hanging, unacked' IRQ
- * holds up an irq slot - in excessive cases (when multiple
- * unexpected vectors occur) that might lock up the APIC
- * completely.
- * But don't ack when the APIC is disabled. -AK
- */
- if (!disable_apic)
- ack_APIC_irq();
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
- return -EINVAL;
-}
-
-void smp_local_timer_interrupt(struct pt_regs *regs)
-{
- profile_tick(CPU_PROFILING, regs);
-#ifndef CONFIG_XEN
-#ifdef CONFIG_SMP
- update_process_times(user_mode(regs));
-#endif
-#endif
- /*
- * We take the 'long' return path, and there every subsystem
- * grabs the appropriate locks (kernel lock/ irq lock).
- *
- * we might want to decouple profiling from the 'long path',
- * and do the profiling totally in assembly.
- *
- * Currently this isn't too much of an issue (performance wise),
- * we can take more than 100K local irqs per second on a 100 MHz P5.
- */
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- * interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
- /*
- * the NMI deadlock-detector uses this.
- */
- add_pda(apic_timer_irqs, 1);
-
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- */
- ack_APIC_irq();
- /*
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- exit_idle();
- irq_enter();
- smp_local_timer_interrupt(regs);
- irq_exit();
-}
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-asmlinkage void smp_spurious_interrupt(void)
-{
- unsigned int v;
- exit_idle();
- irq_enter();
- /*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
- */
- v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
- if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
- ack_APIC_irq();
-
-#if 0
- static unsigned long last_warning;
- static unsigned long skipped;
-
- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- if (time_before(last_warning+30*HZ,jiffies)) {
- printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
- smp_processor_id(), skipped);
- last_warning = jiffies;
- skipped = 0;
- } else {
- skipped++;
- }
-#endif
- irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-
-asmlinkage void smp_error_interrupt(void)
-{
- unsigned int v, v1;
-
- exit_idle();
- irq_enter();
- /* First tickle the hardware, only then report what went on. -- REW */
- v = apic_read(APIC_ESR);
- apic_write(APIC_ESR, 0);
- v1 = apic_read(APIC_ESR);
- ack_APIC_irq();
- atomic_inc(&irq_err_count);
-
- /* Here is what the APIC error bits mean:
- 0: Send CS error
- 1: Receive CS error
- 2: Send accept error
- 3: Receive accept error
- 4: Reserved
- 5: Send illegal vector
- 6: Received illegal vector
- 7: Illegal register address
- */
- printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
- smp_processor_id(), v , v1);
- irq_exit();
-}
-
-int disable_apic;
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int __init APIC_init_uniprocessor (void)
-{
-#ifdef CONFIG_X86_IO_APIC
- if (smp_found_config)
- if (!skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
-#endif
-
- return 1;
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/asm-offsets.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/asm-offsets.c
deleted file mode 100644
index 85f73c1f02..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/asm-offsets.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/suspend.h>
-#include <asm/pda.h>
-#include <asm/processor.h>
-#include <asm/segment.h>
-#include <asm/thread_info.h>
-#include <asm/ia32.h>
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-int main(void)
-{
-#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
- ENTRY(state);
- ENTRY(flags);
- ENTRY(thread);
- ENTRY(pid);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry))
- ENTRY(flags);
- ENTRY(addr_limit);
- ENTRY(preempt_count);
- ENTRY(status);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- ENTRY(kernelstack);
- ENTRY(oldrsp);
- ENTRY(pcurrent);
- ENTRY(irqcount);
- ENTRY(cpunumber);
- ENTRY(irqstackptr);
- ENTRY(data_offset);
- BLANK();
-#undef ENTRY
-#ifdef CONFIG_IA32_EMULATION
-#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
- ENTRY(eax);
- ENTRY(ebx);
- ENTRY(ecx);
- ENTRY(edx);
- ENTRY(esi);
- ENTRY(edi);
- ENTRY(ebp);
- ENTRY(esp);
- ENTRY(eip);
- BLANK();
-#undef ENTRY
- DEFINE(IA32_RT_SIGFRAME_sigcontext,
- offsetof (struct rt_sigframe32, uc.uc_mcontext));
- BLANK();
-#endif
- DEFINE(pbe_address, offsetof(struct pbe, address));
- DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
- DEFINE(pbe_next, offsetof(struct pbe, next));
-#ifndef CONFIG_X86_NO_TSS
- BLANK();
- DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
-#endif
- BLANK();
- DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
- return 0;
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/crash.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/crash.c
deleted file mode 100644
index 178fb47375..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/crash.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Architecture specific (x86_64) functions for kexec based crash dumps.
- *
- * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
- *
- * Copyright (C) IBM Corporation, 2004. All rights reserved.
- *
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/irq.h>
-#include <linux/reboot.h>
-#include <linux/kexec.h>
-#include <linux/delay.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
-
-#include <asm/processor.h>
-#include <asm/hardirq.h>
-#include <asm/nmi.h>
-#include <asm/hw_irq.h>
-#include <asm/mach_apic.h>
-
-/* This keeps a track of which one is crashing cpu. */
-static int crashing_cpu;
-
-static u32 *append_elf_note(u32 *buf, char *name, unsigned type,
- void *data, size_t data_len)
-{
- struct elf_note note;
-
- note.n_namesz = strlen(name) + 1;
- note.n_descsz = data_len;
- note.n_type = type;
- memcpy(buf, &note, sizeof(note));
- buf += (sizeof(note) +3)/4;
- memcpy(buf, name, note.n_namesz);
- buf += (note.n_namesz + 3)/4;
- memcpy(buf, data, note.n_descsz);
- buf += (note.n_descsz + 3)/4;
-
- return buf;
-}
-
-static void final_note(u32 *buf)
-{
- struct elf_note note;
-
- note.n_namesz = 0;
- note.n_descsz = 0;
- note.n_type = 0;
- memcpy(buf, &note, sizeof(note));
-}
-
-static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
-{
- struct elf_prstatus prstatus;
- u32 *buf;
-
- if ((cpu < 0) || (cpu >= NR_CPUS))
- return;
-
- /* Using ELF notes here is opportunistic.
- * I need a well defined structure format
- * for the data I pass, and I need tags
- * on the data to indicate what information I have
- * squirrelled away. ELF notes happen to provide
- * all of that that no need to invent something new.
- */
-
- buf = (u32*)per_cpu_ptr(crash_notes, cpu);
-
- if (!buf)
- return;
-
- memset(&prstatus, 0, sizeof(prstatus));
- prstatus.pr_pid = current->pid;
- elf_core_copy_regs(&prstatus.pr_reg, regs);
- buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
- sizeof(prstatus));
- final_note(buf);
-}
-
-static void crash_save_self(struct pt_regs *regs)
-{
- int cpu;
-
- cpu = smp_processor_id();
- crash_save_this_cpu(regs, cpu);
-}
-
-#ifndef CONFIG_XEN
-#ifdef CONFIG_SMP
-static atomic_t waiting_for_crash_ipi;
-
-static int crash_nmi_callback(struct pt_regs *regs, int cpu)
-{
- /*
- * Don't do anything if this handler is invoked on crashing cpu.
- * Otherwise, system will completely hang. Crashing cpu can get
- * an NMI if system was initially booted with nmi_watchdog parameter.
- */
- if (cpu == crashing_cpu)
- return 1;
- local_irq_disable();
-
- crash_save_this_cpu(regs, cpu);
- disable_local_APIC();
- atomic_dec(&waiting_for_crash_ipi);
- /* Assume hlt works */
- for(;;)
- halt();
-
- return 1;
-}
-
-static void smp_send_nmi_allbutself(void)
-{
- send_IPI_allbutself(NMI_VECTOR);
-}
-
-/*
- * This code is a best effort heuristic to get the
- * other cpus to stop executing. So races with
- * cpu hotplug shouldn't matter.
- */
-
-static void nmi_shootdown_cpus(void)
-{
- unsigned long msecs;
-
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- set_nmi_callback(crash_nmi_callback);
-
- /*
- * Ensure the new callback function is set before sending
- * out the NMI
- */
- wmb();
-
- smp_send_nmi_allbutself();
-
- msecs = 1000; /* Wait at most a second for the other cpus to stop */
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
- mdelay(1);
- msecs--;
- }
- /* Leave the nmi callback set */
- disable_local_APIC();
-}
-#else
-static void nmi_shootdown_cpus(void)
-{
- /* There are no cpus to shootdown */
-}
-#endif
-#endif /* CONFIG_XEN */
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
- /*
- * This function is only called after the system
- * has panicked or is otherwise in a critical state.
- * The minimum amount of code to allow a kexec'd kernel
- * to run successfully needs to happen here.
- *
- * In practice this means shooting down the other cpus in
- * an SMP system.
- */
- /* The kernel is broken so disable interrupts */
- local_irq_disable();
-
- /* Make a note of crashing cpu. Will be used in NMI callback.*/
- crashing_cpu = smp_processor_id();
-
-#ifndef CONFIG_XEN
- nmi_shootdown_cpus();
-
- if(cpu_has_apic)
- disable_local_APIC();
-
-#if defined(CONFIG_X86_IO_APIC)
- disable_IO_APIC();
-#endif
-#endif /* CONFIG_XEN */
- crash_save_self(regs);
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
deleted file mode 100644
index 29c0bb2b91..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
+++ /dev/null
@@ -1,774 +0,0 @@
-/*
- * Handle the memory map.
- * The functions here do the job until bootmem takes over.
- *
- * Getting sanitize_e820_map() in sync with i386 version by applying change:
- * - Provisions for empty E820 memory regions (reported by certain BIOSes).
- * Alex Achenbach <xela@slit.de>, December 2002.
- * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- *
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/kexec.h>
-#include <linux/module.h>
-
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/e820.h>
-#include <asm/proto.h>
-#include <asm/bootsetup.h>
-#include <asm/sections.h>
-#include <xen/interface/memory.h>
-
-/*
- * PFN of last memory page.
- */
-unsigned long end_pfn;
-EXPORT_SYMBOL(end_pfn);
-
-/*
- * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
- * The direct mapping extends to end_pfn_map, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long end_pfn_map;
-
-/*
- * Last pfn which the user wants to use.
- */
-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
-
-extern struct resource code_resource, data_resource;
-
-/* Check for some hardcoded bad areas that early boot is not allowed to touch */
-static inline int bad_addr(unsigned long *addrp, unsigned long size)
-{
- unsigned long addr = *addrp, last = addr + size;
-
-#ifndef CONFIG_XEN
- /* various gunk below that needed for SMP startup */
- if (addr < 0x8000) {
- *addrp = 0x8000;
- return 1;
- }
-
- /* direct mapping tables of the kernel */
- if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
- *addrp = table_end << PAGE_SHIFT;
- return 1;
- }
-
- /* initrd */
-#ifdef CONFIG_BLK_DEV_INITRD
- if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
- addr < INITRD_START+INITRD_SIZE) {
- *addrp = INITRD_START + INITRD_SIZE;
- return 1;
- }
-#endif
- /* kernel code + 640k memory hole (later should not be needed, but
- be paranoid for now) */
- if (last >= 640*1024 && addr < 1024*1024) {
- *addrp = 1024*1024;
- return 1;
- }
- if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
- *addrp = __pa_symbol(&_end);
- return 1;
- }
-
- if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
- *addrp = ebda_addr + ebda_size;
- return 1;
- }
-
- /* XXX ramdisk image here? */
-#else
- if (last < (table_end<<PAGE_SHIFT)) {
- *addrp = table_end << PAGE_SHIFT;
- return 1;
- }
-#endif
- return 0;
-}
-
-#ifndef CONFIG_XEN
-/*
- * This function checks if any part of the range <start,end> is mapped
- * with type.
- */
-int __meminit
-e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- if (type && ei->type != type)
- continue;
- if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
- return 1;
- }
- return 0;
-}
-#endif
-
-/*
- * This function checks if the entire range <start,end> is mapped with type.
- *
- * Note: this function only works correct if the e820 table is sorted and
- * not-overlapping, which is the case
- */
-int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
-{
- int i;
-
-#ifndef CONFIG_XEN
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
-#else
- extern struct e820map machine_e820;
-
- if (!is_initial_xendomain())
- return 0;
- for (i = 0; i < machine_e820.nr_map; i++) {
- const struct e820entry *ei = &machine_e820.map[i];
-#endif
-
- if (type && ei->type != type)
- continue;
- /* is the region (part) in overlap with the current region ?*/
- if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
-
- /* if the region is at the beginning of <start,end> we move
- * start to the end of the region since it's ok until there
- */
- if (ei->addr <= start)
- start = ei->addr + ei->size;
- /* if start is now at or beyond end, we're done, full coverage */
- if (start >= end)
- return 1; /* we're done */
- }
- return 0;
-}
-
-/*
- * Find a free area in a specific range.
- */
-unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long addr = ei->addr, last;
- if (ei->type != E820_RAM)
- continue;
- if (addr < start)
- addr = start;
- if (addr > ei->addr + ei->size)
- continue;
- while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
- ;
- last = addr + size;
- if (last > ei->addr + ei->size)
- continue;
- if (last > end)
- continue;
- return addr;
- }
- return -1UL;
-}
-
-/*
- * Free bootmem based on the e820 table for a node.
- */
-void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long last, addr;
-
- if (ei->type != E820_RAM ||
- ei->addr+ei->size <= start ||
- ei->addr >= end)
- continue;
-
- addr = round_up(ei->addr, PAGE_SIZE);
- if (addr < start)
- addr = start;
-
- last = round_down(ei->addr + ei->size, PAGE_SIZE);
- if (last >= end)
- last = end;
-
- if (last > addr && last-addr >= PAGE_SIZE)
- free_bootmem_node(pgdat, addr, last-addr);
- }
-}
-
-/*
- * Find the highest page frame number we have available
- */
-unsigned long __init e820_end_of_ram(void)
-{
- int i;
- unsigned long end_pfn = 0;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long start, end;
-
- start = round_up(ei->addr, PAGE_SIZE);
- end = round_down(ei->addr + ei->size, PAGE_SIZE);
- if (start >= end)
- continue;
- if (ei->type == E820_RAM) {
- if (end > end_pfn<<PAGE_SHIFT)
- end_pfn = end>>PAGE_SHIFT;
- } else {
- if (end > end_pfn_map<<PAGE_SHIFT)
- end_pfn_map = end>>PAGE_SHIFT;
- }
- }
-
- if (end_pfn > end_pfn_map)
- end_pfn_map = end_pfn;
- if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
- end_pfn_map = MAXMEM>>PAGE_SHIFT;
- if (end_pfn > end_user_pfn)
- end_pfn = end_user_pfn;
- if (end_pfn > end_pfn_map)
- end_pfn = end_pfn_map;
-
- return end_pfn;
-}
-
-/*
- * Compute how much memory is missing in a range.
- * Unlike the other functions in this file the arguments are in page numbers.
- */
-unsigned long __init
-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
-{
- unsigned long ram = 0;
- unsigned long start = start_pfn << PAGE_SHIFT;
- unsigned long end = end_pfn << PAGE_SHIFT;
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long last, addr;
-
- if (ei->type != E820_RAM ||
- ei->addr+ei->size <= start ||
- ei->addr >= end)
- continue;
-
- addr = round_up(ei->addr, PAGE_SIZE);
- if (addr < start)
- addr = start;
-
- last = round_down(ei->addr + ei->size, PAGE_SIZE);
- if (last >= end)
- last = end;
-
- if (last > addr)
- ram += last - addr;
- }
- return ((end - start) - ram) >> PAGE_SHIFT;
-}
-
-/*
- * Mark e820 reserved areas as busy for the resource manager.
- */
-void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
-{
- int i;
- for (i = 0; i < nr_map; i++) {
- struct resource *res;
- res = alloc_bootmem_low(sizeof(struct resource));
- switch (e820[i].type) {
- case E820_RAM: res->name = "System RAM"; break;
- case E820_ACPI: res->name = "ACPI Tables"; break;
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
- default: res->name = "reserved";
- }
- res->start = e820[i].addr;
- res->end = res->start + e820[i].size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- request_resource(&iomem_resource, res);
- if (e820[i].type == E820_RAM) {
- /*
- * We don't know which RAM region contains kernel data,
- * so we try it repeatedly and let the resource manager
- * test it.
- */
-#ifndef CONFIG_XEN
- request_resource(res, &code_resource);
- request_resource(res, &data_resource);
-#endif
-#ifdef CONFIG_KEXEC
- if (crashk_res.start != crashk_res.end)
- request_resource(res, &crashk_res);
-#ifdef CONFIG_XEN
- xen_machine_kexec_register_resources(res);
-#endif
-#endif
- }
- }
-}
-
-/*
- * Add a memory region to the kernel e820 map.
- */
-void __init add_memory_region(unsigned long start, unsigned long size, int type)
-{
- int x = e820.nr_map;
-
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
-
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
-}
-
-void __init e820_print_map(char *who)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- printk(" %s: %016Lx - %016Lx ", who,
- (unsigned long long) e820.map[i].addr,
- (unsigned long long) (e820.map[i].addr + e820.map[i].size));
- switch (e820.map[i].type) {
- case E820_RAM: printk("(usable)\n");
- break;
- case E820_RESERVED:
- printk("(reserved)\n");
- break;
- case E820_ACPI:
- printk("(ACPI data)\n");
- break;
- case E820_NVS:
- printk("(ACPI NVS)\n");
- break;
- default: printk("type %u\n", e820.map[i].type);
- break;
- }
- }
-}
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries. The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
-{
- struct change_member {
- struct e820entry *pbios; /* pointer to original bios entry */
- unsigned long long addr; /* address for this change point */
- };
- static struct change_member change_point_list[2*E820MAX] __initdata;
- static struct change_member *change_point[2*E820MAX] __initdata;
- static struct e820entry *overlap_list[E820MAX] __initdata;
- static struct e820entry new_bios[E820MAX] __initdata;
- struct change_member *change_tmp;
- unsigned long current_type, last_type;
- unsigned long long last_addr;
- int chgidx, still_changing;
- int overlap_entries;
- int new_bios_entry;
- int old_nr, new_nr, chg_nr;
- int i;
-
- /*
- Visually we're performing the following (1,2,3,4 = memory types)...
-
- Sample memory map (w/overlaps):
- ____22__________________
- ______________________4_
- ____1111________________
- _44_____________________
- 11111111________________
- ____________________33__
- ___________44___________
- __________33333_________
- ______________22________
- ___________________2222_
- _________111111111______
- _____________________11_
- _________________4______
-
- Sanitized equivalent (no overlap):
- 1_______________________
- _44_____________________
- ___1____________________
- ____22__________________
- ______11________________
- _________1______________
- __________3_____________
- ___________44___________
- _____________33_________
- _______________2________
- ________________1_______
- _________________4______
- ___________________2____
- ____________________33__
- ______________________4_
- */
-
- /* if there's only one memory region, don't bother */
- if (*pnr_map < 2)
- return -1;
-
- old_nr = *pnr_map;
-
- /* bail out if we find any unreasonable addresses in bios map */
- for (i=0; i<old_nr; i++)
- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
- return -1;
-
- /* create pointers for initial change-point information (for sorting) */
- for (i=0; i < 2*old_nr; i++)
- change_point[i] = &change_point_list[i];
-
- /* record all known change-points (starting and ending addresses),
- omitting those that are for empty memory regions */
- chgidx = 0;
- for (i=0; i < old_nr; i++) {
- if (biosmap[i].size != 0) {
- change_point[chgidx]->addr = biosmap[i].addr;
- change_point[chgidx++]->pbios = &biosmap[i];
- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
- change_point[chgidx++]->pbios = &biosmap[i];
- }
- }
- chg_nr = chgidx;
-
- /* sort change-point list by memory addresses (low -> high) */
- still_changing = 1;
- while (still_changing) {
- still_changing = 0;
- for (i=1; i < chg_nr; i++) {
- /* if <current_addr> > <last_addr>, swap */
- /* or, if current=<start_addr> & last=<end_addr>, swap */
- if ((change_point[i]->addr < change_point[i-1]->addr) ||
- ((change_point[i]->addr == change_point[i-1]->addr) &&
- (change_point[i]->addr == change_point[i]->pbios->addr) &&
- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
- )
- {
- change_tmp = change_point[i];
- change_point[i] = change_point[i-1];
- change_point[i-1] = change_tmp;
- still_changing=1;
- }
- }
- }
-
- /* create a new bios memory map, removing overlaps */
- overlap_entries=0; /* number of entries in the overlap table */
- new_bios_entry=0; /* index for creating new bios map entries */
- last_type = 0; /* start with undefined memory type */
- last_addr = 0; /* start with 0 as last starting address */
- /* loop through change-points, determining affect on the new bios map */
- for (chgidx=0; chgidx < chg_nr; chgidx++)
- {
- /* keep track of all overlapping bios entries */
- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
- {
- /* add map entry to overlap list (> 1 entry implies an overlap) */
- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
- }
- else
- {
- /* remove entry from list (order independent, so swap with last) */
- for (i=0; i<overlap_entries; i++)
- {
- if (overlap_list[i] == change_point[chgidx]->pbios)
- overlap_list[i] = overlap_list[overlap_entries-1];
- }
- overlap_entries--;
- }
- /* if there are overlapping entries, decide which "type" to use */
- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
- current_type = 0;
- for (i=0; i<overlap_entries; i++)
- if (overlap_list[i]->type > current_type)
- current_type = overlap_list[i]->type;
- /* continue building up new bios map based on this information */
- if (current_type != last_type) {
- if (last_type != 0) {
- new_bios[new_bios_entry].size =
- change_point[chgidx]->addr - last_addr;
- /* move forward only if the new size was non-zero */
- if (new_bios[new_bios_entry].size != 0)
- if (++new_bios_entry >= E820MAX)
- break; /* no more space left for new bios entries */
- }
- if (current_type != 0) {
- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
- new_bios[new_bios_entry].type = current_type;
- last_addr=change_point[chgidx]->addr;
- }
- last_type = current_type;
- }
- }
- new_nr = new_bios_entry; /* retain count for new bios entries */
-
- /* copy new bios mapping into original location */
- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
- *pnr_map = new_nr;
-
- return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory. If we aren't, we'll fake a memory map.
- *
- * We check to see that the memory map contains at least 2 elements
- * before we'll use it, because the detection code in setup.S may
- * not be perfect and most every PC known to man has two memory
- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
- * thinkpad 560x, for example, does not cooperate with the memory
- * detection code.)
- */
-static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
-{
-#ifndef CONFIG_XEN
- /* Only one memory region (or negative)? Ignore it */
- if (nr_map < 2)
- return -1;
-#else
- BUG_ON(nr_map < 1);
-#endif
-
- do {
- unsigned long start = biosmap->addr;
- unsigned long size = biosmap->size;
- unsigned long end = start + size;
- unsigned long type = biosmap->type;
-
- /* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
- return -1;
-
-#ifndef CONFIG_XEN
- /*
- * Some BIOSes claim RAM in the 640k - 1M region.
- * Not right. Fix it up.
- *
- * This should be removed on Hammer which is supposed to not
- * have non e820 covered ISA mappings there, but I had some strange
- * problems so it stays for now. -AK
- */
- if (type == E820_RAM) {
- if (start < 0x100000ULL && end > 0xA0000ULL) {
- if (start < 0xA0000ULL)
- add_memory_region(start, 0xA0000ULL-start, type);
- if (end <= 0x100000ULL)
- continue;
- start = 0x100000ULL;
- size = end - start;
- }
- }
-#endif
-
- add_memory_region(start, size, type);
- } while (biosmap++,--nr_map);
- return 0;
-}
-
-#ifndef CONFIG_XEN
-void __init setup_memory_region(void)
-{
- char *who = "BIOS-e820";
-
- /*
- * Try to copy the BIOS-supplied E820-map.
- *
- * Otherwise fake a memory map; one section from 0k->640k,
- * the next section from 1mb->appropriate_mem_k
- */
- sanitize_e820_map(E820_MAP, &E820_MAP_NR);
- if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
- unsigned long mem_size;
-
- /* compare results from other methods and take the greater */
- if (ALT_MEM_K < EXT_MEM_K) {
- mem_size = EXT_MEM_K;
- who = "BIOS-88";
- } else {
- mem_size = ALT_MEM_K;
- who = "BIOS-e801";
- }
-
- e820.nr_map = 0;
- add_memory_region(0, LOWMEMSIZE(), E820_RAM);
- add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
- }
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- e820_print_map(who);
-}
-
-#else /* CONFIG_XEN */
-
-void __init setup_memory_region(void)
-{
- int rc;
- struct xen_memory_map memmap;
- /*
- * This is rather large for a stack variable but this early in
- * the boot process we know we have plenty slack space.
- */
- struct e820entry map[E820MAX];
-
- memmap.nr_entries = E820MAX;
- set_xen_guest_handle(memmap.buffer, map);
-
- rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
- if ( rc == -ENOSYS ) {
- memmap.nr_entries = 1;
- map[0].addr = 0ULL;
- map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
- /* 8MB slack (to balance backend allocations). */
- map[0].size += 8 << 20;
- map[0].type = E820_RAM;
- rc = 0;
- }
- BUG_ON(rc);
-
- sanitize_e820_map(map, (char *)&memmap.nr_entries);
-
- BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
-
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- e820_print_map("Xen");
-}
-#endif
-
-void __init parse_memopt(char *p, char **from)
-{
- int i;
- unsigned long current_end;
- unsigned long end;
-
- end_user_pfn = memparse(p, from);
- end_user_pfn >>= PAGE_SHIFT;
-
- end = end_user_pfn<<PAGE_SHIFT;
- i = e820.nr_map-1;
- current_end = e820.map[i].addr + e820.map[i].size;
-
- if (current_end < end) {
- /*
- * The e820 map ends before our requested size so
- * extend the final entry to the requested address.
- */
- if (e820.map[i].type == E820_RAM)
- e820.map[i].size = end - e820.map[i].addr;
- else
- add_memory_region(current_end, end - current_end, E820_RAM);
- }
-}
-
-void __init parse_memmapopt(char *p, char **from)
-{
- unsigned long long start_at, mem_size;
-
- mem_size = memparse(p, from);
- p = *from;
- if (*p == '@') {
- start_at = memparse(p+1, from);
- add_memory_region(start_at, mem_size, E820_RAM);
- } else if (*p == '#') {
- start_at = memparse(p+1, from);
- add_memory_region(start_at, mem_size, E820_ACPI);
- } else if (*p == '$') {
- start_at = memparse(p+1, from);
- add_memory_region(start_at, mem_size, E820_RESERVED);
- } else {
- end_user_pfn = (mem_size >> PAGE_SHIFT);
- }
- p = *from;
-}
-
-unsigned long pci_mem_start = 0xaeedbabe;
-EXPORT_SYMBOL(pci_mem_start);
-
-/*
- * Search for the biggest gap in the low 32 bits of the e820
- * memory space. We pass this space to PCI to assign MMIO resources
- * for hotplug or unconfigured devices in.
- * Hopefully the BIOS let enough space left.
- */
-__init void e820_setup_gap(struct e820entry *e820, int nr_map)
-{
- unsigned long gapstart, gapsize, round;
- unsigned long last;
- int i;
- int found = 0;
-
- last = 0x100000000ull;
- gapstart = 0x10000000;
- gapsize = 0x400000;
- i = nr_map;
- while (--i >= 0) {
- unsigned long long start = e820[i].addr;
- unsigned long long end = start + e820[i].size;
-
- /*
- * Since "last" is at most 4GB, we know we'll
- * fit in 32 bits if this condition is true
- */
- if (last > end) {
- unsigned long gap = last - end;
-
- if (gap > gapsize) {
- gapsize = gap;
- gapstart = end;
- found = 1;
- }
- }
- if (start < last)
- last = start;
- }
-
- if (!found) {
- gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
- printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n"
- KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n");
- }
-
- /*
- * See how much we want to round up: start off with
- * rounding to the next 1MB area.
- */
- round = 0x100000;
- while ((gapsize >> 4) > round)
- round += round;
- /* Fun with two's complement */
- pci_mem_start = (gapstart + round) & -round;
-
- printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
- pci_mem_start, gapstart, gapsize);
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/early_printk-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/early_printk-xen.c
deleted file mode 100644
index 090f670a7e..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/early_printk-xen.c
+++ /dev/null
@@ -1,302 +0,0 @@
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/screen_info.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/fcntl.h>
-
-/* Simple VGA output */
-
-#ifdef __i386__
-#include <asm/setup.h>
-#define VGABASE (__ISA_IO_base + 0xb8000)
-#else
-#include <asm/bootsetup.h>
-#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
-#endif
-
-#ifndef CONFIG_XEN
-static int max_ypos = 25, max_xpos = 80;
-static int current_ypos = 25, current_xpos = 0;
-
-static void early_vga_write(struct console *con, const char *str, unsigned n)
-{
- char c;
- int i, k, j;
-
- while ((c = *str++) != '\0' && n-- > 0) {
- if (current_ypos >= max_ypos) {
- /* scroll 1 line up */
- for (k = 1, j = 0; k < max_ypos; k++, j++) {
- for (i = 0; i < max_xpos; i++) {
- writew(readw(VGABASE+2*(max_xpos*k+i)),
- VGABASE + 2*(max_xpos*j + i));
- }
- }
- for (i = 0; i < max_xpos; i++)
- writew(0x720, VGABASE + 2*(max_xpos*j + i));
- current_ypos = max_ypos-1;
- }
- if (c == '\n') {
- current_xpos = 0;
- current_ypos++;
- } else if (c != '\r') {
- writew(((0x7 << 8) | (unsigned short) c),
- VGABASE + 2*(max_xpos*current_ypos +
- current_xpos++));
- if (current_xpos >= max_xpos) {
- current_xpos = 0;
- current_ypos++;
- }
- }
- }
-}
-
-static struct console early_vga_console = {
- .name = "earlyvga",
- .write = early_vga_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-
-/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */
-
-static int early_serial_base = 0x3f8; /* ttyS0 */
-
-#define XMTRDY 0x20
-
-#define DLAB 0x80
-
-#define TXR 0 /* Transmit register (WRITE) */
-#define RXR 0 /* Receive register (READ) */
-#define IER 1 /* Interrupt Enable */
-#define IIR 2 /* Interrupt ID */
-#define FCR 2 /* FIFO control */
-#define LCR 3 /* Line control */
-#define MCR 4 /* Modem control */
-#define LSR 5 /* Line Status */
-#define MSR 6 /* Modem Status */
-#define DLL 0 /* Divisor Latch Low */
-#define DLH 1 /* Divisor latch High */
-
-static int early_serial_putc(unsigned char ch)
-{
- unsigned timeout = 0xffff;
- while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
- cpu_relax();
- outb(ch, early_serial_base + TXR);
- return timeout ? 0 : -1;
-}
-
-static void early_serial_write(struct console *con, const char *s, unsigned n)
-{
- while (*s && n-- > 0) {
- early_serial_putc(*s);
- if (*s == '\n')
- early_serial_putc('\r');
- s++;
- }
-}
-
-#define DEFAULT_BAUD 9600
-
-static __init void early_serial_init(char *s)
-{
- unsigned char c;
- unsigned divisor;
- unsigned baud = DEFAULT_BAUD;
- char *e;
-
- if (*s == ',')
- ++s;
-
- if (*s) {
- unsigned port;
- if (!strncmp(s,"0x",2)) {
- early_serial_base = simple_strtoul(s, &e, 16);
- } else {
- static int bases[] = { 0x3f8, 0x2f8 };
-
- if (!strncmp(s,"ttyS",4))
- s += 4;
- port = simple_strtoul(s, &e, 10);
- if (port > 1 || s == e)
- port = 0;
- early_serial_base = bases[port];
- }
- s += strcspn(s, ",");
- if (*s == ',')
- s++;
- }
-
- outb(0x3, early_serial_base + LCR); /* 8n1 */
- outb(0, early_serial_base + IER); /* no interrupt */
- outb(0, early_serial_base + FCR); /* no fifo */
- outb(0x3, early_serial_base + MCR); /* DTR + RTS */
-
- if (*s) {
- baud = simple_strtoul(s, &e, 0);
- if (baud == 0 || s == e)
- baud = DEFAULT_BAUD;
- }
-
- divisor = 115200 / baud;
- c = inb(early_serial_base + LCR);
- outb(c | DLAB, early_serial_base + LCR);
- outb(divisor & 0xff, early_serial_base + DLL);
- outb((divisor >> 8) & 0xff, early_serial_base + DLH);
- outb(c & ~DLAB, early_serial_base + LCR);
-}
-
-#else /* CONFIG_XEN */
-
-static void
-early_serial_write(struct console *con, const char *s, unsigned count)
-{
- int n;
-
- while (count > 0) {
- n = HYPERVISOR_console_io(CONSOLEIO_write, count, (char *)s);
- if (n <= 0)
- break;
- count -= n;
- s += n;
- }
-}
-
-static __init void early_serial_init(char *s)
-{
-}
-
-/*
- * No early VGA console on Xen, as we do not have convenient ISA-space
- * mappings. Someone should fix this for domain 0. For now, use fake serial.
- */
-#define early_vga_console early_serial_console
-
-#endif
-
-static struct console early_serial_console = {
- .name = "earlyser",
- .write = early_serial_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-
-/* Console interface to a host file on AMD's SimNow! */
-
-static int simnow_fd;
-
-enum {
- MAGIC1 = 0xBACCD00A,
- MAGIC2 = 0xCA110000,
- XOPEN = 5,
- XWRITE = 4,
-};
-
-static noinline long simnow(long cmd, long a, long b, long c)
-{
- long ret;
- asm volatile("cpuid" :
- "=a" (ret) :
- "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
- return ret;
-}
-
-void __init simnow_init(char *str)
-{
- char *fn = "klog";
- if (*str == '=')
- fn = ++str;
- /* error ignored */
- simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644);
-}
-
-static void simnow_write(struct console *con, const char *s, unsigned n)
-{
- simnow(XWRITE, simnow_fd, (unsigned long)s, n);
-}
-
-static struct console simnow_console = {
- .name = "simnow",
- .write = simnow_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-
-/* Direct interface for emergencies */
-struct console *early_console = &early_vga_console;
-static int early_console_initialized = 0;
-
-void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
- va_list ap;
-
- va_start(ap,fmt);
- n = vscnprintf(buf,512,fmt,ap);
- early_console->write(early_console,buf,n);
- va_end(ap);
-}
-
-static int __initdata keep_early;
-
-int __init setup_early_printk(char *opt)
-{
- char *space;
- char buf[256];
-
- if (early_console_initialized)
- return 1;
-
- strlcpy(buf,opt,sizeof(buf));
- space = strchr(buf, ' ');
- if (space)
- *space = 0;
-
- if (strstr(buf,"keep"))
- keep_early = 1;
-
- if (!strncmp(buf, "serial", 6)) {
- early_serial_init(buf + 6);
- early_console = &early_serial_console;
- } else if (!strncmp(buf, "ttyS", 4)) {
- early_serial_init(buf);
- early_console = &early_serial_console;
- } else if (!strncmp(buf, "vga", 3)
-#ifndef CONFIG_XEN
- && SCREEN_INFO.orig_video_isVGA == 1) {
- max_xpos = SCREEN_INFO.orig_video_cols;
- max_ypos = SCREEN_INFO.orig_video_lines;
- current_ypos = SCREEN_INFO.orig_y;
-#else
- || !strncmp(buf, "xen", 3)) {
-#endif
- early_console = &early_vga_console;
- } else if (!strncmp(buf, "simnow", 6)) {
- simnow_init(buf + 6);
- early_console = &simnow_console;
- keep_early = 1;
- }
- early_console_initialized = 1;
- register_console(early_console);
- return 0;
-}
-
-void __init disable_early_printk(void)
-{
- if (!early_console_initialized || !early_console)
- return;
- if (!keep_early) {
- printk("disabling early console\n");
- unregister_console(early_console);
- early_console_initialized = 0;
- } else {
- printk("keeping early console\n");
- }
-}
-
-__setup("earlyprintk=", setup_early_printk);
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
deleted file mode 100644
index f9e16e3f78..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
+++ /dev/null
@@ -1,1325 +0,0 @@
-/*
- * linux/arch/x86_64/entry.S
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
- * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- *
- * $Id$
- *
- * Jun Nakajima <jun.nakajima@intel.com>
- * Asit Mallick <asit.k.mallick@intel.com>
- * Modified for Xen
- */
-
-/*
- * entry.S contains the system-call and fault low-level handling routines.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after an interrupt and after each system call.
- *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
- * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
- * at the top of the kernel process stack.
- * - partial stack frame: partially saved registers upto R11.
- * - full stack frame: Like partial stack frame, but all register saved.
- *
- * TODO:
- * - schedule it carefully for the final hardware.
- */
-
-#define ASSEMBLY 1
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/smp.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/dwarf2.h>
-#include <asm/calling.h>
-#include <asm/asm-offsets.h>
-#include <asm/msr.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/hw_irq.h>
-#include <asm/page.h>
-#include <asm/irqflags.h>
-#include <asm/errno.h>
-#include <xen/interface/arch-x86_64.h>
-#include <xen/interface/features.h>
-
-#include "irq_vectors.h"
-
-#include "xen_entry.S"
-
- .code64
-
-#ifndef CONFIG_PREEMPT
-#define retint_kernel retint_restore_args
-#endif
-
-
-.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
-#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
- jnc 1f
- TRACE_IRQS_ON
-1:
-#endif
-.endm
-
-NMI_MASK = 0x80000000
-
-/*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
- * fast path FIXUP_TOP_OF_STACK is needed.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
-
- /* %rsp:at FRAMEEND */
- .macro FIXUP_TOP_OF_STACK tmp
- movq $__USER_CS,CS(%rsp)
- movq $-1,RCX(%rsp)
- .endm
-
- .macro RESTORE_TOP_OF_STACK tmp,offset=0
- .endm
-
- .macro FAKE_STACK_FRAME child_rip
- /* push in order ss, rsp, eflags, cs, rip */
- xorl %eax, %eax
- pushq %rax /* ss */
- CFI_ADJUST_CFA_OFFSET 8
- /*CFI_REL_OFFSET ss,0*/
- pushq %rax /* rsp */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rsp,0
- pushq $(1<<9) /* eflags - interrupts on */
- CFI_ADJUST_CFA_OFFSET 8
- /*CFI_REL_OFFSET rflags,0*/
- pushq $__KERNEL_CS /* cs */
- CFI_ADJUST_CFA_OFFSET 8
- /*CFI_REL_OFFSET cs,0*/
- pushq \child_rip /* rip */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip,0
- pushq %rax /* orig rax */
- CFI_ADJUST_CFA_OFFSET 8
- .endm
-
- .macro UNFAKE_STACK_FRAME
- addq $8*6, %rsp
- CFI_ADJUST_CFA_OFFSET -(6*8)
- .endm
-
- .macro CFI_DEFAULT_STACK start=1,adj=0
- .if \start
- CFI_STARTPROC simple
- CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET)
- .else
- CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET)
- .endif
- .if \adj == 0
- CFI_REL_OFFSET r15,R15
- CFI_REL_OFFSET r14,R14
- CFI_REL_OFFSET r13,R13
- CFI_REL_OFFSET r12,R12
- CFI_REL_OFFSET rbp,RBP
- CFI_REL_OFFSET rbx,RBX
- .endif
- CFI_REL_OFFSET r11,R11
- CFI_REL_OFFSET r10,R10
- CFI_REL_OFFSET r9,R9
- CFI_REL_OFFSET r8,R8
- CFI_REL_OFFSET rax,RAX
- CFI_REL_OFFSET rcx,RCX
- CFI_REL_OFFSET rdx,RDX
- CFI_REL_OFFSET rsi,RSI
- CFI_REL_OFFSET rdi,RDI
- CFI_REL_OFFSET rip,RIP
- /*CFI_REL_OFFSET cs,CS*/
- /*CFI_REL_OFFSET rflags,EFLAGS*/
- CFI_REL_OFFSET rsp,RSP
- /*CFI_REL_OFFSET ss,SS*/
- .endm
-
- /*
- * Must be consistent with the definition in arch-x86/xen-x86_64.h:
- * struct iret_context {
- * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
- * };
- * with rax, r11, and rcx being taken care of in the hypercall stub.
- */
- .macro HYPERVISOR_IRET flag
- testb $3,1*8(%rsp)
- jnz 2f
- testl $NMI_MASK,2*8(%rsp)
- jnz 2f
-
- cmpb $0,(xen_features+XENFEAT_supervisor_mode_kernel)(%rip)
- jne 1f
-
- /* Direct iret to kernel space. Correct CS and SS. */
- orl $3,1*8(%rsp)
- orl $3,4*8(%rsp)
-1: iretq
-
-2: /* Slow iret via hypervisor. */
- andl $~NMI_MASK, 2*8(%rsp)
- pushq $\flag
- jmp hypercall_page + (__HYPERVISOR_iret * 32)
- .endm
-
-/*
- * A newly forked process directly context switches into this.
- */
-/* rdi: prev */
-ENTRY(ret_from_fork)
- CFI_DEFAULT_STACK
- call schedule_tail
- GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
- jnz rff_trace
-rff_action:
- RESTORE_REST
- testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
- je int_ret_from_sys_call
- testl $_TIF_IA32,threadinfo_flags(%rcx)
- jnz int_ret_from_sys_call
- RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
- jmp ret_from_sys_call
-rff_trace:
- movq %rsp,%rdi
- call syscall_trace_leave
- GET_THREAD_INFO(%rcx)
- jmp rff_action
- CFI_ENDPROC
-END(ret_from_fork)
-
-/*
- * initial frame state for interrupts and exceptions
- */
- .macro _frame ref
- CFI_STARTPROC simple
- CFI_DEF_CFA rsp,SS+8-\ref
- /*CFI_REL_OFFSET ss,SS-\ref*/
- CFI_REL_OFFSET rsp,RSP-\ref
- /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
- /*CFI_REL_OFFSET cs,CS-\ref*/
- CFI_REL_OFFSET rip,RIP-\ref
- .endm
-
-/*
- * System call entry. Upto 6 arguments in registers are supported.
- *
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.
- */
-
-/*
- * Register setup:
- * rax system call number
- * rdi arg0
- * rcx return address for syscall/sysret, C arg3
- * rsi arg1
- * rdx arg2
- * r10 arg3 (--> moved to rcx for C)
- * r8 arg4
- * r9 arg5
- * r11 eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
- *
- * Interrupts are off on entry.
- * Only called from user space.
- *
- * XXX if we had a free scratch register we could save the RSP into the stack frame
- * and report it properly in ps. Unfortunately we haven't.
- *
- * When user can change the frames always force IRET. That is because
- * it deals with uncanonical addresses better. SYSRET has trouble
- * with them due to bugs in both AMD and Intel CPUs.
- */
-
-ENTRY(system_call)
- _frame (RIP-0x10)
- SAVE_ARGS -8,0
- movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
- /*
- * No need to follow this irqs off/on section - it's straight
- * and short:
- */
- XEN_UNBLOCK_EVENTS(%r11)
- GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
- CFI_REMEMBER_STATE
- jnz tracesys
- cmpq $__NR_syscall_max,%rax
- ja badsys
- movq %r10,%rcx
- call *sys_call_table(,%rax,8) # XXX: rip relative
- movq %rax,RAX-ARGOFFSET(%rsp)
-/*
- * Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
- */
- .globl ret_from_sys_call
-ret_from_sys_call:
- movl $_TIF_ALLWORK_MASK,%edi
- /* edi: flagmask */
-sysret_check:
- GET_THREAD_INFO(%rcx)
- XEN_BLOCK_EVENTS(%rsi)
- TRACE_IRQS_OFF
- movl threadinfo_flags(%rcx),%edx
- andl %edi,%edx
- CFI_REMEMBER_STATE
- jnz sysret_careful
- /*
- * sysretq will re-enable interrupts:
- */
- TRACE_IRQS_ON
- XEN_UNBLOCK_EVENTS(%rsi)
- RESTORE_ARGS 0,8,0
- HYPERVISOR_IRET VGCF_IN_SYSCALL
-
- /* Handle reschedules */
- /* edx: work, edi: workmask */
-sysret_careful:
- CFI_RESTORE_STATE
- bt $TIF_NEED_RESCHED,%edx
- jnc sysret_signal
- TRACE_IRQS_ON
- XEN_UNBLOCK_EVENTS(%rsi)
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
- call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
- jmp sysret_check
-
- /* Handle a signal */
-sysret_signal:
- TRACE_IRQS_ON
-/* sti */
- XEN_UNBLOCK_EVENTS(%rsi)
- testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
- jz 1f
-
- /* Really a signal */
- /* edx: work flags (arg3) */
- leaq do_notify_resume(%rip),%rax
- leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
- xorl %esi,%esi # oldset -> arg2
- call ptregscall_common
-1: movl $_TIF_NEED_RESCHED,%edi
- /* Use IRET because user could have changed frame. This
- works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
- XEN_BLOCK_EVENTS(%rsi)
- TRACE_IRQS_OFF
- jmp int_with_check
-
-badsys:
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp ret_from_sys_call
-
- /* Do syscall tracing */
-tracesys:
- CFI_RESTORE_STATE
- SAVE_REST
- movq $-ENOSYS,RAX(%rsp)
- FIXUP_TOP_OF_STACK %rdi
- movq %rsp,%rdi
- call syscall_trace_enter
- LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
- RESTORE_REST
- cmpq $__NR_syscall_max,%rax
- ja 1f
- movq %r10,%rcx /* fixup for C */
- call *sys_call_table(,%rax,8)
-1: movq %rax,RAX-ARGOFFSET(%rsp)
- /* Use IRET because user could have changed frame */
- jmp int_ret_from_sys_call
- CFI_ENDPROC
-END(system_call)
-
-/*
- * Syscall return path ending with IRET.
- * Has correct top of stack, but partial stack frame.
- */
-ENTRY(int_ret_from_sys_call)
- CFI_STARTPROC simple
- CFI_DEF_CFA rsp,SS+8-ARGOFFSET
- /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
- CFI_REL_OFFSET rsp,RSP-ARGOFFSET
- /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
- /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
- CFI_REL_OFFSET rdx,RDX-ARGOFFSET
- CFI_REL_OFFSET rcx,RCX-ARGOFFSET
- CFI_REL_OFFSET rax,RAX-ARGOFFSET
- CFI_REL_OFFSET rdi,RDI-ARGOFFSET
- CFI_REL_OFFSET rsi,RSI-ARGOFFSET
- CFI_REL_OFFSET r8,R8-ARGOFFSET
- CFI_REL_OFFSET r9,R9-ARGOFFSET
- CFI_REL_OFFSET r10,R10-ARGOFFSET
- CFI_REL_OFFSET r11,R11-ARGOFFSET
- XEN_BLOCK_EVENTS(%rsi)
- TRACE_IRQS_OFF
- testb $3,CS-ARGOFFSET(%rsp)
- jnz 1f
- /* Need to set the proper %ss (not NULL) for ring 3 iretq */
- movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
- jmp retint_restore_args # retrun from ring3 kernel
-1:
- movl $_TIF_ALLWORK_MASK,%edi
- /* edi: mask to check */
-int_with_check:
- GET_THREAD_INFO(%rcx)
- movl threadinfo_flags(%rcx),%edx
- andl %edi,%edx
- jnz int_careful
- andl $~TS_COMPAT,threadinfo_status(%rcx)
- jmp retint_restore_args
-
- /* Either reschedule or signal or syscall exit tracking needed. */
- /* First do a reschedule test. */
- /* edx: work, edi: workmask */
-int_careful:
- bt $TIF_NEED_RESCHED,%edx
- jnc int_very_careful
- TRACE_IRQS_ON
-/* sti */
- XEN_UNBLOCK_EVENTS(%rsi)
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
- call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
- XEN_BLOCK_EVENTS(%rsi)
- TRACE_IRQS_OFF
- jmp int_with_check
-
- /* handle signals and tracing -- both require a full stack frame */
-int_very_careful:
- TRACE_IRQS_ON
-/* sti */
- XEN_UNBLOCK_EVENTS(%rsi)
- SAVE_REST
- /* Check for syscall exit trace */
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
- jz int_signal
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
- leaq 8(%rsp),%rdi # &ptregs -> arg1
- call syscall_trace_leave
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
- andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
- XEN_BLOCK_EVENTS(%rsi)
- TRACE_IRQS_OFF
- jmp int_restore_rest
-
-int_signal:
- testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
- jz 1f
- movq %rsp,%rdi # &ptregs -> arg1
- xorl %esi,%esi # oldset -> arg2
- call do_notify_resume
-1: movl $_TIF_NEED_RESCHED,%edi
-int_restore_rest:
- RESTORE_REST
- XEN_BLOCK_EVENTS(%rsi)
- TRACE_IRQS_OFF
- jmp int_with_check
- CFI_ENDPROC
-END(int_ret_from_sys_call)
-
-/*
- * Certain special system calls that need to save a complete full stack frame.
- */
-
- .macro PTREGSCALL label,func,arg
- .globl \label
-\label:
- leaq \func(%rip),%rax
- leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
- jmp ptregscall_common
-END(\label)
- .endm
-
- CFI_STARTPROC
-
- PTREGSCALL stub_clone, sys_clone, %r8
- PTREGSCALL stub_fork, sys_fork, %rdi
- PTREGSCALL stub_vfork, sys_vfork, %rdi
- PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
- PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
- PTREGSCALL stub_iopl, sys_iopl, %rsi
-
-ENTRY(ptregscall_common)
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
- SAVE_REST
- movq %r11, %r15
- CFI_REGISTER rip, r15
- FIXUP_TOP_OF_STACK %r11
- call *%rax
- RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- CFI_REGISTER rip, r11
- RESTORE_REST
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip, 0
- ret
- CFI_ENDPROC
-END(ptregscall_common)
-
-ENTRY(stub_execve)
- CFI_STARTPROC
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call sys_execve
- RESTORE_TOP_OF_STACK %r11
- movq %rax,RAX(%rsp)
- RESTORE_REST
- jmp int_ret_from_sys_call
- CFI_ENDPROC
-END(stub_execve)
-
-/*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
-ENTRY(stub_rt_sigreturn)
- CFI_STARTPROC
- addq $8, %rsp
- CFI_ADJUST_CFA_OFFSET -8
- SAVE_REST
- movq %rsp,%rdi
- FIXUP_TOP_OF_STACK %r11
- call sys_rt_sigreturn
- movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
- RESTORE_REST
- jmp int_ret_from_sys_call
- CFI_ENDPROC
-END(stub_rt_sigreturn)
-
-/* initial frame state for interrupts (and exceptions without error code) */
-#define INTR_FRAME _frame (RIP-0x10); \
- CFI_REL_OFFSET rcx,0; \
- CFI_REL_OFFSET r11,8
-
-/* initial frame state for exceptions with error code (and interrupts with
- vector already pushed) */
-#define XCPT_FRAME _frame (RIP-0x18); \
- CFI_REL_OFFSET rcx,0; \
- CFI_REL_OFFSET r11,8
-
-/*
- * Interrupt exit.
- *
- */
-
-retint_check:
- CFI_DEFAULT_STACK adj=1
- movl threadinfo_flags(%rcx),%edx
- andl %edi,%edx
- CFI_REMEMBER_STATE
- jnz retint_careful
-retint_restore_args:
- movl EFLAGS-REST_SKIP(%rsp), %eax
- shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
- XEN_GET_VCPU_INFO(%rsi)
- andb evtchn_upcall_mask(%rsi),%al
- andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
- jnz restore_all_enable_events # != 0 => enable event delivery
- XEN_PUT_VCPU_INFO(%rsi)
-
- RESTORE_ARGS 0,8,0
- HYPERVISOR_IRET 0
-
- /* edi: workmask, edx: work */
-retint_careful:
- CFI_RESTORE_STATE
- bt $TIF_NEED_RESCHED,%edx
- jnc retint_signal
- TRACE_IRQS_ON
- XEN_UNBLOCK_EVENTS(%rsi)
-/* sti */
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
- call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
- GET_THREAD_INFO(%rcx)
- XEN_BLOCK_EVENTS(%rsi)
-/* cli */
- TRACE_IRQS_OFF
- jmp retint_check
-
-retint_signal:
- testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
- jz retint_restore_args
- TRACE_IRQS_ON
- XEN_UNBLOCK_EVENTS(%rsi)
- SAVE_REST
- movq $-1,ORIG_RAX(%rsp)
- xorl %esi,%esi # oldset
- movq %rsp,%rdi # &pt_regs
- call do_notify_resume
- RESTORE_REST
- XEN_BLOCK_EVENTS(%rsi)
- TRACE_IRQS_OFF
- movl $_TIF_NEED_RESCHED,%edi
- GET_THREAD_INFO(%rcx)
- jmp retint_check
-
-#ifdef CONFIG_PREEMPT
- /* Returning to kernel space. Check if we need preemption */
- /* rcx: threadinfo. interrupts off. */
- .p2align
-retint_kernel:
- cmpl $0,threadinfo_preempt_count(%rcx)
- jnz retint_restore_args
- bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
- jnc retint_restore_args
- bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
- jnc retint_restore_args
- call preempt_schedule_irq
- jmp retint_kernel /* check again */
-#endif
-
- CFI_ENDPROC
-END(retint_check)
-
-#ifndef CONFIG_XEN
-/*
- * APIC interrupts.
- */
- .macro apicinterrupt num,func
- INTR_FRAME
- pushq $~(\num)
- CFI_ADJUST_CFA_OFFSET 8
- interrupt \func
- jmp error_entry
- CFI_ENDPROC
- .endm
-
-ENTRY(thermal_interrupt)
- apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
-END(thermal_interrupt)
-
-ENTRY(threshold_interrupt)
- apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
-END(threshold_interrupt)
-
-#ifdef CONFIG_SMP
-ENTRY(reschedule_interrupt)
- apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
-END(reschedule_interrupt)
-
- .macro INVALIDATE_ENTRY num
-ENTRY(invalidate_interrupt\num)
- apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
-END(invalidate_interrupt\num)
- .endm
-
- INVALIDATE_ENTRY 0
- INVALIDATE_ENTRY 1
- INVALIDATE_ENTRY 2
- INVALIDATE_ENTRY 3
- INVALIDATE_ENTRY 4
- INVALIDATE_ENTRY 5
- INVALIDATE_ENTRY 6
- INVALIDATE_ENTRY 7
-
-ENTRY(call_function_interrupt)
- apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
-END(call_function_interrupt)
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
-ENTRY(apic_timer_interrupt)
- apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
-END(apic_timer_interrupt)
-
-ENTRY(error_interrupt)
- apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
-END(error_interrupt)
-
-ENTRY(spurious_interrupt)
- apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
-END(spurious_interrupt)
-#endif
-#endif /* !CONFIG_XEN */
-
-/*
- * Exception entry points.
- */
- .macro zeroentry sym
- INTR_FRAME
- movq (%rsp),%rcx
- CFI_RESTORE rcx
- movq 8(%rsp),%r11
- CFI_RESTORE r11
- addq $0x10,%rsp /* skip rcx and r11 */
- CFI_ADJUST_CFA_OFFSET -0x10
- pushq $0 /* push error code/oldrax */
- CFI_ADJUST_CFA_OFFSET 8
- pushq %rax /* push real oldrax to the rdi slot */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rax,0
- leaq \sym(%rip),%rax
- jmp error_entry
- CFI_ENDPROC
- .endm
-
- .macro errorentry sym
- XCPT_FRAME
- movq (%rsp),%rcx
- CFI_RESTORE rcx
- movq 8(%rsp),%r11
- CFI_RESTORE r11
- addq $0x10,%rsp /* rsp points to the error code */
- CFI_ADJUST_CFA_OFFSET -0x10
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rax,0
- leaq \sym(%rip),%rax
- jmp error_entry
- CFI_ENDPROC
- .endm
-
-#if 0 /* not XEN */
- /* error code is on the stack already */
- /* handle NMI like exceptions that can happen everywhere */
- .macro paranoidentry sym, ist=0, irqtrace=1
- movq (%rsp),%rcx
- movq 8(%rsp),%r11
- addq $0x10,%rsp /* skip rcx and r11 */
- SAVE_ALL
- cld
-#if 0 /* not XEN */
- movl $1,%ebx
- movl $MSR_GS_BASE,%ecx
- rdmsr
- testl %edx,%edx
- js 1f
- swapgs
- xorl %ebx,%ebx
-1:
-#endif
- .if \ist
- movq %gs:pda_data_offset, %rbp
- .endif
- movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi
- movq $-1,ORIG_RAX(%rsp)
- .if \ist
- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
- .endif
- call \sym
- .if \ist
- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
- .endif
-/* cli */
- XEN_BLOCK_EVENTS(%rsi)
- .if \irqtrace
- TRACE_IRQS_OFF
- .endif
- .endm
-
- /*
- * "Paranoid" exit path from exception stack.
- * Paranoid because this is used by NMIs and cannot take
- * any kernel state for granted.
- * We don't do kernel preemption checks here, because only
- * NMI should be common and it does not enable IRQs and
- * cannot get reschedule ticks.
- *
- * "trace" is 0 for the NMI handler only, because irq-tracing
- * is fundamentally NMI-unsafe. (we cannot change the soft and
- * hard flags at once, atomically)
- */
- .macro paranoidexit trace=1
- /* ebx: no swapgs flag */
-paranoid_exit\trace:
- testl %ebx,%ebx /* swapgs needed? */
- jnz paranoid_restore\trace
- testl $3,CS(%rsp)
- jnz paranoid_userspace\trace
-paranoid_swapgs\trace:
- TRACE_IRQS_IRETQ 0
- swapgs
-paranoid_restore\trace:
- RESTORE_ALL 8
- iretq
-paranoid_userspace\trace:
- GET_THREAD_INFO(%rcx)
- movl threadinfo_flags(%rcx),%ebx
- andl $_TIF_WORK_MASK,%ebx
- jz paranoid_swapgs\trace
- movq %rsp,%rdi /* &pt_regs */
- call sync_regs
- movq %rax,%rsp /* switch stack for scheduling */
- testl $_TIF_NEED_RESCHED,%ebx
- jnz paranoid_schedule\trace
- movl %ebx,%edx /* arg3: thread flags */
- .if \trace
- TRACE_IRQS_ON
- .endif
- sti
- xorl %esi,%esi /* arg2: oldset */
- movq %rsp,%rdi /* arg1: &pt_regs */
- call do_notify_resume
- cli
- .if \trace
- TRACE_IRQS_OFF
- .endif
- jmp paranoid_userspace\trace
-paranoid_schedule\trace:
- .if \trace
- TRACE_IRQS_ON
- .endif
- sti
- call schedule
- cli
- .if \trace
- TRACE_IRQS_OFF
- .endif
- jmp paranoid_userspace\trace
- CFI_ENDPROC
- .endm
-#endif
-
-/*
- * Exception entry point. This expects an error code/orig_rax on the stack
- * and the exception handler in %rax.
- */
-ENTRY(error_entry)
- _frame RDI
- CFI_REL_OFFSET rax,0
- /* rdi slot contains rax, oldrax contains error code */
- cld
- subq $14*8,%rsp
- CFI_ADJUST_CFA_OFFSET (14*8)
- movq %rsi,13*8(%rsp)
- CFI_REL_OFFSET rsi,RSI
- movq 14*8(%rsp),%rsi /* load rax from rdi slot */
- CFI_REGISTER rax,rsi
- movq %rdx,12*8(%rsp)
- CFI_REL_OFFSET rdx,RDX
- movq %rcx,11*8(%rsp)
- CFI_REL_OFFSET rcx,RCX
- movq %rsi,10*8(%rsp) /* store rax */
- CFI_REL_OFFSET rax,RAX
- movq %r8, 9*8(%rsp)
- CFI_REL_OFFSET r8,R8
- movq %r9, 8*8(%rsp)
- CFI_REL_OFFSET r9,R9
- movq %r10,7*8(%rsp)
- CFI_REL_OFFSET r10,R10
- movq %r11,6*8(%rsp)
- CFI_REL_OFFSET r11,R11
- movq %rbx,5*8(%rsp)
- CFI_REL_OFFSET rbx,RBX
- movq %rbp,4*8(%rsp)
- CFI_REL_OFFSET rbp,RBP
- movq %r12,3*8(%rsp)
- CFI_REL_OFFSET r12,R12
- movq %r13,2*8(%rsp)
- CFI_REL_OFFSET r13,R13
- movq %r14,1*8(%rsp)
- CFI_REL_OFFSET r14,R14
- movq %r15,(%rsp)
- CFI_REL_OFFSET r15,R15
-#if 0
- cmpl $__KERNEL_CS,CS(%rsp)
- CFI_REMEMBER_STATE
- je error_kernelspace
-#endif
-error_call_handler:
- movq %rdi, RDI(%rsp)
- CFI_REL_OFFSET rdi,RDI
- movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi # get error code
- movq $-1,ORIG_RAX(%rsp)
- call *%rax
-error_exit:
- RESTORE_REST
-/* cli */
- XEN_BLOCK_EVENTS(%rsi)
- TRACE_IRQS_OFF
- GET_THREAD_INFO(%rcx)
- testb $3,CS-ARGOFFSET(%rsp)
- jz retint_kernel
- movl threadinfo_flags(%rcx),%edx
- movl $_TIF_WORK_MASK,%edi
- andl %edi,%edx
- jnz retint_careful
- /*
- * The iret might restore flags:
- */
- TRACE_IRQS_IRETQ
- jmp retint_restore_args
-
-#if 0
- /*
- * We need to re-write the logic here because we don't do iretq to
- * to return to user mode. It's still possible that we get trap/fault
- * in the kernel (when accessing buffers pointed to by system calls,
- * for example).
- *
- */
- CFI_RESTORE_STATE
-error_kernelspace:
- incl %ebx
- /* There are two places in the kernel that can potentially fault with
- usergs. Handle them here. The exception handlers after
- iret run with kernel gs again, so don't set the user space flag.
- B stepping K8s sometimes report an truncated RIP for IRET
- exceptions returning to compat mode. Check for these here too. */
- leaq iret_label(%rip),%rbp
- cmpq %rbp,RIP(%rsp)
- je error_swapgs
- movl %ebp,%ebp /* zero extend */
- cmpq %rbp,RIP(%rsp)
- je error_swapgs
- cmpq $gs_change,RIP(%rsp)
- je error_swapgs
- jmp error_sti
-#endif
- CFI_ENDPROC
-END(error_entry)
-
-ENTRY(hypervisor_callback)
- zeroentry do_hypervisor_callback
-END(hypervisor_callback)
-
-/*
- * Copied from arch/xen/i386/kernel/entry.S
- */
-# A note on the "critical region" in our callback handler.
-# We want to avoid stacking callback handlers due to events occurring
-# during handling of the last event. To do this, we keep events disabled
-# until we've done all processing. HOWEVER, we must enable events before
-# popping the stack frame (can't be done atomically) and so it would still
-# be possible to get enough handler activations to overflow the stack.
-# Although unlikely, bugs of that kind are hard to track down, so we'd
-# like to avoid the possibility.
-# So, on entry to the handler we detect whether we interrupted an
-# existing activation in its critical region -- if so, we pop the current
-# activation and restart the handler using the previous one.
-ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
- CFI_STARTPROC
-# Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
-# see the correct pointer to the pt_regs
- movq %rdi, %rsp # we don't return, adjust the stack frame
- CFI_ENDPROC
- CFI_DEFAULT_STACK
-11: incl %gs:pda_irqcount
- movq %rsp,%rbp
- CFI_DEF_CFA_REGISTER rbp
- cmovzq %gs:pda_irqstackptr,%rsp
- pushq %rbp # backlink for old unwinder
- call evtchn_do_upcall
- popq %rsp
- CFI_DEF_CFA_REGISTER rsp
- decl %gs:pda_irqcount
- jmp error_exit
- CFI_ENDPROC
-END(do_hypervisor_callback)
-
-#ifdef CONFIG_X86_LOCAL_APIC
-KPROBE_ENTRY(nmi)
- zeroentry do_nmi_callback
-ENTRY(do_nmi_callback)
- CFI_STARTPROC
- addq $8, %rsp
- CFI_ENDPROC
- CFI_DEFAULT_STACK
- call do_nmi
- orl $NMI_MASK,EFLAGS(%rsp)
- RESTORE_REST
- XEN_BLOCK_EVENTS(%rsi)
- TRACE_IRQS_OFF
- GET_THREAD_INFO(%rcx)
- jmp retint_restore_args
- CFI_ENDPROC
- .previous .text
-END(nmi)
-#endif
-
- ALIGN
-restore_all_enable_events:
- CFI_DEFAULT_STACK adj=1
- TRACE_IRQS_ON
- XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
-
-scrit: /**** START OF CRITICAL REGION ****/
- XEN_TEST_PENDING(%rsi)
- CFI_REMEMBER_STATE
- jnz 14f # process more events if necessary...
- XEN_PUT_VCPU_INFO(%rsi)
- RESTORE_ARGS 0,8,0
- HYPERVISOR_IRET 0
-
- CFI_RESTORE_STATE
-14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
- XEN_PUT_VCPU_INFO(%rsi)
- SAVE_REST
- movq %rsp,%rdi # set the argument again
- jmp 11b
- CFI_ENDPROC
-ecrit: /**** END OF CRITICAL REGION ****/
-# At this point, unlike on x86-32, we don't do the fixup to simplify the
-# code and the stack frame is more complex on x86-64.
-# When the kernel is interrupted in the critical section, the kernel
-# will do IRET in that case, and everything will be restored at that point,
-# i.e. it just resumes from the next instruction interrupted with the same context.
-
-# Hypervisor uses this for application faults while it executes.
-# We get here for two reasons:
-# 1. Fault while reloading DS, ES, FS or GS
-# 2. Fault while executing IRET
-# Category 1 we do not need to fix up as Xen has already reloaded all segment
-# registers that could be reloaded and zeroed the others.
-# Category 2 we fix up by killing the current process. We cannot use the
-# normal Linux return path in this case because if we use the IRET hypercall
-# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
-# We distinguish between categories by comparing each saved segment register
-# with its current contents: any discrepancy means we in category 1.
-ENTRY(failsafe_callback)
- _frame (RIP-0x30)
- CFI_REL_OFFSET rcx, 0
- CFI_REL_OFFSET r11, 8
- movw %ds,%cx
- cmpw %cx,0x10(%rsp)
- CFI_REMEMBER_STATE
- jne 1f
- movw %es,%cx
- cmpw %cx,0x18(%rsp)
- jne 1f
- movw %fs,%cx
- cmpw %cx,0x20(%rsp)
- jne 1f
- movw %gs,%cx
- cmpw %cx,0x28(%rsp)
- jne 1f
- /* All segments match their saved values => Category 2 (Bad IRET). */
- movq (%rsp),%rcx
- CFI_RESTORE rcx
- movq 8(%rsp),%r11
- CFI_RESTORE r11
- addq $0x30,%rsp
- CFI_ADJUST_CFA_OFFSET -0x30
- movq $11,%rdi /* SIGSEGV */
- jmp do_exit
- CFI_RESTORE_STATE
-1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
- movq (%rsp),%rcx
- CFI_RESTORE rcx
- movq 8(%rsp),%r11
- CFI_RESTORE r11
- addq $0x30,%rsp
- CFI_ADJUST_CFA_OFFSET -0x30
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- SAVE_ALL
- jmp error_exit
- CFI_ENDPROC
-#if 0
- .section __ex_table,"a"
- .align 8
- .quad gs_change,bad_gs
- .previous
- .section .fixup,"ax"
- /* running with kernelgs */
-bad_gs:
-/* swapgs */ /* switch back to user gs */
- xorl %eax,%eax
- movl %eax,%gs
- jmp 2b
- .previous
-#endif
-
-/*
- * Create a kernel thread.
- *
- * C extern interface:
- * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
- *
- * asm input arguments:
- * rdi: fn, rsi: arg, rdx: flags
- */
-ENTRY(kernel_thread)
- CFI_STARTPROC
- FAKE_STACK_FRAME $child_rip
- SAVE_ALL
-
- # rdi: flags, rsi: usp, rdx: will be &pt_regs
- movq %rdx,%rdi
- orq kernel_thread_flags(%rip),%rdi
- movq $-1, %rsi
- movq %rsp, %rdx
-
- xorl %r8d,%r8d
- xorl %r9d,%r9d
-
- # clone now
- call do_fork
- movq %rax,RAX(%rsp)
- xorl %edi,%edi
-
- /*
- * It isn't worth to check for reschedule here,
- * so internally to the x86_64 port you can rely on kernel_thread()
- * not to reschedule the child before returning, this avoids the need
- * of hacks for example to fork off the per-CPU idle tasks.
- * [Hopefully no generic code relies on the reschedule -AK]
- */
- RESTORE_ALL
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
-ENDPROC(kernel_thread)
-
-child_rip:
- pushq $0 # fake return address
- CFI_STARTPROC
- /*
- * Here we are in the child and the registers are set as they were
- * at kernel_thread() invocation in the parent.
- */
- movq %rdi, %rax
- movq %rsi, %rdi
- call *%rax
- # exit
- xorl %edi, %edi
- call do_exit
- CFI_ENDPROC
-ENDPROC(child_rip)
-
-/*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- * extern long execve(char *name, char **argv, char **envp)
- *
- * asm input arguments:
- * rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
- *
- * do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
- */
-ENTRY(execve)
- CFI_STARTPROC
- FAKE_STACK_FRAME $0
- SAVE_ALL
- call sys_execve
- movq %rax, RAX(%rsp)
- RESTORE_REST
- testq %rax,%rax
- jne 1f
- jmp int_ret_from_sys_call
-1: RESTORE_ARGS
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
-ENDPROC(execve)
-
-KPROBE_ENTRY(page_fault)
- errorentry do_page_fault
-END(page_fault)
- .previous .text
-
-ENTRY(coprocessor_error)
- zeroentry do_coprocessor_error
-END(coprocessor_error)
-
-ENTRY(simd_coprocessor_error)
- zeroentry do_simd_coprocessor_error
-END(simd_coprocessor_error)
-
-ENTRY(device_not_available)
- zeroentry math_state_restore
-END(device_not_available)
-
- /* runs on exception stack */
-KPROBE_ENTRY(debug)
-/* INTR_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8 */
- zeroentry do_debug
-/* paranoidexit
- CFI_ENDPROC */
-END(debug)
- .previous .text
-
-#if 0
- /* runs on exception stack */
-KPROBE_ENTRY(nmi)
- INTR_FRAME
- pushq $-1
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_nmi, 0, 0
-#ifdef CONFIG_TRACE_IRQFLAGS
- paranoidexit 0
-#else
- jmp paranoid_exit1
- CFI_ENDPROC
-#endif
-END(nmi)
- .previous .text
-#endif
-
-KPROBE_ENTRY(int3)
-/* INTR_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8 */
- zeroentry do_int3
-/* jmp paranoid_exit1
- CFI_ENDPROC */
-END(int3)
- .previous .text
-
-ENTRY(overflow)
- zeroentry do_overflow
-END(overflow)
-
-ENTRY(bounds)
- zeroentry do_bounds
-END(bounds)
-
-ENTRY(invalid_op)
- zeroentry do_invalid_op
-END(invalid_op)
-
-ENTRY(coprocessor_segment_overrun)
- zeroentry do_coprocessor_segment_overrun
-END(coprocessor_segment_overrun)
-
-ENTRY(reserved)
- zeroentry do_reserved
-END(reserved)
-
-#if 0
- /* runs on exception stack */
-ENTRY(double_fault)
- XCPT_FRAME
- paranoidentry do_double_fault
- jmp paranoid_exit1
- CFI_ENDPROC
-END(double_fault)
-#endif
-
-ENTRY(invalid_TSS)
- errorentry do_invalid_TSS
-END(invalid_TSS)
-
-ENTRY(segment_not_present)
- errorentry do_segment_not_present
-END(segment_not_present)
-
- /* runs on exception stack */
-ENTRY(stack_segment)
-/* XCPT_FRAME
- paranoidentry do_stack_segment */
- errorentry do_stack_segment
-/* jmp paranoid_exit1
- CFI_ENDPROC */
-END(stack_segment)
-
-KPROBE_ENTRY(general_protection)
- errorentry do_general_protection
-END(general_protection)
- .previous .text
-
-ENTRY(alignment_check)
- errorentry do_alignment_check
-END(alignment_check)
-
-ENTRY(divide_error)
- zeroentry do_divide_error
-END(divide_error)
-
-ENTRY(spurious_interrupt_bug)
- zeroentry do_spurious_interrupt_bug
-END(spurious_interrupt_bug)
-
-#ifdef CONFIG_X86_MCE
- /* runs on exception stack */
-ENTRY(machine_check)
- INTR_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_machine_check
- jmp paranoid_exit1
- CFI_ENDPROC
-END(machine_check)
-#endif
-
-/* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(call_softirq)
- CFI_STARTPROC
- push %rbp
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbp,0
- mov %rsp,%rbp
- CFI_DEF_CFA_REGISTER rbp
- incl %gs:pda_irqcount
- cmove %gs:pda_irqstackptr,%rsp
- push %rbp # backlink for old unwinder
- call __do_softirq
- leaveq
- CFI_DEF_CFA_REGISTER rsp
- CFI_ADJUST_CFA_OFFSET -8
- decl %gs:pda_irqcount
- ret
- CFI_ENDPROC
-ENDPROC(call_softirq)
-
-#ifdef CONFIG_STACK_UNWIND
-ENTRY(arch_unwind_init_running)
- CFI_STARTPROC
- movq %r15, R15(%rdi)
- movq %r14, R14(%rdi)
- xchgq %rsi, %rdx
- movq %r13, R13(%rdi)
- movq %r12, R12(%rdi)
- xorl %eax, %eax
- movq %rbp, RBP(%rdi)
- movq %rbx, RBX(%rdi)
- movq (%rsp), %rcx
- movq %rax, R11(%rdi)
- movq %rax, R10(%rdi)
- movq %rax, R9(%rdi)
- movq %rax, R8(%rdi)
- movq %rax, RAX(%rdi)
- movq %rax, RCX(%rdi)
- movq %rax, RDX(%rdi)
- movq %rax, RSI(%rdi)
- movq %rax, RDI(%rdi)
- movq %rax, ORIG_RAX(%rdi)
- movq %rcx, RIP(%rdi)
- leaq 8(%rsp), %rcx
- movq $__KERNEL_CS, CS(%rdi)
- movq %rax, EFLAGS(%rdi)
- movq %rcx, RSP(%rdi)
- movq $__KERNEL_DS, SS(%rdi)
- jmpq *%rdx
- CFI_ENDPROC
-ENDPROC(arch_unwind_init_running)
-#endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic-xen.c
deleted file mode 100644
index c04ae589b0..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic-xen.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Generic APIC sub-arch probe layer.
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/module.h>
-
-#include <asm/smp.h>
-#include <asm/ipi.h>
-
-#if defined(CONFIG_ACPI)
-#include <acpi/acpi_bus.h>
-#endif
-
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
-u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-extern struct genapic apic_cluster;
-extern struct genapic apic_flat;
-extern struct genapic apic_physflat;
-
-#ifndef CONFIG_XEN
-struct genapic *genapic = &apic_flat;
-#else
-extern struct genapic apic_xen;
-struct genapic *genapic = &apic_xen;
-#endif
-
-
-/*
- * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
- */
-void __init clustered_apic_check(void)
-{
-#ifndef CONFIG_XEN
- long i;
- u8 clusters, max_cluster;
- u8 id;
- u8 cluster_cnt[NUM_APIC_CLUSTERS];
- int max_apic = 0;
-
-#if defined(CONFIG_ACPI)
- /*
- * Some x86_64 machines use physical APIC mode regardless of how many
- * procs/clusters are present (x86_64 ES7000 is an example).
- */
- if (acpi_fadt.revision > FADT2_REVISION_ID)
- if (acpi_fadt.force_apic_physical_destination_mode) {
- genapic = &apic_cluster;
- goto print;
- }
-#endif
-
- memset(cluster_cnt, 0, sizeof(cluster_cnt));
- for (i = 0; i < NR_CPUS; i++) {
- id = bios_cpu_apicid[i];
- if (id == BAD_APICID)
- continue;
- if (id > max_apic)
- max_apic = id;
- cluster_cnt[APIC_CLUSTERID(id)]++;
- }
-
- /* Don't use clustered mode on AMD platforms. */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- genapic = &apic_physflat;
-#ifndef CONFIG_HOTPLUG_CPU
- /* In the CPU hotplug case we cannot use broadcast mode
- because that opens a race when a CPU is removed.
- Stay at physflat mode in this case.
- It is bad to do this unconditionally though. Once
- we have ACPI platform support for CPU hotplug
- we should detect hotplug capablity from ACPI tables and
- only do this when really needed. -AK */
- if (max_apic <= 8)
- genapic = &apic_flat;
-#endif
- goto print;
- }
-
- clusters = 0;
- max_cluster = 0;
-
- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
- if (cluster_cnt[i] > 0) {
- ++clusters;
- if (cluster_cnt[i] > max_cluster)
- max_cluster = cluster_cnt[i];
- }
- }
-
- /*
- * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
- * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
- * else physical mode.
- * (We don't use lowest priority delivery + HW APIC IRQ steering, so
- * can ignore the clustered logical case and go straight to physical.)
- */
- if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
-#ifdef CONFIG_HOTPLUG_CPU
- /* Don't use APIC shortcuts in CPU hotplug to avoid races */
- genapic = &apic_physflat;
-#else
- genapic = &apic_flat;
-#endif
- } else
- genapic = &apic_cluster;
-
-print:
-#else
- /* hardcode to xen apic functions */
- genapic = &apic_xen;
-#endif
- printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
-}
-
-/* Same for both flat and clustered. */
-
-#ifdef CONFIG_XEN
-extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
-#endif
-
-void send_IPI_self(int vector)
-{
-#ifndef CONFIG_XEN
- __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
-#else
- xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
-#endif
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c
deleted file mode 100644
index 1171aad778..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Xen APIC subarch code. Maximum 8 CPUs, logical delivery.
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- *
- * Hacked to pieces for Xen by Chris Wright.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-#include <asm/smp.h>
-#include <asm/ipi.h>
-#else
-#include <asm/apic.h>
-#include <asm/apicdef.h>
-#include <asm/genapic.h>
-#endif
-#include <xen/evtchn.h>
-
-DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
-
-static inline void __send_IPI_one(unsigned int cpu, int vector)
-{
- int irq = per_cpu(ipi_to_irq, cpu)[vector];
- BUG_ON(irq < 0);
- notify_remote_via_irq(irq);
-}
-
-void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
-{
- int cpu;
-
- switch (shortcut) {
- case APIC_DEST_SELF:
- __send_IPI_one(smp_processor_id(), vector);
- break;
- case APIC_DEST_ALLBUT:
- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
- if (cpu == smp_processor_id())
- continue;
- if (cpu_isset(cpu, cpu_online_map)) {
- __send_IPI_one(cpu, vector);
- }
- }
- break;
- case APIC_DEST_ALLINC:
- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
- if (cpu_isset(cpu, cpu_online_map)) {
- __send_IPI_one(cpu, vector);
- }
- }
- break;
- default:
- printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
- vector);
- break;
- }
-}
-
-static cpumask_t xen_target_cpus(void)
-{
- return cpu_online_map;
-}
-
-/*
- * Set up the logical destination ID.
- * Do nothing, not called now.
- */
-static void xen_init_apic_ldr(void)
-{
- Dprintk("%s\n", __FUNCTION__);
- return;
-}
-
-static void xen_send_IPI_allbutself(int vector)
-{
- /*
- * if there are no other CPUs in the system then
- * we get an APIC send error if we try to broadcast.
- * thus we have to avoid sending IPIs in this case.
- */
- Dprintk("%s\n", __FUNCTION__);
- if (num_online_cpus() > 1)
- xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL);
-}
-
-static void xen_send_IPI_all(int vector)
-{
- Dprintk("%s\n", __FUNCTION__);
- xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
-}
-
-static void xen_send_IPI_mask(cpumask_t cpumask, int vector)
-{
- unsigned long mask = cpus_addr(cpumask)[0];
- unsigned int cpu;
- unsigned long flags;
-
- Dprintk("%s\n", __FUNCTION__);
- local_irq_save(flags);
- WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
-
- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
- if (cpu_isset(cpu, cpumask)) {
- __send_IPI_one(cpu, vector);
- }
- }
- local_irq_restore(flags);
-}
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-static int xen_apic_id_registered(void)
-{
- /* better be set */
- Dprintk("%s\n", __FUNCTION__);
- return physid_isset(smp_processor_id(), phys_cpu_present_map);
-}
-#endif
-
-static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask)
-{
- Dprintk("%s\n", __FUNCTION__);
- return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
-}
-
-static unsigned int phys_pkg_id(int index_msb)
-{
- u32 ebx;
-
- Dprintk("%s\n", __FUNCTION__);
- ebx = cpuid_ebx(1);
- return ((ebx >> 24) & 0xFF) >> index_msb;
-}
-
-struct genapic apic_xen = {
- .name = "xen",
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- .int_delivery_mode = dest_LowestPrio,
-#endif
- .int_dest_mode = (APIC_DEST_LOGICAL != 0),
- .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
- .target_cpus = xen_target_cpus,
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- .apic_id_registered = xen_apic_id_registered,
-#endif
- .init_apic_ldr = xen_init_apic_ldr,
- .send_IPI_all = xen_send_IPI_all,
- .send_IPI_allbutself = xen_send_IPI_allbutself,
- .send_IPI_mask = xen_send_IPI_mask,
- .cpu_mask_to_apicid = xen_cpu_mask_to_apicid,
- .phys_pkg_id = phys_pkg_id,
-};
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
deleted file mode 100644
index 0ab1074f01..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
- *
- * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
- * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
- * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
- *
- * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
- *
- * Jun Nakajima <jun.nakajima@intel.com>
- * Modified for Xen
- */
-
-
-#include <linux/linkage.h>
-#include <linux/threads.h>
-#include <linux/init.h>
-#include <linux/elfnote.h>
-#include <asm/desc.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-#include <asm/cache.h>
-#include <asm/dwarf2.h>
-#include <xen/interface/elfnote.h>
-
- .section .bootstrap.text, "ax", @progbits
- .code64
-#define VIRT_ENTRY_OFFSET 0x0
-.org VIRT_ENTRY_OFFSET
- .globl startup_64
-startup_64:
-ENTRY(_start)
- movq $(init_thread_union+THREAD_SIZE-8),%rsp
-
- /* rsi is pointer to startup info structure.
- pass it to C */
- movq %rsi,%rdi
- pushq $0 # fake return address
- jmp x86_64_start_kernel
-
-ENTRY(stext)
-ENTRY(_stext)
-
- $page = 0
-#define NEXT_PAGE(name) \
- $page = $page + 1; \
- .org $page * 0x1000; \
- phys_##name = $page * 0x1000 + __PHYSICAL_START; \
-ENTRY(name)
-
-NEXT_PAGE(init_level4_pgt)
- /* This gets initialized in x86_64_start_kernel */
- .fill 512,8,0
-
- /*
- * We update two pgd entries to make kernel and user pgd consistent
- * at pgd_populate(). It can be used for kernel modules. So we place
- * this page here for those cases to avoid memory corruption.
- * We also use this page to establish the initiali mapping for
- * vsyscall area.
- */
-NEXT_PAGE(init_level4_user_pgt)
- .fill 512,8,0
-
-NEXT_PAGE(level3_kernel_pgt)
- .fill 512,8,0
-
- /*
- * This is used for vsyscall area mapping as we have a different
- * level4 page table for user.
- */
-NEXT_PAGE(level3_user_pgt)
- .fill 512,8,0
-
-NEXT_PAGE(level2_kernel_pgt)
- .fill 512,8,0
-
-NEXT_PAGE(hypercall_page)
- CFI_STARTPROC
- .rept 0x1000 / 0x20
- .skip 1 /* push %rcx */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rcx,0
- .skip 2 /* push %r11 */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rcx,0
- .skip 5 /* mov $#,%eax */
- .skip 2 /* syscall */
- .skip 2 /* pop %r11 */
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE r11
- .skip 1 /* pop %rcx */
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE rcx
- .align 0x20,0 /* ret */
- .endr
- CFI_ENDPROC
-
-#undef NEXT_PAGE
-
- .data
-
- .align 16
- .globl cpu_gdt_descr
-cpu_gdt_descr:
- .word gdt_end-cpu_gdt_table-1
-gdt:
- .quad cpu_gdt_table
-#ifdef CONFIG_SMP
- .rept NR_CPUS-1
- .word 0
- .quad 0
- .endr
-#endif
-
-/* We need valid kernel segments for data and code in long mode too
- * IRET will check the segment types kkeil 2000/10/28
- * Also sysret mandates a special GDT layout
- */
-
- .section .data.page_aligned, "aw"
- .align PAGE_SIZE
-
-/* The TLS descriptors are currently at a different place compared to i386.
- Hopefully nobody expects them at a fixed place (Wine?) */
-
-ENTRY(cpu_gdt_table)
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0 /* unused */
- .quad 0x00af9a000000ffff /* __KERNEL_CS */
- .quad 0x00cf92000000ffff /* __KERNEL_DS */
- .quad 0x00cffa000000ffff /* __USER32_CS */
- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
- .quad 0x00affa000000ffff /* __USER_CS */
- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
- .quad 0,0 /* TSS */
- .quad 0,0 /* LDT */
- .quad 0,0,0 /* three TLS descriptors */
- .quad 0 /* unused */
-gdt_end:
- /* asm/segment.h:GDT_ENTRIES must match this */
- /* This should be a multiple of the cache line size */
- /* GDTs of other CPUs are now dynamically allocated */
-
- /* zero the remaining page */
- .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
-
- .section .bss.page_aligned, "aw", @nobits
- .align PAGE_SIZE
-ENTRY(empty_zero_page)
- .skip PAGE_SIZE
-
-#if CONFIG_XEN_COMPAT <= 0x030002
-/*
- * __xen_guest information
- */
-.macro utoh value
- .if (\value) < 0 || (\value) >= 0x10
- utoh (((\value)>>4)&0x0fffffffffffffff)
- .endif
- .if ((\value) & 0xf) < 10
- .byte '0' + ((\value) & 0xf)
- .else
- .byte 'A' + ((\value) & 0xf) - 10
- .endif
-.endm
-
-.section __xen_guest
- .ascii "GUEST_OS=linux,GUEST_VER=2.6"
- .ascii ",XEN_VER=xen-3.0"
- .ascii ",VIRT_BASE=0x"
- utoh __START_KERNEL_map
- .ascii ",ELF_PADDR_OFFSET=0x"
- utoh __START_KERNEL_map
- .ascii ",VIRT_ENTRY=0x"
- utoh (__START_KERNEL_map + __PHYSICAL_START + VIRT_ENTRY_OFFSET)
- .ascii ",HYPERCALL_PAGE=0x"
- utoh (phys_hypercall_page >> PAGE_SHIFT)
- .ascii ",FEATURES=writable_page_tables"
- .ascii "|writable_descriptor_tables"
- .ascii "|auto_translated_physmap"
- .ascii "|supervisor_mode_kernel"
- .ascii ",LOADER=generic"
- .byte 0
-#endif /* CONFIG_XEN_COMPAT <= 0x030002 */
-
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux")
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6")
- ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
- ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, __START_KERNEL_map)
-#if CONFIG_XEN_COMPAT <= 0x030002
- ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, __START_KERNEL_map)
-#else
- ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, 0)
-#endif
- ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, startup_64)
- ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT)
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
- ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
- ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1)
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c
deleted file mode 100644
index a9dab81cd0..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/head64.c -- prepare to run common code
- *
- * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
- *
- * Jun Nakajima <jun.nakajima@intel.com>
- * Modified for Xen.
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/percpu.h>
-#include <linux/module.h>
-
-#include <asm/processor.h>
-#include <asm/proto.h>
-#include <asm/smp.h>
-#include <asm/bootsetup.h>
-#include <asm/setup.h>
-#include <asm/desc.h>
-#include <asm/pgtable.h>
-#include <asm/sections.h>
-
-unsigned long start_pfn;
-
-/* Don't add a printk in there. printk relies on the PDA which is not initialized
- yet. */
-#if 0
-static void __init clear_bss(void)
-{
- memset(__bss_start, 0,
- (unsigned long) __bss_stop - (unsigned long) __bss_start);
-}
-#endif
-
-#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-#define OLD_CL_MAGIC_ADDR 0x90020
-#define OLD_CL_MAGIC 0xA33F
-#define OLD_CL_BASE_ADDR 0x90000
-#define OLD_CL_OFFSET 0x90022
-
-extern char saved_command_line[];
-
-static void __init copy_bootdata(char *real_mode_data)
-{
-#ifndef CONFIG_XEN
- int new_data;
- char * command_line;
-
- memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
- new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
- if (!new_data) {
- if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
- printk("so old bootloader that it does not support commandline?!\n");
- return;
- }
- new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
- printk("old bootloader convention, maybe loadlin?\n");
- }
- command_line = (char *) ((u64)(new_data));
- memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
-#else
- int max_cmdline;
-
- if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
- max_cmdline = COMMAND_LINE_SIZE;
- memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
- saved_command_line[max_cmdline-1] = '\0';
-#endif
- printk("Bootdata ok (command line is %s)\n", saved_command_line);
-}
-
-static void __init setup_boot_cpu_data(void)
-{
- unsigned int dummy, eax;
-
- /* get vendor info */
- cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
- (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
- (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
- (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
-
- /* get cpu type */
- cpuid(1, &eax, &dummy, &dummy,
- (unsigned int *) &boot_cpu_data.x86_capability);
- boot_cpu_data.x86 = (eax >> 8) & 0xf;
- boot_cpu_data.x86_model = (eax >> 4) & 0xf;
- boot_cpu_data.x86_mask = eax & 0xf;
-}
-
-#include <xen/interface/memory.h>
-unsigned long *machine_to_phys_mapping;
-EXPORT_SYMBOL(machine_to_phys_mapping);
-unsigned int machine_to_phys_order;
-EXPORT_SYMBOL(machine_to_phys_order);
-
-void __init x86_64_start_kernel(char * real_mode_data)
-{
- struct xen_machphys_mapping mapping;
- unsigned long machine_to_phys_nr_ents;
- char *s;
- int i;
-
- setup_xen_features();
-
- xen_start_info = (struct start_info *)real_mode_data;
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- phys_to_machine_mapping =
- (unsigned long *)xen_start_info->mfn_list;
- start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
- xen_start_info->nr_pt_frames;
-
- machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
- machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
- if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
- machine_to_phys_mapping = (unsigned long *)mapping.v_start;
- machine_to_phys_nr_ents = mapping.max_mfn + 1;
- }
- while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
- machine_to_phys_order++;
-
-#if 0
- for (i = 0; i < 256; i++)
- set_intr_gate(i, early_idt_handler);
- asm volatile("lidt %0" :: "m" (idt_descr));
-#endif
-
- /*
- * This must be called really, really early:
- */
- lockdep_init();
-
- for (i = 0; i < NR_CPUS; i++)
- cpu_pda(i) = &boot_cpu_pda[i];
-
- pda_init(0);
- copy_bootdata(real_mode_data);
-#ifdef CONFIG_SMP
- cpu_set(0, cpu_online_map);
-#endif
- s = strstr(saved_command_line, "earlyprintk=");
- if (s != NULL)
- setup_early_printk(strchr(s, '=') + 1);
-#ifdef CONFIG_NUMA
- s = strstr(saved_command_line, "numa=");
- if (s != NULL)
- numa_setup(s+5);
-#endif
-#ifdef CONFIG_X86_IO_APIC
- if (strstr(saved_command_line, "disableapic"))
- disable_apic = 1;
-#endif
- /* You need early console to see that */
- if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
- panic("Kernel too big for kernel mapping\n");
-
- setup_boot_cpu_data();
- start_kernel();
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c
deleted file mode 100644
index e0bd1e50e7..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial task structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
-
-#ifndef CONFIG_X86_NO_TSS
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
-
-/* Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-#endif
-
-#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c
deleted file mode 100644
index c1f5172dc6..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c
+++ /dev/null
@@ -1,2269 +0,0 @@
-/*
- * Intel IO-APIC support for multi-Pentium hosts.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- * Many thanks to Stig Venaas for trying out countless experimental
- * patches and reporting/debugging problems patiently!
- *
- * (c) 1999, Multiple IO-APIC support, developed by
- * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- * further tested and cleaned up by Zach Brown <zab@redhat.com>
- * and Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively
- * Paul Diefenbaugh : Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/smp_lock.h>
-#include <linux/mc146818rtc.h>
-#include <linux/acpi.h>
-#include <linux/sysdev.h>
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/mach_apic.h>
-#include <asm/acpi.h>
-#include <asm/dma.h>
-#include <asm/nmi.h>
-
-#define __apicdebuginit __init
-
-int sis_apic_bug; /* not actually supported, dummy for compile */
-
-static int no_timer_check;
-
-int disable_timer_pin_1 __initdata;
-
-#ifndef CONFIG_XEN
-int timer_over_8254 __initdata = 0;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-#endif
-
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-static struct irq_pin_list {
- short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
-
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
-#ifdef CONFIG_PCI_MSI
-#define vector_to_irq(vector) \
- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
-#else
-#define vector_to_irq(vector) (vector)
-#endif
-
-#ifdef CONFIG_XEN
-
-#include <xen/interface/xen.h>
-#include <xen/interface/physdev.h>
-
-/* Fake i8259 */
-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
-#define disable_8259A_irq(_irq) ((void)0)
-#define i8259A_irq_pending(_irq) (0)
-
-unsigned long io_apic_irqs;
-
-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
-{
- struct physdev_apic apic_op;
- int ret;
-
- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
- apic_op.reg = reg;
- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
- if (ret)
- return ret;
- return apic_op.value;
-}
-
-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
- struct physdev_apic apic_op;
-
- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
- apic_op.reg = reg;
- apic_op.value = value;
- HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
-}
-
-#define io_apic_read(a,r) xen_io_apic_read(a,r)
-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
-
-#define clear_IO_APIC() ((void)0)
-
-#else
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
- unsigned long flags;
- unsigned int dest;
- cpumask_t tmp;
-
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- tmp = TARGET_CPUS;
-
- cpus_and(mask, tmp, CPU_MASK_ALL);
-
- dest = cpu_mask_to_apicid(mask);
-
- /*
- * Only the high 8 bits are valid.
- */
- dest = SET_APIC_LOGICAL_ID(dest);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __DO_ACTION(1, = dest, )
- set_irq_info(irq, mask);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-#endif
-
-#endif /* !CONFIG_XEN */
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
- static int first_free_entry = NR_IRQS;
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- BUG_ON(irq >= NR_IRQS);
- while (entry->next)
- entry = irq_2_pin + entry->next;
-
- if (entry->pin != -1) {
- entry->next = first_free_entry;
- entry = irq_2_pin + entry->next;
- if (++first_free_entry >= PIN_MAP_SIZE)
- panic("io_apic.c: ran out of irq_2_pin entries!");
- }
- entry->apic = apic;
- entry->pin = pin;
-}
-
-#ifndef CONFIG_XEN
-#define __DO_ACTION(R, ACTION, FINAL) \
- \
-{ \
- int pin; \
- struct irq_pin_list *entry = irq_2_pin + irq; \
- \
- BUG_ON(irq >= NR_IRQS); \
- for (;;) { \
- unsigned int reg; \
- pin = entry->pin; \
- if (pin == -1) \
- break; \
- reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
- reg ACTION; \
- io_apic_modify(entry->apic, reg); \
- if (!entry->next) \
- break; \
- entry = irq_2_pin + entry->next; \
- } \
- FINAL; \
-}
-
-#define DO_ACTION(name,R,ACTION, FINAL) \
- \
- static void name##_IO_APIC_irq (unsigned int irq) \
- __DO_ACTION(R, ACTION, FINAL)
-
-DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
- /* mask = 1 */
-DO_ACTION( __unmask, 0, &= 0xfffeffff, )
- /* mask = 0 */
-
-static void mask_IO_APIC_irq (unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq (unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- /* Check delivery_mode to be sure we're not clearing an SMI pin */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (entry.delivery_mode == dest_SMI)
- return;
- /*
- * Disable it in the IO-APIC irq-routing table:
- */
- memset(&entry, 0, sizeof(entry));
- entry.mask = 1;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC (void)
-{
- int apic, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
- clear_IO_APIC_pin(apic, pin);
-}
-
-#endif /* !CONFIG_XEN */
-
-static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
-
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
-int skip_ioapic_setup;
-int ioapic_force;
-
-/* dummy parsing: see setup.c */
-
-static int __init disable_ioapic_setup(char *str)
-{
- skip_ioapic_setup = 1;
- return 1;
-}
-
-static int __init enable_ioapic_setup(char *str)
-{
- ioapic_force = 1;
- skip_ioapic_setup = 0;
- return 1;
-}
-
-__setup("noapic", disable_ioapic_setup);
-__setup("apic", enable_ioapic_setup);
-
-#ifndef CONFIG_XEN
-static int __init setup_disable_8254_timer(char *s)
-{
- timer_over_8254 = -1;
- return 1;
-}
-static int __init setup_enable_8254_timer(char *s)
-{
- timer_over_8254 = 2;
- return 1;
-}
-
-__setup("disable_8254_timer", setup_disable_8254_timer);
-__setup("enable_8254_timer", setup_enable_8254_timer);
-#endif /* !CONFIG_XEN */
-
-#include <asm/pci-direct.h>
-#include <linux/pci_ids.h>
-#include <linux/pci.h>
-
-
-#ifdef CONFIG_ACPI
-
-static int nvidia_hpet_detected __initdata;
-
-static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
-{
- nvidia_hpet_detected = 1;
- return 0;
-}
-#endif
-
-/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
- off. Check for an Nvidia or VIA PCI bridge and turn it off.
- Use pci direct infrastructure because this runs before the PCI subsystem.
-
- Can be overwritten with "apic"
-
- And another hack to disable the IOMMU on VIA chipsets.
-
- ... and others. Really should move this somewhere else.
-
- Kludge-O-Rama. */
-void __init check_ioapic(void)
-{
- int num,slot,func;
- /* Poor man's PCI discovery */
- for (num = 0; num < 32; num++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- u32 class;
- u32 vendor;
- u8 type;
- class = read_pci_config(num,slot,func,
- PCI_CLASS_REVISION);
- if (class == 0xffffffff)
- break;
-
- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
- continue;
-
- vendor = read_pci_config(num, slot, func,
- PCI_VENDOR_ID);
- vendor &= 0xffff;
- switch (vendor) {
- case PCI_VENDOR_ID_VIA:
-#ifdef CONFIG_IOMMU
- if ((end_pfn > MAX_DMA32_PFN ||
- force_iommu) &&
- !iommu_aperture_allowed) {
- printk(KERN_INFO
- "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
- iommu_aperture_disabled = 1;
- }
-#endif
- return;
- case PCI_VENDOR_ID_NVIDIA:
-#ifdef CONFIG_ACPI
- /*
- * All timer overrides on Nvidia are
- * wrong unless HPET is enabled.
- */
- nvidia_hpet_detected = 0;
- acpi_table_parse(ACPI_HPET,
- nvidia_hpet_check);
- if (nvidia_hpet_detected == 0) {
- acpi_skip_timer_override = 1;
- printk(KERN_INFO "Nvidia board "
- "detected. Ignoring ACPI "
- "timer override.\n");
- }
-#endif
- /* RED-PEN skip them on mptables too? */
- return;
- case PCI_VENDOR_ID_ATI:
-
- /* This should be actually default, but
- for 2.6.16 let's do it for ATI only where
- it's really needed. */
-#ifndef CONFIG_XEN
- if (timer_over_8254 == 1) {
- timer_over_8254 = 0;
- printk(KERN_INFO
- "ATI board detected. Disabling timer routing over 8254.\n");
- }
-#endif
- return;
- }
-
-
- /* No multi-function device? */
- type = read_pci_config_byte(num,slot,func,
- PCI_HEADER_TYPE);
- if (!(type & 0x80))
- break;
- }
- }
- }
-}
-
-static int __init ioapic_pirq_setup(char *str)
-{
- int i, max;
- int ints[MAX_PIRQS+1];
-
- get_options(str, ARRAY_SIZE(ints), ints);
-
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- pirqs_enabled = 1;
- apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
- max = MAX_PIRQS;
- if (ints[0] < MAX_PIRQS)
- max = ints[0];
-
- for (i = 0; i < max; i++) {
- apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
- /*
- * PIRQs are mapped upside down, usually.
- */
- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
- }
- return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_irqtype == type &&
- (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
- mp_irqs[i].mpc_dstirq == pin)
- return i;
-
- return -1;
-}
-
-#ifndef CONFIG_XEN
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
- (mp_irqs[i].mpc_irqtype == type) &&
- (mp_irqs[i].mpc_srcbusirq == irq))
-
- return mp_irqs[i].mpc_dstirq;
- }
- return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
- (mp_irqs[i].mpc_irqtype == type) &&
- (mp_irqs[i].mpc_srcbusirq == irq))
- break;
- }
- if (i < mp_irq_entries) {
- int apic;
- for(apic = 0; apic < nr_ioapics; apic++) {
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
- return apic;
- }
- }
-
- return -1;
-}
-#endif
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
- int apic, i, best_guess = -1;
-
- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
- bus, slot, pin);
- if (mp_bus_id_to_pci_bus[bus] == -1) {
- apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
- break;
-
- if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
- !mp_irqs[i].mpc_irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
-
- if (!(apic || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
- return irq;
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0)
- best_guess = irq;
- }
- }
- BUG_ON(best_guess >= NR_IRQS);
- return best_guess;
-}
-
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
- if (irq < 16) {
- unsigned int port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
- }
- apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
- return 0;
-}
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value. If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
-#define default_EISA_polarity(idx) (0)
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx) (0)
-#define default_ISA_polarity(idx) (0)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx) (1)
-#define default_PCI_polarity(idx) (1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx) (1)
-#define default_MCA_polarity(idx) (0)
-
-static int __init MPBIOS_polarity(int idx)
-{
- int bus = mp_irqs[idx].mpc_srcbus;
- int polarity;
-
- /*
- * Determine IRQ line polarity (high active or low active):
- */
- switch (mp_irqs[idx].mpc_irqflag & 3)
- {
- case 0: /* conforms, ie. bus-type dependent polarity */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- polarity = default_ISA_polarity(idx);
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- polarity = default_EISA_polarity(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- polarity = default_PCI_polarity(idx);
- break;
- }
- case MP_BUS_MCA: /* MCA pin */
- {
- polarity = default_MCA_polarity(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- break;
- }
- case 1: /* high active */
- {
- polarity = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- case 3: /* low active */
- {
- polarity = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
- int bus = mp_irqs[idx].mpc_srcbus;
- int trigger;
-
- /*
- * Determine IRQ trigger mode (edge or level sensitive):
- */
- switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
- {
- case 0: /* conforms, ie. bus-type dependent */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- trigger = default_ISA_trigger(idx);
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- trigger = default_EISA_trigger(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- trigger = default_PCI_trigger(idx);
- break;
- }
- case MP_BUS_MCA: /* MCA pin */
- {
- trigger = default_MCA_trigger(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- }
- break;
- }
- case 1: /* edge */
- {
- trigger = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- case 3: /* level */
- {
- trigger = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 0;
- break;
- }
- }
- return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
- return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
- return MPBIOS_trigger(idx);
-}
-
-static int next_irq = 16;
-
-/*
- * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
- * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
- * from ACPI, which can reach 800 in large boxen.
- *
- * Compact the sparse GSI space into a sequential IRQ series and reuse
- * vectors if possible.
- */
-int gsi_irq_sharing(int gsi)
-{
- int i, tries, vector;
-
- BUG_ON(gsi >= NR_IRQ_VECTORS);
-
- if (platform_legacy_irq(gsi))
- return gsi;
-
- if (gsi_2_irq[gsi] != 0xFF)
- return (int)gsi_2_irq[gsi];
-
- tries = NR_IRQS;
- try_again:
- vector = assign_irq_vector(gsi);
-
- /*
- * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
- * use of vector and if found, return that IRQ. However, we never want
- * to share legacy IRQs, which usually have a different trigger mode
- * than PCI.
- */
- for (i = 0; i < NR_IRQS; i++)
- if (IO_APIC_VECTOR(i) == vector)
- break;
- if (platform_legacy_irq(i)) {
- if (--tries >= 0) {
- IO_APIC_VECTOR(i) = 0;
- goto try_again;
- }
- panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
- }
- if (i < NR_IRQS) {
- gsi_2_irq[gsi] = i;
- printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
- gsi, vector, i);
- return i;
- }
-
- i = next_irq++;
- BUG_ON(i >= NR_IRQS);
- gsi_2_irq[gsi] = i;
- IO_APIC_VECTOR(i) = vector;
- printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
- gsi, vector, i);
- return i;
-}
-
-static int pin_2_irq(int idx, int apic, int pin)
-{
- int irq, i;
- int bus = mp_irqs[idx].mpc_srcbus;
-
- /*
- * Debugging check, we are in big trouble if this message pops up!
- */
- if (mp_irqs[idx].mpc_dstirq != pin)
- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- case MP_BUS_EISA:
- case MP_BUS_MCA:
- {
- irq = mp_irqs[idx].mpc_srcbusirq;
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- /*
- * PCI IRQs are mapped in order
- */
- i = irq = 0;
- while (i < apic)
- irq += nr_ioapic_registers[i++];
- irq += pin;
- irq = gsi_irq_sharing(irq);
- break;
- }
- default:
- {
- printk(KERN_ERR "unknown bus type %d.\n",bus);
- irq = 0;
- break;
- }
- }
- BUG_ON(irq >= NR_IRQS);
-
- /*
- * PCI IRQ command line redirection. Yes, limits are hardcoded.
- */
- if ((pin >= 16) && (pin <= 23)) {
- if (pirq_entries[pin-16] != -1) {
- if (!pirq_entries[pin-16]) {
- apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
- } else {
- irq = pirq_entries[pin-16];
- apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
- pin-16, irq);
- }
- }
- }
- BUG_ON(irq >= NR_IRQS);
- return irq;
-}
-
-static inline int IO_APIC_irq_trigger(int irq)
-{
- int apic, idx, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- idx = find_irq_entry(apic,pin,mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
- return irq_trigger(idx);
- }
- }
- /*
- * nonexistent IRQs are edge default
- */
- return 0;
-}
-
-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
-
-int assign_irq_vector(int irq)
-{
- unsigned long flags;
- int vector;
- struct physdev_irq irq_op;
-
- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
-
- spin_lock_irqsave(&vector_lock, flags);
-
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
- spin_unlock_irqrestore(&vector_lock, flags);
- return IO_APIC_VECTOR(irq);
- }
-
- irq_op.irq = irq;
- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
- spin_unlock_irqrestore(&vector_lock, flags);
- return -ENOSPC;
- }
-
- vector = irq_op.vector;
- vector_irq[vector] = irq;
- if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = vector;
-
- spin_unlock_irqrestore(&vector_lock, flags);
-
- return vector;
-}
-
-extern void (*interrupt[NR_IRQS])(void);
-#ifndef CONFIG_XEN
-static struct hw_interrupt_type ioapic_level_type;
-static struct hw_interrupt_type ioapic_edge_type;
-
-#define IOAPIC_AUTO -1
-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
-
-static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
-{
- unsigned idx;
-
- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
-
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[idx].chip = &ioapic_level_type;
- else
- irq_desc[idx].chip = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[idx]);
-}
-#else
-#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
-#endif /* !CONFIG_XEN */
-
-static void __init setup_IO_APIC_irqs(void)
-{
- struct IO_APIC_route_entry entry;
- int apic, pin, idx, irq, first_notcon = 1, vector;
- unsigned long flags;
-
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry,0,sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 0; /* enable IRQ */
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-
- idx = find_irq_entry(apic,pin,mp_INT);
- if (idx == -1) {
- if (first_notcon) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
- first_notcon = 0;
- } else
- apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
- continue;
- }
-
- entry.trigger = irq_trigger(idx);
- entry.polarity = irq_polarity(idx);
-
- if (irq_trigger(idx)) {
- entry.trigger = 1;
- entry.mask = 1;
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- }
-
- irq = pin_2_irq(idx, apic, pin);
- add_pin_to_irq(irq, apic, pin);
-
- if (/* !apic && */ !IO_APIC_IRQ(irq))
- continue;
-
- if (IO_APIC_IRQ(irq)) {
- vector = assign_irq_vector(irq);
- entry.vector = vector;
-
- ioapic_register_intr(irq, vector, IOAPIC_AUTO);
- if (!apic && (irq < 16))
- disable_8259A_irq(irq);
- }
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
- set_native_irq_info(irq, TARGET_CPUS);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- }
- }
-
- if (!first_notcon)
- apic_printk(APIC_VERBOSE," not connected.\n");
-}
-
-#ifndef CONFIG_XEN
-/*
- * Set up the 8259A-master output pin as broadcast to all
- * CPUs.
- */
-static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- memset(&entry,0,sizeof(entry));
-
- disable_8259A_irq(0);
-
- /* mask LVT0 */
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-
- /*
- * We use logical delivery to get the timer IRQ
- * to the first CPU.
- */
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 0; /* unmask IRQ now */
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.polarity = 0;
- entry.trigger = 0;
- entry.vector = vector;
-
- /*
- * The timer IRQ doesn't have to know that behind the
- * scene we have a 8259A-master in AEOI mode ...
- */
- irq_desc[0].chip = &ioapic_edge_type;
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- enable_8259A_irq(0);
-}
-
-void __init UNEXPECTED_IO_APIC(void)
-{
-}
-
-void __apicdebuginit print_IO_APIC(void)
-{
- int apic, i;
- union IO_APIC_reg_00 reg_00;
- union IO_APIC_reg_01 reg_01;
- union IO_APIC_reg_02 reg_02;
- unsigned long flags;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
- for (i = 0; i < nr_ioapics; i++)
- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
-
- /*
- * We are a bit conservative about what we expect. We have to
- * know about every hardware change ASAP.
- */
- printk(KERN_INFO "testing the IO APIC.......................\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- reg_01.raw = io_apic_read(apic, 1);
- if (reg_01.bits.version >= 0x10)
- reg_02.raw = io_apic_read(apic, 2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- printk("\n");
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
- printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
-
- printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
- (reg_01.bits.entries != 0x2E) &&
- (reg_01.bits.entries != 0x3F) &&
- (reg_01.bits.entries != 0x03)
- )
- UNEXPECTED_IO_APIC();
-
- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
- (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
- )
- UNEXPECTED_IO_APIC();
- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
-
- if (reg_01.bits.version >= 0x10) {
- printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
- }
-
- printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
- printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
- " Stat Dest Deli Vect: \n");
-
- for (i = 0; i <= reg_01.bits.entries; i++) {
- struct IO_APIC_route_entry entry;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- printk(KERN_DEBUG " %02x %03X %02X ",
- i,
- entry.dest.logical.logical_dest,
- entry.dest.physical.physical_dest
- );
-
- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector
- );
- }
- }
- if (use_pci_vector())
- printk(KERN_INFO "Using vector-based indexing\n");
- printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for (i = 0; i < NR_IRQS; i++) {
- struct irq_pin_list *entry = irq_2_pin + i;
- if (entry->pin < 0)
- continue;
- if (use_pci_vector() && !platform_legacy_irq(i))
- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
- else
- printk(KERN_DEBUG "IRQ%d ", i);
- for (;;) {
- printk("-> %d:%d", entry->apic, entry->pin);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- printk("\n");
- }
-
- printk(KERN_INFO ".................................... done.\n");
-
- return;
-}
-
-#if 0
-
-static __apicdebuginit void print_APIC_bitfield (int base)
-{
- unsigned int v;
- int i, j;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
- for (i = 0; i < 8; i++) {
- v = apic_read(base + i*0x10);
- for (j = 0; j < 32; j++) {
- if (v & (1<<j))
- printk("1");
- else
- printk("0");
- }
- printk("\n");
- }
-}
-
-void __apicdebuginit print_local_APIC(void * dummy)
-{
- unsigned int v, ver, maxlvt;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
- smp_processor_id(), hard_smp_processor_id());
- v = apic_read(APIC_ID);
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
- v = apic_read(APIC_LVR);
- printk(KERN_INFO "... APIC VERSION: %08x\n", v);
- ver = GET_APIC_VERSION(v);
- maxlvt = get_maxlvt();
-
- v = apic_read(APIC_TASKPRI);
- printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
- v = apic_read(APIC_ARBPRI);
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
- v & APIC_ARBPRI_MASK);
- v = apic_read(APIC_PROCPRI);
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-
- v = apic_read(APIC_EOI);
- printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
- v = apic_read(APIC_RRR);
- printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
- v = apic_read(APIC_LDR);
- printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
- v = apic_read(APIC_DFR);
- printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
- v = apic_read(APIC_SPIV);
- printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
- printk(KERN_DEBUG "... APIC ISR field:\n");
- print_APIC_bitfield(APIC_ISR);
- printk(KERN_DEBUG "... APIC TMR field:\n");
- print_APIC_bitfield(APIC_TMR);
- printk(KERN_DEBUG "... APIC IRR field:\n");
- print_APIC_bitfield(APIC_IRR);
-
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-
- v = apic_read(APIC_ICR);
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
- v = apic_read(APIC_ICR2);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
-
- v = apic_read(APIC_LVTT);
- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
- if (maxlvt > 3) { /* PC is LVT#4. */
- v = apic_read(APIC_LVTPC);
- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
- }
- v = apic_read(APIC_LVT0);
- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
- v = apic_read(APIC_LVT1);
- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
- if (maxlvt > 2) { /* ERR is LVT#3. */
- v = apic_read(APIC_LVTERR);
- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
- }
-
- v = apic_read(APIC_TMICT);
- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
- v = apic_read(APIC_TMCCT);
- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
- v = apic_read(APIC_TDCR);
- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
- printk("\n");
-}
-
-void print_all_local_APICs (void)
-{
- on_each_cpu(print_local_APIC, NULL, 1, 1);
-}
-
-void __apicdebuginit print_PIC(void)
-{
- unsigned int v;
- unsigned long flags;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "\nprinting PIC contents\n");
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- v = inb(0xa1) << 8 | inb(0x21);
- printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
-
- v = inb(0xa0) << 8 | inb(0x20);
- printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
-
- outb(0x0b,0xa0);
- outb(0x0b,0x20);
- v = inb(0xa0) << 8 | inb(0x20);
- outb(0x0a,0xa0);
- outb(0x0a,0x20);
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
-
- printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
-
- v = inb(0x4d1) << 8 | inb(0x4d0);
- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-#endif /* 0 */
-
-#else
-void __init print_IO_APIC(void) { }
-#endif /* !CONFIG_XEN */
-
-static void __init enable_IO_APIC(void)
-{
- union IO_APIC_reg_01 reg_01;
-#ifndef CONFIG_XEN
- int i8259_apic, i8259_pin;
-#endif
- int i, apic;
- unsigned long flags;
-
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
- if (!pirqs_enabled)
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- /*
- * The number of IO-APIC IRQ registers (== #pins):
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(apic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[apic] = reg_01.bits.entries+1;
- }
-#ifndef CONFIG_XEN
- for(apic = 0; apic < nr_ioapics; apic++) {
- int pin;
- /* See if any of the pins is in ExtINT mode */
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- struct IO_APIC_route_entry entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
-
- /* If the interrupt line is enabled and in ExtInt mode
- * I have found the pin where the i8259 is connected.
- */
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
- ioapic_i8259.apic = apic;
- ioapic_i8259.pin = pin;
- goto found_i8259;
- }
- }
- }
- found_i8259:
- /* Look to see what if the MP table has reported the ExtINT */
- i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
- i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
- /* Trust the MP table if nothing is setup in the hardware */
- if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
- printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
- ioapic_i8259.pin = i8259_pin;
- ioapic_i8259.apic = i8259_apic;
- }
- /* Complain if the MP table and the hardware disagree */
- if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
- (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
- {
- printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
- }
-#endif
-
- /*
- * Do not trust the IO-APIC being empty at bootup
- */
- clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
- /*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
-#ifndef CONFIG_XEN
- /*
- * If the i8259 is routed through an IOAPIC
- * Put that IOAPIC in virtual wire mode
- * so legacy interrupts can be delivered.
- */
- if (ioapic_i8259.pin != -1) {
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- memset(&entry, 0, sizeof(entry));
- entry.mask = 0; /* Enabled */
- entry.trigger = 0; /* Edge */
- entry.irr = 0;
- entry.polarity = 0; /* High */
- entry.delivery_status = 0;
- entry.dest_mode = 0; /* Physical */
- entry.delivery_mode = dest_ExtINT; /* ExtInt */
- entry.vector = 0;
- entry.dest.physical.physical_dest =
- GET_APIC_ID(apic_read(APIC_ID));
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
- *(((int *)&entry)+1));
- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
- *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
- }
-
- disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-#endif
-}
-
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
- */
-
-#ifndef CONFIG_XEN
-static void __init setup_ioapic_ids_from_mpc (void)
-{
- union IO_APIC_reg_00 reg_00;
- int apic;
- int i;
- unsigned char old_id;
- unsigned long flags;
-
- /*
- * Set the IOAPIC ID to the value stored in the MPC table.
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- /* Read the register 0 value */
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- old_id = mp_ioapics[apic].mpc_apicid;
-
-
- printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
-
-
- /*
- * We need to adjust the IRQ routing table
- * if the ID changed.
- */
- if (old_id != mp_ioapics[apic].mpc_apicid)
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_dstapic == old_id)
- mp_irqs[i].mpc_dstapic
- = mp_ioapics[apic].mpc_apicid;
-
- /*
- * Read the right value from the MPC table and
- * write it into the ID register.
- */
- apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
- mp_ioapics[apic].mpc_apicid);
-
- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0, reg_00.raw);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /*
- * Sanity check
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
- printk("could not set ID!\n");
- else
- apic_printk(APIC_VERBOSE," ok.\n");
- }
-}
-#else
-static void __init setup_ioapic_ids_from_mpc(void) { }
-#endif
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- * - timer IRQ defaults to IO-APIC IRQ
- * - if this function detects that timer IRQs are defunct, then we fall
- * back to ISA timer IRQs
- */
-#ifndef CONFIG_XEN
-static int __init timer_irq_works(void)
-{
- unsigned long t1 = jiffies;
-
- local_irq_enable();
- /* Let ten ticks pass... */
- mdelay((10 * 1000) / HZ);
-
- /*
- * Expect a few ticks at least, to be sure some possible
- * glue logic does not lock up after one or two first
- * ticks in a non-ExtINT mode. Also the local APIC
- * might have cached one ExtINT interrupt. Finally, at
- * least one tick may be lost due to delays.
- */
-
- /* jiffies wrap? */
- if (jiffies - t1 > 4)
- return 1;
- return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- */
-
-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
-{
- int was_pending = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < 16) {
- disable_8259A_irq(irq);
- if (i8259A_irq_pending(irq))
- was_pending = 1;
- }
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return was_pending;
-}
-
-/*
- * Once we have recorded IRQ_PENDING already, we can mask the
- * interrupt for real. This prevents IRQ storms from unhandled
- * devices.
- */
-static void ack_edge_ioapic_irq(unsigned int irq)
-{
- move_irq(irq);
- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
- == (IRQ_PENDING | IRQ_DISABLED))
- mask_IO_APIC_irq(irq);
- ack_APIC_irq();
-}
-
-/*
- * Level triggered interrupts can just be masked,
- * and shutting down and starting up the interrupt
- * is the same as enabling and disabling them -- except
- * with a startup need to return a "was pending" value.
- *
- * Level triggered interrupts are special because we
- * do not touch any IO-APIC register while handling
- * them. We ack the APIC in the end-IRQ handler, not
- * in the start-IRQ-handler. Protection against reentrance
- * from the same interrupt is still provided, both by the
- * generic IRQ layer and by the fact that an unacked local
- * APIC does not accept IRQs.
- */
-static unsigned int startup_level_ioapic_irq (unsigned int irq)
-{
- unmask_IO_APIC_irq(irq);
-
- return 0; /* don't check for pending */
-}
-
-static void end_level_ioapic_irq (unsigned int irq)
-{
- move_irq(irq);
- ack_APIC_irq();
-}
-
-#ifdef CONFIG_PCI_MSI
-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- return startup_edge_ioapic_irq(irq);
-}
-
-static void ack_edge_ioapic_vector(unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- move_native_irq(vector);
- ack_edge_ioapic_irq(irq);
-}
-
-static unsigned int startup_level_ioapic_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- return startup_level_ioapic_irq (irq);
-}
-
-static void end_level_ioapic_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- move_native_irq(vector);
- end_level_ioapic_irq(irq);
-}
-
-static void mask_IO_APIC_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- mask_IO_APIC_irq(irq);
-}
-
-static void unmask_IO_APIC_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- unmask_IO_APIC_irq(irq);
-}
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_vector (unsigned int vector,
- cpumask_t cpu_mask)
-{
- int irq = vector_to_irq(vector);
-
- set_native_irq_info(vector, cpu_mask);
- set_ioapic_affinity_irq(irq, cpu_mask);
-}
-#endif // CONFIG_SMP
-#endif // CONFIG_PCI_MSI
-
-static int ioapic_retrigger(unsigned int irq)
-{
- send_IPI_self(IO_APIC_VECTOR(irq));
-
- return 1;
-}
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-
-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
- .typename = "IO-APIC-edge",
- .startup = startup_edge_ioapic,
- .shutdown = shutdown_edge_ioapic,
- .enable = enable_edge_ioapic,
- .disable = disable_edge_ioapic,
- .ack = ack_edge_ioapic,
- .end = end_edge_ioapic,
-#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity,
-#endif
- .retrigger = ioapic_retrigger,
-};
-
-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
- .typename = "IO-APIC-level",
- .startup = startup_level_ioapic,
- .shutdown = shutdown_level_ioapic,
- .enable = enable_level_ioapic,
- .disable = disable_level_ioapic,
- .ack = mask_and_ack_level_ioapic,
- .end = end_level_ioapic,
-#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity,
-#endif
- .retrigger = ioapic_retrigger,
-};
-#endif /* !CONFIG_XEN */
-
-static inline void init_IO_APIC_traps(void)
-{
- int irq;
-
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
- for (irq = 0; irq < NR_IRQS ; irq++) {
- int tmp = irq;
- if (use_pci_vector()) {
- if (!platform_legacy_irq(tmp))
- if ((tmp = vector_to_irq(tmp)) == -1)
- continue;
- }
- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
- /*
- * Hmm.. We don't have an entry for this,
- * so default to an old-fashioned 8259
- * interrupt if we can..
- */
- if (irq < 16)
- make_8259A_irq(irq);
-#ifndef CONFIG_XEN
- else
- /* Strange. Oh, well.. */
- irq_desc[irq].chip = &no_irq_type;
-#endif
- }
- }
-}
-
-#ifndef CONFIG_XEN
-static void enable_lapic_irq (unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static void disable_lapic_irq (unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void ack_lapic_irq (unsigned int irq)
-{
- ack_APIC_irq();
-}
-
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
-
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
- .typename = "local-APIC-edge",
- .startup = NULL, /* startup_irq() not used for IRQ0 */
- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
- .enable = enable_lapic_irq,
- .disable = disable_lapic_irq,
- .ack = ack_lapic_irq,
- .end = end_lapic_irq,
-};
-
-static void setup_nmi (void)
-{
- /*
- * Dirty trick to enable the NMI watchdog ...
- * We put the 8259A master into AEOI mode and
- * unmask on all local APICs LVT0 as NMI.
- *
- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
- * is from Maciej W. Rozycki - so we do not have to EOI from
- * the NMI handler or the timer interrupt.
- */
- printk(KERN_INFO "activating NMI Watchdog ...");
-
- enable_NMI_through_LVT0(NULL);
-
- printk(" done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic. ICR does
- * not support the ExtINT mode, unfortunately. We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA. --macro
- */
-static inline void unlock_ExtINT_logic(void)
-{
- int apic, pin, i;
- struct IO_APIC_route_entry entry0, entry1;
- unsigned char save_control, save_freq_select;
- unsigned long flags;
-
- pin = find_isa_irq_pin(8, mp_INT);
- apic = find_isa_irq_apic(8, mp_INT);
- if (pin == -1)
- return;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- clear_IO_APIC_pin(apic, pin);
-
- memset(&entry1, 0, sizeof(entry1));
-
- entry1.dest_mode = 0; /* physical delivery */
- entry1.mask = 0; /* unmask IRQ now */
- entry1.dest.physical.physical_dest = hard_smp_processor_id();
- entry1.delivery_mode = dest_ExtINT;
- entry1.polarity = entry0.polarity;
- entry1.trigger = 0;
- entry1.vector = 0;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- save_control = CMOS_READ(RTC_CONTROL);
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
- RTC_FREQ_SELECT);
- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
- i = 100;
- while (i-- > 0) {
- mdelay(10);
- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
- i -= 10;
- }
-
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- clear_IO_APIC_pin(apic, pin);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-int timer_uses_ioapic_pin_0;
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
- * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for modern platforms only.
- */
-static inline void check_timer(void)
-{
- int apic1, pin1, apic2, pin2;
- int vector;
-
- /*
- * get/set the timer IRQ vector:
- */
- disable_8259A_irq(0);
- vector = assign_irq_vector(0);
- set_intr_gate(vector, interrupt[0]);
-
- /*
- * Subtle, code in do_timer_interrupt() expects an AEOI
- * mode for the 8259A whenever interrupts are routed
- * through I/O APICs. Also IRQ0 has to be enabled in
- * the 8259A which implies the virtual wire has to be
- * disabled in the local APIC.
- */
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
- init_8259A(1);
- if (timer_over_8254 > 0)
- enable_8259A_irq(0);
-
- pin1 = find_isa_irq_pin(0, mp_INT);
- apic1 = find_isa_irq_apic(0, mp_INT);
- pin2 = ioapic_i8259.pin;
- apic2 = ioapic_i8259.apic;
-
- if (pin1 == 0)
- timer_uses_ioapic_pin_0 = 1;
-
- apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
- vector, apic1, pin1, apic2, pin2);
-
- if (pin1 != -1) {
- /*
- * Ok, does IRQ0 through the IOAPIC work?
- */
- unmask_IO_APIC_irq(0);
- if (!no_timer_check && timer_irq_works()) {
- nmi_watchdog_default();
- if (nmi_watchdog == NMI_IO_APIC) {
- disable_8259A_irq(0);
- setup_nmi();
- enable_8259A_irq(0);
- }
- if (disable_timer_pin_1 > 0)
- clear_IO_APIC_pin(0, pin1);
- return;
- }
- clear_IO_APIC_pin(apic1, pin1);
- apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
- "connected to IO-APIC\n");
- }
-
- apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
- "through the 8259A ... ");
- if (pin2 != -1) {
- apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
- apic2, pin2);
- /*
- * legacy devices should be connected to IO APIC #0
- */
- setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
- if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
- nmi_watchdog_default();
- if (nmi_watchdog == NMI_IO_APIC) {
- setup_nmi();
- }
- return;
- }
- /*
- * Cleanup, just in case ...
- */
- clear_IO_APIC_pin(apic2, pin2);
- }
- apic_printk(APIC_VERBOSE," failed.\n");
-
- if (nmi_watchdog == NMI_IO_APIC) {
- printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
- nmi_watchdog = 0;
- }
-
- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
-
- disable_8259A_irq(0);
- irq_desc[0].chip = &lapic_irq_type;
- apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
- enable_8259A_irq(0);
-
- if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
- return;
- }
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
- apic_printk(APIC_VERBOSE," failed.\n");
-
- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
-
- init_8259A(0);
- make_8259A_irq(0);
- apic_write(APIC_LVT0, APIC_DM_EXTINT);
-
- unlock_ExtINT_logic();
-
- if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
- return;
- }
- apic_printk(APIC_VERBOSE," failed :(.\n");
- panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
-}
-#else
-#define check_timer() ((void)0)
-int timer_uses_ioapic_pin_0 = 0;
-#endif /* !CONFIG_XEN */
-
-static int __init notimercheck(char *s)
-{
- no_timer_check = 1;
- return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-/*
- *
- * IRQ's that are handled by the PIC in the MPS IOAPIC case.
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- * Linux doesn't really care, as it's not actually used
- * for any interrupt handling anyway.
- */
-#define PIC_IRQS (1<<2)
-
-void __init setup_IO_APIC(void)
-{
- enable_IO_APIC();
-
- if (acpi_ioapic)
- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
- else
- io_apic_irqs = ~PIC_IRQS;
-
- apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-
- /*
- * Set up the IO-APIC IRQ routing table.
- */
- if (!acpi_ioapic)
- setup_ioapic_ids_from_mpc();
-#ifndef CONFIG_XEN
- sync_Arb_IDs();
-#endif /* !CONFIG_XEN */
- setup_IO_APIC_irqs();
- init_IO_APIC_traps();
- check_timer();
- if (!acpi_ioapic)
- print_IO_APIC();
-}
-
-struct sysfs_ioapic_data {
- struct sys_device dev;
- struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
- unsigned long flags;
- int i;
-
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
- unsigned long flags;
- union IO_APIC_reg_00 reg_00;
- int i;
-
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(dev->id, 0);
- if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
- reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
- io_apic_write(dev->id, 0, reg_00.raw);
- }
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
- set_kset_name("ioapic"),
- .suspend = ioapic_suspend,
- .resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
- struct sys_device * dev;
- int i, size, error = 0;
-
- error = sysdev_class_register(&ioapic_sysdev_class);
- if (error)
- return error;
-
- for (i = 0; i < nr_ioapics; i++ ) {
- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
- * sizeof(struct IO_APIC_route_entry);
- mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
- if (!mp_ioapic_data[i]) {
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
- memset(mp_ioapic_data[i], 0, size);
- dev = &mp_ioapic_data[i]->dev;
- dev->id = i;
- dev->cls = &ioapic_sysdev_class;
- error = sysdev_register(dev);
- if (error) {
- kfree(mp_ioapic_data[i]);
- mp_ioapic_data[i] = NULL;
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
- }
-
- return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-/* --------------------------------------------------------------------------
- ACPI-based IOAPIC Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-#define IO_APIC_MAX_ID 0xFE
-
-int __init io_apic_get_version (int ioapic)
-{
- union IO_APIC_reg_01 reg_01;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return reg_01.bits.version;
-}
-
-
-int __init io_apic_get_redir_entries (int ioapic)
-{
- union IO_APIC_reg_01 reg_01;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return reg_01.bits.entries;
-}
-
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- if (!IO_APIC_IRQ(irq)) {
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
- return -EINVAL;
- }
-
- /*
- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
- * Note that we mask (disable) IRQs now -- these get enabled when the
- * corresponding device driver registers for this IRQ.
- */
-
- memset(&entry,0,sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- entry.trigger = edge_level;
- entry.polarity = active_high_low;
- entry.mask = 1; /* Disabled (masked) */
-
- irq = gsi_irq_sharing(irq);
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
-
- entry.vector = assign_irq_vector(irq);
-
- apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
- "IRQ %d Mode:%i Active:%i)\n", ioapic,
- mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
- edge_level, active_high_low);
-
- ioapic_register_intr(irq, entry.vector, edge_level);
-
- if (!ioapic && (irq < 16))
- disable_8259A_irq(irq);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return 0;
-}
-
-#endif /* CONFIG_ACPI */
-
-
-#ifndef CONFIG_XEN
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
- int pin, ioapic, irq, irq_entry;
-
- if (skip_ioapic_setup == 1)
- return;
-
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
- }
-
- }
-}
-#endif
-#endif /* !CONFIG_XEN */
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/ioport-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/ioport-xen.c
deleted file mode 100644
index a57442c698..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/ioport-xen.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/ioport.c
- *
- * This contains the io-permission bitmap code - written by obz, with changes
- * by Linus.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/thread_info.h>
-#include <xen/interface/physdev.h>
-
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
-{
- int i;
-
- if (new_value)
- for (i = base; i < base + extent; i++)
- __set_bit(i, bitmap);
- else
- for (i = base; i < base + extent; i++)
- clear_bit(i, bitmap);
-}
-
-/*
- * this changes the io permissions bitmap in the current task.
- */
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
-{
- struct thread_struct * t = &current->thread;
- unsigned long *bitmap;
- struct physdev_set_iobitmap set_iobitmap;
-
- if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
- return -EINVAL;
- if (turn_on && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- /*
- * If it's the first ioperm() call in this thread's lifetime, set the
- * IO bitmap up. ioperm() is much less timing critical than clone(),
- * this is why we delay this operation until now:
- */
- if (!t->io_bitmap_ptr) {
- bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
- if (!bitmap)
- return -ENOMEM;
-
- memset(bitmap, 0xff, IO_BITMAP_BYTES);
- t->io_bitmap_ptr = bitmap;
-
- set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
- set_iobitmap.nr_ports = IO_BITMAP_BITS;
- HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &set_iobitmap);
- }
-
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
-
- return 0;
-}
-
-/*
- * sys_iopl has to be used when you want to access the IO ports
- * beyond the 0x3ff range: to get the full 65536 ports bitmapped
- * you'd need 8kB of bitmaps/process, which is a bit excessive.
- *
- */
-
-asmlinkage long sys_iopl(unsigned int new_iopl, struct pt_regs *regs)
-{
- unsigned int old_iopl = current->thread.iopl;
- struct physdev_set_iopl set_iopl;
-
- if (new_iopl > 3)
- return -EINVAL;
-
- /* Need "raw I/O" privileges for direct port access. */
- if ((new_iopl > old_iopl) && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- /* Change our version of the privilege levels. */
- current->thread.iopl = new_iopl;
-
- /* Force the change at ring 0. */
- set_iopl.iopl = (new_iopl == 0) ? 1 : new_iopl;
- HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-
- return 0;
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c
deleted file mode 100644
index 02cd6e2807..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/irq.c
- *
- * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
- *
- * This file contains the lowest level x86_64-specific interrupt
- * entry and irq statistics code. All the remaining irq logic is
- * done by the generic kernel/irq/ code and in the
- * x86_64-specific irq controller code. (e.g. i8259.c and
- * io_apic.c.)
- */
-
-#include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <asm/uaccess.h>
-#include <asm/io_apic.h>
-#include <asm/idle.h>
-
-atomic_t irq_err_count;
-#ifdef CONFIG_X86_IO_APIC
-#ifdef APIC_MISMATCH_DEBUG
-atomic_t irq_mis_count;
-#endif
-#endif
-
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-/*
- * Probabilistic stack overflow check:
- *
- * Only check the stack in process context, because everything else
- * runs on the big interrupt stacks. Checking reliably is too expensive,
- * so we just check from interrupts.
- */
-static inline void stack_overflow_check(struct pt_regs *regs)
-{
- u64 curbase = (u64) current->thread_info;
- static unsigned long warned = -60*HZ;
-
- if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
- regs->rsp < curbase + sizeof(struct thread_info) + 128 &&
- time_after(jiffies, warned + 60*HZ)) {
- printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
- current->comm, curbase, regs->rsp);
- show_stack(NULL,NULL);
- warned = jiffies;
- }
-}
-#endif
-
-/*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
- int i = *(loff_t *) v, j;
- struct irqaction * action;
- unsigned long flags;
-
- if (i == 0) {
- seq_printf(p, " ");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%-8d",j);
- seq_putc(p, '\n');
- }
-
- if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
- if (!action)
- goto skip;
- seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
- seq_printf(p, " %14s", irq_desc[i].chip->typename);
-
- seq_printf(p, " %s", action->name);
- for (action=action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
- seq_putc(p, '\n');
-skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
- } else if (i == NR_IRQS) {
- seq_printf(p, "NMI: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
- seq_putc(p, '\n');
-#ifdef CONFIG_X86_LOCAL_APIC
- seq_printf(p, "LOC: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
- seq_putc(p, '\n');
-#endif
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#ifdef CONFIG_X86_IO_APIC
-#ifdef APIC_MISMATCH_DEBUG
- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-#endif
- }
- return 0;
-}
-
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
-{
- /* high bit used in ret_from_ code */
- unsigned irq = ~regs->orig_rax;
-
- if (unlikely(irq >= NR_IRQS)) {
- printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
- __FUNCTION__, irq);
- BUG();
- }
-
- exit_idle();
- irq_enter();
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
- stack_overflow_check(regs);
-#endif
- __do_IRQ(irq, regs);
- irq_exit();
-
- return 1;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
-{
- unsigned int irq;
- static int warned;
-
- for (irq = 0; irq < NR_IRQS; irq++) {
- cpumask_t mask;
- if (irq == 2)
- continue;
-
- cpus_and(mask, irq_desc[irq].affinity, map);
- if (any_online_cpu(mask) == NR_CPUS) {
- printk("Breaking affinity for irq %i\n", irq);
- mask = map;
- }
- if (irq_desc[irq].chip->set_affinity)
- irq_desc[irq].chip->set_affinity(irq, mask);
- else if (irq_desc[irq].action && !(warned++))
- printk("Cannot set affinity for irq %i\n", irq);
- }
-
- /* That doesn't seem sufficient. Give it 1ms. */
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-}
-#endif
-
-extern void call_softirq(void);
-
-asmlinkage void do_softirq(void)
-{
- __u32 pending;
- unsigned long flags;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
- pending = local_softirq_pending();
- /* Switch to interrupt stack */
- if (pending) {
- call_softirq();
- WARN_ON_ONCE(softirq_count());
- }
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(do_softirq);
-
-#ifndef CONFIG_X86_LOCAL_APIC
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
- printk("unexpected IRQ trap at vector %02x\n", irq);
-}
-#endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/ldt-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/ldt-xen.c
deleted file mode 100644
index 6efbb64f80..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/ldt-xen.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/ldt.c
- *
- * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2002 Andi Kleen
- *
- * This handles calls from both 32bit and 64bit mode.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/pgalloc.h>
-
-#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
-static void flush_ldt(void *null)
-{
- if (current->active_mm)
- load_LDT(&current->active_mm->context);
-}
-#endif
-
-static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
-{
- void *oldldt;
- void *newldt;
- unsigned oldsize;
-
- if (mincount <= (unsigned)pc->size)
- return 0;
- oldsize = pc->size;
- mincount = (mincount+511)&(~511);
- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
- else
- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
- if (!newldt)
- return -ENOMEM;
-
- if (oldsize)
- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
- oldldt = pc->ldt;
- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
- wmb();
- pc->ldt = newldt;
- wmb();
- pc->size = mincount;
- wmb();
- if (reload) {
-#ifdef CONFIG_SMP
- cpumask_t mask;
-
- preempt_disable();
-#endif
- make_pages_readonly(
- pc->ldt,
- (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
- XENFEAT_writable_descriptor_tables);
- load_LDT(pc);
-#ifdef CONFIG_SMP
- mask = cpumask_of_cpu(smp_processor_id());
- if (!cpus_equal(current->mm->cpu_vm_mask, mask))
- smp_call_function(flush_ldt, NULL, 1, 1);
- preempt_enable();
-#endif
- }
- if (oldsize) {
- make_pages_writable(
- oldldt,
- (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE,
- XENFEAT_writable_descriptor_tables);
- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(oldldt);
- else
- kfree(oldldt);
- }
- return 0;
-}
-
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
-{
- int err = alloc_ldt(new, old->size, 0);
- if (err < 0)
- return err;
- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
- make_pages_readonly(
- new->ldt,
- (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
- XENFEAT_writable_descriptor_tables);
- return 0;
-}
-
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
- struct mm_struct * old_mm;
- int retval = 0;
-
- memset(&mm->context, 0, sizeof(mm->context));
- init_MUTEX(&mm->context.sem);
- old_mm = current->mm;
- if (old_mm && old_mm->context.size > 0) {
- down(&old_mm->context.sem);
- retval = copy_ldt(&mm->context, &old_mm->context);
- up(&old_mm->context.sem);
- }
- if (retval == 0) {
- spin_lock(&mm_unpinned_lock);
- list_add(&mm->context.unpinned, &mm_unpinned);
- spin_unlock(&mm_unpinned_lock);
- }
- return retval;
-}
-
-/*
- *
- * Don't touch the LDT register - we're already in the next thread.
- */
-void destroy_context(struct mm_struct *mm)
-{
- if (mm->context.size) {
- if (mm == current->active_mm)
- clear_LDT();
- make_pages_writable(
- mm->context.ldt,
- (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE,
- XENFEAT_writable_descriptor_tables);
- if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(mm->context.ldt);
- else
- kfree(mm->context.ldt);
- mm->context.size = 0;
- }
- if (!mm->context.pinned) {
- spin_lock(&mm_unpinned_lock);
- list_del(&mm->context.unpinned);
- spin_unlock(&mm_unpinned_lock);
- }
-}
-
-static int read_ldt(void __user * ptr, unsigned long bytecount)
-{
- int err;
- unsigned long size;
- struct mm_struct * mm = current->mm;
-
- if (!mm->context.size)
- return 0;
- if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
- bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
-
- down(&mm->context.sem);
- size = mm->context.size*LDT_ENTRY_SIZE;
- if (size > bytecount)
- size = bytecount;
-
- err = 0;
- if (copy_to_user(ptr, mm->context.ldt, size))
- err = -EFAULT;
- up(&mm->context.sem);
- if (err < 0)
- goto error_return;
- if (size != bytecount) {
- /* zero-fill the rest */
- if (clear_user(ptr+size, bytecount-size) != 0) {
- err = -EFAULT;
- goto error_return;
- }
- }
- return bytecount;
-error_return:
- return err;
-}
-
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
-{
- /* Arbitrary number */
- /* x86-64 default LDT is all zeros */
- if (bytecount > 128)
- bytecount = 128;
- if (clear_user(ptr, bytecount))
- return -EFAULT;
- return bytecount;
-}
-
-static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
-{
- struct task_struct *me = current;
- struct mm_struct * mm = me->mm;
- __u32 entry_1, entry_2, *lp;
- unsigned long mach_lp;
- int error;
- struct user_desc ldt_info;
-
- error = -EINVAL;
-
- if (bytecount != sizeof(ldt_info))
- goto out;
- error = -EFAULT;
- if (copy_from_user(&ldt_info, ptr, bytecount))
- goto out;
-
- error = -EINVAL;
- if (ldt_info.entry_number >= LDT_ENTRIES)
- goto out;
- if (ldt_info.contents == 3) {
- if (oldmode)
- goto out;
- if (ldt_info.seg_not_present == 0)
- goto out;
- }
-
- down(&mm->context.sem);
- if (ldt_info.entry_number >= (unsigned)mm->context.size) {
- error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
- if (error < 0)
- goto out_unlock;
- }
-
- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
- mach_lp = arbitrary_virt_to_machine(lp);
-
- /* Allow LDTs to be cleared by the user. */
- if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode || LDT_empty(&ldt_info)) {
- entry_1 = 0;
- entry_2 = 0;
- goto install;
- }
- }
-
- entry_1 = LDT_entry_a(&ldt_info);
- entry_2 = LDT_entry_b(&ldt_info);
- if (oldmode)
- entry_2 &= ~(1 << 20);
-
- /* Install the new entry ... */
-install:
- error = HYPERVISOR_update_descriptor(mach_lp, (unsigned long)((entry_1 | (unsigned long) entry_2 << 32)));
-
-out_unlock:
- up(&mm->context.sem);
-out:
- return error;
-}
-
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-{
- int ret = -ENOSYS;
-
- switch (func) {
- case 0:
- ret = read_ldt(ptr, bytecount);
- break;
- case 1:
- ret = write_ldt(ptr, bytecount, 1);
- break;
- case 2:
- ret = read_default_ldt(ptr, bytecount);
- break;
- case 0x11:
- ret = write_ldt(ptr, bytecount, 0);
- break;
- }
- return ret;
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/machine_kexec.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/machine_kexec.c
deleted file mode 100644
index f1d9583880..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/machine_kexec.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * machine_kexec.c - handle transition of Linux booting another kernel
- * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/kexec.h>
-#include <linux/string.h>
-#include <linux/reboot.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/io.h>
-
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-static u64 kexec_pgd[512] PAGE_ALIGNED;
-static u64 kexec_pud0[512] PAGE_ALIGNED;
-static u64 kexec_pmd0[512] PAGE_ALIGNED;
-static u64 kexec_pte0[512] PAGE_ALIGNED;
-static u64 kexec_pud1[512] PAGE_ALIGNED;
-static u64 kexec_pmd1[512] PAGE_ALIGNED;
-static u64 kexec_pte1[512] PAGE_ALIGNED;
-
-#ifdef CONFIG_XEN
-
-/* In the case of Xen, override hypervisor functions to be able to create
- * a regular identity mapping page table...
- */
-
-#include <xen/interface/kexec.h>
-#include <xen/interface/memory.h>
-
-#define x__pmd(x) ((pmd_t) { (x) } )
-#define x__pud(x) ((pud_t) { (x) } )
-#define x__pgd(x) ((pgd_t) { (x) } )
-
-#define x_pmd_val(x) ((x).pmd)
-#define x_pud_val(x) ((x).pud)
-#define x_pgd_val(x) ((x).pgd)
-
-static inline void x_set_pmd(pmd_t *dst, pmd_t val)
-{
- x_pmd_val(*dst) = x_pmd_val(val);
-}
-
-static inline void x_set_pud(pud_t *dst, pud_t val)
-{
- x_pud_val(*dst) = phys_to_machine(x_pud_val(val));
-}
-
-static inline void x_pud_clear (pud_t *pud)
-{
- x_pud_val(*pud) = 0;
-}
-
-static inline void x_set_pgd(pgd_t *dst, pgd_t val)
-{
- x_pgd_val(*dst) = phys_to_machine(x_pgd_val(val));
-}
-
-static inline void x_pgd_clear (pgd_t * pgd)
-{
- x_pgd_val(*pgd) = 0;
-}
-
-#define X__PAGE_KERNEL_LARGE_EXEC \
- _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_PSE
-#define X_KERNPG_TABLE _PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY
-
-#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
-
-#if PAGES_NR > KEXEC_XEN_NO_PAGES
-#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
-#endif
-
-#if PA_CONTROL_PAGE != 0
-#error PA_CONTROL_PAGE is non zero - Xen support will break
-#endif
-
-void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
-{
- void *control_page;
- void *table_page;
-
- memset(xki->page_list, 0, sizeof(xki->page_list));
-
- control_page = page_address(image->control_code_page) + PAGE_SIZE;
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
-
- table_page = page_address(image->control_code_page);
-
- xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
- xki->page_list[PA_TABLE_PAGE] = __ma(table_page);
-
- xki->page_list[PA_PGD] = __ma(kexec_pgd);
- xki->page_list[PA_PUD_0] = __ma(kexec_pud0);
- xki->page_list[PA_PUD_1] = __ma(kexec_pud1);
- xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
- xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
- xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
- xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
-}
-
-#else /* CONFIG_XEN */
-
-#define x__pmd(x) __pmd(x)
-#define x__pud(x) __pud(x)
-#define x__pgd(x) __pgd(x)
-
-#define x_set_pmd(x, y) set_pmd(x, y)
-#define x_set_pud(x, y) set_pud(x, y)
-#define x_set_pgd(x, y) set_pgd(x, y)
-
-#define x_pud_clear(x) pud_clear(x)
-#define x_pgd_clear(x) pgd_clear(x)
-
-#define X__PAGE_KERNEL_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
-#define X_KERNPG_TABLE _KERNPG_TABLE
-
-#endif /* CONFIG_XEN */
-
-static void init_level2_page(pmd_t *level2p, unsigned long addr)
-{
- unsigned long end_addr;
-
- addr &= PAGE_MASK;
- end_addr = addr + PUD_SIZE;
- while (addr < end_addr) {
- x_set_pmd(level2p++, x__pmd(addr | X__PAGE_KERNEL_LARGE_EXEC));
- addr += PMD_SIZE;
- }
-}
-
-static int init_level3_page(struct kimage *image, pud_t *level3p,
- unsigned long addr, unsigned long last_addr)
-{
- unsigned long end_addr;
- int result;
-
- result = 0;
- addr &= PAGE_MASK;
- end_addr = addr + PGDIR_SIZE;
- while ((addr < last_addr) && (addr < end_addr)) {
- struct page *page;
- pmd_t *level2p;
-
- page = kimage_alloc_control_pages(image, 0);
- if (!page) {
- result = -ENOMEM;
- goto out;
- }
- level2p = (pmd_t *)page_address(page);
- init_level2_page(level2p, addr);
- x_set_pud(level3p++, x__pud(__pa(level2p) | X_KERNPG_TABLE));
- addr += PUD_SIZE;
- }
- /* clear the unused entries */
- while (addr < end_addr) {
- x_pud_clear(level3p++);
- addr += PUD_SIZE;
- }
-out:
- return result;
-}
-
-
-static int init_level4_page(struct kimage *image, pgd_t *level4p,
- unsigned long addr, unsigned long last_addr)
-{
- unsigned long end_addr;
- int result;
-
- result = 0;
- addr &= PAGE_MASK;
- end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
- while ((addr < last_addr) && (addr < end_addr)) {
- struct page *page;
- pud_t *level3p;
-
- page = kimage_alloc_control_pages(image, 0);
- if (!page) {
- result = -ENOMEM;
- goto out;
- }
- level3p = (pud_t *)page_address(page);
- result = init_level3_page(image, level3p, addr, last_addr);
- if (result) {
- goto out;
- }
- x_set_pgd(level4p++, x__pgd(__pa(level3p) | X_KERNPG_TABLE));
- addr += PGDIR_SIZE;
- }
- /* clear the unused entries */
- while (addr < end_addr) {
- x_pgd_clear(level4p++);
- addr += PGDIR_SIZE;
- }
-out:
- return result;
-}
-
-
-static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
-{
- pgd_t *level4p;
- unsigned long x_end_pfn = end_pfn;
-
-#ifdef CONFIG_XEN
- x_end_pfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
-#endif
-
- level4p = (pgd_t *)__va(start_pgtable);
- return init_level4_page(image, level4p, 0, x_end_pfn << PAGE_SHIFT);
-}
-
-int machine_kexec_prepare(struct kimage *image)
-{
- unsigned long start_pgtable;
- int result;
-
- /* Calculate the offsets */
- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-
- /* Setup the identity mapped 64bit page table */
- result = init_pgtable(image, start_pgtable);
- if (result)
- return result;
-
- return 0;
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
- return;
-}
-
-#ifndef CONFIG_XEN
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-NORET_TYPE void machine_kexec(struct kimage *image)
-{
- unsigned long page_list[PAGES_NR];
- void *control_page;
-
- /* Interrupts aren't acceptable while we reboot */
- local_irq_disable();
-
- control_page = page_address(image->control_code_page) + PAGE_SIZE;
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
-
- page_list[PA_CONTROL_PAGE] = __pa(control_page);
- page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
- page_list[PA_PGD] = __pa(kexec_pgd);
- page_list[VA_PGD] = (unsigned long)kexec_pgd;
- page_list[PA_PUD_0] = __pa(kexec_pud0);
- page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
- page_list[PA_PMD_0] = __pa(kexec_pmd0);
- page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
- page_list[PA_PTE_0] = __pa(kexec_pte0);
- page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
- page_list[PA_PUD_1] = __pa(kexec_pud1);
- page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
- page_list[PA_PMD_1] = __pa(kexec_pmd1);
- page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
- page_list[PA_PTE_1] = __pa(kexec_pte1);
- page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
- page_list[PA_TABLE_PAGE] =
- (unsigned long)__pa(page_address(image->control_code_page));
-
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start);
-}
-#endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c
deleted file mode 100644
index 5ae1c68bd4..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*
- * Intel Multiprocessor Specification 1.1 and 1.4
- * compliant MP-table parsing routines.
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Erich Boleyn : MP v1.4 and additional changes.
- * Alan Cox : Added EBDA scanning
- * Ingo Molnar : various cleanups and rewrites
- * Maciej W. Rozycki: Bits for default MP configurations
- * Paul Diefenbaugh: Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/pgalloc.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/acpi.h>
-
-/* Have we found an MP table */
-int smp_found_config;
-unsigned int __initdata maxcpus = NR_CPUS;
-
-int acpi_found_madt;
-
-/*
- * Various Linux-internal data structures created from the
- * MP-table.
- */
-unsigned char apic_version [MAX_APICS];
-unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-
-static int mp_current_pci_id = 0;
-/* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* MP IRQ source entries */
-int mp_irq_entries;
-
-int nr_ioapics;
-int pic_mode;
-unsigned long mp_lapic_addr = 0;
-
-
-
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_id = -1U;
-/* Internal processor count */
-unsigned int num_processors __initdata = 0;
-
-unsigned disabled_cpus __initdata;
-
-/* Bitmask of physically existing CPUs */
-physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
-
-/* ACPI MADT entry parsing functions */
-#ifdef CONFIG_ACPI
-extern struct acpi_boot_flags acpi_boot;
-#ifdef CONFIG_X86_LOCAL_APIC
-extern int acpi_parse_lapic (acpi_table_entry_header *header);
-extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
-extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
-#endif /*CONFIG_X86_LOCAL_APIC*/
-#ifdef CONFIG_X86_IO_APIC
-extern int acpi_parse_ioapic (acpi_table_entry_header *header);
-#endif /*CONFIG_X86_IO_APIC*/
-#endif /*CONFIG_ACPI*/
-
-u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-
-/*
- * Intel MP BIOS table parsing routines:
- */
-
-/*
- * Checksum an MP configuration block.
- */
-
-static int __init mpf_checksum(unsigned char *mp, int len)
-{
- int sum = 0;
-
- while (len--)
- sum += *mp++;
-
- return sum & 0xFF;
-}
-
-#ifndef CONFIG_XEN
-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
-{
- int cpu;
- unsigned char ver;
- cpumask_t tmp_map;
-
- if (!(m->mpc_cpuflag & CPU_ENABLED)) {
- disabled_cpus++;
- return;
- }
-
- printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
- m->mpc_apicid,
- (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
- m->mpc_apicver);
-
- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
- Dprintk(" Bootup CPU\n");
- boot_cpu_id = m->mpc_apicid;
- }
- if (num_processors >= NR_CPUS) {
- printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
- " Processor ignored.\n", NR_CPUS);
- return;
- }
-
- num_processors++;
- cpus_complement(tmp_map, cpu_present_map);
- cpu = first_cpu(tmp_map);
-
-#if MAX_APICS < 255
- if ((int)m->mpc_apicid > MAX_APICS) {
- printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
- m->mpc_apicid, MAX_APICS);
- return;
- }
-#endif
- ver = m->mpc_apicver;
-
- physid_set(m->mpc_apicid, phys_cpu_present_map);
- /*
- * Validate version
- */
- if (ver == 0x0) {
- printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
- ver = 0x10;
- }
- apic_version[m->mpc_apicid] = ver;
- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
- /*
- * bios_cpu_apicid is required to have processors listed
- * in same order as logical cpu numbers. Hence the first
- * entry is BSP, and so on.
- */
- cpu = 0;
- }
- bios_cpu_apicid[cpu] = m->mpc_apicid;
- x86_cpu_to_apicid[cpu] = m->mpc_apicid;
-
- cpu_set(cpu, cpu_possible_map);
- cpu_set(cpu, cpu_present_map);
-}
-#else
-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
-{
- num_processors++;
-}
-#endif /* CONFIG_XEN */
-
-static void __init MP_bus_info (struct mpc_config_bus *m)
-{
- char str[7];
-
- memcpy(str, m->mpc_bustype, 6);
- str[6] = 0;
- Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
-
- if (strncmp(str, "ISA", 3) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
- } else if (strncmp(str, "EISA", 4) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
- } else if (strncmp(str, "PCI", 3) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
- mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
- mp_current_pci_id++;
- } else if (strncmp(str, "MCA", 3) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
- } else {
- printk(KERN_ERR "Unknown bustype %s\n", str);
- }
-}
-
-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
-{
- if (!(m->mpc_flags & MPC_APIC_USABLE))
- return;
-
- printk("I/O APIC #%d Version %d at 0x%X.\n",
- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
- if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
- MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
- }
- if (!m->mpc_apicaddr) {
- printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
- " found in MP table, skipping!\n");
- return;
- }
- mp_ioapics[nr_ioapics] = *m;
- nr_ioapics++;
-}
-
-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
-{
- mp_irqs [mp_irq_entries] = *m;
- Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
- m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- if (++mp_irq_entries >= MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!!\n");
-}
-
-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
-{
- Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
- m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
- /*
- * Well it seems all SMP boards in existence
- * use ExtINT/LVT1 == LINT0 and
- * NMI/LVT2 == LINT1 - the following check
- * will show us if this assumptions is false.
- * Until then we do not have to add baggage.
- */
- if ((m->mpc_irqtype == mp_ExtINT) &&
- (m->mpc_destapiclint != 0))
- BUG();
- if ((m->mpc_irqtype == mp_NMI) &&
- (m->mpc_destapiclint != 1))
- BUG();
-}
-
-/*
- * Read/parse the MPC
- */
-
-static int __init smp_read_mpc(struct mp_config_table *mpc)
-{
- char str[16];
- int count=sizeof(*mpc);
- unsigned char *mpt=((unsigned char *)mpc)+count;
-
- if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
- printk("SMP mptable: bad signature [%c%c%c%c]!\n",
- mpc->mpc_signature[0],
- mpc->mpc_signature[1],
- mpc->mpc_signature[2],
- mpc->mpc_signature[3]);
- return 0;
- }
- if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
- printk("SMP mptable: checksum error!\n");
- return 0;
- }
- if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
- printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
- mpc->mpc_spec);
- return 0;
- }
- if (!mpc->mpc_lapic) {
- printk(KERN_ERR "SMP mptable: null local APIC address!\n");
- return 0;
- }
- memcpy(str,mpc->mpc_oem,8);
- str[8]=0;
- printk(KERN_INFO "OEM ID: %s ",str);
-
- memcpy(str,mpc->mpc_productid,12);
- str[12]=0;
- printk("Product ID: %s ",str);
-
- printk("APIC at: 0x%X\n",mpc->mpc_lapic);
-
- /* save the local APIC address, it might be non-default */
- if (!acpi_lapic)
- mp_lapic_addr = mpc->mpc_lapic;
-
- /*
- * Now process the configuration blocks.
- */
- while (count < mpc->mpc_length) {
- switch(*mpt) {
- case MP_PROCESSOR:
- {
- struct mpc_config_processor *m=
- (struct mpc_config_processor *)mpt;
- if (!acpi_lapic)
- MP_processor_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_BUS:
- {
- struct mpc_config_bus *m=
- (struct mpc_config_bus *)mpt;
- MP_bus_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_IOAPIC:
- {
- struct mpc_config_ioapic *m=
- (struct mpc_config_ioapic *)mpt;
- MP_ioapic_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- case MP_INTSRC:
- {
- struct mpc_config_intsrc *m=
- (struct mpc_config_intsrc *)mpt;
-
- MP_intsrc_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- case MP_LINTSRC:
- {
- struct mpc_config_lintsrc *m=
- (struct mpc_config_lintsrc *)mpt;
- MP_lintsrc_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- }
- }
- clustered_apic_check();
- if (!num_processors)
- printk(KERN_ERR "SMP mptable: no processors registered!\n");
- return num_processors;
-}
-
-static int __init ELCR_trigger(unsigned int irq)
-{
- unsigned int port;
-
- port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
-}
-
-static void __init construct_default_ioirq_mptable(int mpc_default_type)
-{
- struct mpc_config_intsrc intsrc;
- int i;
- int ELCR_fallback = 0;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqflag = 0; /* conforming */
- intsrc.mpc_srcbus = 0;
- intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
-
- intsrc.mpc_irqtype = mp_INT;
-
- /*
- * If true, we have an ISA/PCI system with no IRQ entries
- * in the MP table. To prevent the PCI interrupts from being set up
- * incorrectly, we try to use the ELCR. The sanity check to see if
- * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
- * never be level sensitive, so we simply see if the ELCR agrees.
- * If it does, we assume it's valid.
- */
- if (mpc_default_type == 5) {
- printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
-
- if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
- printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
- else {
- printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
- ELCR_fallback = 1;
- }
- }
-
- for (i = 0; i < 16; i++) {
- switch (mpc_default_type) {
- case 2:
- if (i == 0 || i == 13)
- continue; /* IRQ0 & IRQ13 not connected */
- /* fall through */
- default:
- if (i == 2)
- continue; /* IRQ2 is never connected */
- }
-
- if (ELCR_fallback) {
- /*
- * If the ELCR indicates a level-sensitive interrupt, we
- * copy that information over to the MP table in the
- * irqflag field (level sensitive, active high polarity).
- */
- if (ELCR_trigger(i))
- intsrc.mpc_irqflag = 13;
- else
- intsrc.mpc_irqflag = 0;
- }
-
- intsrc.mpc_srcbusirq = i;
- intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
- MP_intsrc_info(&intsrc);
- }
-
- intsrc.mpc_irqtype = mp_ExtINT;
- intsrc.mpc_srcbusirq = 0;
- intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
- MP_intsrc_info(&intsrc);
-}
-
-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
-{
- struct mpc_config_processor processor;
- struct mpc_config_bus bus;
- struct mpc_config_ioapic ioapic;
- struct mpc_config_lintsrc lintsrc;
- int linttypes[2] = { mp_ExtINT, mp_NMI };
- int i;
-
- /*
- * local APIC has default address
- */
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
- /*
- * 2 CPUs, numbered 0 & 1.
- */
- processor.mpc_type = MP_PROCESSOR;
- /* Either an integrated APIC or a discrete 82489DX. */
- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
- processor.mpc_cpuflag = CPU_ENABLED;
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) |
- boot_cpu_data.x86_mask;
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
- processor.mpc_reserved[0] = 0;
- processor.mpc_reserved[1] = 0;
- for (i = 0; i < 2; i++) {
- processor.mpc_apicid = i;
- MP_processor_info(&processor);
- }
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- switch (mpc_default_type) {
- default:
- printk(KERN_ERR "???\nUnknown standard configuration %d\n",
- mpc_default_type);
- /* fall through */
- case 1:
- case 5:
- memcpy(bus.mpc_bustype, "ISA ", 6);
- break;
- case 2:
- case 6:
- case 3:
- memcpy(bus.mpc_bustype, "EISA ", 6);
- break;
- case 4:
- case 7:
- memcpy(bus.mpc_bustype, "MCA ", 6);
- }
- MP_bus_info(&bus);
- if (mpc_default_type > 4) {
- bus.mpc_busid = 1;
- memcpy(bus.mpc_bustype, "PCI ", 6);
- MP_bus_info(&bus);
- }
-
- ioapic.mpc_type = MP_IOAPIC;
- ioapic.mpc_apicid = 2;
- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
- ioapic.mpc_flags = MPC_APIC_USABLE;
- ioapic.mpc_apicaddr = 0xFEC00000;
- MP_ioapic_info(&ioapic);
-
- /*
- * We set up most of the low 16 IO-APIC pins according to MPS rules.
- */
- construct_default_ioirq_mptable(mpc_default_type);
-
- lintsrc.mpc_type = MP_LINTSRC;
- lintsrc.mpc_irqflag = 0; /* conforming */
- lintsrc.mpc_srcbusid = 0;
- lintsrc.mpc_srcbusirq = 0;
- lintsrc.mpc_destapic = MP_APIC_ALL;
- for (i = 0; i < 2; i++) {
- lintsrc.mpc_irqtype = linttypes[i];
- lintsrc.mpc_destapiclint = i;
- MP_lintsrc_info(&lintsrc);
- }
-}
-
-static struct intel_mp_floating *mpf_found;
-
-/*
- * Scan the memory blocks for an SMP configuration block.
- */
-void __init get_smp_config (void)
-{
- struct intel_mp_floating *mpf = mpf_found;
-
- /*
- * ACPI supports both logical (e.g. Hyper-Threading) and physical
- * processors, where MPS only supports physical.
- */
- if (acpi_lapic && acpi_ioapic) {
- printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
- return;
- }
- else if (acpi_lapic)
- printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
-
- printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
- if (mpf->mpf_feature2 & (1<<7)) {
- printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
- pic_mode = 1;
- } else {
- printk(KERN_INFO " Virtual Wire compatibility mode.\n");
- pic_mode = 0;
- }
-
- /*
- * Now see if we need to read further.
- */
- if (mpf->mpf_feature1 != 0) {
-
- printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
- construct_default_ISA_mptable(mpf->mpf_feature1);
-
- } else if (mpf->mpf_physptr) {
-
- /*
- * Read the physical hardware table. Anything here will
- * override the defaults.
- */
- if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
- smp_found_config = 0;
- printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
- printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
- return;
- }
- /*
- * If there are no explicit MP IRQ entries, then we are
- * broken. We set up most of the low 16 IO-APIC pins to
- * ISA defaults and hope it will work.
- */
- if (!mp_irq_entries) {
- struct mpc_config_bus bus;
-
- printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- memcpy(bus.mpc_bustype, "ISA ", 6);
- MP_bus_info(&bus);
-
- construct_default_ioirq_mptable(0);
- }
-
- } else
- BUG();
-
- printk(KERN_INFO "Processors: %d\n", num_processors);
- /*
- * Only use the first configuration found.
- */
-}
-
-static int __init smp_scan_config (unsigned long base, unsigned long length)
-{
- extern void __bad_mpf_size(void);
- unsigned int *bp = isa_bus_to_virt(base);
- struct intel_mp_floating *mpf;
-
- Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
- if (sizeof(*mpf) != 16)
- __bad_mpf_size();
-
- while (length > 0) {
- mpf = (struct intel_mp_floating *)bp;
- if ((*bp == SMP_MAGIC_IDENT) &&
- (mpf->mpf_length == 1) &&
- !mpf_checksum((unsigned char *)bp, 16) &&
- ((mpf->mpf_specification == 1)
- || (mpf->mpf_specification == 4)) ) {
-
- smp_found_config = 1;
- mpf_found = mpf;
- return 1;
- }
- bp += 4;
- length -= 16;
- }
- return 0;
-}
-
-void __init find_intel_smp (void)
-{
- unsigned int address;
-
- /*
- * FIXME: Linux assumes you have 640K of base ram..
- * this continues the error...
- *
- * 1) Scan the bottom 1K for a signature
- * 2) Scan the top 1K of base RAM
- * 3) Scan the 64K of bios
- */
- if (smp_scan_config(0x0,0x400) ||
- smp_scan_config(639*0x400,0x400) ||
- smp_scan_config(0xF0000,0x10000))
- return;
- /*
- * If it is an SMP machine we should know now, unless the
- * configuration is in an EISA/MCA bus machine with an
- * extended bios data area.
- *
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E, calculate and scan it here.
- *
- * NOTE! There are Linux loaders that will corrupt the EBDA
- * area, and as such this kind of SMP config may be less
- * trustworthy, simply because the SMP table may have been
- * stomped on during early boot. These loaders are buggy and
- * should be fixed.
- */
-
- address = *(unsigned short *)phys_to_virt(0x40E);
- address <<= 4;
- if (smp_scan_config(address, 0x1000))
- return;
-
- /* If we have come this far, we did not find an MP table */
- printk(KERN_INFO "No mptable found.\n");
-}
-
-/*
- * - Intel MP Configuration Table
- */
-void __init find_smp_config (void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
- find_intel_smp();
-#endif
-}
-
-
-/* --------------------------------------------------------------------------
- ACPI-based MP Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-void __init mp_register_lapic_address (
- u64 address)
-{
-#ifndef CONFIG_XEN
- mp_lapic_addr = (unsigned long) address;
-
- set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
-
- if (boot_cpu_id == -1U)
- boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-
- Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
-#endif
-}
-
-
-void __cpuinit mp_register_lapic (
- u8 id,
- u8 enabled)
-{
- struct mpc_config_processor processor;
- int boot_cpu = 0;
-
- if (id >= MAX_APICS) {
- printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
- id, MAX_APICS);
- return;
- }
-
- if (id == boot_cpu_physical_apicid)
- boot_cpu = 1;
-
-#ifndef CONFIG_XEN
- processor.mpc_type = MP_PROCESSOR;
- processor.mpc_apicid = id;
- processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
- processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
- processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
- processor.mpc_reserved[0] = 0;
- processor.mpc_reserved[1] = 0;
-#endif
-
- MP_processor_info(&processor);
-}
-
-#ifdef CONFIG_X86_IO_APIC
-
-#define MP_ISA_BUS 0
-#define MP_MAX_IOAPIC_PIN 127
-
-static struct mp_ioapic_routing {
- int apic_id;
- int gsi_start;
- int gsi_end;
- u32 pin_programmed[4];
-} mp_ioapic_routing[MAX_IO_APICS];
-
-
-static int mp_find_ioapic (
- int gsi)
-{
- int i = 0;
-
- /* Find the IOAPIC that manages this GSI. */
- for (i = 0; i < nr_ioapics; i++) {
- if ((gsi >= mp_ioapic_routing[i].gsi_start)
- && (gsi <= mp_ioapic_routing[i].gsi_end))
- return i;
- }
-
- printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
-
- return -1;
-}
-
-
-void __init mp_register_ioapic (
- u8 id,
- u32 address,
- u32 gsi_base)
-{
- int idx = 0;
-
- if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
- "(found %d)\n", MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!\n");
- }
- if (!address) {
- printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
- " found in MADT table, skipping!\n");
- return;
- }
-
- idx = nr_ioapics++;
-
- mp_ioapics[idx].mpc_type = MP_IOAPIC;
- mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
- mp_ioapics[idx].mpc_apicaddr = address;
-
-#ifndef CONFIG_XEN
- set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-#endif
- mp_ioapics[idx].mpc_apicid = id;
- mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
-
- /*
- * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
- * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
- */
- mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
- mp_ioapic_routing[idx].gsi_start = gsi_base;
- mp_ioapic_routing[idx].gsi_end = gsi_base +
- io_apic_get_redir_entries(idx);
-
- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
- "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
- mp_ioapic_routing[idx].gsi_start,
- mp_ioapic_routing[idx].gsi_end);
-
- return;
-}
-
-
-void __init mp_override_legacy_irq (
- u8 bus_irq,
- u8 polarity,
- u8 trigger,
- u32 gsi)
-{
- struct mpc_config_intsrc intsrc;
- int ioapic = -1;
- int pin = -1;
-
- /*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
-
- /*
- * TBD: This check is for faulty timer entries, where the override
- * erroneously sets the trigger to level, resulting in a HUGE
- * increase of timer interrupts!
- */
- if ((bus_irq == 0) && (trigger == 3))
- trigger = 1;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqtype = mp_INT;
- intsrc.mpc_irqflag = (trigger << 2) | polarity;
- intsrc.mpc_srcbus = MP_ISA_BUS;
- intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
- intsrc.mpc_dstirq = pin; /* INTIN# */
-
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
- intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
- (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
- intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
-
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
-
- return;
-}
-
-
-void __init mp_config_acpi_legacy_irqs (void)
-{
- struct mpc_config_intsrc intsrc;
- int i = 0;
- int ioapic = -1;
-
- /*
- * Fabricate the legacy ISA bus (bus #31).
- */
- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
-
- /*
- * Locate the IOAPIC that manages the ISA IRQs (0-15).
- */
- ioapic = mp_find_ioapic(0);
- if (ioapic < 0)
- return;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqflag = 0; /* Conforming */
- intsrc.mpc_srcbus = MP_ISA_BUS;
- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
-
- /*
- * Use the default configuration for the IRQs 0-15. Unless
- * overridden by (MADT) interrupt source override entries.
- */
- for (i = 0; i < 16; i++) {
- int idx;
-
- for (idx = 0; idx < mp_irq_entries; idx++) {
- struct mpc_config_intsrc *irq = mp_irqs + idx;
-
- /* Do we already have a mapping for this ISA IRQ? */
- if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
- break;
-
- /* Do we already have a mapping for this IOAPIC pin */
- if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
- (irq->mpc_dstirq == i))
- break;
- }
-
- if (idx != mp_irq_entries) {
- printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
- continue; /* IRQ already used */
- }
-
- intsrc.mpc_irqtype = mp_INT;
- intsrc.mpc_srcbusirq = i; /* Identity mapped */
- intsrc.mpc_dstirq = i;
-
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
- "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
- (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
- intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
- intsrc.mpc_dstirq);
-
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
- }
-
- return;
-}
-
-#define MAX_GSI_NUM 4096
-
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
-{
- int ioapic = -1;
- int ioapic_pin = 0;
- int idx, bit = 0;
- static int pci_irq = 16;
- /*
- * Mapping between Global System Interrupts, which
- * represent all possible interrupts, to the IRQs
- * assigned to actual devices.
- */
- static int gsi_to_irq[MAX_GSI_NUM];
-
- if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
- return gsi;
-
- /* Don't set up the ACPI SCI because it's already set up */
- if (acpi_fadt.sci_int == gsi)
- return gsi;
-
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0) {
- printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
- return gsi;
- }
-
- ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
-
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->gsi mappings (but unique PCI devices);
- * we only program the IOAPIC on the first.
- */
- bit = ioapic_pin % 32;
- idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
- if (idx > 3) {
- printk(KERN_ERR "Invalid reference to IOAPIC pin "
- "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
- ioapic_pin);
- return gsi;
- }
- if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
- Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
- return gsi_to_irq[gsi];
- }
-
- mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
-
- if (triggering == ACPI_LEVEL_SENSITIVE) {
- /*
- * For PCI devices assign IRQs in order, avoiding gaps
- * due to unused I/O APIC pins.
- */
- int irq = gsi;
- if (gsi < MAX_GSI_NUM) {
- /*
- * Retain the VIA chipset work-around (gsi > 15), but
- * avoid a problem where the 8254 timer (IRQ0) is setup
- * via an override (so it's not on pin 0 of the ioapic),
- * and at the same time, the pin 0 interrupt is a PCI
- * type. The gsi > 15 test could cause these two pins
- * to be shared as IRQ0, and they are not shareable.
- * So test for this condition, and if necessary, avoid
- * the pin collision.
- */
- if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
- gsi = pci_irq++;
- /*
- * Don't assign IRQ used by ACPI SCI
- */
- if (gsi == acpi_fadt.sci_int)
- gsi = pci_irq++;
- gsi_to_irq[irq] = gsi;
- } else {
- printk(KERN_ERR "GSI %u is too high\n", gsi);
- return gsi;
- }
- }
-
- io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
- triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
- polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
- return gsi;
-}
-
-#endif /*CONFIG_X86_IO_APIC*/
-#endif /*CONFIG_ACPI*/
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/pci-swiotlb-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/pci-swiotlb-xen.c
deleted file mode 100644
index eed643c114..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/pci-swiotlb-xen.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Glue code to lib/swiotlb.c */
-
-#include <linux/pci.h>
-#include <linux/cache.h>
-#include <linux/module.h>
-#include <asm/dma-mapping.h>
-#include <asm/proto.h>
-#include <asm/swiotlb.h>
-#include <asm/dma.h>
-
-#if 0
-int swiotlb __read_mostly;
-EXPORT_SYMBOL(swiotlb);
-#endif
-
-struct dma_mapping_ops swiotlb_dma_ops = {
-#if 0
- .mapping_error = swiotlb_dma_mapping_error,
- .alloc_coherent = swiotlb_alloc_coherent,
- .free_coherent = swiotlb_free_coherent,
- .map_single = swiotlb_map_single,
- .unmap_single = swiotlb_unmap_single,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
- .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .map_sg = swiotlb_map_sg,
- .unmap_sg = swiotlb_unmap_sg,
- .dma_supported = NULL,
-#endif
-};
-
-void pci_swiotlb_init(void)
-{
-#if 0
- /* don't initialize swiotlb if iommu=off (no_iommu=1) */
- if (!iommu_detected && !no_iommu && end_pfn > MAX_DMA32_PFN)
- swiotlb = 1;
- if (swiotlb_force)
- swiotlb = 1;
- if (swiotlb) {
- printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
- swiotlb_init();
- dma_ops = &swiotlb_dma_ops;
- }
-#else
- swiotlb_init();
- if (swiotlb) {
- printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
- dma_ops = &swiotlb_dma_ops;
- }
-#endif
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
deleted file mode 100644
index 41aa70ce63..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
+++ /dev/null
@@ -1,829 +0,0 @@
-/*
- * linux/arch/x86-64/kernel/process.c
- *
- * Copyright (C) 1995 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * X86-64 port
- * Andi Kleen.
- *
- * CPU hotplug support - ashok.raj@intel.com
- *
- * Jun Nakajima <jun.nakajima@intel.com>
- * Modified for Xen
- */
-
-/*
- * This file handles the architecture-dependent parts of process handling..
- */
-
-#include <stdarg.h>
-
-#include <linux/cpu.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/elfcore.h>
-#include <linux/smp.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/module.h>
-#include <linux/a.out.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/ptrace.h>
-#include <linux/utsname.h>
-#include <linux/random.h>
-#include <linux/notifier.h>
-#include <linux/kprobes.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/mmu_context.h>
-#include <asm/pda.h>
-#include <asm/prctl.h>
-#include <asm/kdebug.h>
-#include <xen/interface/platform.h>
-#include <xen/interface/physdev.h>
-#include <xen/interface/vcpu.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/hardirq.h>
-#include <asm/ia32.h>
-#include <asm/idle.h>
-
-#include <xen/cpu_hotplug.h>
-
-asmlinkage extern void ret_from_fork(void);
-
-unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
-
-unsigned long boot_option_idle_override = 0;
-EXPORT_SYMBOL(boot_option_idle_override);
-
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
-
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
- atomic_notifier_chain_register(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
- atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL(idle_notifier_unregister);
-
-enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
-static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
-
-void enter_idle(void)
-{
- __get_cpu_var(idle_state) = CPU_IDLE;
- atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
-}
-
-static void __exit_idle(void)
-{
- __get_cpu_var(idle_state) = CPU_NOT_IDLE;
- atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
-}
-
-/* Called from interrupts to signify idle end */
-void exit_idle(void)
-{
- if (current->pid | read_pda(irqcount))
- return;
- __exit_idle();
-}
-
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle (void)
-{
- local_irq_enable();
-
- asm volatile(
- "2:"
- "testl %0,%1;"
- "rep; nop;"
- "je 2b;"
- : :
- "i" (_TIF_NEED_RESCHED),
- "m" (current_thread_info()->flags));
-}
-
-static void xen_idle(void)
-{
- local_irq_disable();
-
- if (need_resched())
- local_irq_enable();
- else {
- current_thread_info()->status &= ~TS_POLLING;
- smp_mb__after_clear_bit();
- safe_halt();
- current_thread_info()->status |= TS_POLLING;
- }
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static inline void play_dead(void)
-{
- idle_task_exit();
- local_irq_disable();
- cpu_clear(smp_processor_id(), cpu_initialized);
- preempt_enable_no_resched();
- HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
- cpu_bringup();
-}
-#else
-static inline void play_dead(void)
-{
- BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle (void)
-{
- current_thread_info()->status |= TS_POLLING;
- /* endless idle loop with no priority at all */
- while (1) {
- while (!need_resched()) {
- void (*idle)(void);
-
- if (__get_cpu_var(cpu_idle_state))
- __get_cpu_var(cpu_idle_state) = 0;
- rmb();
- idle = xen_idle; /* no alternatives */
- if (cpu_is_offline(smp_processor_id()))
- play_dead();
- enter_idle();
- idle();
- __exit_idle();
- }
-
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
- }
-}
-
-void cpu_idle_wait(void)
-{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) &&
- !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- } while (!cpus_empty(map));
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
-}
-
-static int __init idle_setup (char *str)
-{
- if (!strncmp(str, "poll", 4)) {
- printk("using polling idle threads.\n");
- pm_idle = poll_idle;
- }
-
- boot_option_idle_override = 1;
- return 1;
-}
-
-__setup("idle=", idle_setup);
-
-/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs * regs)
-{
- unsigned long fs, gs, shadowgs;
- unsigned int fsindex,gsindex;
- unsigned int ds,cs,es;
-
- printk("\n");
- print_modules();
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- system_utsname.release,
- (int)strcspn(system_utsname.version, " "),
- system_utsname.version);
- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
- printk_address(regs->rip);
- printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
- regs->eflags);
- printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
- regs->rax, regs->rbx, regs->rcx);
- printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
- regs->rdx, regs->rsi, regs->rdi);
- printk("RBP: %016lx R08: %016lx R09: %016lx\n",
- regs->rbp, regs->r8, regs->r9);
- printk("R10: %016lx R11: %016lx R12: %016lx\n",
- regs->r10, regs->r11, regs->r12);
- printk("R13: %016lx R14: %016lx R15: %016lx\n",
- regs->r13, regs->r14, regs->r15);
-
- asm("mov %%ds,%0" : "=r" (ds));
- asm("mov %%cs,%0" : "=r" (cs));
- asm("mov %%es,%0" : "=r" (es));
- asm("mov %%fs,%0" : "=r" (fsindex));
- asm("mov %%gs,%0" : "=r" (gsindex));
-
- rdmsrl(MSR_FS_BASE, fs);
- rdmsrl(MSR_GS_BASE, gs);
- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
-
- printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
- fs,fsindex,gs,gsindex,shadowgs);
- printk("CS: %04x DS: %04x ES: %04x\n", cs, ds, es);
-
-}
-
-void show_regs(struct pt_regs *regs)
-{
- printk("CPU %d:", smp_processor_id());
- __show_regs(regs);
- show_trace(NULL, regs, (void *)(regs + 1));
-}
-
-/*
- * Free current thread data structures etc..
- */
-void exit_thread(void)
-{
- struct task_struct *me = current;
- struct thread_struct *t = &me->thread;
-
- if (me->thread.io_bitmap_ptr) {
-#ifndef CONFIG_X86_NO_TSS
- struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
-#endif
-#ifdef CONFIG_XEN
- struct physdev_set_iobitmap iobmp_op;
- memset(&iobmp_op, 0, sizeof(iobmp_op));
-#endif
-
- kfree(t->io_bitmap_ptr);
- t->io_bitmap_ptr = NULL;
- /*
- * Careful, clear this in the TSS too:
- */
-#ifndef CONFIG_X86_NO_TSS
- memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
- put_cpu();
-#endif
-#ifdef CONFIG_XEN
- HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobmp_op);
-#endif
- t->io_bitmap_max = 0;
- }
-}
-
-void load_gs_index(unsigned gs)
-{
- HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs);
-}
-
-void flush_thread(void)
-{
- struct task_struct *tsk = current;
- struct thread_info *t = current_thread_info();
-
- if (t->flags & _TIF_ABI_PENDING) {
- t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
- if (t->flags & _TIF_IA32)
- current_thread_info()->status |= TS_COMPAT;
- }
-
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- /*
- * Forget coprocessor state..
- */
- clear_fpu(tsk);
- clear_used_math();
-}
-
-void release_thread(struct task_struct *dead_task)
-{
- if (dead_task->mm) {
- if (dead_task->mm->context.size) {
- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
- dead_task->comm,
- dead_task->mm->context.ldt,
- dead_task->mm->context.size);
- BUG();
- }
- }
-}
-
-static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
-{
- struct user_desc ud = {
- .base_addr = addr,
- .limit = 0xfffff,
- .seg_32bit = 1,
- .limit_in_pages = 1,
- .useable = 1,
- };
- struct n_desc_struct *desc = (void *)t->thread.tls_array;
- desc += tls;
- desc->a = LDT_entry_a(&ud);
- desc->b = LDT_entry_b(&ud);
-}
-
-static inline u32 read_32bit_tls(struct task_struct *t, int tls)
-{
- struct desc_struct *desc = (void *)t->thread.tls_array;
- desc += tls;
- return desc->base0 |
- (((u32)desc->base1) << 16) |
- (((u32)desc->base2) << 24);
-}
-
-/*
- * This gets called before we allocate a new thread and copy
- * the current task into it.
- */
-void prepare_to_copy(struct task_struct *tsk)
-{
- unlazy_fpu(tsk);
-}
-
-int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
- unsigned long unused,
- struct task_struct * p, struct pt_regs * regs)
-{
- int err;
- struct pt_regs * childregs;
- struct task_struct *me = current;
-
- childregs = ((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(p))) - 1;
- *childregs = *regs;
-
- childregs->rax = 0;
- childregs->rsp = rsp;
- if (rsp == ~0UL)
- childregs->rsp = (unsigned long)childregs;
-
- p->thread.rsp = (unsigned long) childregs;
- p->thread.rsp0 = (unsigned long) (childregs+1);
- p->thread.userrsp = me->thread.userrsp;
-
- set_tsk_thread_flag(p, TIF_FORK);
-
- p->thread.fs = me->thread.fs;
- p->thread.gs = me->thread.gs;
-
- asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
- asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
- asm("mov %%es,%0" : "=m" (p->thread.es));
- asm("mov %%ds,%0" : "=m" (p->thread.ds));
-
- if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
- p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
- if (!p->thread.io_bitmap_ptr) {
- p->thread.io_bitmap_max = 0;
- return -ENOMEM;
- }
- memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
- IO_BITMAP_BYTES);
- }
-
- /*
- * Set a new TLS for the child thread?
- */
- if (clone_flags & CLONE_SETTLS) {
-#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32))
- err = ia32_child_tls(p, childregs);
- else
-#endif
- err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
- if (err)
- goto out;
- }
- p->thread.iopl = current->thread.iopl;
-
- err = 0;
-out:
- if (err && p->thread.io_bitmap_ptr) {
- kfree(p->thread.io_bitmap_ptr);
- p->thread.io_bitmap_max = 0;
- }
- return err;
-}
-
-static inline void __save_init_fpu( struct task_struct *tsk )
-{
- asm volatile( "rex64 ; fxsave %0 ; fnclex"
- : "=m" (tsk->thread.i387.fxsave));
- tsk->thread_info->status &= ~TS_USEDFPU;
-}
-
-/*
- * switch_to(x,y) should switch tasks from x to y.
- *
- * This could still be optimized:
- * - fold all the options into a flag word and test it with a single test.
- * - could test fs/gs bitsliced
- *
- * Kprobes not supported here. Set the probe on schedule instead.
- */
-__kprobes struct task_struct *
-__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-{
- struct thread_struct *prev = &prev_p->thread,
- *next = &next_p->thread;
- int cpu = smp_processor_id();
-#ifndef CONFIG_X86_NO_TSS
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
-#endif
- struct physdev_set_iopl iopl_op;
- struct physdev_set_iobitmap iobmp_op;
- multicall_entry_t _mcl[8], *mcl = _mcl;
-
- /*
- * This is basically '__unlazy_fpu', except that we queue a
- * multicall to indicate FPU task switch, rather than
- * synchronously trapping to Xen.
- * The AMD workaround requires it to be after DS reload, or
- * after DS has been cleared, which we do in __prepare_arch_switch.
- */
- if (prev_p->thread_info->status & TS_USEDFPU) {
- __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
- mcl->op = __HYPERVISOR_fpu_taskswitch;
- mcl->args[0] = 1;
- mcl++;
- }
-
- /*
- * Reload esp0, LDT and the page table pointer:
- */
- mcl->op = __HYPERVISOR_stack_switch;
- mcl->args[0] = __KERNEL_DS;
- mcl->args[1] = next->rsp0;
- mcl++;
-
- /*
- * Load the per-thread Thread-Local Storage descriptor.
- * This is load_TLS(next, cpu) with multicalls.
- */
-#define C(i) do { \
- if (unlikely(next->tls_array[i] != prev->tls_array[i])) { \
- mcl->op = __HYPERVISOR_update_descriptor; \
- mcl->args[0] = virt_to_machine( \
- &cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]); \
- mcl->args[1] = next->tls_array[i]; \
- mcl++; \
- } \
-} while (0)
- C(0); C(1); C(2);
-#undef C
-
- if (unlikely(prev->iopl != next->iopl)) {
- iopl_op.iopl = (next->iopl == 0) ? 1 : next->iopl;
- mcl->op = __HYPERVISOR_physdev_op;
- mcl->args[0] = PHYSDEVOP_set_iopl;
- mcl->args[1] = (unsigned long)&iopl_op;
- mcl++;
- }
-
- if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
- set_xen_guest_handle(iobmp_op.bitmap,
- (char *)next->io_bitmap_ptr);
- iobmp_op.nr_ports = next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
- mcl->op = __HYPERVISOR_physdev_op;
- mcl->args[0] = PHYSDEVOP_set_iobitmap;
- mcl->args[1] = (unsigned long)&iobmp_op;
- mcl++;
- }
-
- (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
- /*
- * Switch DS and ES.
- * This won't pick up thread selector changes, but I guess that is ok.
- */
- if (unlikely(next->es))
- loadsegment(es, next->es);
-
- if (unlikely(next->ds))
- loadsegment(ds, next->ds);
-
- /*
- * Switch FS and GS.
- */
- if (unlikely(next->fsindex))
- loadsegment(fs, next->fsindex);
-
- if (next->fs)
- HYPERVISOR_set_segment_base(SEGBASE_FS, next->fs);
-
- if (unlikely(next->gsindex))
- load_gs_index(next->gsindex);
-
- if (next->gs)
- HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs);
-
- /*
- * Switch the PDA context.
- */
- prev->userrsp = read_pda(oldrsp);
- write_pda(oldrsp, next->userrsp);
- write_pda(pcurrent, next_p);
- write_pda(kernelstack,
- task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
-
- /*
- * Now maybe reload the debug registers
- */
- if (unlikely(next->debugreg7)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
- return prev_p;
-}
-
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage
-long sys_execve(char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs regs)
-{
- long error;
- char * filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, &regs);
- if (error == 0) {
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
- }
- putname(filename);
- return error;
-}
-
-void set_personality_64bit(void)
-{
- /* inherit personality from parent */
-
- /* Make sure to be in 64bit mode */
- clear_thread_flag(TIF_IA32);
-
- /* TBD: overwrites user setup. Should have two bits.
- But 64bit processes have always behaved this way,
- so it's not too bad. The main problem is just that
- 32bit childs are affected again. */
- current->personality &= ~READ_IMPLIES_EXEC;
-}
-
-asmlinkage long sys_fork(struct pt_regs *regs)
-{
- return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
-}
-
-asmlinkage long
-sys_clone(unsigned long clone_flags, unsigned long newsp,
- void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
-{
- if (!newsp)
- newsp = regs->rsp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-asmlinkage long sys_vfork(struct pt_regs *regs)
-{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
- NULL, NULL);
-}
-
-unsigned long get_wchan(struct task_struct *p)
-{
- unsigned long stack;
- u64 fp,rip;
- int count = 0;
-
- if (!p || p == current || p->state==TASK_RUNNING)
- return 0;
- stack = (unsigned long)task_stack_page(p);
- if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
- return 0;
- fp = *(u64 *)(p->thread.rsp);
- do {
- if (fp < (unsigned long)stack ||
- fp > (unsigned long)stack+THREAD_SIZE)
- return 0;
- rip = *(u64 *)(fp+8);
- if (!in_sched_functions(rip))
- return rip;
- fp = *(u64 *)fp;
- } while (count++ < 16);
- return 0;
-}
-
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
- int ret = 0;
- int doit = task == current;
- int cpu;
-
- switch (code) {
- case ARCH_SET_GS:
- if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
- cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
- }
- task->thread.gsindex = GS_TLS_SEL;
- task->thread.gs = 0;
- } else {
- task->thread.gsindex = 0;
- task->thread.gs = addr;
- if (doit) {
- load_gs_index(0);
- ret = HYPERVISOR_set_segment_base(
- SEGBASE_GS_USER, addr);
- }
- }
- put_cpu();
- break;
- case ARCH_SET_FS:
- /* Not strictly needed for fs, but do it for symmetry
- with gs */
- if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
- cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, FS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
- }
- task->thread.fsindex = FS_TLS_SEL;
- task->thread.fs = 0;
- } else {
- task->thread.fsindex = 0;
- task->thread.fs = addr;
- if (doit) {
- /* set the selector to 0 to not confuse
- __switch_to */
- asm volatile("movl %0,%%fs" :: "r" (0));
- ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
- addr);
- }
- }
- put_cpu();
- break;
- case ARCH_GET_FS: {
- unsigned long base;
- if (task->thread.fsindex == FS_TLS_SEL)
- base = read_32bit_tls(task, FS_TLS);
- else if (doit)
- rdmsrl(MSR_FS_BASE, base);
- else
- base = task->thread.fs;
- ret = put_user(base, (unsigned long __user *)addr);
- break;
- }
- case ARCH_GET_GS: {
- unsigned long base;
- unsigned gsindex;
- if (task->thread.gsindex == GS_TLS_SEL)
- base = read_32bit_tls(task, GS_TLS);
- else if (doit) {
- asm("movl %%gs,%0" : "=r" (gsindex));
- if (gsindex)
- rdmsrl(MSR_KERNEL_GS_BASE, base);
- else
- base = task->thread.gs;
- }
- else
- base = task->thread.gs;
- ret = put_user(base, (unsigned long __user *)addr);
- break;
- }
-
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
-long sys_arch_prctl(int code, unsigned long addr)
-{
- return do_arch_prctl(current, code, addr);
-}
-
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
- struct pt_regs *pp, ptregs;
-
- pp = task_pt_regs(tsk);
-
- ptregs = *pp;
- ptregs.cs &= 0xffff;
- ptregs.ss &= 0xffff;
-
- elf_core_copy_regs(regs, &ptregs);
-
- boot_option_idle_override = 1;
- return 1;
-}
-
-unsigned long arch_align_stack(unsigned long sp)
-{
- if (randomize_va_space)
- sp -= get_random_int() % 8192;
- return sp & ~0xf;
-}
-
-#ifndef CONFIG_SMP
-void _restore_vcpu(void)
-{
-}
-#endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
deleted file mode 100644
index 74e8fd6363..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
+++ /dev/null
@@ -1,1677 +0,0 @@
-/*
- * linux/arch/x86-64/kernel/setup.c
- *
- * Copyright (C) 1995 Linus Torvalds
- *
- * Nov 2001 Dave Jones <davej@suse.de>
- * Forked from i386 setup code.
- */
-
-/*
- * This file handles the architecture-dependent parts of initialization
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/screen_info.h>
-#include <linux/ioport.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/initrd.h>
-#include <linux/highmem.h>
-#include <linux/bootmem.h>
-#include <linux/module.h>
-#include <asm/processor.h>
-#include <linux/console.h>
-#include <linux/seq_file.h>
-#include <linux/crash_dump.h>
-#include <linux/root_dev.h>
-#include <linux/pci.h>
-#include <linux/acpi.h>
-#include <linux/kallsyms.h>
-#include <linux/edd.h>
-#include <linux/mmzone.h>
-#include <linux/kexec.h>
-#include <linux/cpufreq.h>
-#include <linux/dmi.h>
-#include <linux/dma-mapping.h>
-#include <linux/ctype.h>
-
-#include <asm/mtrr.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/msr.h>
-#include <asm/desc.h>
-#include <video/edid.h>
-#include <asm/e820.h>
-#include <asm/dma.h>
-#include <asm/mpspec.h>
-#include <asm/mmu_context.h>
-#include <asm/bootsetup.h>
-#include <asm/proto.h>
-#include <asm/setup.h>
-#include <asm/mach_apic.h>
-#include <asm/numa.h>
-#include <asm/sections.h>
-#include <asm/dmi.h>
-#ifdef CONFIG_XEN
-#include <linux/percpu.h>
-#include <xen/interface/physdev.h>
-#include "setup_arch_pre.h"
-#include <asm/hypervisor.h>
-#include <xen/interface/nmi.h>
-#include <xen/features.h>
-#include <xen/xencons.h>
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
-#include <asm/mach-xen/setup_arch_post.h>
-#include <xen/interface/memory.h>
-
-#ifdef CONFIG_XEN
-#include <xen/interface/kexec.h>
-#endif
-
-extern unsigned long start_pfn;
-extern struct edid_info edid_info;
-
-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
-EXPORT_SYMBOL(HYPERVISOR_shared_info);
-
-extern char hypercall_page[PAGE_SIZE];
-EXPORT_SYMBOL(hypercall_page);
-
-static int xen_panic_event(struct notifier_block *, unsigned long, void *);
-static struct notifier_block xen_panic_block = {
- xen_panic_event, NULL, 0 /* try to go last */
-};
-
-unsigned long *phys_to_machine_mapping;
-unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[512];
-
-EXPORT_SYMBOL(phys_to_machine_mapping);
-
-DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]);
-DEFINE_PER_CPU(int, nr_multicall_ents);
-
-/* Raw start-of-day parameters from the hypervisor. */
-start_info_t *xen_start_info;
-EXPORT_SYMBOL(xen_start_info);
-#endif
-
-/*
- * Machine setup..
- */
-
-struct cpuinfo_x86 boot_cpu_data __read_mostly;
-EXPORT_SYMBOL(boot_cpu_data);
-
-unsigned long mmu_cr4_features;
-
-int acpi_disabled;
-EXPORT_SYMBOL(acpi_disabled);
-#ifdef CONFIG_ACPI
-extern int __initdata acpi_ht;
-extern acpi_interrupt_flags acpi_sci_flags;
-int __initdata acpi_force = 0;
-#endif
-
-int acpi_numa __initdata;
-
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
-
-unsigned long saved_video_mode;
-
-/*
- * Early DMI memory
- */
-int dmi_alloc_index;
-char dmi_alloc_data[DMI_MAX_DATA];
-
-/*
- * Setup options
- */
-struct screen_info screen_info;
-EXPORT_SYMBOL(screen_info);
-struct sys_desc_table_struct {
- unsigned short length;
- unsigned char table[0];
-};
-
-struct edid_info edid_info;
-EXPORT_SYMBOL_GPL(edid_info);
-struct e820map e820;
-#ifdef CONFIG_XEN
-struct e820map machine_e820;
-#endif
-
-extern int root_mountflags;
-
-char command_line[COMMAND_LINE_SIZE];
-
-struct resource standard_io_resources[] = {
- { .name = "dma1", .start = 0x00, .end = 0x1f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "pic1", .start = 0x20, .end = 0x21,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "timer0", .start = 0x40, .end = 0x43,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "timer1", .start = 0x50, .end = 0x53,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "keyboard", .start = 0x60, .end = 0x6f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "dma page reg", .start = 0x80, .end = 0x8f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "pic2", .start = 0xa0, .end = 0xa1,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "dma2", .start = 0xc0, .end = 0xdf,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "fpu", .start = 0xf0, .end = 0xff,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO }
-};
-
-#define STANDARD_IO_RESOURCES \
- (sizeof standard_io_resources / sizeof standard_io_resources[0])
-
-#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
-
-struct resource data_resource = {
- .name = "Kernel data",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_RAM,
-};
-struct resource code_resource = {
- .name = "Kernel code",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_RAM,
-};
-
-#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
-
-static struct resource system_rom_resource = {
- .name = "System ROM",
- .start = 0xf0000,
- .end = 0xfffff,
- .flags = IORESOURCE_ROM,
-};
-
-static struct resource extension_rom_resource = {
- .name = "Extension ROM",
- .start = 0xe0000,
- .end = 0xeffff,
- .flags = IORESOURCE_ROM,
-};
-
-static struct resource adapter_rom_resources[] = {
- { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
- .flags = IORESOURCE_ROM },
- { .name = "Adapter ROM", .start = 0, .end = 0,
- .flags = IORESOURCE_ROM },
- { .name = "Adapter ROM", .start = 0, .end = 0,
- .flags = IORESOURCE_ROM },
- { .name = "Adapter ROM", .start = 0, .end = 0,
- .flags = IORESOURCE_ROM },
- { .name = "Adapter ROM", .start = 0, .end = 0,
- .flags = IORESOURCE_ROM },
- { .name = "Adapter ROM", .start = 0, .end = 0,
- .flags = IORESOURCE_ROM }
-};
-
-#define ADAPTER_ROM_RESOURCES \
- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
-
-static struct resource video_rom_resource = {
- .name = "Video ROM",
- .start = 0xc0000,
- .end = 0xc7fff,
- .flags = IORESOURCE_ROM,
-};
-
-static struct resource video_ram_resource = {
- .name = "Video RAM area",
- .start = 0xa0000,
- .end = 0xbffff,
- .flags = IORESOURCE_RAM,
-};
-
-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
-
-static int __init romchecksum(unsigned char *rom, unsigned long length)
-{
- unsigned char *p, sum = 0;
-
- for (p = rom; p < rom + length; p++)
- sum += *p;
- return sum == 0;
-}
-
-static void __init probe_roms(void)
-{
- unsigned long start, length, upper;
- unsigned char *rom;
- int i;
-
-#ifdef CONFIG_XEN
- /* Nothing to do if not running in dom0. */
- if (!is_initial_xendomain())
- return;
-#endif
-
- /* video rom */
- upper = adapter_rom_resources[0].start;
- for (start = video_rom_resource.start; start < upper; start += 2048) {
- rom = isa_bus_to_virt(start);
- if (!romsignature(rom))
- continue;
-
- video_rom_resource.start = start;
-
- /* 0 < length <= 0x7f * 512, historically */
- length = rom[2] * 512;
-
- /* if checksum okay, trust length byte */
- if (length && romchecksum(rom, length))
- video_rom_resource.end = start + length - 1;
-
- request_resource(&iomem_resource, &video_rom_resource);
- break;
- }
-
- start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
- if (start < upper)
- start = upper;
-
- /* system rom */
- request_resource(&iomem_resource, &system_rom_resource);
- upper = system_rom_resource.start;
-
- /* check for extension rom (ignore length byte!) */
- rom = isa_bus_to_virt(extension_rom_resource.start);
- if (romsignature(rom)) {
- length = extension_rom_resource.end - extension_rom_resource.start + 1;
- if (romchecksum(rom, length)) {
- request_resource(&iomem_resource, &extension_rom_resource);
- upper = extension_rom_resource.start;
- }
- }
-
- /* check for adapter roms on 2k boundaries */
- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
- rom = isa_bus_to_virt(start);
- if (!romsignature(rom))
- continue;
-
- /* 0 < length <= 0x7f * 512, historically */
- length = rom[2] * 512;
-
- /* but accept any length that fits if checksum okay */
- if (!length || start + length > upper || !romchecksum(rom, length))
- continue;
-
- adapter_rom_resources[i].start = start;
- adapter_rom_resources[i].end = start + length - 1;
- request_resource(&iomem_resource, &adapter_rom_resources[i]);
-
- start = adapter_rom_resources[i++].end & ~2047UL;
- }
-}
-
-/* Check for full argument with no trailing characters */
-static int fullarg(char *p, char *arg)
-{
- int l = strlen(arg);
- return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
-}
-
-static __init void parse_cmdline_early (char ** cmdline_p)
-{
- char c = ' ', *to = command_line, *from = COMMAND_LINE;
- int len = 0;
- int userdef = 0;
-
- for (;;) {
- if (c != ' ')
- goto next_char;
-
-#ifdef CONFIG_SMP
- /*
- * If the BIOS enumerates physical processors before logical,
- * maxcpus=N at enumeration-time can be used to disable HT.
- */
- else if (!memcmp(from, "maxcpus=", 8)) {
- extern unsigned int maxcpus;
-
- maxcpus = simple_strtoul(from + 8, NULL, 0);
- }
-#endif
-#ifdef CONFIG_ACPI
- /* "acpi=off" disables both ACPI table parsing and interpreter init */
- if (fullarg(from,"acpi=off"))
- disable_acpi();
-
- if (fullarg(from, "acpi=force")) {
- /* add later when we do DMI horrors: */
- acpi_force = 1;
- acpi_disabled = 0;
- }
-
- /* acpi=ht just means: do ACPI MADT parsing
- at bootup, but don't enable the full ACPI interpreter */
- if (fullarg(from, "acpi=ht")) {
- if (!acpi_force)
- disable_acpi();
- acpi_ht = 1;
- }
- else if (fullarg(from, "pci=noacpi"))
- acpi_disable_pci();
- else if (fullarg(from, "acpi=noirq"))
- acpi_noirq_set();
-
- else if (fullarg(from, "acpi_sci=edge"))
- acpi_sci_flags.trigger = 1;
- else if (fullarg(from, "acpi_sci=level"))
- acpi_sci_flags.trigger = 3;
- else if (fullarg(from, "acpi_sci=high"))
- acpi_sci_flags.polarity = 1;
- else if (fullarg(from, "acpi_sci=low"))
- acpi_sci_flags.polarity = 3;
-
- /* acpi=strict disables out-of-spec workarounds */
- else if (fullarg(from, "acpi=strict")) {
- acpi_strict = 1;
- }
-#ifdef CONFIG_X86_IO_APIC
- else if (fullarg(from, "acpi_skip_timer_override"))
- acpi_skip_timer_override = 1;
-#endif
-#endif
-
-#ifndef CONFIG_XEN
- if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
- disable_apic = 1;
- }
-
- if (fullarg(from, "noapic"))
- skip_ioapic_setup = 1;
-
- if (fullarg(from,"apic")) {
- skip_ioapic_setup = 0;
- ioapic_force = 1;
- }
-#endif
-
- if (!memcmp(from, "mem=", 4))
- parse_memopt(from+4, &from);
-
- if (!memcmp(from, "memmap=", 7)) {
- /* exactmap option is for used defined memory */
- if (!memcmp(from+7, "exactmap", 8)) {
-#ifdef CONFIG_CRASH_DUMP
- /* If we are doing a crash dump, we
- * still need to know the real mem
- * size before original memory map is
- * reset.
- */
- saved_max_pfn = e820_end_of_ram();
-#endif
- from += 8+7;
- end_pfn_map = 0;
- e820.nr_map = 0;
- userdef = 1;
- }
- else {
- parse_memmapopt(from+7, &from);
- userdef = 1;
- }
- }
-
-#ifdef CONFIG_NUMA
- if (!memcmp(from, "numa=", 5))
- numa_setup(from+5);
-#endif
-
- if (!memcmp(from,"iommu=",6)) {
- iommu_setup(from+6);
- }
-
- if (fullarg(from,"oops=panic"))
- panic_on_oops = 1;
-
- if (!memcmp(from, "noexec=", 7))
- nonx_setup(from + 7);
-
-#ifdef CONFIG_KEXEC
- /* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
- else if (!memcmp(from, "crashkernel=", 12)) {
-#ifndef CONFIG_XEN
- unsigned long size, base;
- size = memparse(from+12, &from);
- if (*from == '@') {
- base = memparse(from+1, &from);
- /* FIXME: Do I want a sanity check
- * to validate the memory range?
- */
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
-#else
- printk("Ignoring crashkernel command line, "
- "parameter will be supplied by xen\n");
-#endif
- }
-#endif
-
-#ifdef CONFIG_PROC_VMCORE
- /* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel. This option will be passed
- * by kexec loader to the capture kernel.
- */
- else if(!memcmp(from, "elfcorehdr=", 11))
- elfcorehdr_addr = memparse(from+11, &from);
-#endif
-
-#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
- else if (!memcmp(from, "additional_cpus=", 16))
- setup_additional_cpus(from+16);
-#endif
-
- next_char:
- c = *(from++);
- if (!c)
- break;
- if (COMMAND_LINE_SIZE <= ++len)
- break;
- *(to++) = c;
- }
- if (userdef) {
- printk(KERN_INFO "user-defined physical RAM map:\n");
- e820_print_map("user");
- }
- *to = '\0';
- *cmdline_p = command_line;
-}
-
-#ifndef CONFIG_NUMA
-static void __init
-contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
- unsigned long bootmap_size, bootmap;
-
- bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
- bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
- if (bootmap == -1L)
- panic("Cannot find bootmem map of size %ld\n",bootmap_size);
- bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
-#ifdef CONFIG_XEN
- e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
-#else
- e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
-#endif
- reserve_bootmem(bootmap, bootmap_size);
-}
-#endif
-
-#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
-struct edd edd;
-#ifdef CONFIG_EDD_MODULE
-EXPORT_SYMBOL(edd);
-#endif
-/**
- * copy_edd() - Copy the BIOS EDD information
- * from boot_params into a safe place.
- *
- */
-static inline void copy_edd(void)
-{
- memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
- memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
- edd.mbr_signature_nr = EDD_MBR_SIG_NR;
- edd.edd_info_nr = EDD_NR;
-}
-#else
-static inline void copy_edd(void)
-{
-}
-#endif
-
-#ifndef CONFIG_XEN
-#define EBDA_ADDR_POINTER 0x40E
-
-unsigned __initdata ebda_addr;
-unsigned __initdata ebda_size;
-
-static void discover_ebda(void)
-{
- /*
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E
- */
- ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
- ebda_addr <<= 4;
-
- ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
-
- /* Round EBDA up to pages */
- if (ebda_size == 0)
- ebda_size = 1;
- ebda_size <<= 10;
- ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
- if (ebda_size > 64*1024)
- ebda_size = 64*1024;
-}
-#else
-#define discover_ebda() ((void)0)
-#endif
-
-void __init setup_arch(char **cmdline_p)
-{
-#ifdef CONFIG_XEN
- /* Register a call for panic conditions. */
- atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
-
- ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
- screen_info = SCREEN_INFO;
-
- if (is_initial_xendomain()) {
- /* This is drawn from a dump from vgacon:startup in
- * standard Linux. */
- screen_info.orig_video_mode = 3;
- screen_info.orig_video_isVGA = 1;
- screen_info.orig_video_lines = 25;
- screen_info.orig_video_cols = 80;
- screen_info.orig_video_ega_bx = 3;
- screen_info.orig_video_points = 16;
- screen_info.orig_y = screen_info.orig_video_lines - 1;
- if (xen_start_info->console.dom0.info_size >=
- sizeof(struct dom0_vga_console_info)) {
- const struct dom0_vga_console_info *info =
- (struct dom0_vga_console_info *)(
- (char *)xen_start_info +
- xen_start_info->console.dom0.info_off);
- dom0_init_screen_info(info);
- }
- xen_start_info->console.domU.mfn = 0;
- xen_start_info->console.domU.evtchn = 0;
- } else
- screen_info.orig_video_isVGA = 0;
-
- edid_info = EDID_INFO;
- saved_video_mode = SAVED_VIDEO_MODE;
- bootloader_type = LOADER_TYPE;
-
-#ifdef CONFIG_BLK_DEV_RAM
- rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
-
-
-#endif
-
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_writable_pagetables);
-
- ARCH_SETUP
-#else
- ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
- screen_info = SCREEN_INFO;
- edid_info = EDID_INFO;
- saved_video_mode = SAVED_VIDEO_MODE;
- bootloader_type = LOADER_TYPE;
-
-#ifdef CONFIG_BLK_DEV_RAM
- rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
-#endif
-#endif /* !CONFIG_XEN */
- setup_memory_region();
- copy_edd();
-
- if (!MOUNT_ROOT_RDONLY)
- root_mountflags &= ~MS_RDONLY;
- init_mm.start_code = (unsigned long) &_text;
- init_mm.end_code = (unsigned long) &_etext;
- init_mm.end_data = (unsigned long) &_edata;
- init_mm.brk = (unsigned long) &_end;
-
- code_resource.start = virt_to_phys(&_text);
- code_resource.end = virt_to_phys(&_etext)-1;
- data_resource.start = virt_to_phys(&_etext);
- data_resource.end = virt_to_phys(&_edata)-1;
-
- parse_cmdline_early(cmdline_p);
-
- early_identify_cpu(&boot_cpu_data);
-
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
- end_pfn = e820_end_of_ram();
- num_physpages = end_pfn; /* for pfn_valid */
-
- check_efer();
-
- discover_ebda();
-
- init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
-
- if (is_initial_xendomain())
- dmi_scan_machine();
-
-#ifdef CONFIG_ACPI_NUMA
- /*
- * Parse SRAT to discover nodes.
- */
- acpi_numa_init();
-#endif
-
-#ifdef CONFIG_NUMA
- numa_initmem_init(0, end_pfn);
-#else
- contig_initmem_init(0, end_pfn);
-#endif
-
-#ifdef CONFIG_XEN
- /*
- * Reserve kernel, physmap, start info, initial page tables, and
- * direct mapping.
- */
- reserve_bootmem_generic(__pa_symbol(&_text),
- (table_end << PAGE_SHIFT) - __pa_symbol(&_text));
-#else
- /* Reserve direct mapping */
- reserve_bootmem_generic(table_start << PAGE_SHIFT,
- (table_end - table_start) << PAGE_SHIFT);
-
- /* reserve kernel */
- reserve_bootmem_generic(__pa_symbol(&_text),
- __pa_symbol(&_end) - __pa_symbol(&_text));
-
- /*
- * reserve physical page 0 - it's a special BIOS page on many boxes,
- * enabling clean reboots, SMP operation, laptop functions.
- */
- reserve_bootmem_generic(0, PAGE_SIZE);
-
- /* reserve ebda region */
- if (ebda_addr)
- reserve_bootmem_generic(ebda_addr, ebda_size);
-
-#ifdef CONFIG_SMP
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
-
- /* Reserve SMP trampoline */
- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
-#endif
-#endif
-
-#ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
- */
- acpi_reserve_bootmem();
-#endif
-#ifdef CONFIG_XEN
-#ifdef CONFIG_BLK_DEV_INITRD
- if (xen_start_info->mod_start) {
- if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
- /*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/
- initrd_start = INITRD_START + PAGE_OFFSET;
- initrd_end = initrd_start+INITRD_SIZE;
- initrd_below_start_ok = 1;
- } else {
- printk(KERN_ERR "initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- (unsigned long)(INITRD_START + INITRD_SIZE),
- (unsigned long)(end_pfn << PAGE_SHIFT));
- initrd_start = 0;
- }
- }
-#endif
-#else /* CONFIG_XEN */
-#ifdef CONFIG_BLK_DEV_INITRD
- if (LOADER_TYPE && INITRD_START) {
- if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
- reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
- initrd_start =
- INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
- initrd_end = initrd_start+INITRD_SIZE;
- }
- else {
- printk(KERN_ERR "initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- (unsigned long)(INITRD_START + INITRD_SIZE),
- (unsigned long)(end_pfn << PAGE_SHIFT));
- initrd_start = 0;
- }
- }
-#endif
-#endif /* !CONFIG_XEN */
-#ifdef CONFIG_KEXEC
-#ifdef CONFIG_XEN
- xen_machine_kexec_setup_resources();
-#else
- if (crashk_res.start != crashk_res.end) {
- reserve_bootmem_generic(crashk_res.start,
- crashk_res.end - crashk_res.start + 1);
- }
-#endif
-#endif
-
- paging_init();
-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
-#endif
-#ifdef CONFIG_XEN
- {
- int i, j, k, fpp;
- unsigned long p2m_pages;
-
- p2m_pages = end_pfn;
- if (xen_start_info->nr_pages > end_pfn) {
- /*
- * the end_pfn was shrunk (probably by mem= or highmem=
- * kernel parameter); shrink reservation with the HV
- */
- struct xen_memory_reservation reservation = {
- .address_bits = 0,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- unsigned int difference;
- int ret;
-
- difference = xen_start_info->nr_pages - end_pfn;
-
- set_xen_guest_handle(reservation.extent_start,
- ((unsigned long *)xen_start_info->mfn_list) + end_pfn);
- reservation.nr_extents = difference;
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation);
- BUG_ON (ret != difference);
- }
- else if (end_pfn > xen_start_info->nr_pages)
- p2m_pages = xen_start_info->nr_pages;
-
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /* Make sure we have a large enough P->M table. */
- phys_to_machine_mapping = alloc_bootmem_pages(
- end_pfn * sizeof(unsigned long));
- memset(phys_to_machine_mapping, ~0,
- end_pfn * sizeof(unsigned long));
- memcpy(phys_to_machine_mapping,
- (unsigned long *)xen_start_info->mfn_list,
- p2m_pages * sizeof(unsigned long));
- free_bootmem(
- __pa(xen_start_info->mfn_list),
- PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
- sizeof(unsigned long))));
-
- /*
- * Initialise the list of the frames that specify the
- * list of frames that make up the p2m table. Used by
- * save/restore.
- */
- pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE);
-
- fpp = PAGE_SIZE/sizeof(unsigned long);
- for (i=0, j=0, k=-1; i< end_pfn; i+=fpp, j++) {
- if ((j % fpp) == 0) {
- k++;
- BUG_ON(k>=fpp);
- pfn_to_mfn_frame_list[k] =
- alloc_bootmem_pages(PAGE_SIZE);
- pfn_to_mfn_frame_list_list[k] =
- virt_to_mfn(pfn_to_mfn_frame_list[k]);
- j=0;
- }
- pfn_to_mfn_frame_list[k][j] =
- virt_to_mfn(&phys_to_machine_mapping[i]);
- }
- HYPERVISOR_shared_info->arch.max_pfn = end_pfn;
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
- virt_to_mfn(pfn_to_mfn_frame_list_list);
- }
-
- }
-
- if (!is_initial_xendomain()) {
- acpi_disabled = 1;
-#ifdef CONFIG_ACPI
- acpi_ht = 0;
-#endif
- }
-#endif
-
-#ifndef CONFIG_XEN
- check_ioapic();
-#endif
-
- zap_low_mappings(0);
-
- /*
- * set this early, so we dont allocate cpu0
- * if MADT list doesnt list BSP first
- * mpparse.c/MP_processor_info() allocates logical cpu numbers.
- */
- cpu_set(0, cpu_present_map);
-#ifdef CONFIG_ACPI
- /*
- * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
- * Call this early for SRAT node setup.
- */
- acpi_boot_table_init();
-
- /*
- * Read APIC and some other early information from ACPI tables.
- */
- acpi_boot_init();
-#endif
-
- init_cpu_to_node();
-
-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * get boot-time SMP configuration:
- */
- if (smp_found_config)
- get_smp_config();
-#ifndef CONFIG_XEN
- init_apic_mappings();
-#endif
-#endif
-#if defined(CONFIG_XEN) && defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
- prefill_possible_map();
-#endif
-
- /*
- * Request address space for all standard RAM and ROM resources
- * and also for regions reported as reserved by the e820.
- */
- probe_roms();
-#ifdef CONFIG_XEN
- if (is_initial_xendomain()) {
- struct xen_memory_map memmap;
-
- memmap.nr_entries = E820MAX;
- set_xen_guest_handle(memmap.buffer, machine_e820.map);
-
- if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
- BUG();
- machine_e820.nr_map = memmap.nr_entries;
-
- e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
- }
-#else
- e820_reserve_resources(e820.map, e820.nr_map);
-#endif
-
- request_resource(&iomem_resource, &video_ram_resource);
-
- {
- unsigned i;
- /* request I/O space for devices used on all i[345]86 PCs */
- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
- request_resource(&ioport_resource, &standard_io_resources[i]);
- }
-
-#ifdef CONFIG_XEN
- if (is_initial_xendomain())
- e820_setup_gap(machine_e820.map, machine_e820.nr_map);
-#else
- e820_setup_gap(e820.map, e820.nr_map);
-#endif
-
-#ifdef CONFIG_XEN
- {
- struct physdev_set_iopl set_iopl;
-
- set_iopl.iopl = 1;
- HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-
- if (is_initial_xendomain()) {
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
- conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
- conswitchp = &dummy_con;
-#endif
-#endif
- } else {
-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
- conswitchp = &dummy_con;
-#endif
- }
- }
- xencons_early_setup();
-#else /* CONFIG_XEN */
-
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
- conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
- conswitchp = &dummy_con;
-#endif
-#endif
-
-#endif /* !CONFIG_XEN */
-}
-
-#ifdef CONFIG_XEN
-static int
-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- HYPERVISOR_shutdown(SHUTDOWN_crash);
- /* we're never actually going to get here... */
- return NOTIFY_DONE;
-}
-#endif /* !CONFIG_XEN */
-
-
-static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
-{
- unsigned int *v;
-
- if (c->extended_cpuid_level < 0x80000004)
- return 0;
-
- v = (unsigned int *) c->x86_model_id;
- cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
- cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
- cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
- c->x86_model_id[48] = 0;
- return 1;
-}
-
-
-static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
-{
- unsigned int n, dummy, eax, ebx, ecx, edx;
-
- n = c->extended_cpuid_level;
-
- if (n >= 0x80000005) {
- cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
- c->x86_cache_size=(ecx>>24)+(edx>>24);
- /* On K8 L1 TLB is inclusive, so don't count it */
- c->x86_tlbsize = 0;
- }
-
- if (n >= 0x80000006) {
- cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
- ecx = cpuid_ecx(0x80000006);
- c->x86_cache_size = ecx >> 16;
- c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
-
- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
- c->x86_cache_size, ecx & 0xFF);
- }
-
- if (n >= 0x80000007)
- cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
- if (n >= 0x80000008) {
- cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- }
-}
-
-#ifdef CONFIG_NUMA
-static int nearby_node(int apicid)
-{
- int i;
- for (i = apicid - 1; i >= 0; i--) {
- int node = apicid_to_node[i];
- if (node != NUMA_NO_NODE && node_online(node))
- return node;
- }
- for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
- int node = apicid_to_node[i];
- if (node != NUMA_NO_NODE && node_online(node))
- return node;
- }
- return first_node(node_online_map); /* Shouldn't happen */
-}
-#endif
-
-/*
- * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
- * Assumes number of cores is a power of two.
- */
-static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- unsigned bits;
-#ifdef CONFIG_NUMA
- int cpu = smp_processor_id();
- int node = 0;
- unsigned apicid = hard_smp_processor_id();
-#endif
- unsigned ecx = cpuid_ecx(0x80000008);
-
- c->x86_max_cores = (ecx & 0xff) + 1;
-
- /* CPU telling us the core id bits shift? */
- bits = (ecx >> 12) & 0xF;
-
- /* Otherwise recompute */
- if (bits == 0) {
- while ((1 << bits) < c->x86_max_cores)
- bits++;
- }
-
- /* Low order bits define the core id (index of core in socket) */
- c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
- /* Convert the APIC ID into the socket ID */
- c->phys_proc_id = phys_pkg_id(bits);
-
-#ifdef CONFIG_NUMA
- node = c->phys_proc_id;
- if (apicid_to_node[apicid] != NUMA_NO_NODE)
- node = apicid_to_node[apicid];
- if (!node_online(node)) {
- /* Two possibilities here:
- - The CPU is missing memory and no node was created.
- In that case try picking one from a nearby CPU
- - The APIC IDs differ from the HyperTransport node IDs
- which the K8 northbridge parsing fills in.
- Assume they are all increased by a constant offset,
- but in the same order as the HT nodeids.
- If that doesn't result in a usable node fall back to the
- path for the previous case. */
- int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
- if (ht_nodeid >= 0 &&
- apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- node = apicid_to_node[ht_nodeid];
- /* Pick a nearby node */
- if (!node_online(node))
- node = nearby_node(apicid);
- }
- numa_set_node(cpu, node);
-
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
-#endif
-#endif
-}
-
-static void __init init_amd(struct cpuinfo_x86 *c)
-{
- unsigned level;
-
-#ifdef CONFIG_SMP
- unsigned long value;
-
- /*
- * Disable TLB flush filter by setting HWCR.FFDIS on K8
- * bit 6 of msr C001_0015
- *
- * Errata 63 for SH-B3 steppings
- * Errata 122 for all steppings (F+ have it disabled by default)
- */
- if (c->x86 == 15) {
- rdmsrl(MSR_K8_HWCR, value);
- value |= 1 << 6;
- wrmsrl(MSR_K8_HWCR, value);
- }
-#endif
-
- /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
- 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
- clear_bit(0*32+31, &c->x86_capability);
-
- /* On C+ stepping K8 rep microcode works well for copy/memset */
- level = cpuid_eax(1);
- if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
- set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
-
- /* Enable workaround for FXSAVE leak */
- if (c->x86 >= 6)
- set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
-
- level = get_model_name(c);
- if (!level) {
- switch (c->x86) {
- case 15:
- /* Should distinguish Models here, but this is only
- a fallback anyways. */
- strcpy(c->x86_model_id, "Hammer");
- break;
- }
- }
- display_cacheinfo(c);
-
- /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
- if (c->x86_power & (1<<8))
- set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
-
- /* Multi core CPU? */
- if (c->extended_cpuid_level >= 0x80000008)
- amd_detect_cmp(c);
-
- /* Fix cpuid4 emulation for more */
- num_cache_leaves = 3;
-}
-
-static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- u32 eax, ebx, ecx, edx;
- int index_msb, core_bits;
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
-
-
- if (!cpu_has(c, X86_FEATURE_HT))
- return;
- if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
- goto out;
-
- smp_num_siblings = (ebx & 0xff0000) >> 16;
-
- if (smp_num_siblings == 1) {
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
- } else if (smp_num_siblings > 1 ) {
-
- if (smp_num_siblings > NR_CPUS) {
- printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
- smp_num_siblings = 1;
- return;
- }
-
- index_msb = get_count_order(smp_num_siblings);
- c->phys_proc_id = phys_pkg_id(index_msb);
-
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
-
- index_msb = get_count_order(smp_num_siblings) ;
-
- core_bits = get_count_order(c->x86_max_cores);
-
- c->cpu_core_id = phys_pkg_id(index_msb) &
- ((1 << core_bits) - 1);
- }
-out:
- if ((c->x86_max_cores * smp_num_siblings) > 1) {
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
- printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id);
- }
-
-#endif
-}
-
-/*
- * find out the number of processor cores on the die
- */
-static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax, t;
-
- if (c->cpuid_level < 4)
- return 1;
-
- cpuid_count(4, 0, &eax, &t, &t, &t);
-
- if (eax & 0x1f)
- return ((eax >> 26) + 1);
- else
- return 1;
-}
-
-static void srat_detect_node(void)
-{
-#ifdef CONFIG_NUMA
- unsigned node;
- int cpu = smp_processor_id();
- int apicid = hard_smp_processor_id();
-
- /* Don't do the funky fallback heuristics the AMD version employs
- for now. */
- node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE)
- node = first_node(node_online_map);
- numa_set_node(cpu, node);
-
- if (acpi_numa > 0)
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
-#endif
-}
-
-static void __cpuinit init_intel(struct cpuinfo_x86 *c)
-{
- /* Cache sizes */
- unsigned n;
-
- init_intel_cacheinfo(c);
- if (c->cpuid_level > 9 ) {
- unsigned eax = cpuid_eax(10);
- /* Check for version and the number of counters */
- if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
- set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
- }
-
- n = c->extended_cpuid_level;
- if (n >= 0x80000008) {
- unsigned eax = cpuid_eax(0x80000008);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- /* CPUID workaround for Intel 0F34 CPU */
- if (c->x86_vendor == X86_VENDOR_INTEL &&
- c->x86 == 0xF && c->x86_model == 0x3 &&
- c->x86_mask == 0x4)
- c->x86_phys_bits = 36;
- }
-
- if (c->x86 == 15)
- c->x86_cache_alignment = c->x86_clflush_size * 2;
- if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
- (c->x86 == 0x6 && c->x86_model >= 0x0e))
- set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
- c->x86_max_cores = intel_num_cpu_cores(c);
-
- srat_detect_node();
-}
-
-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
-{
- char *v = c->x86_vendor_id;
-
- if (!strcmp(v, "AuthenticAMD"))
- c->x86_vendor = X86_VENDOR_AMD;
- else if (!strcmp(v, "GenuineIntel"))
- c->x86_vendor = X86_VENDOR_INTEL;
- else
- c->x86_vendor = X86_VENDOR_UNKNOWN;
-}
-
-struct cpu_model_info {
- int vendor;
- int family;
- char *model_names[16];
-};
-
-/* Do some early cpuid on the boot CPU to get some parameter that are
- needed before check_bugs. Everything advanced is in identify_cpu
- below. */
-void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
-{
- u32 tfms;
-
- c->loops_per_jiffy = loops_per_jiffy;
- c->x86_cache_size = -1;
- c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
- c->x86_vendor_id[0] = '\0'; /* Unset */
- c->x86_model_id[0] = '\0'; /* Unset */
- c->x86_clflush_size = 64;
- c->x86_cache_alignment = c->x86_clflush_size;
- c->x86_max_cores = 1;
- c->extended_cpuid_level = 0;
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
-
- /* Get vendor name */
- cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
- (unsigned int *)&c->x86_vendor_id[0],
- (unsigned int *)&c->x86_vendor_id[8],
- (unsigned int *)&c->x86_vendor_id[4]);
-
- get_cpu_vendor(c);
-
- /* Initialize the standard set of capabilities */
- /* Note that the vendor-specific code below might override */
-
- /* Intel-defined flags: level 0x00000001 */
- if (c->cpuid_level >= 0x00000001) {
- __u32 misc;
- cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
- &c->x86_capability[0]);
- c->x86 = (tfms >> 8) & 0xf;
- c->x86_model = (tfms >> 4) & 0xf;
- c->x86_mask = tfms & 0xf;
- if (c->x86 == 0xf)
- c->x86 += (tfms >> 20) & 0xff;
- if (c->x86 >= 0x6)
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
- if (c->x86_capability[0] & (1<<19))
- c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
- } else {
- /* Have CPUID level 0 only - unheard of */
- c->x86 = 4;
- }
-
-#ifdef CONFIG_SMP
- c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
-#endif
-}
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
-{
- int i;
- u32 xlvl;
-
- early_identify_cpu(c);
-
- /* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- c->extended_cpuid_level = xlvl;
- if ((xlvl & 0xffff0000) == 0x80000000) {
- if (xlvl >= 0x80000001) {
- c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[6] = cpuid_ecx(0x80000001);
- }
- if (xlvl >= 0x80000004)
- get_model_name(c); /* Default name */
- }
-
- /* Transmeta-defined flags: level 0x80860001 */
- xlvl = cpuid_eax(0x80860000);
- if ((xlvl & 0xffff0000) == 0x80860000) {
- /* Don't set x86_cpuid_level here for now to not confuse. */
- if (xlvl >= 0x80860001)
- c->x86_capability[2] = cpuid_edx(0x80860001);
- }
-
- c->apicid = phys_pkg_id(0);
-
- /*
- * Vendor-specific initialization. In this section we
- * canonicalize the feature flags, meaning if there are
- * features a certain CPU supports which CPUID doesn't
- * tell us, CPUID claiming incorrect flags, or other bugs,
- * we handle them here.
- *
- * At the end of this section, c->x86_capability better
- * indicate the features this CPU genuinely supports!
- */
- switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- init_amd(c);
- break;
-
- case X86_VENDOR_INTEL:
- init_intel(c);
- break;
-
- case X86_VENDOR_UNKNOWN:
- default:
- display_cacheinfo(c);
- break;
- }
-
- select_idle_routine(c);
- detect_ht(c);
-
- /*
- * On SMP, boot_cpu_data holds the common feature set between
- * all CPUs; so make sure that we indicate which features are
- * common between the CPUs. The first time this routine gets
- * executed, c == &boot_cpu_data.
- */
- if (c != &boot_cpu_data) {
- /* AND the already accumulated flags with these */
- for (i = 0 ; i < NCAPINTS ; i++)
- boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
- }
-
-#ifdef CONFIG_X86_MCE
- mcheck_init(c);
-#endif
- if (c == &boot_cpu_data)
- mtrr_bp_init();
- else
- mtrr_ap_init();
-#ifdef CONFIG_NUMA
- numa_add_cpu(smp_processor_id());
-#endif
-}
-
-
-void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
-{
- if (c->x86_model_id[0])
- printk("%s", c->x86_model_id);
-
- if (c->x86_mask || c->cpuid_level >= 0)
- printk(" stepping %02x\n", c->x86_mask);
- else
- printk("\n");
-}
-
-/*
- * Get CPU information for use by the procfs.
- */
-
-static int show_cpuinfo(struct seq_file *m, void *v)
-{
- struct cpuinfo_x86 *c = v;
-
- /*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
- static char *x86_cap_flags[] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
- "constant_tsc", NULL, NULL,
- "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- };
- static char *x86_power_flags[] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- NULL,
- /* nothing */ /* constant_tsc - moved to flags */
- };
-
-
-#ifdef CONFIG_SMP
- if (!cpu_online(c-cpu_data))
- return 0;
-#endif
-
- seq_printf(m,"processor\t: %u\n"
- "vendor_id\t: %s\n"
- "cpu family\t: %d\n"
- "model\t\t: %d\n"
- "model name\t: %s\n",
- (unsigned)(c-cpu_data),
- c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
- c->x86,
- (int)c->x86_model,
- c->x86_model_id[0] ? c->x86_model_id : "unknown");
-
- if (c->x86_mask || c->cpuid_level >= 0)
- seq_printf(m, "stepping\t: %d\n", c->x86_mask);
- else
- seq_printf(m, "stepping\t: unknown\n");
-
- if (cpu_has(c,X86_FEATURE_TSC)) {
- unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
- if (!freq)
- freq = cpu_khz;
- seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
- freq / 1000, (freq % 1000));
- }
-
- /* Cache size */
- if (c->x86_cache_size >= 0)
- seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-
-#ifdef CONFIG_SMP
- if (smp_num_siblings * c->x86_max_cores > 1) {
- int cpu = c - cpu_data;
- seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
- seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
- seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
- seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
- }
-#endif
-
- seq_printf(m,
- "fpu\t\t: yes\n"
- "fpu_exception\t: yes\n"
- "cpuid level\t: %d\n"
- "wp\t\t: yes\n"
- "flags\t\t:",
- c->cpuid_level);
-
- {
- int i;
- for ( i = 0 ; i < 32*NCAPINTS ; i++ )
- if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
- seq_printf(m, " %s", x86_cap_flags[i]);
- }
-
- seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
- c->loops_per_jiffy/(500000/HZ),
- (c->loops_per_jiffy/(5000/HZ)) % 100);
-
- if (c->x86_tlbsize > 0)
- seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
- seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
- seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
-
- seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
- c->x86_phys_bits, c->x86_virt_bits);
-
- seq_printf(m, "power management:");
- {
- unsigned i;
- for (i = 0; i < 32; i++)
- if (c->x86_power & (1 << i)) {
- if (i < ARRAY_SIZE(x86_power_flags) &&
- x86_power_flags[i])
- seq_printf(m, "%s%s",
- x86_power_flags[i][0]?" ":"",
- x86_power_flags[i]);
- else
- seq_printf(m, " [%d]", i);
- }
- }
-
- seq_printf(m, "\n\n");
-
- return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
- return *pos < NR_CPUS ? cpu_data + *pos : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
-};
-
-#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
-#include <linux/platform_device.h>
-static __init int add_pcspkr(void)
-{
- struct platform_device *pd;
- int ret;
-
- if (!is_initial_xendomain())
- return 0;
-
- pd = platform_device_alloc("pcspkr", -1);
- if (!pd)
- return -ENOMEM;
-
- ret = platform_device_add(pd);
- if (ret)
- platform_device_put(pd);
-
- return ret;
-}
-device_initcall(add_pcspkr);
-#endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c
deleted file mode 100644
index 2230e33867..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * X86-64 specific CPU setup.
- * Copyright (C) 1995 Linus Torvalds
- * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
- * See setup.c for older changelog.
- *
- * Jun Nakajima <jun.nakajima@intel.com>
- * Modified for Xen
- *
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/bootmem.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <asm/bootsetup.h>
-#include <asm/pda.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-#include <asm/atomic.h>
-#include <asm/mmu_context.h>
-#include <asm/smp.h>
-#include <asm/i387.h>
-#include <asm/percpu.h>
-#include <asm/proto.h>
-#include <asm/sections.h>
-#ifdef CONFIG_XEN
-#include <asm/hypervisor.h>
-#endif
-
-char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
-
-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-
-struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
-
-#ifndef CONFIG_X86_NO_IDT
-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-#endif
-
-char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
-
-unsigned long __supported_pte_mask __read_mostly = ~0UL;
-EXPORT_SYMBOL(__supported_pte_mask);
-static int do_not_nx __cpuinitdata = 0;
-
-/* noexec=on|off
-Control non executable mappings for 64bit processes.
-
-on Enable(default)
-off Disable
-*/
-int __init nonx_setup(char *str)
-{
- if (!strncmp(str, "on", 2)) {
- __supported_pte_mask |= _PAGE_NX;
- do_not_nx = 0;
- } else if (!strncmp(str, "off", 3)) {
- do_not_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
- }
- return 1;
-}
-__setup("noexec=", nonx_setup); /* parsed early actually */
-
-int force_personality32 = 0;
-
-/* noexec32=on|off
-Control non executable heap for 32bit processes.
-To control the stack too use noexec=off
-
-on PROT_READ does not imply PROT_EXEC for 32bit processes
-off PROT_READ implies PROT_EXEC (default)
-*/
-static int __init nonx32_setup(char *str)
-{
- if (!strcmp(str, "on"))
- force_personality32 &= ~READ_IMPLIES_EXEC;
- else if (!strcmp(str, "off"))
- force_personality32 |= READ_IMPLIES_EXEC;
- return 1;
-}
-__setup("noexec32=", nonx32_setup);
-
-/*
- * Great future plan:
- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
- * Always point %gs to its beginning
- */
-void __init setup_per_cpu_areas(void)
-{
- int i;
- unsigned long size;
-
-#ifdef CONFIG_HOTPLUG_CPU
- prefill_possible_map();
-#endif
-
- /* Copy section for each CPU (we discard the original) */
- size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
-#ifdef CONFIG_MODULES
- if (size < PERCPU_ENOUGH_ROOM)
- size = PERCPU_ENOUGH_ROOM;
-#endif
-
- for_each_cpu_mask (i, cpu_possible_map) {
- char *ptr;
-
- if (!NODE_DATA(cpu_to_node(i))) {
- printk("cpu with no node %d, num_online_nodes %d\n",
- i, num_online_nodes());
- ptr = alloc_bootmem(size);
- } else {
- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
- }
- if (!ptr)
- panic("Cannot allocate cpu data for CPU %d\n", i);
- cpu_pda(i)->data_offset = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
- }
-}
-
-#ifdef CONFIG_XEN
-static void switch_pt(void)
-{
- xen_pt_switch(__pa(init_level4_pgt));
- xen_new_user_pt(__pa(init_level4_user_pgt));
-}
-
-void __cpuinit cpu_gdt_init(struct desc_ptr *gdt_descr)
-{
- unsigned long frames[16];
- unsigned long va;
- int f;
-
- for (va = gdt_descr->address, f = 0;
- va < gdt_descr->address + gdt_descr->size;
- va += PAGE_SIZE, f++) {
- frames[f] = virt_to_mfn(va);
- make_page_readonly(
- (void *)va, XENFEAT_writable_descriptor_tables);
- }
- if (HYPERVISOR_set_gdt(frames, gdt_descr->size /
- sizeof (struct desc_struct)))
- BUG();
-}
-#else
-static void switch_pt(void)
-{
- asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
-}
-
-void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
-{
- asm volatile("lgdt %0" :: "m" (*gdt_descr));
- asm volatile("lidt %0" :: "m" (idt_descr));
-}
-#endif
-
-void pda_init(int cpu)
-{
- struct x8664_pda *pda = cpu_pda(cpu);
-
- /* Setup up data that may be needed in __get_free_pages early */
- asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
-#ifndef CONFIG_XEN
- wrmsrl(MSR_GS_BASE, pda);
-#else
- HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, (unsigned long)pda);
-#endif
- pda->cpunumber = cpu;
- pda->irqcount = -1;
- pda->kernelstack =
- (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
- pda->active_mm = &init_mm;
- pda->mmu_state = 0;
-
- if (cpu == 0) {
-#ifdef CONFIG_XEN
- xen_init_pt();
-#endif
- /* others are initialized in smpboot.c */
- pda->pcurrent = &init_task;
- pda->irqstackptr = boot_cpu_stack;
- } else {
- pda->irqstackptr = (char *)
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
- if (!pda->irqstackptr)
- panic("cannot allocate irqstack for cpu %d", cpu);
- }
-
- switch_pt();
-
- pda->irqstackptr += IRQSTACKSIZE-64;
-}
-
-#ifndef CONFIG_X86_NO_TSS
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]
-__attribute__((section(".bss.page_aligned")));
-#endif
-
-/* May not be marked __init: used by software suspend */
-void syscall_init(void)
-{
-#ifndef CONFIG_XEN
- /*
- * LSTAR and STAR live in a bit strange symbiosis.
- * They both write to the same internal register. STAR allows to set CS/DS
- * but only a 32bit target. LSTAR sets the 64bit rip.
- */
- wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
- wrmsrl(MSR_LSTAR, system_call);
-
- /* Flags to clear on syscall */
- wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
-#endif
-#ifdef CONFIG_IA32_EMULATION
- syscall32_cpu_init ();
-#endif
-}
-
-void __cpuinit check_efer(void)
-{
- unsigned long efer;
-
- rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || do_not_nx) {
- __supported_pte_mask &= ~_PAGE_NX;
- }
-}
-
-/*
- * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
- * A lot of state is already set up in PDA init.
- */
-void __cpuinit cpu_init (void)
-{
- int cpu = stack_smp_processor_id();
-#ifndef CONFIG_X86_NO_TSS
- struct tss_struct *t = &per_cpu(init_tss, cpu);
- struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
- unsigned long v;
- char *estacks = NULL;
- unsigned i;
-#endif
- struct task_struct *me;
-
- /* CPU 0 is initialised in head64.c */
- if (cpu != 0) {
- pda_init(cpu);
- zap_low_mappings(cpu);
- }
-#ifndef CONFIG_X86_NO_TSS
- else
- estacks = boot_exception_stacks;
-#endif
-
- me = current;
-
- if (cpu_test_and_set(cpu, cpu_initialized))
- panic("CPU#%d already initialized!\n", cpu);
-
- printk("Initializing CPU#%d\n", cpu);
-
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-
- /*
- * Initialize the per-CPU GDT with the boot GDT,
- * and set up the GDT descriptor:
- */
-#ifndef CONFIG_XEN
- if (cpu)
- memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
-#endif
-
- cpu_gdt_descr[cpu].size = GDT_SIZE;
- cpu_gdt_init(&cpu_gdt_descr[cpu]);
-
- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
- syscall_init();
-
- wrmsrl(MSR_FS_BASE, 0);
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
- barrier();
-
- check_efer();
-
-#ifndef CONFIG_X86_NO_TSS
- /*
- * set up and load the per-CPU TSS
- */
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
- if (cpu) {
- static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
- };
-
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
- if (!estacks)
- panic("Cannot allocate exception stack %ld %d\n",
- v, cpu);
- }
- switch (v + 1) {
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- case DEBUG_STACK:
- cpu_pda(cpu)->debugstack = (unsigned long)estacks;
- estacks += DEBUG_STKSZ;
- break;
-#endif
- default:
- estacks += EXCEPTION_STKSZ;
- break;
- }
- orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
- }
-
- t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
- /*
- * <= is required because the CPU will access up to
- * 8 bits beyond the end of the IO permission bitmap.
- */
- for (i = 0; i <= IO_BITMAP_LONGS; i++)
- t->io_bitmap[i] = ~0UL;
-#endif
-
- atomic_inc(&init_mm.mm_count);
- me->active_mm = &init_mm;
- if (me->mm)
- BUG();
- enter_lazy_tlb(&init_mm, me);
-
-#ifndef CONFIG_X86_NO_TSS
- set_tss_desc(cpu, t);
-#endif
-#ifndef CONFIG_XEN
- load_TR_desc();
-#endif
- load_LDT(&init_mm.context);
-
- /*
- * Clear all 6 debug registers:
- */
-
- set_debugreg(0UL, 0);
- set_debugreg(0UL, 1);
- set_debugreg(0UL, 2);
- set_debugreg(0UL, 3);
- set_debugreg(0UL, 6);
- set_debugreg(0UL, 7);
-
- fpu_init();
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
deleted file mode 100644
index 32761b5f8a..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
+++ /dev/null
@@ -1,600 +0,0 @@
-/*
- * Intel SMP support routines.
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- * (c) 2002,2003 Andi Kleen, SuSE Labs.
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
-#include <linux/smp.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/interrupt.h>
-
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/mach_apic.h>
-#include <asm/mmu_context.h>
-#include <asm/proto.h>
-#include <asm/apicdef.h>
-#include <asm/idle.h>
-#ifdef CONFIG_XEN
-#include <xen/evtchn.h>
-#endif
-
-#ifndef CONFIG_XEN
-/*
- * Smarter SMP flushing macros.
- * c/o Linus Torvalds.
- *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
- *
- * Optimizations Manfred Spraul <manfred@colorfullife.com>
- *
- * More scalable flush, from Andi Kleen
- *
- * To avoid global state use 8 different call vectors.
- * Each CPU uses a specific vector to trigger flushes on other
- * CPUs. Depending on the received vector the target CPUs look into
- * the right per cpu variable for the flush data.
- *
- * With more than 8 CPUs they are hashed to the 8 available
- * vectors. The limited global vector space forces us to this right now.
- * In future when interrupts are split into per CPU domains this could be
- * fixed, at the cost of triggering multiple IPIs in some cases.
- */
-
-union smp_flush_state {
- struct {
- cpumask_t flush_cpumask;
- struct mm_struct *flush_mm;
- unsigned long flush_va;
-#define FLUSH_ALL -1ULL
- spinlock_t tlbstate_lock;
- };
- char pad[SMP_CACHE_BYTES];
-} ____cacheline_aligned;
-
-/* State is put into the per CPU data section, but padded
- to a full cache line because other CPUs can access it and we don't
- want false sharing in the per cpu data segment. */
-static DEFINE_PER_CPU(union smp_flush_state, flush_state);
-#endif
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- */
-static inline void leave_mm(unsigned long cpu)
-{
- if (read_pda(mmu_state) == TLBSTATE_OK)
- BUG();
- cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
- load_cr3(swapper_pg_dir);
-}
-
-#ifndef CONFIG_XEN
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superfluous
- * tlb flush.
- * 1a2) set cpu mmu_state to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- * was in lazy tlb mode.
- * 1a3) update cpu active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- * cpu active_mm is correct, cpu0 already handles
- * flush ipis.
- * 1b1) set cpu mmu_state to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
- *
- * The good news is that cpu mmu_state is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- *
- * Interrupts are disabled.
- */
-
-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
-{
- int cpu;
- int sender;
- union smp_flush_state *f;
-
- cpu = smp_processor_id();
- /*
- * orig_rax contains the negated interrupt vector.
- * Use that to determine where the sender put the data.
- */
- sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START;
- f = &per_cpu(flush_state, sender);
-
- if (!cpu_isset(cpu, f->flush_cpumask))
- goto out;
- /*
- * This was a BUG() but until someone can quote me the
- * line from the intel manual that guarantees an IPI to
- * multiple CPUs is retried _only_ on the erroring CPUs
- * its staying as a return
- *
- * BUG();
- */
-
- if (f->flush_mm == read_pda(active_mm)) {
- if (read_pda(mmu_state) == TLBSTATE_OK) {
- if (f->flush_va == FLUSH_ALL)
- local_flush_tlb();
- else
- __flush_tlb_one(f->flush_va);
- } else
- leave_mm(cpu);
- }
-out:
- ack_APIC_irq();
- cpu_clear(cpu, f->flush_cpumask);
-}
-
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
- unsigned long va)
-{
- int sender;
- union smp_flush_state *f;
-
- /* Caller has disabled preemption */
- sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
- f = &per_cpu(flush_state, sender);
-
- /* Could avoid this lock when
- num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
- probably not worth checking this for a cache-hot lock. */
- spin_lock(&f->tlbstate_lock);
-
- f->flush_mm = mm;
- f->flush_va = va;
- cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
-
- /*
- * We have to send the IPI only to
- * CPUs affected.
- */
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
-
- while (!cpus_empty(f->flush_cpumask))
- cpu_relax();
-
- f->flush_mm = NULL;
- f->flush_va = 0;
- spin_unlock(&f->tlbstate_lock);
-}
-
-int __cpuinit init_smp_flush(void)
-{
- int i;
- for_each_cpu_mask(i, cpu_possible_map) {
- spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
- }
- return 0;
-}
-
-core_initcall(init_smp_flush);
-
-void flush_tlb_current_task(void)
-{
- struct mm_struct *mm = current->mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- local_flush_tlb();
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
- preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_current_task);
-
-void flush_tlb_mm (struct mm_struct * mm)
-{
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if (current->mm)
- local_flush_tlb();
- else
- leave_mm(smp_processor_id());
- }
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-
- preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_mm);
-
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
-{
- struct mm_struct *mm = vma->vm_mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if(current->mm)
- __flush_tlb_one(va);
- else
- leave_mm(smp_processor_id());
- }
-
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, va);
-
- preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_page);
-
-static void do_flush_tlb_all(void* info)
-{
- unsigned long cpu = smp_processor_id();
-
- __flush_tlb_all();
- if (read_pda(mmu_state) == TLBSTATE_LAZY)
- leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
- on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
-}
-#else
-asmlinkage void smp_invalidate_interrupt (void)
-{ return; }
-void flush_tlb_current_task(void)
-{ xen_tlb_flush_mask(&current->mm->cpu_vm_mask); }
-void flush_tlb_mm (struct mm_struct * mm)
-{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
-{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
-void flush_tlb_all(void)
-{ xen_tlb_flush_all(); }
-#endif /* Xen */
-
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-
-void smp_send_reschedule(int cpu)
-{
- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-}
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
-
-static struct call_data_struct * call_data;
-
-void lock_ipi_call_lock(void)
-{
- spin_lock_irq(&call_lock);
-}
-
-void unlock_ipi_call_lock(void)
-{
- spin_unlock_irq(&call_lock);
-}
-
-/*
- * this function sends a 'generic call function' IPI to one other CPU
- * in the system.
- *
- * cpu is a standard Linux logical CPU number.
- */
-static void
-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- struct call_data_struct data;
- int cpus = 1;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (!wait)
- return;
-
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
-}
-
-/*
- * smp_call_function_single - Run a function on another CPU
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Currently unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Retrurns 0 on success, else a negative status code.
- *
- * Does not return until the remote CPU is nearly ready to execute <func>
- * or is or has executed.
- */
-
-int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- /* prevent preemption and reschedule on another processor */
- int me = get_cpu();
- if (cpu == me) {
- WARN_ON(1);
- put_cpu();
- return -EBUSY;
- }
- spin_lock_bh(&call_lock);
- __smp_call_function_single(cpu, func, info, nonatomic, wait);
- spin_unlock_bh(&call_lock);
- put_cpu();
- return 0;
-}
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-static void __smp_call_function (void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- struct call_data_struct data;
- int cpus = num_online_cpus()-1;
-
- if (!cpus)
- return;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
-#ifndef CONFIG_XEN
- cpu_relax();
-#else
- barrier();
-#endif
-
- if (!wait)
- return;
-
- while (atomic_read(&data.finished) != cpus)
-#ifndef CONFIG_XEN
- cpu_relax();
-#else
- barrier();
-#endif
-}
-
-/*
- * smp_call_function - run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
- * @wait: If true, wait (atomically) until function has completed on other
- * CPUs.
- *
- * Returns 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute func or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- * Actually there are a few legal cases, like panic.
- */
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
-{
- spin_lock(&call_lock);
- __smp_call_function(func,info,nonatomic,wait);
- spin_unlock(&call_lock);
- return 0;
-}
-EXPORT_SYMBOL(smp_call_function);
-
-void smp_stop_cpu(void)
-{
- unsigned long flags;
- /*
- * Remove this CPU:
- */
- cpu_clear(smp_processor_id(), cpu_online_map);
- local_irq_save(flags);
-#ifndef CONFIG_XEN
- disable_local_APIC();
-#endif
- local_irq_restore(flags);
-}
-
-static void smp_really_stop_cpu(void *dummy)
-{
- smp_stop_cpu();
- for (;;)
- halt();
-}
-
-void smp_send_stop(void)
-{
- int nolock = 0;
-#ifndef CONFIG_XEN
- if (reboot_force)
- return;
-#endif
- /* Don't deadlock on the call lock in panic */
- if (!spin_trylock(&call_lock)) {
- /* ignore locking because we have panicked anyways */
- nolock = 1;
- }
- __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
- if (!nolock)
- spin_unlock(&call_lock);
-
- local_irq_disable();
-#ifndef CONFIG_XEN
- disable_local_APIC();
-#endif
- local_irq_enable();
-}
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-#ifndef CONFIG_XEN
-asmlinkage void smp_reschedule_interrupt(void)
-#else
-asmlinkage irqreturn_t smp_reschedule_interrupt(void)
-#endif
-{
-#ifndef CONFIG_XEN
- ack_APIC_irq();
-#else
- return IRQ_HANDLED;
-#endif
-}
-
-#ifndef CONFIG_XEN
-asmlinkage void smp_call_function_interrupt(void)
-#else
-asmlinkage irqreturn_t smp_call_function_interrupt(void)
-#endif
-{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
-#ifndef CONFIG_XEN
- ack_APIC_irq();
-#endif
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- exit_idle();
- irq_enter();
- (*func)(info);
- irq_exit();
- if (wait) {
- mb();
- atomic_inc(&call_data->finished);
- }
-#ifdef CONFIG_XEN
- return IRQ_HANDLED;
-#endif
-}
-
-int safe_smp_processor_id(void)
-{
-#ifdef CONFIG_XEN
- return smp_processor_id();
-#else
- unsigned apicid, i;
-
- if (disable_apic)
- return 0;
-
- apicid = hard_smp_processor_id();
- if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
- return apicid;
-
- for (i = 0; i < NR_CPUS; ++i) {
- if (x86_cpu_to_apicid[i] == apicid)
- return i;
- }
-
- /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
- * or called too early. Either way, we must be CPU 0. */
- if (x86_cpu_to_apicid[0] == BAD_APICID)
- return 0;
-
- return 0; /* Should not happen */
-#endif
-}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
deleted file mode 100644
index 313c73fc4a..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
+++ /dev/null
@@ -1,1175 +0,0 @@
-/*
- * linux/arch/x86-64/traps.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'entry.S'.
- */
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/nmi.h>
-#include <linux/kprobes.h>
-#include <linux/kexec.h>
-#include <linux/unwind.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/debugreg.h>
-#include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/kdebug.h>
-#include <asm/processor.h>
-#include <asm/unwind.h>
-#include <asm/smp.h>
-#include <asm/pgalloc.h>
-#include <asm/pda.h>
-#include <asm/proto.h>
-#include <asm/nmi.h>
-
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
-asmlinkage void double_fault(void);
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void simd_coprocessor_error(void);
-asmlinkage void reserved(void);
-asmlinkage void alignment_check(void);
-asmlinkage void machine_check(void);
-asmlinkage void spurious_interrupt_bug(void);
-
-ATOMIC_NOTIFIER_HEAD(die_chain);
-EXPORT_SYMBOL(die_chain);
-
-int register_die_notifier(struct notifier_block *nb)
-{
- vmalloc_sync_all();
- return atomic_notifier_chain_register(&die_chain, nb);
-}
-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */
-
-int unregister_die_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&die_chain, nb);
-}
-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */
-
-static inline void conditional_sti(struct pt_regs *regs)
-{
- if (regs->eflags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void preempt_conditional_sti(struct pt_regs *regs)
-{
- preempt_disable();
- if (regs->eflags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void preempt_conditional_cli(struct pt_regs *regs)
-{
- if (regs->eflags & X86_EFLAGS_IF)
- local_irq_disable();
- /* Make sure to not schedule here because we could be running
- on an exception stack. */
- preempt_enable_no_resched();
-}
-
-static int kstack_depth_to_print = 12;
-#ifdef CONFIG_STACK_UNWIND
-static int call_trace = 1;
-#else
-#define call_trace (-1)
-#endif
-
-#ifdef CONFIG_KALLSYMS
-# include <linux/kallsyms.h>
-void printk_address(unsigned long address)
-{
- unsigned long offset = 0, symsize;
- const char *symname;
- char *modname;
- char *delim = ":";
- char namebuf[128];
-
- symname = kallsyms_lookup(address, &symsize, &offset,
- &modname, namebuf);
- if (!symname) {
- printk(" [<%016lx>]\n", address);
- return;
- }
- if (!modname)
- modname = delim = "";
- printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
- address, delim, modname, delim, symname, offset, symsize);
-}
-#else
-void printk_address(unsigned long address)
-{
- printk(" [<%016lx>]\n", address);
-}
-#endif
-
-static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
- unsigned *usedp, const char **idp)
-{
-#ifndef CONFIG_X86_NO_TSS
- static char ids[][8] = {
- [DEBUG_STACK - 1] = "#DB",
- [NMI_STACK - 1] = "NMI",
- [DOUBLEFAULT_STACK - 1] = "#DF",
- [STACKFAULT_STACK - 1] = "#SS",
- [MCE_STACK - 1] = "#MC",
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
-#endif
- };
- unsigned k;
-
- /*
- * Iterate over all exception stacks, and figure out whether
- * 'stack' is in one of them:
- */
- for (k = 0; k < N_EXCEPTION_STACKS; k++) {
- unsigned long end;
-
- /*
- * set 'end' to the end of the exception stack.
- */
- switch (k + 1) {
- /*
- * TODO: this block is not needed i think, because
- * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
- * properly too.
- */
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- case DEBUG_STACK:
- end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
- break;
-#endif
- default:
- end = per_cpu(orig_ist, cpu).ist[k];
- break;
- }
- /*
- * Is 'stack' above this exception frame's end?
- * If yes then skip to the next frame.
- */
- if (stack >= end)
- continue;
- /*
- * Is 'stack' above this exception frame's start address?
- * If yes then we found the right frame.
- */
- if (stack >= end - EXCEPTION_STKSZ) {
- /*
- * Make sure we only iterate through an exception
- * stack once. If it comes up for the second time
- * then there's something wrong going on - just
- * break out and return NULL:
- */
- if (*usedp & (1U << k))
- break;
- *usedp |= 1U << k;
- *idp = ids[k];
- return (unsigned long *)end;
- }
- /*
- * If this is a debug stack, and if it has a larger size than
- * the usual exception stacks, then 'stack' might still
- * be within the lower portion of the debug stack:
- */
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
- unsigned j = N_EXCEPTION_STACKS - 1;
-
- /*
- * Black magic. A large debug stack is composed of
- * multiple exception stack entries, which we
- * iterate through now. Dont look:
- */
- do {
- ++j;
- end -= EXCEPTION_STKSZ;
- ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
- } while (stack < end - EXCEPTION_STKSZ);
- if (*usedp & (1U << j))
- break;
- *usedp |= 1U << j;
- *idp = ids[j];
- return (unsigned long *)end;
- }
-#endif
- }
-#endif
- return NULL;
-}
-
-static int show_trace_unwind(struct unwind_frame_info *info, void *context)
-{
- int n = 0;
-
- while (unwind(info) == 0 && UNW_PC(info)) {
- n++;
- printk_address(UNW_PC(info));
- if (arch_unw_user_mode(info))
- break;
- }
- return n;
-}
-
-/*
- * x86-64 can have upto three kernel stacks:
- * process stack
- * interrupt stack
- * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
- */
-
-void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
-{
- const unsigned cpu = safe_smp_processor_id();
- unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
- unsigned used = 0;
-
- printk("\nCall Trace:\n");
-
- if (!tsk)
- tsk = current;
-
- if (call_trace >= 0) {
- int unw_ret = 0;
- struct unwind_frame_info info;
-
- if (regs) {
- if (unwind_init_frame_info(&info, tsk, regs) == 0)
- unw_ret = show_trace_unwind(&info, NULL);
- } else if (tsk == current)
- unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
- else {
- if (unwind_init_blocked(&info, tsk) == 0)
- unw_ret = show_trace_unwind(&info, NULL);
- }
- if (unw_ret > 0) {
- if (call_trace == 1 && !arch_unw_user_mode(&info)) {
- print_symbol("DWARF2 unwinder stuck at %s\n",
- UNW_PC(&info));
- if ((long)UNW_SP(&info) < 0) {
- printk("Leftover inexact backtrace:\n");
- stack = (unsigned long *)UNW_SP(&info);
- } else
- printk("Full inexact backtrace again:\n");
- } else if (call_trace >= 1)
- return;
- else
- printk("Full inexact backtrace again:\n");
- } else
- printk("Inexact backtrace:\n");
- }
-
- /*
- * Print function call entries within a stack. 'cond' is the
- * "end of stackframe" condition, that the 'stack++'
- * iteration will eventually trigger.
- */
-#define HANDLE_STACK(cond) \
- do while (cond) { \
- unsigned long addr = *stack++; \
- if (kernel_text_address(addr)) { \
- /* \
- * If the address is either in the text segment of the \
- * kernel, or in the region which contains vmalloc'ed \
- * memory, it *may* be the address of a calling \
- * routine; if so, print it so that someone tracing \
- * down the cause of the crash will be able to figure \
- * out the call path that was taken. \
- */ \
- printk_address(addr); \
- } \
- } while (0)
-
- /*
- * Print function call entries in all stacks, starting at the
- * current stack address. If the stacks consist of nested
- * exceptions
- */
- for ( ; ; ) {
- const char *id;
- unsigned long *estack_end;
- estack_end = in_exception_stack(cpu, (unsigned long)stack,
- &used, &id);
-
- if (estack_end) {
- printk(" <%s>", id);
- HANDLE_STACK (stack < estack_end);
- printk(" <EOE>");
- /*
- * We link to the next stack via the
- * second-to-last pointer (index -2 to end) in the
- * exception stack:
- */
- stack = (unsigned long *) estack_end[-2];
- continue;
- }
- if (irqstack_end) {
- unsigned long *irqstack;
- irqstack = irqstack_end -
- (IRQSTACKSIZE - 64) / sizeof(*irqstack);
-
- if (stack >= irqstack && stack < irqstack_end) {
- printk(" <IRQ>");
- HANDLE_STACK (stack < irqstack_end);
- /*
- * We link to the next stack (which would be
- * the process stack normally) the last
- * pointer (index -1 to end) in the IRQ stack:
- */
- stack = (unsigned long *) (irqstack_end[-1]);
- irqstack_end = NULL;
- printk(" <EOI>");
- continue;
- }
- }
- break;
- }
-
- /*
- * This prints the process stack:
- */
- HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
-#undef HANDLE_STACK
-
- printk("\n");
-}
-
-static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
-{
- unsigned long *stack;
- int i;
- const int cpu = safe_smp_processor_id();
- unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
- unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
-
- // debugging aid: "show_stack(NULL, NULL);" prints the
- // back trace for this cpu.
-
- if (rsp == NULL) {
- if (tsk)
- rsp = (unsigned long *)tsk->thread.rsp;
- else
- rsp = (unsigned long *)&rsp;
- }
-
- stack = rsp;
- for(i=0; i < kstack_depth_to_print; i++) {
- if (stack >= irqstack && stack <= irqstack_end) {
- if (stack == irqstack_end) {
- stack = (unsigned long *) (irqstack_end[-1]);
- printk(" <EOI> ");
- }
- } else {
- if (((long) stack & (THREAD_SIZE-1)) == 0)
- break;
- }
- if (i && ((i % 4) == 0))
- printk("\n");
- printk(" %016lx", *stack++);
- touch_nmi_watchdog();
- }
- show_trace(tsk, regs, rsp);
-}
-
-void show_stack(struct task_struct *tsk, unsigned long * rsp)
-{
- _show_stack(tsk, NULL, rsp);
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long dummy;
- show_trace(NULL, NULL, &dummy);
-}
-
-EXPORT_SYMBOL(dump_stack);
-
-void show_registers(struct pt_regs *regs)
-{
- int i;
- int in_kernel = !user_mode(regs);
- unsigned long rsp;
- const int cpu = safe_smp_processor_id();
- struct task_struct *cur = cpu_pda(cpu)->pcurrent;
-
- rsp = regs->rsp;
-
- printk("CPU %d ", cpu);
- __show_regs(regs);
- printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
- cur->comm, cur->pid, task_thread_info(cur), cur);
-
- /*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
- */
- if (in_kernel) {
-
- printk("Stack: ");
- _show_stack(NULL, regs, (unsigned long*)rsp);
-
- printk("\nCode: ");
- if (regs->rip < PAGE_OFFSET)
- goto bad;
-
- for (i=0; i<20; i++) {
- unsigned char c;
- if (__get_user(c, &((unsigned char*)regs->rip)[i])) {
-bad:
- printk(" Bad RIP value.");
- break;
- }
- printk("%02x ", c);
- }
- }
- printk("\n");
-}
-
-void handle_BUG(struct pt_regs *regs)
-{
- struct bug_frame f;
- long len;
- const char *prefix = "";
-
- if (user_mode(regs))
- return;
- if (__copy_from_user(&f, (const void __user *) regs->rip,
- sizeof(struct bug_frame)))
- return;
- if (f.filename >= 0 ||
- f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
- return;
- len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1;
- if (len < 0 || len >= PATH_MAX)
- f.filename = (int)(long)"unmapped filename";
- else if (len > 50) {
- f.filename += len - 50;
- prefix = "...";
- }
- printk("----------- [cut here ] --------- [please bite here ] ---------\n");
- printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line);
-}
-
-#ifdef CONFIG_BUG
-void out_of_line_bug(void)
-{
- BUG();
-}
-EXPORT_SYMBOL(out_of_line_bug);
-#endif
-
-static DEFINE_SPINLOCK(die_lock);
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- int cpu = safe_smp_processor_id();
- unsigned long flags;
-
- /* racy, but better than risking deadlock. */
- local_irq_save(flags);
- if (!spin_trylock(&die_lock)) {
- if (cpu == die_owner)
- /* nested oops. should stop eventually */;
- else
- spin_lock(&die_lock);
- }
- die_nest_count++;
- die_owner = cpu;
- console_verbose();
- bust_spinlocks(1);
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags)
-{
- die_owner = -1;
- bust_spinlocks(0);
- die_nest_count--;
- if (die_nest_count)
- /* We still own the lock */
- local_irq_restore(flags);
- else
- /* Nest count reaches zero, release the lock. */
- spin_unlock_irqrestore(&die_lock, flags);
- if (panic_on_oops)
- panic("Fatal exception");
-}
-
-void __kprobes __die(const char * str, struct pt_regs * regs, long err)
-{
- static int die_counter;
- printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
- show_registers(regs);
- /* Executive summary in case the oops scrolled away */
- printk(KERN_ALERT "RIP ");
- printk_address(regs->rip);
- printk(" RSP <%016lx>\n", regs->rsp);
- if (kexec_should_crash(current))
- crash_kexec(regs);
-}
-
-void die(const char * str, struct pt_regs * regs, long err)
-{
- unsigned long flags = oops_begin();
-
- handle_BUG(regs);
- __die(str, regs, err);
- oops_end(flags);
- do_exit(SIGSEGV);
-}
-
-#ifdef CONFIG_X86_LOCAL_APIC
-void __kprobes die_nmi(char *str, struct pt_regs *regs)
-{
- unsigned long flags = oops_begin();
-
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- printk(str, safe_smp_processor_id());
- show_registers(regs);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- if (panic_on_timeout || panic_on_oops)
- panic("nmi watchdog");
- printk("console shuts up ...\n");
- oops_end(flags);
- nmi_exit();
- local_irq_enable();
- do_exit(SIGSEGV);
-}
-#endif
-
-static void __kprobes do_trap(int trapnr, int signr, char *str,
- struct pt_regs * regs, long error_code,
- siginfo_t *info)
-{
- struct task_struct *tsk = current;
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
-
- if (user_mode(regs)) {
- if (exception_trace && unhandled_signal(tsk, signr))
- printk(KERN_INFO
- "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
- tsk->comm, tsk->pid, str,
- regs->rip, regs->rsp, error_code);
-
- if (info)
- force_sig_info(signr, info, tsk);
- else
- force_sig(signr, tsk);
- return;
- }
-
-
- /* kernel trap */
- {
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->rip);
- if (fixup)
- regs->rip = fixup->fixup;
- else
- die(str, regs, error_code);
- return;
- }
-}
-
-#define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- conditional_sti(regs); \
- do_trap(trapnr, signr, str, regs, error_code, NULL); \
-}
-
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- siginfo_t info; \
- info.si_signo = signr; \
- info.si_errno = 0; \
- info.si_code = sicode; \
- info.si_addr = (void __user *)siaddr; \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- conditional_sti(regs); \
- do_trap(trapnr, signr, str, regs, error_code, &info); \
-}
-
-DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
-DO_ERROR( 4, SIGSEGV, "overflow", overflow)
-DO_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip)
-DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
-DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
-DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
-DO_ERROR(18, SIGSEGV, "reserved", reserved)
-
-/* Runs on IST stack */
-asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
-{
- if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
- 12, SIGBUS) == NOTIFY_STOP)
- return;
- preempt_conditional_sti(regs);
- do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
- preempt_conditional_cli(regs);
-}
-
-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
-{
- static const char str[] = "double fault";
- struct task_struct *tsk = current;
-
- /* Return not checked because double check cannot be ignored */
- notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 8;
-
- /* This is always a kernel trap and never fixable (and thus must
- never return). */
- for (;;)
- die(str, regs, error_code);
-}
-
-asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
- long error_code)
-{
- struct task_struct *tsk = current;
-
- conditional_sti(regs);
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 13;
-
- if (user_mode(regs)) {
- if (exception_trace && unhandled_signal(tsk, SIGSEGV))
- printk(KERN_INFO
- "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
- tsk->comm, tsk->pid,
- regs->rip, regs->rsp, error_code);
-
- force_sig(SIGSEGV, tsk);
- return;
- }
-
- /* kernel gp */
- {
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return;
- }
- if (notify_die(DIE_GPF, "general protection fault", regs,
- error_code, 13, SIGSEGV) == NOTIFY_STOP)
- return;
- die("general protection fault", regs, error_code);
- }
-}
-
-static __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs * regs)
-{
- printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
- printk("You probably have a hardware problem with your RAM chips\n");
-
-#if 0 /* XEN */
- /* Clear and disable the memory parity error line. */
- reason = (reason & 0xf) | 4;
- outb(reason, 0x61);
-#endif /* XEN */
-}
-
-static __kprobes void
-io_check_error(unsigned char reason, struct pt_regs * regs)
-{
- printk("NMI: IOCK error (debug interrupt?)\n");
- show_registers(regs);
-
-#if 0 /* XEN */
- /* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
- mdelay(2000);
- reason &= ~8;
- outb(reason, 0x61);
-#endif /* XEN */
-}
-
-static __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
- printk("Dazed and confused, but trying to continue\n");
- printk("Do you have a strange power saving mode enabled?\n");
-}
-
-/* Runs on IST stack. This code must keep interrupts off all the time.
- Nested NMIs are prevented by the CPU. */
-asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
-{
- unsigned char reason = 0;
- int cpu;
-
- cpu = smp_processor_id();
-
- /* Only the BSP gets external NMIs from the system. */
- if (!cpu)
- reason = get_nmi_reason();
-
- if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Ok, so this is none of the documented NMI sources,
- * so it must be the NMI watchdog.
- */
- if (nmi_watchdog > 0) {
- nmi_watchdog_tick(regs,reason);
- return;
- }
-#endif
- unknown_nmi_error(reason, regs);
- return;
- }
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- /* AK: following checks seem to be broken on modern chipsets. FIXME */
-
- if (reason & 0x80)
- mem_parity_error(reason, regs);
- if (reason & 0x40)
- io_check_error(reason, regs);
-}
-
-/* runs on IST stack. */
-asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
-{
- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
- return;
- }
- preempt_conditional_sti(regs);
- do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
- preempt_conditional_cli(regs);
-}
-
-/* Help handler running on IST stack to switch back to user stack
- for scheduling or signal handling. The actual stack switch is done in
- entry.S */
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
-{
- struct pt_regs *regs = eregs;
- /* Did already sync */
- if (eregs == (struct pt_regs *)eregs->rsp)
- ;
- /* Exception from user space */
- else if (user_mode(eregs))
- regs = task_pt_regs(current);
- /* Exception from kernel and interrupts are enabled. Move to
- kernel process stack. */
- else if (eregs->eflags & X86_EFLAGS_IF)
- regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs));
- if (eregs != regs)
- *regs = *eregs;
- return regs;
-}
-
-/* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs * regs,
- unsigned long error_code)
-{
- unsigned long condition;
- struct task_struct *tsk = current;
- siginfo_t info;
-
- get_debugreg(condition, 6);
-
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP)
- return;
-
- preempt_conditional_sti(regs);
-
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7) {
- goto clear_dr7;
- }
- }
-
- tsk->thread.debugreg6 = condition;
-
- /* Mask out spurious TF errors due to lazy TF clearing */
- if (condition & DR_STEP) {
- /*
- * The TF error should be masked out only if the current
- * process is not traced and if the TRAP flag has been set
- * previously by a tracing process (condition detected by
- * the PT_DTRACE flag); remember that the i386 TRAP flag
- * can be modified by the process itself in user mode,
- * allowing programs to debug themselves without the ptrace()
- * interface.
- */
- if (!user_mode(regs))
- goto clear_TF_reenable;
- /*
- * Was the TF flag set by a debugger? If so, clear it now,
- * so that register information is correct.
- */
- if (tsk->ptrace & PT_DTRACE) {
- regs->eflags &= ~TF_MASK;
- tsk->ptrace &= ~PT_DTRACE;
- }
- }
-
- /* Ok, finally something we can handle */
- tsk->thread.trap_no = 1;
- tsk->thread.error_code = error_code;
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL;
- force_sig_info(SIGTRAP, &info, tsk);
-
-clear_dr7:
- set_debugreg(0UL, 7);
- preempt_conditional_cli(regs);
- return;
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->eflags &= ~TF_MASK;
- preempt_conditional_cli(regs);
-}
-
-static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
-{
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return 1;
- }
- notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
- /* Illegal floating point operation in the kernel */
- current->thread.trap_no = trapnr;
- die(str, regs, 0);
- return 0;
-}
-
-/*
- * Note that we play around with the 'TS' bit in an attempt to get
- * the correct behaviour even in the presence of the asynchronous
- * IRQ13 behaviour
- */
-asmlinkage void do_coprocessor_error(struct pt_regs *regs)
-{
- void __user *rip = (void __user *)(regs->rip);
- struct task_struct * task;
- siginfo_t info;
- unsigned short cwd, swd;
-
- conditional_sti(regs);
- if (!user_mode(regs) &&
- kernel_math_error(regs, "kernel x87 math error", 16))
- return;
-
- /*
- * Save the info for the exception handler and clear the error.
- */
- task = current;
- save_init_fpu(task);
- task->thread.trap_no = 16;
- task->thread.error_code = 0;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = __SI_FAULT;
- info.si_addr = rip;
- /*
- * (~cwd & swd) will mask out exceptions that are not set to unmasked
- * status. 0x3f is the exception bits in these regs, 0x200 is the
- * C1 reg you need in case of a stack fault, 0x040 is the stack
- * fault bit. We should only be taking one exception at a time,
- * so if this combination doesn't produce any single exception,
- * then we have a bad program that isn't synchronizing its FPU usage
- * and it will suffer the consequences since we won't be able to
- * fully reproduce the context of the exception
- */
- cwd = get_fpu_cwd(task);
- swd = get_fpu_swd(task);
- switch (swd & ~cwd & 0x3f) {
- case 0x000:
- default:
- break;
- case 0x001: /* Invalid Op */
- /*
- * swd & 0x240 == 0x040: Stack Underflow
- * swd & 0x240 == 0x240: Stack Overflow
- * User must clear the SF bit (0x40) if set
- */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
- }
- force_sig_info(SIGFPE, &info, task);
-}
-
-asmlinkage void bad_intr(void)
-{
- printk("bad interrupt");
-}
-
-asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
-{
- void __user *rip = (void __user *)(regs->rip);
- struct task_struct * task;
- siginfo_t info;
- unsigned short mxcsr;
-
- conditional_sti(regs);
- if (!user_mode(regs) &&
- kernel_math_error(regs, "kernel simd math error", 19))
- return;
-
- /*
- * Save the info for the exception handler and clear the error.
- */
- task = current;
- save_init_fpu(task);
- task->thread.trap_no = 19;
- task->thread.error_code = 0;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = __SI_FAULT;
- info.si_addr = rip;
- /*
- * The SIMD FPU exceptions are handled a little differently, as there
- * is only a single status/control register. Thus, to determine which
- * unmasked exception was caught we must mask the exception mask bits
- * at 0x1f80, and then use these to mask the exception bits at 0x3f.
- */
- mxcsr = get_fpu_mxcsr(task);
- switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
- case 0x000:
- default:
- break;
- case 0x001: /* Invalid Op */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
- }
- force_sig_info(SIGFPE, &info, task);
-}
-
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
-{
-}
-
-#if 0
-asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
-{
-}
-#endif
-
-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
-{
-}
-
-/*
- * 'math_state_restore()' saves the current math information in the
- * old math state array, and gets the new ones from the current task
- *
- * Careful.. There are problems with IBM-designed IRQ13 behaviour.
- * Don't touch unless you *really* know how it works.
- */
-asmlinkage void math_state_restore(void)
-{
- struct task_struct *me = current;
- /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
-
- if (!used_math())
- init_fpu(me);
- restore_fpu_checking(&me->thread.i387.fxsave);
- task_thread_info(me)->status |= TS_USEDFPU;
-}
-
-
-/*
- * NB. All these are "interrupt gates" (i.e. events_mask is set) because we
- * specify <dpl>|4 in the second field.
- */
-static trap_info_t trap_table[] = {
- { 0, 0|4, __KERNEL_CS, (unsigned long)divide_error },
- { 1, 0|4, __KERNEL_CS, (unsigned long)debug },
- { 3, 3|4, __KERNEL_CS, (unsigned long)int3 },
- { 4, 3|4, __KERNEL_CS, (unsigned long)overflow },
- { 5, 0|4, __KERNEL_CS, (unsigned long)bounds },
- { 6, 0|4, __KERNEL_CS, (unsigned long)invalid_op },
- { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available },
- { 9, 0|4, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun},
- { 10, 0|4, __KERNEL_CS, (unsigned long)invalid_TSS },
- { 11, 0|4, __KERNEL_CS, (unsigned long)segment_not_present },
- { 12, 0|4, __KERNEL_CS, (unsigned long)stack_segment },
- { 13, 0|4, __KERNEL_CS, (unsigned long)general_protection },
- { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault },
- { 15, 0|4, __KERNEL_CS, (unsigned long)spurious_interrupt_bug },
- { 16, 0|4, __KERNEL_CS, (unsigned long)coprocessor_error },
- { 17, 0|4, __KERNEL_CS, (unsigned long)alignment_check },
-#ifdef CONFIG_X86_MCE
- { 18, 0|4, __KERNEL_CS, (unsigned long)machine_check },
-#endif
- { 19, 0|4, __KERNEL_CS, (unsigned long)simd_coprocessor_error },
-#ifdef CONFIG_IA32_EMULATION
- { IA32_SYSCALL_VECTOR, 3|4, __KERNEL_CS, (unsigned long)ia32_syscall},
-#endif
- { 0, 0, 0, 0 }
-};
-
-void __init trap_init(void)
-{
- int ret;
-
- ret = HYPERVISOR_set_trap_table(trap_table);
-
- if (ret)
- printk("HYPERVISOR_set_trap_table faild: error %d\n",
- ret);
-
- /*
- * Should be a barrier for any external CPU state.
- */
- cpu_init();
-}
-
-void smp_trap_init(trap_info_t *trap_ctxt)
-{
- trap_info_t *t = trap_table;
-
- for (t = trap_table; t->address; t++) {
- trap_ctxt[t->vector].flags = t->flags;
- trap_ctxt[t->vector].cs = t->cs;
- trap_ctxt[t->vector].address = t->address;
- }
-}
-
-
-/* Actual parsing is done early in setup.c. */
-static int __init oops_dummy(char *s)
-{
- panic_on_oops = 1;
- return 1;
-}
-__setup("oops=", oops_dummy);
-
-static int __init kstack_setup(char *s)
-{
- kstack_depth_to_print = simple_strtoul(s,NULL,0);
- return 1;
-}
-__setup("kstack=", kstack_setup);
-
-#ifdef CONFIG_STACK_UNWIND
-static int __init call_trace_setup(char *s)
-{
- if (strcmp(s, "old") == 0)
- call_trace = -1;
- else if (strcmp(s, "both") == 0)
- call_trace = 0;
- else if (strcmp(s, "newfallback") == 0)
- call_trace = 1;
- else if (strcmp(s, "new") == 0)
- call_trace = 2;
- return 1;
-}
-__setup("call_trace=", call_trace_setup);
-#endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/vsyscall-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/vsyscall-xen.c
deleted file mode 100644
index ed452cebc2..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/vsyscall-xen.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/vsyscall.c
- *
- * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- * Copyright 2003 Andi Kleen, SuSE Labs.
- *
- * Thanks to hpa@transmeta.com for some useful hint.
- * Special thanks to Ingo Molnar for his early experience with
- * a different vsyscall implementation for Linux/IA32 and for the name.
- *
- * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
- * at virtual address -10Mbyte+1024bytes etc... There are at max 4
- * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
- * jumping out of line if necessary. We cannot add more with this
- * mechanism because older kernels won't return -ENOSYS.
- * If we want more than four we need a vDSO.
- *
- * Note: the concept clashes with user mode linux. If you use UML and
- * want per guest time just set the kernel.vsyscall64 sysctl to 0.
- */
-
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/seqlock.h>
-#include <linux/jiffies.h>
-#include <linux/sysctl.h>
-
-#include <asm/vsyscall.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/fixmap.h>
-#include <asm/errno.h>
-#include <asm/io.h>
-
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
-
-int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
-seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
-
-#include <asm/unistd.h>
-
-static __always_inline void timeval_normalize(struct timeval * tv)
-{
- time_t __sec;
-
- __sec = tv->tv_usec / 1000000;
- if (__sec) {
- tv->tv_usec %= 1000000;
- tv->tv_sec += __sec;
- }
-}
-
-static __always_inline void do_vgettimeofday(struct timeval * tv)
-{
- long sequence, t;
- unsigned long sec, usec;
-
- do {
- sequence = read_seqbegin(&__xtime_lock);
-
- sec = __xtime.tv_sec;
- usec = (__xtime.tv_nsec / 1000) +
- (__jiffies - __wall_jiffies) * (1000000 / HZ);
-
- if (__vxtime.mode != VXTIME_HPET) {
- t = get_cycles_sync();
- if (t < __vxtime.last_tsc)
- t = __vxtime.last_tsc;
- usec += ((t - __vxtime.last_tsc) *
- __vxtime.tsc_quot) >> 32;
- /* See comment in x86_64 do_gettimeofday. */
- } else {
- usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
- __vxtime.last) * __vxtime.quot) >> 32;
- }
- } while (read_seqretry(&__xtime_lock, sequence));
-
- tv->tv_sec = sec + usec / 1000000;
- tv->tv_usec = usec % 1000000;
-}
-
-/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
-static __always_inline void do_get_tz(struct timezone * tz)
-{
- *tz = __sys_tz;
-}
-
-static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- int ret;
- asm volatile("vsysc2: syscall"
- : "=a" (ret)
- : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
- return ret;
-}
-
-static __always_inline long time_syscall(long *t)
-{
- long secs;
- asm volatile("vsysc1: syscall"
- : "=a" (secs)
- : "0" (__NR_time),"D" (t) : __syscall_clobber);
- return secs;
-}
-
-int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
-{
- if (!__sysctl_vsyscall)
- return gettimeofday(tv,tz);
- if (tv)
- do_vgettimeofday(tv);
- if (tz)
- do_get_tz(tz);
- return 0;
-}
-
-/* This will break when the xtime seconds get inaccurate, but that is
- * unlikely */
-time_t __vsyscall(1) vtime(time_t *t)
-{
- if (!__sysctl_vsyscall)
- return time_syscall(t);
- else if (t)
- *t = __xtime.tv_sec;
- return __xtime.tv_sec;
-}
-
-long __vsyscall(2) venosys_0(void)
-{
- return -ENOSYS;
-}
-
-long __vsyscall(3) venosys_1(void)
-{
- return -ENOSYS;
-}
-
-#ifdef CONFIG_SYSCTL
-
-#define SYSCALL 0x050f
-#define NOP2 0x9090
-
-/*
- * NOP out syscall in vsyscall page when not needed.
- */
-static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- extern u16 vsysc1, vsysc2;
- u16 *map1, *map2;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- if (!write)
- return ret;
- /* gcc has some trouble with __va(__pa()), so just do it this
- way. */
- map1 = ioremap(__pa_symbol(&vsysc1), 2);
- if (!map1)
- return -ENOMEM;
- map2 = ioremap(__pa_symbol(&vsysc2), 2);
- if (!map2) {
- ret = -ENOMEM;
- goto out;
- }
- if (!sysctl_vsyscall) {
- *map1 = SYSCALL;
- *map2 = SYSCALL;
- } else {
- *map1 = NOP2;
- *map2 = NOP2;
- }
- iounmap(map2);
-out:
- iounmap(map1);
- return ret;
-}
-
-static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context)
-{
- return -ENOSYS;
-}
-
-static ctl_table kernel_table2[] = {
- { .ctl_name = 99, .procname = "vsyscall64",
- .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644,
- .strategy = vsyscall_sysctl_nostrat,
- .proc_handler = vsyscall_sysctl_change },
- { 0, }
-};
-
-static ctl_table kernel_root_table2[] = {
- { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
- .child = kernel_table2 },
- { 0 },
-};
-
-#endif
-
-static void __init map_vsyscall(void)
-{
- extern char __vsyscall_0;
- unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
-
- __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
-}
-
-#ifdef CONFIG_XEN
-static void __init map_vsyscall_user(void)
-{
- extern void __set_fixmap_user(enum fixed_addresses, unsigned long, pgprot_t);
- extern char __vsyscall_0;
- unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
-
- __set_fixmap_user(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
-}
-#endif
-
-static int __init vsyscall_init(void)
-{
- BUG_ON(((unsigned long) &vgettimeofday !=
- VSYSCALL_ADDR(__NR_vgettimeofday)));
- BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
- BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
- map_vsyscall();
-#ifdef CONFIG_XEN
- map_vsyscall_user();
- sysctl_vsyscall = 0; /* disable vgettimeofay() */
-#endif
-#ifdef CONFIG_SYSCTL
- register_sysctl_table(kernel_root_table2, 0);
-#endif
- return 0;
-}
-
-__initcall(vsyscall_init);
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/xen_entry.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/xen_entry.S
deleted file mode 100644
index b3d7f19405..0000000000
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/xen_entry.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copied from arch/xen/i386/kernel/entry.S
- */
-/* Offsets into shared_info_t. */
-#define evtchn_upcall_pending /* 0 */
-#define evtchn_upcall_mask 1
-
-#define sizeof_vcpu_shift 6
-
-#ifdef CONFIG_SMP
-//#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
-//#define preempt_enable(reg) decl threadinfo_preempt_count(reg)
-#define preempt_disable(reg)
-#define preempt_enable(reg)
-#define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp) ; \
- movq %gs:pda_cpunumber,reg ; \
- shl $32, reg ; \
- shr $32-sizeof_vcpu_shift,reg ; \
- addq HYPERVISOR_shared_info,reg
-#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp) ; \
-#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
-#else
-#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg
-#define XEN_PUT_VCPU_INFO(reg)
-#define XEN_PUT_VCPU_INFO_fixup
-#endif
-
-#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
-#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
-#define XEN_BLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \
- XEN_LOCKED_BLOCK_EVENTS(reg) ; \
- XEN_PUT_VCPU_INFO(reg)
-#define XEN_UNBLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \
- XEN_LOCKED_UNBLOCK_EVENTS(reg) ; \
- XEN_PUT_VCPU_INFO(reg)
-#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
-
-VGCF_IN_SYSCALL = (1<<8)
-
-