diff options
Diffstat (limited to 'linux-2.6-xen-sparse/arch/ia64')
43 files changed, 0 insertions, 28857 deletions
diff --git a/linux-2.6-xen-sparse/arch/ia64/Kconfig b/linux-2.6-xen-sparse/arch/ia64/Kconfig deleted file mode 100644 index 4991dd4a2b..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig +++ /dev/null @@ -1,587 +0,0 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. -# - -mainmenu "IA-64 Linux Kernel Configuration" - -source "init/Kconfig" - -menu "Processor type and features" - -config IA64 - bool - default y - help - The Itanium Processor Family is Intel's 64-bit successor to - the 32-bit X86 line. The IA-64 Linux project has a home - page at <http://www.linuxia64.org/> and a mailing list at - <linux-ia64@vger.kernel.org>. - -config 64BIT - bool - default y - -config MMU - bool - default y - -config SWIOTLB - bool - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - -config GENERIC_FIND_NEXT_BIT - bool - default y - -config GENERIC_CALIBRATE_DELAY - bool - default y - -config TIME_INTERPOLATION - bool - default y - -config DMI - bool - default y - -config EFI - bool - default y - -config GENERIC_IOMAP - bool - default y - -config XEN - bool "Xen hypervisor support" - default y - help - Enable Xen hypervisor support. Resulting kernel runs - both as a guest OS on Xen and natively on hardware. - -config XEN_IA64_VDSO_PARAVIRT - bool - depends on XEN && !ITANIUM - default y - help - vDSO paravirtualization - -config XEN_IA64_EXPOSE_P2M - bool "Xen/IA64 exposure p2m table" - depends on XEN - default y - help - expose p2m from xen - -config XEN_IA64_EXPOSE_P2M_USE_DTR - bool "Xen/IA64 map p2m table with dtr" - depends on XEN_IA64_EXPOSE_P2M - default y - help - use dtr to map the exposed p2m table - -config SCHED_NO_NO_OMIT_FRAME_POINTER - bool - default y - -config IA64_UNCACHED_ALLOCATOR - bool - select GENERIC_ALLOCATOR - -config DMA_IS_DMA32 - bool - default y - -config DMA_IS_NORMAL - bool - depends on IA64_SGI_SN2 - default y - -config AUDIT_ARCH - bool - default y - -choice - prompt "System type" - default IA64_GENERIC - -config IA64_GENERIC - bool "generic" - select ACPI - select PCI - select NUMA - select ACPI_NUMA - help - This selects the system type of your hardware. A "generic" kernel - will run on any supported IA-64 system. However, if you configure - a kernel for your specific system, it will be faster and smaller. - - generic For any supported IA-64 system - DIG-compliant For DIG ("Developer's Interface Guide") compliant systems - HP-zx1/sx1000 For HP systems - HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices. - SGI-SN2 For SGI Altix systems - Ski-simulator For the HP simulator <http://www.hpl.hp.com/research/linux/ski/> - - If you don't know what to do, choose "generic". - -config IA64_DIG - bool "DIG-compliant" - -config IA64_HP_ZX1 - bool "HP-zx1/sx1000" - help - Build a kernel that runs on HP zx1 and sx1000 systems. This adds - support for the HP I/O MMU. - -config IA64_HP_ZX1_SWIOTLB - bool "HP-zx1/sx1000 with software I/O TLB" - help - Build a kernel that runs on HP zx1 and sx1000 systems even when they - have broken PCI devices which cannot DMA to full 32 bits. Apart - from support for the HP I/O MMU, this includes support for the software - I/O TLB, which allows supporting the broken devices at the expense of - wasting some kernel memory (about 2MB by default). - -config IA64_SGI_SN2 - bool "SGI-SN2" - help - Selecting this option will optimize the kernel for use on sn2 based - systems, but the resulting kernel binary will not run on other - types of ia64 systems. If you have an SGI Altix system, it's safe - to select this option. If in doubt, select ia64 generic support - instead. - -config IA64_HP_SIM - bool "Ski-simulator" - -config IA64_XEN - bool "Xen guest" - depends on XEN - -endchoice - -choice - prompt "Processor type" - default ITANIUM - -config ITANIUM - bool "Itanium" - help - Select your IA-64 processor type. The default is Itanium. - This choice is safe for all IA-64 systems, but may not perform - optimally on systems with, say, Itanium 2 or newer processors. - -config MCKINLEY - bool "Itanium 2" - help - Select this to configure for an Itanium 2 (McKinley) processor. - -endchoice - -choice - prompt "Kernel page size" - default IA64_PAGE_SIZE_16KB - -config IA64_PAGE_SIZE_4KB - bool "4KB" - help - This lets you select the page size of the kernel. For best IA-64 - performance, a page size of 8KB or 16KB is recommended. For best - IA-32 compatibility, a page size of 4KB should be selected (the vast - majority of IA-32 binaries work perfectly fine with a larger page - size). For Itanium 2 or newer systems, a page size of 64KB can also - be selected. - - 4KB For best IA-32 compatibility - 8KB For best IA-64 performance - 16KB For best IA-64 performance - 64KB Requires Itanium 2 or newer processor. - - If you don't know what to do, choose 16KB. - -config IA64_PAGE_SIZE_8KB - bool "8KB" - -config IA64_PAGE_SIZE_16KB - bool "16KB" - -config IA64_PAGE_SIZE_64KB - depends on !ITANIUM - bool "64KB" - -endchoice - -choice - prompt "Page Table Levels" - default PGTABLE_3 - -config PGTABLE_3 - bool "3 Levels" - -config PGTABLE_4 - depends on !IA64_PAGE_SIZE_64KB - bool "4 Levels" - -endchoice - -source kernel/Kconfig.hz - -config IA64_BRL_EMU - bool - depends on ITANIUM - default y - -# align cache-sensitive data to 128 bytes -config IA64_L1_CACHE_SHIFT - int - default "7" if MCKINLEY - default "6" if ITANIUM - -config IA64_CYCLONE - bool "Cyclone (EXA) Time Source support" - help - Say Y here to enable support for IBM EXA Cyclone time source. - If you're unsure, answer N. - -config IOSAPIC - bool - depends on !IA64_HP_SIM - default y - -config IA64_SGI_SN_XP - tristate "Support communication between SGI SSIs" - depends on IA64_GENERIC || IA64_SGI_SN2 - select IA64_UNCACHED_ALLOCATOR - help - An SGI machine can be divided into multiple Single System - Images which act independently of each other and have - hardware based memory protection from the others. Enabling - this feature will allow for direct communication between SSIs - based on a network adapter and DMA messaging. - -config FORCE_MAX_ZONEORDER - int "MAX_ORDER (11 - 17)" if !HUGETLB_PAGE - range 11 17 if !HUGETLB_PAGE - default "17" if HUGETLB_PAGE - default "11" - -config SMP - bool "Symmetric multi-processing support" - help - This enables support for systems with more than one CPU. If you have - a system with only one CPU, say N. If you have a system with more - than one CPU, say Y. - - If you say N here, the kernel will run on single and multiprocessor - systems, but will use only one CPU of a multiprocessor system. If - you say Y here, the kernel will run on many, but not all, - single processor systems. On a single processor system, the kernel - will run faster if you say N here. - - See also the <file:Documentation/smp.txt> and the SMP-HOWTO - available at <http://www.tldp.org/docs.html#howto>. - - If you don't know what to do here, say N. - -config NR_CPUS - int "Maximum number of CPUs (2-1024)" - range 2 1024 - depends on SMP - default "1024" - help - You should set this to the number of CPUs in your system, but - keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but - only use 2 CPUs on a >2 CPU system. Setting this to a value larger - than 64 will cause the use of a CPU mask array, causing a small - performance hit. - -config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && EXPERIMENTAL - select HOTPLUG - default n - ---help--- - Say Y here to experiment with turning CPUs off and on. CPUs - can be controlled through /sys/devices/system/cpu/cpu#. - Say N if you want to disable CPU hotplug. - -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - -config SCHED_SMT - bool "SMT scheduler support" - depends on SMP - help - Improves the CPU scheduler's decision making when dealing with - Intel IA64 chips with MultiThreading at a cost of slightly increased - overhead in some places. If unsure say N here. - -config PERMIT_BSP_REMOVE - bool "Support removal of Bootstrap Processor" - depends on HOTPLUG_CPU - default n - ---help--- - Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU - support. - -config FORCE_CPEI_RETARGET - bool "Force assumption that CPEI can be re-targetted" - depends on PERMIT_BSP_REMOVE - default n - ---help--- - Say Y if you need to force the assumption that CPEI can be re-targetted to - any cpu in the system. This hint is available via ACPI 3.0 specifications. - Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP. - This option it useful to enable this feature on older BIOS's as well. - You can also enable this by using boot command line option force_cpei=1. - -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. - -source "mm/Kconfig" - -config ARCH_SELECT_MEMORY_MODEL - def_bool y - -config ARCH_DISCONTIGMEM_ENABLE - def_bool y - help - Say Y to support efficient handling of discontiguous physical memory, - for architectures which are either NUMA (Non-Uniform Memory Access) - or have huge holes in the physical address space for other reasons. - See <file:Documentation/vm/numa> for more. - -config ARCH_FLATMEM_ENABLE - def_bool y - -config ARCH_SPARSEMEM_ENABLE - def_bool y - depends on ARCH_DISCONTIGMEM_ENABLE - -config ARCH_DISCONTIGMEM_DEFAULT - def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) - depends on ARCH_DISCONTIGMEM_ENABLE - -config NUMA - bool "NUMA support" - depends on !IA64_HP_SIM && !FLATMEM - default y if IA64_SGI_SN2 - help - Say Y to compile the kernel to support NUMA (Non-Uniform Memory - Access). This option is for configuring high-end multiprocessor - server systems. If in doubt, say N. - -config NODES_SHIFT - int "Max num nodes shift(3-10)" - range 3 10 - default "10" - depends on NEED_MULTIPLE_NODES - help - This option specifies the maximum number of nodes in your SSI system. - MAX_NUMNODES will be 2^(This value). - If in doubt, use the default. - -# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent. -# VIRTUAL_MEM_MAP has been retained for historical reasons. -config VIRTUAL_MEM_MAP - bool "Virtual mem map" - depends on !SPARSEMEM - default y if !IA64_HP_SIM - help - Say Y to compile the kernel with support for a virtual mem map. - This code also only takes effect if a memory hole of greater than - 1 Gb is found during boot. You must turn this option on if you - require the DISCONTIGMEM option for your machine. If you are - unsure, say Y. - -config HOLES_IN_ZONE - bool - default y if VIRTUAL_MEM_MAP - -config HAVE_ARCH_EARLY_PFN_TO_NID - def_bool y - depends on NEED_MULTIPLE_NODES - -config HAVE_ARCH_NODEDATA_EXTENSION - def_bool y - depends on NUMA - -config IA32_SUPPORT - bool "Support for Linux/x86 binaries" - help - IA-64 processors can execute IA-32 (X86) instructions. By - saying Y here, the kernel will include IA-32 system call - emulation support which makes it possible to transparently - run IA-32 Linux binaries on an IA-64 Linux system. - If in doubt, say Y. - -config COMPAT - bool - depends on IA32_SUPPORT - default y - -config IA64_MCA_RECOVERY - tristate "MCA recovery from errors other than TLB." - -config PERFMON - bool "Performance monitor support" - help - Selects whether support for the IA-64 performance monitor hardware - is included in the kernel. This makes some kernel data-structures a - little bigger and slows down execution a bit, but it is generally - a good idea to turn this on. If you're unsure, say Y. - -config IA64_PALINFO - tristate "/proc/pal support" - help - If you say Y here, you are able to get PAL (Processor Abstraction - Layer) information in /proc/pal. This contains useful information - about the processors in your systems, such as cache and TLB sizes - and the PAL firmware version in use. - - To use this option, you have to ensure that the "/proc file system - support" (CONFIG_PROC_FS) is enabled, too. - -config SGI_SN - def_bool y if (IA64_SGI_SN2 || IA64_GENERIC) - -source "drivers/sn/Kconfig" - -source "drivers/firmware/Kconfig" - -source "fs/Kconfig.binfmt" - -endmenu - -menu "Power management and ACPI" - -source "kernel/power/Kconfig" - -source "drivers/acpi/Kconfig" - -if PM - -source "arch/ia64/kernel/cpufreq/Kconfig" - -endif - -endmenu - -if !IA64_HP_SIM - -menu "Bus options (PCI, PCMCIA)" - -config PCI - bool "PCI support" - help - Real IA-64 machines all have PCI/PCI-X/PCI Express busses. Say Y - here unless you are using a simulator without PCI support. - -config PCI_DOMAINS - bool - default PCI - -config XEN_PCIDEV_FRONTEND - bool "Xen PCI Frontend" - depends on PCI && XEN - default y - help - The PCI device frontend driver allows the kernel to import arbitrary - PCI devices from a PCI backend to support PCI driver domains. - -config XEN_PCIDEV_FE_DEBUG - bool "Xen PCI Frontend Debugging" - depends on XEN_PCIDEV_FRONTEND - default n - help - Enables some debug statements within the PCI Frontend. - -source "drivers/pci/pcie/Kconfig" - -source "drivers/pci/Kconfig" - -source "drivers/pci/hotplug/Kconfig" - -source "drivers/pcmcia/Kconfig" - -endmenu - -endif - -source "net/Kconfig" - -source "drivers/Kconfig" - -source "fs/Kconfig" - -source "lib/Kconfig" - -# -# Use the generic interrupt handling code in kernel/irq/: -# -config GENERIC_HARDIRQS - bool - default y - -config GENERIC_IRQ_PROBE - bool - default y - -config GENERIC_PENDING_IRQ - bool - depends on GENERIC_HARDIRQS && SMP - default y - -config IRQ_PER_CPU - bool - default y - -source "arch/ia64/hp/sim/Kconfig" - -menu "Instrumentation Support" - depends on EXPERIMENTAL - -source "arch/ia64/oprofile/Kconfig" - -config KPROBES - bool "Kprobes (EXPERIMENTAL)" - depends on EXPERIMENTAL && MODULES - help - Kprobes allows you to trap at almost any kernel address and - execute a callback function. register_kprobe() establishes - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -endmenu - -source "arch/ia64/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" - -# -# override default values of drivers/xen/Kconfig -# -if XEN -config XEN_SMPBOOT - default n -endif - -source "drivers/xen/Kconfig" diff --git a/linux-2.6-xen-sparse/arch/ia64/Makefile b/linux-2.6-xen-sparse/arch/ia64/Makefile deleted file mode 100644 index 9c7c05626b..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/Makefile +++ /dev/null @@ -1,106 +0,0 @@ -# -# ia64/Makefile -# -# This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com> -# - -NM := $(CROSS_COMPILE)nm -B -READELF := $(CROSS_COMPILE)readelf - -export AWK - -CHECKFLAGS += -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__ - -OBJCOPYFLAGS := --strip-all -LDFLAGS_vmlinux := -static -LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds -AFLAGS_KERNEL := -mconstant-gp -EXTRA := - -cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ - -falign-functions=32 -frename-registers -fno-optimize-sibling-calls -CFLAGS_KERNEL := -mconstant-gp - -GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") -CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)") - -ifeq ($(GAS_STATUS),buggy) -$(error Sorry, you need a newer version of the assember, one that is built from \ - a source-tree that post-dates 18-Dec-2002. You can find a pre-compiled \ - static binary of such an assembler at: \ - \ - ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) -endif - -ifeq ($(call cc-version),0304) - cflags-$(CONFIG_ITANIUM) += -mtune=merced - cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley -endif - -CFLAGS += $(cflags-y) - -cppflags-$(CONFIG_XEN) += \ - -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) - -CPPFLAGS += $(cppflags-y) - -head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o - -libs-y += arch/ia64/lib/ -core-y += arch/ia64/kernel/ arch/ia64/mm/ -core-$(CONFIG_IA32_SUPPORT) += arch/ia64/ia32/ -core-$(CONFIG_IA64_DIG) += arch/ia64/dig/ -core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ -core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ -core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ -core-$(CONFIG_IA64_XEN) += arch/ia64/dig/ -core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ -core-$(CONFIG_XEN) += arch/ia64/xen/ - -drivers-$(CONFIG_PCI) += arch/ia64/pci/ -drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ -drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ -drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ -drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/ -drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/ - -boot := arch/ia64/hp/sim/boot - -PHONY += boot compressed check - -all: compressed unwcheck - -compressed: vmlinux.gz - -vmlinuz: vmlinux.gz - -vmlinux.gz: vmlinux - $(Q)$(MAKE) $(build)=$(boot) $@ - -unwcheck: vmlinux - -$(Q)READELF=$(READELF) $(srctree)/arch/ia64/scripts/unwcheck.py $< - -archclean: - $(Q)$(MAKE) $(clean)=$(boot) - -CLEAN_FILES += vmlinux.gz bootloader - -boot: lib/lib.a vmlinux - $(Q)$(MAKE) $(build)=$(boot) $@ - -install: - -yes | sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) vmlinux.gz System.map "$(INSTALL_PATH)" - -define archhelp - echo '* compressed - Build compressed kernel image' - echo ' install - Install compressed kernel image' - echo ' boot - Build vmlinux and bootloader for Ski simulator' - echo '* unwcheck - Check vmlinux for invalid unwind info' -endef diff --git a/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c b/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c deleted file mode 100644 index c0f6eac819..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c +++ /dev/null @@ -1,2160 +0,0 @@ -/* -** IA64 System Bus Adapter (SBA) I/O MMU manager -** -** (c) Copyright 2002-2005 Alex Williamson -** (c) Copyright 2002-2003 Grant Grundler -** (c) Copyright 2002-2005 Hewlett-Packard Company -** -** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) -** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) -** -** This program is free software; you can redistribute it and/or modify -** it under the terms of the GNU General Public License as published by -** the Free Software Foundation; either version 2 of the License, or -** (at your option) any later version. -** -** -** This module initializes the IOC (I/O Controller) found on HP -** McKinley machines and their successors. -** -*/ - -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/pci.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/acpi.h> -#include <linux/efi.h> -#include <linux/nodemask.h> -#include <linux/bitops.h> /* hweight64() */ - -#include <asm/delay.h> /* ia64_get_itc() */ -#include <asm/io.h> -#include <asm/page.h> /* PAGE_OFFSET */ -#include <asm/dma.h> -#include <asm/system.h> /* wmb() */ - -#include <asm/acpi-ext.h> - -#define PFX "IOC: " - -/* -** Enabling timing search of the pdir resource map. Output in /proc. -** Disabled by default to optimize performance. -*/ -#undef PDIR_SEARCH_TIMING - -/* -** This option allows cards capable of 64bit DMA to bypass the IOMMU. If -** not defined, all DMA will be 32bit and go through the TLB. -** There's potentially a conflict in the bio merge code with us -** advertising an iommu, but then bypassing it. Since I/O MMU bypassing -** appears to give more performance than bio-level virtual merging, we'll -** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to -** completely restrict DMA to the IOMMU. -*/ -#define ALLOW_IOV_BYPASS - -/* -** This option specifically allows/disallows bypassing scatterlists with -** multiple entries. Coalescing these entries can allow better DMA streaming -** and in some cases shows better performance than entirely bypassing the -** IOMMU. Performance increase on the order of 1-2% sequential output/input -** using bonnie++ on a RAID0 MD device (sym2 & mpt). -*/ -#undef ALLOW_IOV_BYPASS_SG - -/* -** If a device prefetches beyond the end of a valid pdir entry, it will cause -** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should -** disconnect on 4k boundaries and prevent such issues. If the device is -** particularly agressive, this option will keep the entire pdir valid such -** that prefetching will hit a valid address. This could severely impact -** error containment, and is therefore off by default. The page that is -** used for spill-over is poisoned, so that should help debugging somewhat. -*/ -#undef FULL_VALID_PDIR - -#define ENABLE_MARK_CLEAN - -/* -** The number of debug flags is a clue - this code is fragile. NOTE: since -** tightening the use of res_lock the resource bitmap and actual pdir are no -** longer guaranteed to stay in sync. The sanity checking code isn't going to -** like that. -*/ -#undef DEBUG_SBA_INIT -#undef DEBUG_SBA_RUN -#undef DEBUG_SBA_RUN_SG -#undef DEBUG_SBA_RESOURCE -#undef ASSERT_PDIR_SANITY -#undef DEBUG_LARGE_SG_ENTRIES -#undef DEBUG_BYPASS - -#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY) -#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive -#endif - -#define SBA_INLINE __inline__ -/* #define SBA_INLINE */ - -#ifdef DEBUG_SBA_INIT -#define DBG_INIT(x...) printk(x) -#else -#define DBG_INIT(x...) -#endif - -#ifdef DEBUG_SBA_RUN -#define DBG_RUN(x...) printk(x) -#else -#define DBG_RUN(x...) -#endif - -#ifdef DEBUG_SBA_RUN_SG -#define DBG_RUN_SG(x...) printk(x) -#else -#define DBG_RUN_SG(x...) -#endif - - -#ifdef DEBUG_SBA_RESOURCE -#define DBG_RES(x...) printk(x) -#else -#define DBG_RES(x...) -#endif - -#ifdef DEBUG_BYPASS -#define DBG_BYPASS(x...) printk(x) -#else -#define DBG_BYPASS(x...) -#endif - -#ifdef ASSERT_PDIR_SANITY -#define ASSERT(expr) \ - if(!(expr)) { \ - printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \ - panic(#expr); \ - } -#else -#define ASSERT(expr) -#endif - -/* -** The number of pdir entries to "free" before issuing -** a read to PCOM register to flush out PCOM writes. -** Interacts with allocation granularity (ie 4 or 8 entries -** allocated and free'd/purged at a time might make this -** less interesting). -*/ -#define DELAYED_RESOURCE_CNT 64 - -#define PCI_DEVICE_ID_HP_SX2000_IOC 0x12ec - -#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) -#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP) -#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) -#define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP) -#define SX2000_IOC_ID ((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP) - -#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ - -#define IOC_FUNC_ID 0x000 -#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */ -#define IOC_IBASE 0x300 /* IO TLB */ -#define IOC_IMASK 0x308 -#define IOC_PCOM 0x310 -#define IOC_TCNFG 0x318 -#define IOC_PDIR_BASE 0x320 - -#define IOC_ROPE0_CFG 0x500 -#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */ - - -/* AGP GART driver looks for this */ -#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL - -/* -** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register) -** -** Some IOCs (sx1000) can run at the above pages sizes, but are -** really only supported using the IOC at a 4k page size. -** -** iovp_size could only be greater than PAGE_SIZE if we are -** confident the drivers really only touch the next physical -** page iff that driver instance owns it. -*/ -static unsigned long iovp_size; -static unsigned long iovp_shift; -static unsigned long iovp_mask; - -struct ioc { - void __iomem *ioc_hpa; /* I/O MMU base address */ - char *res_map; /* resource map, bit == pdir entry */ - u64 *pdir_base; /* physical base address */ - unsigned long ibase; /* pdir IOV Space base */ - unsigned long imask; /* pdir IOV Space mask */ - - unsigned long *res_hint; /* next avail IOVP - circular search */ - unsigned long dma_mask; - spinlock_t res_lock; /* protects the resource bitmap, but must be held when */ - /* clearing pdir to prevent races with allocations. */ - unsigned int res_bitshift; /* from the RIGHT! */ - unsigned int res_size; /* size of resource map in bytes */ -#ifdef CONFIG_NUMA - unsigned int node; /* node where this IOC lives */ -#endif -#if DELAYED_RESOURCE_CNT > 0 - spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ - /* than res_lock for bigger systems. */ - int saved_cnt; - struct sba_dma_pair { - dma_addr_t iova; - size_t size; - } saved[DELAYED_RESOURCE_CNT]; -#endif - -#ifdef PDIR_SEARCH_TIMING -#define SBA_SEARCH_SAMPLE 0x100 - unsigned long avg_search[SBA_SEARCH_SAMPLE]; - unsigned long avg_idx; /* current index into avg_search */ -#endif - - /* Stuff we don't need in performance path */ - struct ioc *next; /* list of IOC's in system */ - acpi_handle handle; /* for multiple IOC's */ - const char *name; - unsigned int func_id; - unsigned int rev; /* HW revision of chip */ - u32 iov_size; - unsigned int pdir_size; /* in bytes, determined by IOV Space size */ - struct pci_dev *sac_only_dev; -}; - -static struct ioc *ioc_list; -static int reserve_sba_gart = 1; - -static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); -static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); - -#define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset) - -#ifdef FULL_VALID_PDIR -static u64 prefetch_spill_page; -#endif - -#ifdef CONFIG_PCI -# define GET_IOC(dev) (((dev)->bus == &pci_bus_type) \ - ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL) -#else -# define GET_IOC(dev) NULL -#endif - -/* -** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up -** (or rather not merge) DMA's into managable chunks. -** On parisc, this is more of the software/tuning constraint -** rather than the HW. I/O MMU allocation alogorithms can be -** faster with smaller size is (to some degree). -*/ -#define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) - -#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1)) - -/************************************ -** SBA register read and write support -** -** BE WARNED: register writes are posted. -** (ie follow writes which must reach HW with a read) -** -*/ -#define READ_REG(addr) __raw_readq(addr) -#define WRITE_REG(val, addr) __raw_writeq(val, addr) - -#ifdef DEBUG_SBA_INIT - -/** - * sba_dump_tlb - debugging only - print IOMMU operating parameters - * @hpa: base address of the IOMMU - * - * Print the size/location of the IO MMU PDIR. - */ -static void -sba_dump_tlb(char *hpa) -{ - DBG_INIT("IO TLB at 0x%p\n", (void *)hpa); - DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE)); - DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK)); - DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG)); - DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE)); - DBG_INIT("\n"); -} -#endif - - -#ifdef ASSERT_PDIR_SANITY - -/** - * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @msg: text to print ont the output line. - * @pide: pdir index. - * - * Print one entry of the IO MMU PDIR in human readable form. - */ -static void -sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) -{ - /* start printing from lowest pde in rval */ - u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]; - unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)]; - uint rcnt; - - printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", - msg, rptr, pide & (BITS_PER_LONG - 1), *rptr); - - rcnt = 0; - while (rcnt < BITS_PER_LONG) { - printk(KERN_DEBUG "%s %2d %p %016Lx\n", - (rcnt == (pide & (BITS_PER_LONG - 1))) - ? " -->" : " ", - rcnt, ptr, (unsigned long long) *ptr ); - rcnt++; - ptr++; - } - printk(KERN_DEBUG "%s", msg); -} - - -/** - * sba_check_pdir - debugging only - consistency checker - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @msg: text to print ont the output line. - * - * Verify the resource map and pdir state is consistent - */ -static int -sba_check_pdir(struct ioc *ioc, char *msg) -{ - u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]); - u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */ - u64 *pptr = ioc->pdir_base; /* pdir ptr */ - uint pide = 0; - - while (rptr < rptr_end) { - u64 rval; - int rcnt; /* number of bits we might check */ - - rval = *rptr; - rcnt = 64; - - while (rcnt) { - /* Get last byte and highest bit from that */ - u32 pde = ((u32)((*pptr >> (63)) & 0x1)); - if ((rval & 0x1) ^ pde) - { - /* - ** BUMMER! -- res_map != pdir -- - ** Dump rval and matching pdir entries - */ - sba_dump_pdir_entry(ioc, msg, pide); - return(1); - } - rcnt--; - rval >>= 1; /* try the next bit */ - pptr++; - pide++; - } - rptr++; /* look at next word of res_map */ - } - /* It'd be nice if we always got here :^) */ - return 0; -} - - -/** - * sba_dump_sg - debugging only - print Scatter-Gather list - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @startsg: head of the SG list - * @nents: number of entries in SG list - * - * print the SG list so we can verify it's correct by hand. - */ -static void -sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) -{ - while (nents-- > 0) { - printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents, - startsg->dma_address, startsg->dma_length, - sba_sg_address(startsg)); - startsg++; - } -} - -static void -sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) -{ - struct scatterlist *the_sg = startsg; - int the_nents = nents; - - while (the_nents-- > 0) { - if (sba_sg_address(the_sg) == 0x0UL) - sba_dump_sg(NULL, startsg, nents); - the_sg++; - } -} - -#endif /* ASSERT_PDIR_SANITY */ - - - - -/************************************************************** -* -* I/O Pdir Resource Management -* -* Bits set in the resource map are in use. -* Each bit can represent a number of pages. -* LSbs represent lower addresses (IOVA's). -* -***************************************************************/ -#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ - -/* Convert from IOVP to IOVA and vice versa. */ -#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset)) -#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase)) - -#define PDIR_ENTRY_SIZE sizeof(u64) - -#define PDIR_INDEX(iovp) ((iovp)>>iovp_shift) - -#define RESMAP_MASK(n) ~(~0UL << (n)) -#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) - - -/** - * For most cases the normal get_order is sufficient, however it limits us - * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity. - * It only incurs about 1 clock cycle to use this one with the static variable - * and makes the code more intuitive. - */ -static SBA_INLINE int -get_iovp_order (unsigned long size) -{ - long double d = size - 1; - long order; - - order = ia64_getf_exp(d); - order = order - iovp_shift - 0xffff + 1; - if (order < 0) - order = 0; - return order; -} - -/** - * sba_search_bitmap - find free space in IO PDIR resource bitmap - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @bits_wanted: number of entries we need. - * @use_hint: use res_hint to indicate where to start looking - * - * Find consecutive free bits in resource bitmap. - * Each bit represents one entry in the IO Pdir. - * Cool perf optimization: search for log2(size) bits at a time. - */ -static SBA_INLINE unsigned long -sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) -{ - unsigned long *res_ptr; - unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); - unsigned long flags, pide = ~0UL; - - ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); - ASSERT(res_ptr < res_end); - - spin_lock_irqsave(&ioc->res_lock, flags); - - /* Allow caller to force a search through the entire resource space */ - if (likely(use_hint)) { - res_ptr = ioc->res_hint; - } else { - res_ptr = (ulong *)ioc->res_map; - ioc->res_bitshift = 0; - } - - /* - * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts - * if a TLB entry is purged while in use. sba_mark_invalid() - * purges IOTLB entries in power-of-two sizes, so we also - * allocate IOVA space in power-of-two sizes. - */ - bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift); - - if (likely(bits_wanted == 1)) { - unsigned int bitshiftcnt; - for(; res_ptr < res_end ; res_ptr++) { - if (likely(*res_ptr != ~0UL)) { - bitshiftcnt = ffz(*res_ptr); - *res_ptr |= (1UL << bitshiftcnt); - pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); - pide <<= 3; /* convert to bit address */ - pide += bitshiftcnt; - ioc->res_bitshift = bitshiftcnt + bits_wanted; - goto found_it; - } - } - goto not_found; - - } - - if (likely(bits_wanted <= BITS_PER_LONG/2)) { - /* - ** Search the resource bit map on well-aligned values. - ** "o" is the alignment. - ** We need the alignment to invalidate I/O TLB using - ** SBA HW features in the unmap path. - */ - unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift); - uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); - unsigned long mask, base_mask; - - base_mask = RESMAP_MASK(bits_wanted); - mask = base_mask << bitshiftcnt; - - DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr); - for(; res_ptr < res_end ; res_ptr++) - { - DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); - ASSERT(0 != mask); - for (; mask ; mask <<= o, bitshiftcnt += o) { - if(0 == ((*res_ptr) & mask)) { - *res_ptr |= mask; /* mark resources busy! */ - pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); - pide <<= 3; /* convert to bit address */ - pide += bitshiftcnt; - ioc->res_bitshift = bitshiftcnt + bits_wanted; - goto found_it; - } - } - - bitshiftcnt = 0; - mask = base_mask; - - } - - } else { - int qwords, bits, i; - unsigned long *end; - - qwords = bits_wanted >> 6; /* /64 */ - bits = bits_wanted - (qwords * BITS_PER_LONG); - - end = res_end - qwords; - - for (; res_ptr < end; res_ptr++) { - for (i = 0 ; i < qwords ; i++) { - if (res_ptr[i] != 0) - goto next_ptr; - } - if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits)) - continue; - - /* Found it, mark it */ - for (i = 0 ; i < qwords ; i++) - res_ptr[i] = ~0UL; - res_ptr[i] |= RESMAP_MASK(bits); - - pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); - pide <<= 3; /* convert to bit address */ - res_ptr += qwords; - ioc->res_bitshift = bits; - goto found_it; -next_ptr: - ; - } - } - -not_found: - prefetch(ioc->res_map); - ioc->res_hint = (unsigned long *) ioc->res_map; - ioc->res_bitshift = 0; - spin_unlock_irqrestore(&ioc->res_lock, flags); - return (pide); - -found_it: - ioc->res_hint = res_ptr; - spin_unlock_irqrestore(&ioc->res_lock, flags); - return (pide); -} - - -/** - * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @size: number of bytes to create a mapping for - * - * Given a size, find consecutive unmarked and then mark those bits in the - * resource bit map. - */ -static int -sba_alloc_range(struct ioc *ioc, size_t size) -{ - unsigned int pages_needed = size >> iovp_shift; -#ifdef PDIR_SEARCH_TIMING - unsigned long itc_start; -#endif - unsigned long pide; - - ASSERT(pages_needed); - ASSERT(0 == (size & ~iovp_mask)); - -#ifdef PDIR_SEARCH_TIMING - itc_start = ia64_get_itc(); -#endif - /* - ** "seek and ye shall find"...praying never hurts either... - */ - pide = sba_search_bitmap(ioc, pages_needed, 1); - if (unlikely(pide >= (ioc->res_size << 3))) { - pide = sba_search_bitmap(ioc, pages_needed, 0); - if (unlikely(pide >= (ioc->res_size << 3))) { -#if DELAYED_RESOURCE_CNT > 0 - unsigned long flags; - - /* - ** With delayed resource freeing, we can give this one more shot. We're - ** getting close to being in trouble here, so do what we can to make this - ** one count. - */ - spin_lock_irqsave(&ioc->saved_lock, flags); - if (ioc->saved_cnt > 0) { - struct sba_dma_pair *d; - int cnt = ioc->saved_cnt; - - d = &(ioc->saved[ioc->saved_cnt - 1]); - - spin_lock(&ioc->res_lock); - while (cnt--) { - sba_mark_invalid(ioc, d->iova, d->size); - sba_free_range(ioc, d->iova, d->size); - d--; - } - ioc->saved_cnt = 0; - READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ - spin_unlock(&ioc->res_lock); - } - spin_unlock_irqrestore(&ioc->saved_lock, flags); - - pide = sba_search_bitmap(ioc, pages_needed, 0); - if (unlikely(pide >= (ioc->res_size << 3))) - panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", - ioc->ioc_hpa); -#else - panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", - ioc->ioc_hpa); -#endif - } - } - -#ifdef PDIR_SEARCH_TIMING - ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed; - ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; -#endif - - prefetchw(&(ioc->pdir_base[pide])); - -#ifdef ASSERT_PDIR_SANITY - /* verify the first enable bit is clear */ - if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) { - sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide); - } -#endif - - DBG_RES("%s(%x) %d -> %lx hint %x/%x\n", - __FUNCTION__, size, pages_needed, pide, - (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), - ioc->res_bitshift ); - - return (pide); -} - - -/** - * sba_free_range - unmark bits in IO PDIR resource bitmap - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @iova: IO virtual address which was previously allocated. - * @size: number of bytes to create a mapping for - * - * clear bits in the ioc's resource map - */ -static SBA_INLINE void -sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) -{ - unsigned long iovp = SBA_IOVP(ioc, iova); - unsigned int pide = PDIR_INDEX(iovp); - unsigned int ridx = pide >> 3; /* convert bit to byte address */ - unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); - int bits_not_wanted = size >> iovp_shift; - unsigned long m; - - /* Round up to power-of-two size: see AR2305 note above */ - bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift); - for (; bits_not_wanted > 0 ; res_ptr++) { - - if (unlikely(bits_not_wanted > BITS_PER_LONG)) { - - /* these mappings start 64bit aligned */ - *res_ptr = 0UL; - bits_not_wanted -= BITS_PER_LONG; - pide += BITS_PER_LONG; - - } else { - - /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ - m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); - bits_not_wanted = 0; - - DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size, - bits_not_wanted, m, pide, res_ptr, *res_ptr); - - ASSERT(m != 0); - ASSERT(bits_not_wanted); - ASSERT((*res_ptr & m) == m); /* verify same bits are set */ - *res_ptr &= ~m; - } - } -} - - -/************************************************************** -* -* "Dynamic DMA Mapping" support (aka "Coherent I/O") -* -***************************************************************/ - -/** - * sba_io_pdir_entry - fill in one IO PDIR entry - * @pdir_ptr: pointer to IO PDIR entry - * @vba: Virtual CPU address of buffer to map - * - * SBA Mapping Routine - * - * Given a virtual address (vba, arg1) sba_io_pdir_entry() - * loads the I/O PDIR entry pointed to by pdir_ptr (arg0). - * Each IO Pdir entry consists of 8 bytes as shown below - * (LSB == bit 0): - * - * 63 40 11 7 0 - * +-+---------------------+----------------------------------+----+--------+ - * |V| U | PPN[39:12] | U | FF | - * +-+---------------------+----------------------------------+----+--------+ - * - * V == Valid Bit - * U == Unused - * PPN == Physical Page Number - * - * The physical address fields are filled with the results of virt_to_phys() - * on the vba. - */ - -#if 1 -#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = \ - ((virt_to_bus((void *)vba) & ~0xFFFULL) | 0x8000000000000000ULL) -#else -void SBA_INLINE -sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba) -{ - *pdir_ptr = ((virt_to_bus((void *)vba) & ~0xFFFULL) | - 0x80000000000000FFULL); -} -#endif - -#ifdef ENABLE_MARK_CLEAN -/** - * Since DMA is i-cache coherent, any (complete) pages that were written via - * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to - * flush them when they get mapped into an executable vm-area. - */ -static void -mark_clean (void *addr, size_t size) -{ - unsigned long pg_addr, end; - -#ifdef CONFIG_XEN - /* XXX: Bad things happen starting domUs when this is enabled. */ - if (is_running_on_xen()) - return; -#endif - - pg_addr = PAGE_ALIGN((unsigned long) addr); - end = (unsigned long) addr + size; - while (pg_addr + PAGE_SIZE <= end) { - struct page *page = virt_to_page((void *)pg_addr); - set_bit(PG_arch_1, &page->flags); - pg_addr += PAGE_SIZE; - } -} -#endif - -/** - * sba_mark_invalid - invalidate one or more IO PDIR entries - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @iova: IO Virtual Address mapped earlier - * @byte_cnt: number of bytes this mapping covers. - * - * Marking the IO PDIR entry(ies) as Invalid and invalidate - * corresponding IO TLB entry. The PCOM (Purge Command Register) - * is to purge stale entries in the IO TLB when unmapping entries. - * - * The PCOM register supports purging of multiple pages, with a minium - * of 1 page and a maximum of 2GB. Hardware requires the address be - * aligned to the size of the range being purged. The size of the range - * must be a power of 2. The "Cool perf optimization" in the - * allocation routine helps keep that true. - */ -static SBA_INLINE void -sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) -{ - u32 iovp = (u32) SBA_IOVP(ioc,iova); - - int off = PDIR_INDEX(iovp); - - /* Must be non-zero and rounded up */ - ASSERT(byte_cnt > 0); - ASSERT(0 == (byte_cnt & ~iovp_mask)); - -#ifdef ASSERT_PDIR_SANITY - /* Assert first pdir entry is set */ - if (!(ioc->pdir_base[off] >> 60)) { - sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp)); - } -#endif - - if (byte_cnt <= iovp_size) - { - ASSERT(off < ioc->pdir_size); - - iovp |= iovp_shift; /* set "size" field for PCOM */ - -#ifndef FULL_VALID_PDIR - /* - ** clear I/O PDIR entry "valid" bit - ** Do NOT clear the rest - save it for debugging. - ** We should only clear bits that have previously - ** been enabled. - */ - ioc->pdir_base[off] &= ~(0x80000000000000FFULL); -#else - /* - ** If we want to maintain the PDIR as valid, put in - ** the spill page so devices prefetching won't - ** cause a hard fail. - */ - ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); -#endif - } else { - u32 t = get_iovp_order(byte_cnt) + iovp_shift; - - iovp |= t; - ASSERT(t <= 31); /* 2GB! Max value of "size" field */ - - do { - /* verify this pdir entry is enabled */ - ASSERT(ioc->pdir_base[off] >> 63); -#ifndef FULL_VALID_PDIR - /* clear I/O Pdir entry "valid" bit first */ - ioc->pdir_base[off] &= ~(0x80000000000000FFULL); -#else - ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); -#endif - off++; - byte_cnt -= iovp_size; - } while (byte_cnt > 0); - } - - WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM); -} - -/** - * sba_map_single - map one buffer and return IOVA for DMA - * @dev: instance of PCI owned by the driver that's asking. - * @addr: driver buffer to map. - * @size: number of bytes to map in driver buffer. - * @dir: R/W or both. - * - * See Documentation/DMA-mapping.txt - */ -dma_addr_t -sba_map_single(struct device *dev, void *addr, size_t size, int dir) -{ - struct ioc *ioc; - dma_addr_t iovp; - dma_addr_t offset; - u64 *pdir_start; - int pide; -#ifdef ASSERT_PDIR_SANITY - unsigned long flags; -#endif -#ifdef ALLOW_IOV_BYPASS - unsigned long pci_addr = virt_to_bus(addr); - - ASSERT(to_pci_dev(dev)->dma_mask); - /* - ** Check if the PCI device can DMA to ptr... if so, just return ptr - */ - if (likely(pci_addr & ~to_pci_dev(dev)->dma_mask) == 0 && - !range_straddles_page_boundary(addr, size)) { - /* - ** Device is bit capable of DMA'ing to the buffer... - ** just return the PCI address of ptr - */ - DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n", - to_pci_dev(dev)->dma_mask, pci_addr); - return pci_addr; - } -#endif - ioc = GET_IOC(dev); - ASSERT(ioc); - - prefetch(ioc->res_hint); - - ASSERT(size > 0); - ASSERT(size <= DMA_CHUNK_SIZE); - - /* save offset bits */ - offset = ((dma_addr_t) (long) addr) & ~iovp_mask; - - /* round up to nearest iovp_size */ - size = (size + offset + ~iovp_mask) & iovp_mask; - -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check before sba_map_single()")) - panic("Sanity check failed"); - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - - pide = sba_alloc_range(ioc, size); - - iovp = (dma_addr_t) pide << iovp_shift; - - DBG_RUN("%s() 0x%p -> 0x%lx\n", - __FUNCTION__, addr, (long) iovp | offset); - - pdir_start = &(ioc->pdir_base[pide]); - - while (size > 0) { - ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */ - sba_io_pdir_entry(pdir_start, (unsigned long) addr); - - DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start); - - addr += iovp_size; - size -= iovp_size; - pdir_start++; - } - /* force pdir update */ - wmb(); - - /* form complete address */ -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check after sba_map_single()"); - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - return SBA_IOVA(ioc, iovp, offset); -} - -#ifdef ENABLE_MARK_CLEAN -static SBA_INLINE void -sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) -{ - u32 iovp = (u32) SBA_IOVP(ioc,iova); - int off = PDIR_INDEX(iovp); - void *addr; - - if (size <= iovp_size) { - addr = bus_to_virt(ioc->pdir_base[off] & - ~0xE000000000000FFFULL); - mark_clean(addr, size); - } else { - do { - addr = bus_to_virt(ioc->pdir_base[off] & - ~0xE000000000000FFFULL); - mark_clean(addr, min(size, iovp_size)); - off++; - size -= iovp_size; - } while (size > 0); - } -} -#endif - -/** - * sba_unmap_single - unmap one IOVA and free resources - * @dev: instance of PCI owned by the driver that's asking. - * @iova: IOVA of driver buffer previously mapped. - * @size: number of bytes mapped in driver buffer. - * @dir: R/W or both. - * - * See Documentation/DMA-mapping.txt - */ -void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) -{ - struct ioc *ioc; -#if DELAYED_RESOURCE_CNT > 0 - struct sba_dma_pair *d; -#endif - unsigned long flags; - dma_addr_t offset; - - ioc = GET_IOC(dev); - ASSERT(ioc); - -#ifdef ALLOW_IOV_BYPASS - if (likely((iova & ioc->imask) != ioc->ibase)) { - /* - ** Address does not fall w/in IOVA, must be bypassing - */ - DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova); - -#ifdef ENABLE_MARK_CLEAN - if (dir == DMA_FROM_DEVICE) { - mark_clean(bus_to_virt(iova), size); - } -#endif - return; - } -#endif - offset = iova & ~iovp_mask; - - DBG_RUN("%s() iovp 0x%lx/%x\n", - __FUNCTION__, (long) iova, size); - - iova ^= offset; /* clear offset bits */ - size += offset; - size = ROUNDUP(size, iovp_size); - -#ifdef ENABLE_MARK_CLEAN - if (dir == DMA_FROM_DEVICE) - sba_mark_clean(ioc, iova, size); -#endif - -#if DELAYED_RESOURCE_CNT > 0 - spin_lock_irqsave(&ioc->saved_lock, flags); - d = &(ioc->saved[ioc->saved_cnt]); - d->iova = iova; - d->size = size; - if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) { - int cnt = ioc->saved_cnt; - spin_lock(&ioc->res_lock); - while (cnt--) { - sba_mark_invalid(ioc, d->iova, d->size); - sba_free_range(ioc, d->iova, d->size); - d--; - } - ioc->saved_cnt = 0; - READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ - spin_unlock(&ioc->res_lock); - } - spin_unlock_irqrestore(&ioc->saved_lock, flags); -#else /* DELAYED_RESOURCE_CNT == 0 */ - spin_lock_irqsave(&ioc->res_lock, flags); - sba_mark_invalid(ioc, iova, size); - sba_free_range(ioc, iova, size); - READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif /* DELAYED_RESOURCE_CNT == 0 */ -} - - -/** - * sba_alloc_coherent - allocate/map shared mem for DMA - * @dev: instance of PCI owned by the driver that's asking. - * @size: number of bytes mapped in driver buffer. - * @dma_handle: IOVA of new buffer. - * - * See Documentation/DMA-mapping.txt - */ -void * -sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags) -{ - struct ioc *ioc; - void *addr; - - ioc = GET_IOC(dev); - ASSERT(ioc); - -#ifdef CONFIG_NUMA - { - struct page *page; - page = alloc_pages_node(ioc->node == MAX_NUMNODES ? - numa_node_id() : ioc->node, flags, - get_order(size)); - - if (unlikely(!page)) - return NULL; - - addr = page_address(page); - } -#else - addr = (void *) __get_free_pages(flags, get_order(size)); -#endif - if (unlikely(!addr)) - return NULL; - - memset(addr, 0, size); - -#ifdef ALLOW_IOV_BYPASS -#ifdef CONFIG_XEN - if (xen_create_contiguous_region((unsigned long)addr, get_order(size), - fls64(dev->coherent_dma_mask))) - goto iommu_map; -#endif - *dma_handle = virt_to_bus(addr); - ASSERT(dev->coherent_dma_mask); - /* - ** Check if the PCI device can DMA to ptr... if so, just return ptr - */ - if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) { - DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n", - dev->coherent_dma_mask, *dma_handle); - - return addr; - } -#ifdef CONFIG_XEN -iommu_map: -#endif -#endif - - /* - * If device can't bypass or bypass is disabled, pass the 32bit fake - * device to map single to get an iova mapping. - */ - *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0); - - return addr; -} - - -/** - * sba_free_coherent - free/unmap shared mem for DMA - * @dev: instance of PCI owned by the driver that's asking. - * @size: number of bytes mapped in driver buffer. - * @vaddr: virtual address IOVA of "consistent" buffer. - * @dma_handler: IO virtual address of "consistent" buffer. - * - * See Documentation/DMA-mapping.txt - */ -void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) -{ -#if defined(ALLOW_IOV_BYPASS) && defined(CONFIG_XEN) - struct ioc *ioc = GET_IOC(dev); - - if (likely((dma_handle & ioc->imask) != ioc->ibase)) - xen_destroy_contiguous_region((unsigned long)vaddr, - get_order(size)); -#endif - sba_unmap_single(dev, dma_handle, size, 0); - free_pages((unsigned long) vaddr, get_order(size)); -} - - -/* -** Since 0 is a valid pdir_base index value, can't use that -** to determine if a value is valid or not. Use a flag to indicate -** the SG list entry contains a valid pdir index. -*/ -#define PIDE_FLAG 0x1UL - -#ifdef DEBUG_LARGE_SG_ENTRIES -int dump_run_sg = 0; -#endif - - -/** - * sba_fill_pdir - write allocated SG entries into IO PDIR - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @startsg: list of IOVA/size pairs - * @nents: number of entries in startsg list - * - * Take preprocessed SG list and write corresponding entries - * in the IO PDIR. - */ - -static SBA_INLINE int -sba_fill_pdir( - struct ioc *ioc, - struct scatterlist *startsg, - int nents) -{ - struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ - int n_mappings = 0; - u64 *pdirp = NULL; - unsigned long dma_offset = 0; - - dma_sg--; - while (nents-- > 0) { - int cnt = startsg->dma_length; - startsg->dma_length = 0; - -#ifdef DEBUG_LARGE_SG_ENTRIES - if (dump_run_sg) - printk(" %2d : %08lx/%05x %p\n", - nents, startsg->dma_address, cnt, - sba_sg_address(startsg)); -#else - DBG_RUN_SG(" %d : %08lx/%05x %p\n", - nents, startsg->dma_address, cnt, - sba_sg_address(startsg)); -#endif - /* - ** Look for the start of a new DMA stream - */ - if (startsg->dma_address & PIDE_FLAG) { - u32 pide = startsg->dma_address & ~PIDE_FLAG; - dma_offset = (unsigned long) pide & ~iovp_mask; - startsg->dma_address = 0; - dma_sg++; - dma_sg->dma_address = pide | ioc->ibase; - pdirp = &(ioc->pdir_base[pide >> iovp_shift]); - n_mappings++; - } - - /* - ** Look for a VCONTIG chunk - */ - if (cnt) { - unsigned long vaddr = (unsigned long) sba_sg_address(startsg); - ASSERT(pdirp); - - /* Since multiple Vcontig blocks could make up - ** one DMA stream, *add* cnt to dma_len. - */ - dma_sg->dma_length += cnt; - cnt += dma_offset; - dma_offset=0; /* only want offset on first chunk */ - cnt = ROUNDUP(cnt, iovp_size); - do { - sba_io_pdir_entry(pdirp, vaddr); - vaddr += iovp_size; - cnt -= iovp_size; - pdirp++; - } while (cnt > 0); - } - startsg++; - } - /* force pdir update */ - wmb(); - -#ifdef DEBUG_LARGE_SG_ENTRIES - dump_run_sg = 0; -#endif - return(n_mappings); -} - - -/* -** Two address ranges are DMA contiguous *iff* "end of prev" and -** "start of next" are both on an IOV page boundary. -** -** (shift left is a quick trick to mask off upper bits) -*/ -#define DMA_CONTIG(__X, __Y) \ - (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL) - - -/** - * sba_coalesce_chunks - preprocess the SG list - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @startsg: list of IOVA/size pairs - * @nents: number of entries in startsg list - * - * First pass is to walk the SG list and determine where the breaks are - * in the DMA stream. Allocates PDIR entries but does not fill them. - * Returns the number of DMA chunks. - * - * Doing the fill separate from the coalescing/allocation keeps the - * code simpler. Future enhancement could make one pass through - * the sglist do both. - */ -static SBA_INLINE int -sba_coalesce_chunks( struct ioc *ioc, - struct scatterlist *startsg, - int nents) -{ - struct scatterlist *vcontig_sg; /* VCONTIG chunk head */ - unsigned long vcontig_len; /* len of VCONTIG chunk */ - unsigned long vcontig_end; - struct scatterlist *dma_sg; /* next DMA stream head */ - unsigned long dma_offset, dma_len; /* start/len of DMA stream */ - int n_mappings = 0; - - while (nents > 0) { - unsigned long vaddr = (unsigned long) sba_sg_address(startsg); - - /* - ** Prepare for first/next DMA stream - */ - dma_sg = vcontig_sg = startsg; - dma_len = vcontig_len = vcontig_end = startsg->length; - vcontig_end += vaddr; - dma_offset = vaddr & ~iovp_mask; - - /* PARANOID: clear entries */ - startsg->dma_address = startsg->dma_length = 0; - - /* - ** This loop terminates one iteration "early" since - ** it's always looking one "ahead". - */ - while (--nents > 0) { - unsigned long vaddr; /* tmp */ - - startsg++; - - /* PARANOID */ - startsg->dma_address = startsg->dma_length = 0; - - /* catch brokenness in SCSI layer */ - ASSERT(startsg->length <= DMA_CHUNK_SIZE); - - /* - ** First make sure current dma stream won't - ** exceed DMA_CHUNK_SIZE if we coalesce the - ** next entry. - */ - if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask) - > DMA_CHUNK_SIZE) - break; - - /* - ** Then look for virtually contiguous blocks. - ** - ** append the next transaction? - */ - vaddr = (unsigned long) sba_sg_address(startsg); - if (vcontig_end == vaddr) - { - vcontig_len += startsg->length; - vcontig_end += startsg->length; - dma_len += startsg->length; - continue; - } - -#ifdef DEBUG_LARGE_SG_ENTRIES - dump_run_sg = (vcontig_len > iovp_size); -#endif - - /* - ** Not virtually contigous. - ** Terminate prev chunk. - ** Start a new chunk. - ** - ** Once we start a new VCONTIG chunk, dma_offset - ** can't change. And we need the offset from the first - ** chunk - not the last one. Ergo Successive chunks - ** must start on page boundaries and dove tail - ** with it's predecessor. - */ - vcontig_sg->dma_length = vcontig_len; - - vcontig_sg = startsg; - vcontig_len = startsg->length; - - /* - ** 3) do the entries end/start on page boundaries? - ** Don't update vcontig_end until we've checked. - */ - if (DMA_CONTIG(vcontig_end, vaddr)) - { - vcontig_end = vcontig_len + vaddr; - dma_len += vcontig_len; - continue; - } else { - break; - } - } - - /* - ** End of DMA Stream - ** Terminate last VCONTIG block. - ** Allocate space for DMA stream. - */ - vcontig_sg->dma_length = vcontig_len; - dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask; - ASSERT(dma_len <= DMA_CHUNK_SIZE); - dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG - | (sba_alloc_range(ioc, dma_len) << iovp_shift) - | dma_offset); - n_mappings++; - } - - return n_mappings; -} - - -/** - * sba_map_sg - map Scatter/Gather list - * @dev: instance of PCI owned by the driver that's asking. - * @sglist: array of buffer/length pairs - * @nents: number of entries in list - * @dir: R/W or both. - * - * See Documentation/DMA-mapping.txt - */ -int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir) -{ - struct ioc *ioc; - int coalesced, filled = 0; -#ifdef ASSERT_PDIR_SANITY - unsigned long flags; -#endif -#ifdef ALLOW_IOV_BYPASS_SG - struct scatterlist *sg; -#endif - - DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents); - ioc = GET_IOC(dev); - ASSERT(ioc); - -#ifdef ALLOW_IOV_BYPASS_SG - ASSERT(to_pci_dev(dev)->dma_mask); - if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) { - for (sg = sglist ; filled < nents ; filled++, sg++){ - sg->dma_length = sg->length; - sg->dma_address = virt_to_bus(sba_sg_address(sg)); - } - return filled; - } -#endif - /* Fast path single entry scatterlists. */ - if (nents == 1) { - sglist->dma_length = sglist->length; - sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir); - return 1; - } - -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check before sba_map_sg()")) - { - sba_dump_sg(ioc, sglist, nents); - panic("Check before sba_map_sg()"); - } - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - - prefetch(ioc->res_hint); - - /* - ** First coalesce the chunks and allocate I/O pdir space - ** - ** If this is one DMA stream, we can properly map using the - ** correct virtual address associated with each DMA page. - ** w/o this association, we wouldn't have coherent DMA! - ** Access to the virtual address is what forces a two pass algorithm. - */ - coalesced = sba_coalesce_chunks(ioc, sglist, nents); - - /* - ** Program the I/O Pdir - ** - ** map the virtual addresses to the I/O Pdir - ** o dma_address will contain the pdir index - ** o dma_len will contain the number of bytes to map - ** o address contains the virtual address. - */ - filled = sba_fill_pdir(ioc, sglist, nents); - -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check after sba_map_sg()")) - { - sba_dump_sg(ioc, sglist, nents); - panic("Check after sba_map_sg()\n"); - } - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - - ASSERT(coalesced == filled); - DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled); - - return filled; -} - - -/** - * sba_unmap_sg - unmap Scatter/Gather list - * @dev: instance of PCI owned by the driver that's asking. - * @sglist: array of buffer/length pairs - * @nents: number of entries in list - * @dir: R/W or both. - * - * See Documentation/DMA-mapping.txt - */ -void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) -{ -#ifdef ASSERT_PDIR_SANITY - struct ioc *ioc; - unsigned long flags; -#endif - - DBG_RUN_SG("%s() START %d entries, %p,%x\n", - __FUNCTION__, nents, sba_sg_address(sglist), sglist->length); - -#ifdef ASSERT_PDIR_SANITY - ioc = GET_IOC(dev); - ASSERT(ioc); - - spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check before sba_unmap_sg()"); - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - - while (nents && sglist->dma_length) { - - sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir); - sglist++; - nents--; - } - - DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__, nents); - -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check after sba_unmap_sg()"); - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - -} - -/************************************************************** -* -* Initialization and claim -* -***************************************************************/ - -static void __init -ioc_iova_init(struct ioc *ioc) -{ - int tcnfg; - int agp_found = 0; - struct pci_dev *device = NULL; -#ifdef FULL_VALID_PDIR - unsigned long index; -#endif - - /* - ** Firmware programs the base and size of a "safe IOVA space" - ** (one that doesn't overlap memory or LMMIO space) in the - ** IBASE and IMASK registers. - */ - ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL; - ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL; - - ioc->iov_size = ~ioc->imask + 1; - - DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n", - __FUNCTION__, ioc->ioc_hpa, ioc->ibase, ioc->imask, - ioc->iov_size >> 20); - - switch (iovp_size) { - case 4*1024: tcnfg = 0; break; - case 8*1024: tcnfg = 1; break; - case 16*1024: tcnfg = 2; break; - case 64*1024: tcnfg = 3; break; - default: - panic(PFX "Unsupported IOTLB page size %ldK", - iovp_size >> 10); - break; - } - WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG); - - ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE; - ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL, - get_order(ioc->pdir_size)); - if (!ioc->pdir_base) - panic(PFX "Couldn't allocate I/O Page Table\n"); - -#ifdef CONFIG_XEN - /* The page table needs to be pinned in Xen memory */ - if (xen_create_contiguous_region((unsigned long)ioc->pdir_base, - get_order(ioc->pdir_size), 0)) - panic(PFX "Couldn't contiguously map I/O Page Table\n"); -#endif - memset(ioc->pdir_base, 0, ioc->pdir_size); - - DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __FUNCTION__, - iovp_size >> 10, ioc->pdir_base, ioc->pdir_size); - - ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base); - WRITE_REG(virt_to_bus(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE); - - /* - ** If an AGP device is present, only use half of the IOV space - ** for PCI DMA. Unfortunately we can't know ahead of time - ** whether GART support will actually be used, for now we - ** can just key on an AGP device found in the system. - ** We program the next pdir index after we stop w/ a key for - ** the GART code to handshake on. - */ - for_each_pci_dev(device) - agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP); - - if (agp_found && reserve_sba_gart) { - printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n", - ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2); - ioc->pdir_size /= 2; - ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE; - } -#ifdef FULL_VALID_PDIR - /* - ** Check to see if the spill page has been allocated, we don't need more than - ** one across multiple SBAs. - */ - if (!prefetch_spill_page) { - char *spill_poison = "SBAIOMMU POISON"; - int poison_size = 16; - void *poison_addr, *addr; - - addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size)); - if (!addr) - panic(PFX "Couldn't allocate PDIR spill page\n"); - - poison_addr = addr; - for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size) - memcpy(poison_addr, spill_poison, poison_size); - - prefetch_spill_page = virt_to_bus(addr); - - DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __FUNCTION__, prefetch_spill_page); - } - /* - ** Set all the PDIR entries valid w/ the spill page as the target - */ - for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++) - ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page); -#endif - - /* Clear I/O TLB of any possible entries */ - WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM); - READ_REG(ioc->ioc_hpa + IOC_PCOM); - - /* Enable IOVA translation */ - WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE); - READ_REG(ioc->ioc_hpa + IOC_IBASE); -} - -static void __init -ioc_resource_init(struct ioc *ioc) -{ - spin_lock_init(&ioc->res_lock); -#if DELAYED_RESOURCE_CNT > 0 - spin_lock_init(&ioc->saved_lock); -#endif - - /* resource map size dictated by pdir_size */ - ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */ - ioc->res_size >>= 3; /* convert bit count to byte count */ - DBG_INIT("%s() res_size 0x%x\n", __FUNCTION__, ioc->res_size); - - ioc->res_map = (char *) __get_free_pages(GFP_KERNEL, - get_order(ioc->res_size)); - if (!ioc->res_map) - panic(PFX "Couldn't allocate resource map\n"); - - memset(ioc->res_map, 0, ioc->res_size); - /* next available IOVP - circular search */ - ioc->res_hint = (unsigned long *) ioc->res_map; - -#ifdef ASSERT_PDIR_SANITY - /* Mark first bit busy - ie no IOVA 0 */ - ioc->res_map[0] = 0x1; - ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE; -#endif -#ifdef FULL_VALID_PDIR - /* Mark the last resource used so we don't prefetch beyond IOVA space */ - ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */ - ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF - | prefetch_spill_page); -#endif - - DBG_INIT("%s() res_map %x %p\n", __FUNCTION__, - ioc->res_size, (void *) ioc->res_map); -} - -static void __init -ioc_sac_init(struct ioc *ioc) -{ - struct pci_dev *sac = NULL; - struct pci_controller *controller = NULL; - - /* - * pci_alloc_coherent() must return a DMA address which is - * SAC (single address cycle) addressable, so allocate a - * pseudo-device to enforce that. - */ - sac = kmalloc(sizeof(*sac), GFP_KERNEL); - if (!sac) - panic(PFX "Couldn't allocate struct pci_dev"); - memset(sac, 0, sizeof(*sac)); - - controller = kmalloc(sizeof(*controller), GFP_KERNEL); - if (!controller) - panic(PFX "Couldn't allocate struct pci_controller"); - memset(controller, 0, sizeof(*controller)); - - controller->iommu = ioc; - sac->sysdata = controller; - sac->dma_mask = 0xFFFFFFFFUL; -#ifdef CONFIG_PCI - sac->dev.bus = &pci_bus_type; -#endif - ioc->sac_only_dev = sac; -} - -static void __init -ioc_zx1_init(struct ioc *ioc) -{ - unsigned long rope_config; - unsigned int i; - - if (ioc->rev < 0x20) - panic(PFX "IOC 2.0 or later required for IOMMU support\n"); - - /* 38 bit memory controller + extra bit for range displaced by MMIO */ - ioc->dma_mask = (0x1UL << 39) - 1; - - /* - ** Clear ROPE(N)_CONFIG AO bit. - ** Disables "NT Ordering" (~= !"Relaxed Ordering") - ** Overrides bit 1 in DMA Hint Sets. - ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701. - */ - for (i=0; i<(8*8); i+=8) { - rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i); - rope_config &= ~IOC_ROPE_AO; - WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i); - } -} - -typedef void (initfunc)(struct ioc *); - -struct ioc_iommu { - u32 func_id; - char *name; - initfunc *init; -}; - -static struct ioc_iommu ioc_iommu_info[] __initdata = { - { ZX1_IOC_ID, "zx1", ioc_zx1_init }, - { ZX2_IOC_ID, "zx2", NULL }, - { SX1000_IOC_ID, "sx1000", NULL }, - { SX2000_IOC_ID, "sx2000", NULL }, -}; - -static struct ioc * __init -ioc_init(u64 hpa, void *handle) -{ - struct ioc *ioc; - struct ioc_iommu *info; - - ioc = kmalloc(sizeof(*ioc), GFP_KERNEL); - if (!ioc) - return NULL; - - memset(ioc, 0, sizeof(*ioc)); - - ioc->next = ioc_list; - ioc_list = ioc; - - ioc->handle = handle; - ioc->ioc_hpa = ioremap(hpa, 0x1000); - - ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID); - ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL; - ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */ - - for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) { - if (ioc->func_id == info->func_id) { - ioc->name = info->name; - if (info->init) - (info->init)(ioc); - } - } - - iovp_size = (1 << iovp_shift); - iovp_mask = ~(iovp_size - 1); - - DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __FUNCTION__, - PAGE_SIZE >> 10, iovp_size >> 10); - - if (!ioc->name) { - ioc->name = kmalloc(24, GFP_KERNEL); - if (ioc->name) - sprintf((char *) ioc->name, "Unknown (%04x:%04x)", - ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF); - else - ioc->name = "Unknown"; - } - - ioc_iova_init(ioc); - ioc_resource_init(ioc); - ioc_sac_init(ioc); - - if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask) - ia64_max_iommu_merge_mask = ~iovp_mask; - - printk(KERN_INFO PFX - "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", - ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, - hpa, ioc->iov_size >> 20, ioc->ibase); - - return ioc; -} - - - -/************************************************************************** -** -** SBA initialization code (HW and SW) -** -** o identify SBA chip itself -** o FIXME: initialize DMA hints for reasonable defaults -** -**************************************************************************/ - -#ifdef CONFIG_PROC_FS -static void * -ioc_start(struct seq_file *s, loff_t *pos) -{ - struct ioc *ioc; - loff_t n = *pos; - - for (ioc = ioc_list; ioc; ioc = ioc->next) - if (!n--) - return ioc; - - return NULL; -} - -static void * -ioc_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct ioc *ioc = v; - - ++*pos; - return ioc->next; -} - -static void -ioc_stop(struct seq_file *s, void *v) -{ -} - -static int -ioc_show(struct seq_file *s, void *v) -{ - struct ioc *ioc = v; - unsigned long *res_ptr = (unsigned long *)ioc->res_map; - int i, used = 0; - - seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", - ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); -#ifdef CONFIG_NUMA - if (ioc->node != MAX_NUMNODES) - seq_printf(s, "NUMA node : %d\n", ioc->node); -#endif - seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); - seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); - - for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr) - used += hweight64(*res_ptr); - - seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3); - seq_printf(s, "PDIR used : %d entries\n", used); - -#ifdef PDIR_SEARCH_TIMING - { - unsigned long i = 0, avg = 0, min, max; - min = max = ioc->avg_search[0]; - for (i = 0; i < SBA_SEARCH_SAMPLE; i++) { - avg += ioc->avg_search[i]; - if (ioc->avg_search[i] > max) max = ioc->avg_search[i]; - if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; - } - avg /= SBA_SEARCH_SAMPLE; - seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n", - min, avg, max); - } -#endif -#ifndef ALLOW_IOV_BYPASS - seq_printf(s, "IOVA bypass disabled\n"); -#endif - return 0; -} - -static struct seq_operations ioc_seq_ops = { - .start = ioc_start, - .next = ioc_next, - .stop = ioc_stop, - .show = ioc_show -}; - -static int -ioc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ioc_seq_ops); -} - -static struct file_operations ioc_fops = { - .open = ioc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - -static void __init -ioc_proc_init(void) -{ - struct proc_dir_entry *dir, *entry; - - dir = proc_mkdir("bus/mckinley", NULL); - if (!dir) - return; - - entry = create_proc_entry(ioc_list->name, 0, dir); - if (entry) - entry->proc_fops = &ioc_fops; -} -#endif - -static void -sba_connect_bus(struct pci_bus *bus) -{ - acpi_handle handle, parent; - acpi_status status; - struct ioc *ioc; - - if (!PCI_CONTROLLER(bus)) - panic(PFX "no sysdata on bus %d!\n", bus->number); - - if (PCI_CONTROLLER(bus)->iommu) - return; - - handle = PCI_CONTROLLER(bus)->acpi_handle; - if (!handle) - return; - - /* - * The IOC scope encloses PCI root bridges in the ACPI - * namespace, so work our way out until we find an IOC we - * claimed previously. - */ - do { - for (ioc = ioc_list; ioc; ioc = ioc->next) - if (ioc->handle == handle) { - PCI_CONTROLLER(bus)->iommu = ioc; - return; - } - - status = acpi_get_parent(handle, &parent); - handle = parent; - } while (ACPI_SUCCESS(status)); - - printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); -} - -#ifdef CONFIG_NUMA -static void __init -sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) -{ - unsigned int node; - int pxm; - - ioc->node = MAX_NUMNODES; - - pxm = acpi_get_pxm(handle); - - if (pxm < 0) - return; - - node = pxm_to_node(pxm); - - if (node >= MAX_NUMNODES || !node_online(node)) - return; - - ioc->node = node; - return; -} -#else -#define sba_map_ioc_to_node(ioc, handle) -#endif - -static int __init -acpi_sba_ioc_add(struct acpi_device *device) -{ - struct ioc *ioc; - acpi_status status; - u64 hpa, length; - struct acpi_buffer buffer; - struct acpi_device_info *dev_info; - - status = hp_acpi_csr_space(device->handle, &hpa, &length); - if (ACPI_FAILURE(status)) - return 1; - - buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - status = acpi_get_object_info(device->handle, &buffer); - if (ACPI_FAILURE(status)) - return 1; - dev_info = buffer.pointer; - - /* - * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI - * root bridges, and its CSR space includes the IOC function. - */ - if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) { - hpa += ZX1_IOC_OFFSET; - /* zx1 based systems default to kernel page size iommu pages */ - if (!iovp_shift) - iovp_shift = min(PAGE_SHIFT, 16); - } - kfree(dev_info); - - /* - * default anything not caught above or specified on cmdline to 4k - * iommu page size - */ - if (!iovp_shift) - iovp_shift = 12; - - ioc = ioc_init(hpa, device->handle); - if (!ioc) - return 1; - - /* setup NUMA node association */ - sba_map_ioc_to_node(ioc, device->handle); - return 0; -} - -static struct acpi_driver acpi_sba_ioc_driver = { - .name = "IOC IOMMU Driver", - .ids = "HWP0001,HWP0004", - .ops = { - .add = acpi_sba_ioc_add, - }, -}; - -static int __init -sba_init(void) -{ - if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb")) - return 0; - - acpi_bus_register_driver(&acpi_sba_ioc_driver); - if (!ioc_list) { -#ifdef CONFIG_IA64_GENERIC - extern int swiotlb_late_init_with_default_size (size_t size); - - /* - * If we didn't find something sba_iommu can claim, we - * need to setup the swiotlb and switch to the dig machvec. - */ - if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) - panic("Unable to find SBA IOMMU or initialize " - "software I/O TLB: Try machvec=dig boot option"); - machvec_init("dig"); -#else - panic("Unable to find SBA IOMMU: Try a generic or DIG kernel"); -#endif - return 0; - } - -#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB) - /* - * hpzx1_swiotlb needs to have a fairly small swiotlb bounce - * buffer setup to support devices with smaller DMA masks than - * sba_iommu can handle. - */ - if (ia64_platform_is("hpzx1_swiotlb")) { - extern void hwsw_init(void); - - hwsw_init(); - } -#endif - -#ifdef CONFIG_PCI - { - struct pci_bus *b = NULL; - while ((b = pci_find_next_bus(b)) != NULL) - sba_connect_bus(b); - } -#endif - -#ifdef CONFIG_PROC_FS - ioc_proc_init(); -#endif - return 0; -} - -subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ - -static int __init -nosbagart(char *str) -{ - reserve_sba_gart = 0; - return 1; -} - -int -sba_dma_supported (struct device *dev, u64 mask) -{ - /* make sure it's at least 32bit capable */ - return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); -} - -int -sba_dma_mapping_error (dma_addr_t dma_addr) -{ - return 0; -} - -__setup("nosbagart", nosbagart); - -static int __init -sba_page_override(char *str) -{ - unsigned long page_size; - - page_size = memparse(str, &str); - switch (page_size) { - case 4096: - case 8192: - case 16384: - case 65536: - iovp_shift = ffs(page_size) - 1; - break; - default: - printk("%s: unknown/unsupported iommu page size %ld\n", - __FUNCTION__, page_size); - } - - return 1; -} - -__setup("sbapagesize=",sba_page_override); - -EXPORT_SYMBOL(sba_dma_mapping_error); -EXPORT_SYMBOL(sba_map_single); -EXPORT_SYMBOL(sba_unmap_single); -EXPORT_SYMBOL(sba_map_sg); -EXPORT_SYMBOL(sba_unmap_sg); -EXPORT_SYMBOL(sba_dma_supported); -EXPORT_SYMBOL(sba_alloc_coherent); -EXPORT_SYMBOL(sba_free_coherent); diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile b/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile deleted file mode 100644 index ad8215a3c5..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile +++ /dev/null @@ -1,63 +0,0 @@ -# -# Makefile for the linux kernel. -# - -extra-y := head.o init_task.o vmlinux.lds - -obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ - irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ - salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ - unwind.o mca.o mca_asm.o topology.o - -obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o -obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o -obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o -obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o - -ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += acpi-processor.o -endif - -obj-$(CONFIG_IA64_PALINFO) += palinfo.o -obj-$(CONFIG_IOSAPIC) += iosapic.o -obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_SMP) += smp.o smpboot.o -obj-$(CONFIG_NUMA) += numa.o -obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o -obj-$(CONFIG_IA64_CYCLONE) += cyclone.o -obj-$(CONFIG_CPU_FREQ) += cpufreq/ -obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o -obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o -obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o -obj-$(CONFIG_AUDIT) += audit.o -mca_recovery-y += mca_drv.o mca_drv_asm.o - -# The gate DSO image is built using a special linker script. -targets += gate.so gate-syms.o - -extra-y += gate.so gate-syms.o gate.lds gate.o - -# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. -CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 - -CPPFLAGS_gate.lds := -P -C -U$(ARCH) - -quiet_cmd_gate = GATE $@ - cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ - -GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) -$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE - $(call if_changed,gate) - -$(obj)/built-in.o: $(obj)/gate-syms.o -$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o - -GATECFLAGS_gate-syms.o = -r -$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE - $(call if_changed,gate) - -# gate-data.o contains the gate DSO image as data in section .data.gate. -# We must build gate.so before we can assemble it. -# Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/gate-data.o: $(obj)/gate.so diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c b/linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c deleted file mode 100644 index 54e2fb8ab7..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c +++ /dev/null @@ -1,1010 +0,0 @@ -/* - * acpi.c - Architecture-Specific Low-Level ACPI Support - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> - * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co. - * David Mosberger-Tang <davidm@hpl.hp.com> - * Copyright (C) 2000 Intel Corp. - * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com> - * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> - * Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com> - * Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com> - * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de> - * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/smp.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/irq.h> -#include <linux/acpi.h> -#include <linux/efi.h> -#include <linux/mmzone.h> -#include <linux/nodemask.h> -#include <asm/io.h> -#include <asm/iosapic.h> -#include <asm/machvec.h> -#include <asm/page.h> -#include <asm/system.h> -#include <asm/numa.h> -#include <asm/sal.h> -#include <asm/cyclone.h> - -#define BAD_MADT_ENTRY(entry, end) ( \ - (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ - ((acpi_table_entry_header *)entry)->length < sizeof(*entry)) - -#define PREFIX "ACPI: " - -void (*pm_idle) (void); -EXPORT_SYMBOL(pm_idle); -void (*pm_power_off) (void); -EXPORT_SYMBOL(pm_power_off); - -unsigned char acpi_kbd_controller_present = 1; -unsigned char acpi_legacy_devices; - -unsigned int acpi_cpei_override; -unsigned int acpi_cpei_phys_cpuid; - -#define MAX_SAPICS 256 -u16 ia64_acpiid_to_sapicid[MAX_SAPICS] = {[0 ... MAX_SAPICS - 1] = -1 }; - -EXPORT_SYMBOL(ia64_acpiid_to_sapicid); - -const char *acpi_get_sysname(void) -{ -#ifdef CONFIG_IA64_GENERIC - unsigned long rsdp_phys; - struct acpi20_table_rsdp *rsdp; - struct acpi_table_xsdt *xsdt; - struct acpi_table_header *hdr; - - rsdp_phys = acpi_find_rsdp(); - if (!rsdp_phys) { - printk(KERN_ERR - "ACPI 2.0 RSDP not found, default to \"dig\"\n"); - return "dig"; - } - - rsdp = (struct acpi20_table_rsdp *)__va(rsdp_phys); - if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) { - printk(KERN_ERR - "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n"); - return "dig"; - } - - xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_address); - hdr = &xsdt->header; - if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) { - printk(KERN_ERR - "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n"); - return "dig"; - } - - if (!strcmp(hdr->oem_id, "HP")) { - return "hpzx1"; - } else if (!strcmp(hdr->oem_id, "SGI")) { - return "sn2"; -#ifdef CONFIG_XEN - } else if (is_running_on_xen() && !strcmp(hdr->oem_id, "XEN")) { - return "xen"; -#endif - } - - return "dig"; -#else -# if defined (CONFIG_IA64_HP_SIM) - return "hpsim"; -# elif defined (CONFIG_IA64_HP_ZX1) - return "hpzx1"; -# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB) - return "hpzx1_swiotlb"; -# elif defined (CONFIG_IA64_SGI_SN2) - return "sn2"; -# elif defined (CONFIG_IA64_DIG) - return "dig"; -# elif defined (CONFIG_IA64_XEN) - return "xen"; -# else -# error Unknown platform. Fix acpi.c. -# endif -#endif -} - -#ifdef CONFIG_ACPI - -#define ACPI_MAX_PLATFORM_INTERRUPTS 256 - -/* Array to record platform interrupt vectors for generic interrupt routing. */ -int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = { - [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1 -}; - -enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC; - -/* - * Interrupt routing API for device drivers. Provides interrupt vector for - * a generic platform event. Currently only CPEI is implemented. - */ -int acpi_request_vector(u32 int_type) -{ - int vector = -1; - - if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) { - /* corrected platform error interrupt */ - vector = platform_intr_list[int_type]; - } else - printk(KERN_ERR - "acpi_request_vector(): invalid interrupt type\n"); - return vector; -} - -char *__acpi_map_table(unsigned long phys_addr, unsigned long size) -{ - return __va(phys_addr); -} - -/* -------------------------------------------------------------------------- - Boot-time Table Parsing - -------------------------------------------------------------------------- */ - -static int total_cpus __initdata; -static int available_cpus __initdata; -struct acpi_table_madt *acpi_madt __initdata; -static u8 has_8259; - -static int __init -acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header, - const unsigned long end) -{ - struct acpi_table_lapic_addr_ovr *lapic; - - lapic = (struct acpi_table_lapic_addr_ovr *)header; - - if (BAD_MADT_ENTRY(lapic, end)) - return -EINVAL; - - if (lapic->address) { - iounmap(ipi_base_addr); - ipi_base_addr = ioremap(lapic->address, 0); - } - return 0; -} - -static int __init -acpi_parse_lsapic(acpi_table_entry_header * header, const unsigned long end) -{ - struct acpi_table_lsapic *lsapic; - - lsapic = (struct acpi_table_lsapic *)header; - - if (BAD_MADT_ENTRY(lsapic, end)) - return -EINVAL; - - if (lsapic->flags.enabled) { -#ifdef CONFIG_SMP - smp_boot_data.cpu_phys_id[available_cpus] = - (lsapic->id << 8) | lsapic->eid; -#endif - ia64_acpiid_to_sapicid[lsapic->acpi_id] = - (lsapic->id << 8) | lsapic->eid; - ++available_cpus; - } - - total_cpus++; - return 0; -} - -static int __init -acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end) -{ - struct acpi_table_lapic_nmi *lacpi_nmi; - - lacpi_nmi = (struct acpi_table_lapic_nmi *)header; - - if (BAD_MADT_ENTRY(lacpi_nmi, end)) - return -EINVAL; - - /* TBD: Support lapic_nmi entries */ - return 0; -} - -static int __init -acpi_parse_iosapic(acpi_table_entry_header * header, const unsigned long end) -{ - struct acpi_table_iosapic *iosapic; - - iosapic = (struct acpi_table_iosapic *)header; - - if (BAD_MADT_ENTRY(iosapic, end)) - return -EINVAL; - - return iosapic_init(iosapic->address, iosapic->global_irq_base); -} - -static unsigned int __initdata acpi_madt_rev; - -static int __init -acpi_parse_plat_int_src(acpi_table_entry_header * header, - const unsigned long end) -{ - struct acpi_table_plat_int_src *plintsrc; - int vector; - - plintsrc = (struct acpi_table_plat_int_src *)header; - - if (BAD_MADT_ENTRY(plintsrc, end)) - return -EINVAL; - - /* - * Get vector assignment for this interrupt, set attributes, - * and program the IOSAPIC routing table. - */ - vector = iosapic_register_platform_intr(plintsrc->type, - plintsrc->global_irq, - plintsrc->iosapic_vector, - plintsrc->eid, - plintsrc->id, - (plintsrc->flags.polarity == - 1) ? IOSAPIC_POL_HIGH : - IOSAPIC_POL_LOW, - (plintsrc->flags.trigger == - 1) ? IOSAPIC_EDGE : - IOSAPIC_LEVEL); - - platform_intr_list[plintsrc->type] = vector; - if (acpi_madt_rev > 1) { - acpi_cpei_override = plintsrc->plint_flags.cpei_override_flag; - } - - /* - * Save the physical id, so we can check when its being removed - */ - acpi_cpei_phys_cpuid = ((plintsrc->id << 8) | (plintsrc->eid)) & 0xffff; - - return 0; -} - -#ifdef CONFIG_HOTPLUG_CPU -unsigned int can_cpei_retarget(void) -{ - extern int cpe_vector; - extern unsigned int force_cpei_retarget; - - /* - * Only if CPEI is supported and the override flag - * is present, otherwise return that its re-targettable - * if we are in polling mode. - */ - if (cpe_vector > 0) { - if (acpi_cpei_override || force_cpei_retarget) - return 1; - else - return 0; - } - return 1; -} - -unsigned int is_cpu_cpei_target(unsigned int cpu) -{ - unsigned int logical_id; - - logical_id = cpu_logical_id(acpi_cpei_phys_cpuid); - - if (logical_id == cpu) - return 1; - else - return 0; -} - -void set_cpei_target_cpu(unsigned int cpu) -{ - acpi_cpei_phys_cpuid = cpu_physical_id(cpu); -} -#endif - -unsigned int get_cpei_target_cpu(void) -{ - return acpi_cpei_phys_cpuid; -} - -static int __init -acpi_parse_int_src_ovr(acpi_table_entry_header * header, - const unsigned long end) -{ - struct acpi_table_int_src_ovr *p; - - p = (struct acpi_table_int_src_ovr *)header; - - if (BAD_MADT_ENTRY(p, end)) - return -EINVAL; - - iosapic_override_isa_irq(p->bus_irq, p->global_irq, - (p->flags.polarity == - 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, - (p->flags.trigger == - 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); - return 0; -} - -static int __init -acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end) -{ - struct acpi_table_nmi_src *nmi_src; - - nmi_src = (struct acpi_table_nmi_src *)header; - - if (BAD_MADT_ENTRY(nmi_src, end)) - return -EINVAL; - - /* TBD: Support nimsrc entries */ - return 0; -} - -static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERMOW", 6))) { - - /* - * Unfortunately ITC_DRIFT is not yet part of the - * official SAL spec, so the ITC_DRIFT bit is not - * set by the BIOS on this hardware. - */ - sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; - - cyclone_setup(); - } -} - -static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size) -{ - if (!phys_addr || !size) - return -EINVAL; - - acpi_madt = (struct acpi_table_madt *)__va(phys_addr); - - acpi_madt_rev = acpi_madt->header.revision; - - /* remember the value for reference after free_initmem() */ -#ifdef CONFIG_ITANIUM - has_8259 = 1; /* Firmware on old Itanium systems is broken */ -#else - has_8259 = acpi_madt->flags.pcat_compat; -#endif - iosapic_system_init(has_8259); - - /* Get base address of IPI Message Block */ - - if (acpi_madt->lapic_address) - ipi_base_addr = ioremap(acpi_madt->lapic_address, 0); - - printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr); - - acpi_madt_oem_check(acpi_madt->header.oem_id, - acpi_madt->header.oem_table_id); - - return 0; -} - -#ifdef CONFIG_ACPI_NUMA - -#undef SLIT_DEBUG - -#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) - -static int __initdata srat_num_cpus; /* number of cpus */ -static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; -#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) -#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) -static struct acpi_table_slit __initdata *slit_table; - -static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa) -{ - int pxm; - - pxm = pa->proximity_domain; - if (ia64_platform_is("sn2")) - pxm += pa->reserved[0] << 8; - return pxm; -} - -static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma) -{ - int pxm; - - pxm = ma->proximity_domain; - if (ia64_platform_is("sn2")) - pxm += ma->reserved1[0] << 8; - return pxm; -} - -/* - * ACPI 2.0 SLIT (System Locality Information Table) - * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf - */ -void __init acpi_numa_slit_init(struct acpi_table_slit *slit) -{ - u32 len; - - len = sizeof(struct acpi_table_header) + 8 - + slit->localities * slit->localities; - if (slit->header.length != len) { - printk(KERN_ERR - "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", - len, slit->header.length); - memset(numa_slit, 10, sizeof(numa_slit)); - return; - } - slit_table = slit; -} - -void __init -acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) -{ - int pxm; - - if (!pa->flags.enabled) - return; - - pxm = get_processor_proximity_domain(pa); - - /* record this node in proximity bitmap */ - pxm_bit_set(pxm); - - node_cpuid[srat_num_cpus].phys_id = - (pa->apic_id << 8) | (pa->lsapic_eid); - /* nid should be overridden as logical node id later */ - node_cpuid[srat_num_cpus].nid = pxm; - srat_num_cpus++; -} - -void __init -acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) -{ - unsigned long paddr, size; - int pxm; - struct node_memblk_s *p, *q, *pend; - - pxm = get_memory_proximity_domain(ma); - - /* fill node memory chunk structure */ - paddr = ma->base_addr_hi; - paddr = (paddr << 32) | ma->base_addr_lo; - size = ma->length_hi; - size = (size << 32) | ma->length_lo; - - /* Ignore disabled entries */ - if (!ma->flags.enabled) - return; - - /* record this node in proximity bitmap */ - pxm_bit_set(pxm); - - /* Insertion sort based on base address */ - pend = &node_memblk[num_node_memblks]; - for (p = &node_memblk[0]; p < pend; p++) { - if (paddr < p->start_paddr) - break; - } - if (p < pend) { - for (q = pend - 1; q >= p; q--) - *(q + 1) = *q; - } - p->start_paddr = paddr; - p->size = size; - p->nid = pxm; - num_node_memblks++; -} - -void __init acpi_numa_arch_fixup(void) -{ - int i, j, node_from, node_to; - - /* If there's no SRAT, fix the phys_id and mark node 0 online */ - if (srat_num_cpus == 0) { - node_set_online(0); - node_cpuid[0].phys_id = hard_smp_processor_id(); - return; - } - - /* - * MCD - This can probably be dropped now. No need for pxm ID to node ID - * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. - */ - nodes_clear(node_online_map); - for (i = 0; i < MAX_PXM_DOMAINS; i++) { - if (pxm_bit_test(i)) { - int nid = acpi_map_pxm_to_node(i); - node_set_online(nid); - } - } - - /* set logical node id in memory chunk structure */ - for (i = 0; i < num_node_memblks; i++) - node_memblk[i].nid = pxm_to_node(node_memblk[i].nid); - - /* assign memory bank numbers for each chunk on each node */ - for_each_online_node(i) { - int bank; - - bank = 0; - for (j = 0; j < num_node_memblks; j++) - if (node_memblk[j].nid == i) - node_memblk[j].bank = bank++; - } - - /* set logical node id in cpu structure */ - for (i = 0; i < srat_num_cpus; i++) - node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); - - printk(KERN_INFO "Number of logical nodes in system = %d\n", - num_online_nodes()); - printk(KERN_INFO "Number of memory chunks in system = %d\n", - num_node_memblks); - - if (!slit_table) - return; - memset(numa_slit, -1, sizeof(numa_slit)); - for (i = 0; i < slit_table->localities; i++) { - if (!pxm_bit_test(i)) - continue; - node_from = pxm_to_node(i); - for (j = 0; j < slit_table->localities; j++) { - if (!pxm_bit_test(j)) - continue; - node_to = pxm_to_node(j); - node_distance(node_from, node_to) = - slit_table->entry[i * slit_table->localities + j]; - } - } - -#ifdef SLIT_DEBUG - printk("ACPI 2.0 SLIT locality table:\n"); - for_each_online_node(i) { - for_each_online_node(j) - printk("%03d ", node_distance(i, j)); - printk("\n"); - } -#endif -} -#endif /* CONFIG_ACPI_NUMA */ - -/* - * success: return IRQ number (>=0) - * failure: return < 0 - */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) -{ - if (has_8259 && gsi < 16) - return isa_irq_to_vector(gsi); - - return iosapic_register_intr(gsi, - (polarity == - ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : - IOSAPIC_POL_LOW, - (triggering == - ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : - IOSAPIC_LEVEL); -} - -EXPORT_SYMBOL(acpi_register_gsi); - -void acpi_unregister_gsi(u32 gsi) -{ - iosapic_unregister_intr(gsi); -} - -EXPORT_SYMBOL(acpi_unregister_gsi); - -static int __init acpi_parse_fadt(unsigned long phys_addr, unsigned long size) -{ - struct acpi_table_header *fadt_header; - struct fadt_descriptor *fadt; - - if (!phys_addr || !size) - return -EINVAL; - - fadt_header = (struct acpi_table_header *)__va(phys_addr); - if (fadt_header->revision != 3) - return -ENODEV; /* Only deal with ACPI 2.0 FADT */ - - fadt = (struct fadt_descriptor *)fadt_header; - - if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER)) - acpi_kbd_controller_present = 0; - - if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES) - acpi_legacy_devices = 1; - - acpi_register_gsi(fadt->sci_int, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); - return 0; -} - -unsigned long __init acpi_find_rsdp(void) -{ - unsigned long rsdp_phys = 0; - - if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) - rsdp_phys = efi.acpi20; - else if (efi.acpi != EFI_INVALID_TABLE_ADDR) - printk(KERN_WARNING PREFIX - "v1.0/r0.71 tables no longer supported\n"); - return rsdp_phys; -} - -int __init acpi_boot_init(void) -{ - - /* - * MADT - * ---- - * Parse the Multiple APIC Description Table (MADT), if exists. - * Note that this table provides platform SMP configuration - * information -- the successor to MPS tables. - */ - - if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) { - printk(KERN_ERR PREFIX "Can't find MADT\n"); - goto skip_madt; - } - - /* Local APIC */ - - if (acpi_table_parse_madt - (ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0) - printk(KERN_ERR PREFIX - "Error parsing LAPIC address override entry\n"); - - if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) - < 1) - printk(KERN_ERR PREFIX - "Error parsing MADT - no LAPIC entries\n"); - - if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) - < 0) - printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); - - /* I/O APIC */ - - if (acpi_table_parse_madt - (ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) - printk(KERN_ERR PREFIX - "Error parsing MADT - no IOSAPIC entries\n"); - - /* System-Level Interrupt Routing */ - - if (acpi_table_parse_madt - (ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, - ACPI_MAX_PLATFORM_INTERRUPTS) < 0) - printk(KERN_ERR PREFIX - "Error parsing platform interrupt source entry\n"); - - if (acpi_table_parse_madt - (ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0) - printk(KERN_ERR PREFIX - "Error parsing interrupt source overrides entry\n"); - - if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0) - printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); - skip_madt: - - /* - * FADT says whether a legacy keyboard controller is present. - * The FADT also contains an SCI_INT line, by which the system - * gets interrupts such as power and sleep buttons. If it's not - * on a Legacy interrupt, it needs to be setup. - */ - if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1) - printk(KERN_ERR PREFIX "Can't find FADT\n"); - -#ifdef CONFIG_SMP - if (available_cpus == 0) { - printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); - printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); - smp_boot_data.cpu_phys_id[available_cpus] = - hard_smp_processor_id(); - available_cpus = 1; /* We've got at least one of these, no? */ - } - smp_boot_data.cpu_count = available_cpus; - - smp_build_cpu_map(); -# ifdef CONFIG_ACPI_NUMA - if (srat_num_cpus == 0) { - int cpu, i = 1; - for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) - if (smp_boot_data.cpu_phys_id[cpu] != - hard_smp_processor_id()) - node_cpuid[i++].phys_id = - smp_boot_data.cpu_phys_id[cpu]; - } -# endif -#endif -#ifdef CONFIG_ACPI_NUMA - build_cpu_to_node_map(); -#endif - /* Make boot-up look pretty */ - printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, - total_cpus); - return 0; -} - -int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) -{ - int vector; - - if (has_8259 && gsi < 16) - *irq = isa_irq_to_vector(gsi); - else { - vector = gsi_to_vector(gsi); - if (vector == -1) - return -1; - - *irq = vector; - } - return 0; -} - -/* - * ACPI based hotplug CPU support - */ -#ifdef CONFIG_ACPI_HOTPLUG_CPU -static -int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) -{ -#ifdef CONFIG_ACPI_NUMA - int pxm_id; - - pxm_id = acpi_get_pxm(handle); - - /* - * Assuming that the container driver would have set the proximity - * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag - */ - node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id); - - node_cpuid[cpu].phys_id = physid; -#endif - return (0); -} - -int additional_cpus __initdata = -1; - -static __init int setup_additional_cpus(char *s) -{ - if (s) - additional_cpus = simple_strtol(s, NULL, 0); - - return 0; -} - -early_param("additional_cpus", setup_additional_cpus); - -/* - * cpu_possible_map should be static, it cannot change as cpu's - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and dont expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_map on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * - Ashok Raj - * - * Three ways to find out the number of additional hotplug CPUs: - * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with additional_cpus=NUM - * - Otherwise don't reserve additional CPUs. - */ -__init void prefill_possible_map(void) -{ - int i; - int possible, disabled_cpus; - - disabled_cpus = total_cpus - available_cpus; - - if (additional_cpus == -1) { - if (disabled_cpus > 0) - additional_cpus = disabled_cpus; - else - additional_cpus = 0; - } - - possible = available_cpus + additional_cpus; - - if (possible > NR_CPUS) - possible = NR_CPUS; - - printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", - possible, max((possible - available_cpus), 0)); - - for (i = 0; i < possible; i++) - cpu_set(i, cpu_possible_map); -} - -int acpi_map_lsapic(acpi_handle handle, int *pcpu) -{ - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *obj; - struct acpi_table_lsapic *lsapic; - cpumask_t tmp_map; - long physid; - int cpu; - - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) - return -EINVAL; - - if (!buffer.length || !buffer.pointer) - return -EINVAL; - - obj = buffer.pointer; - if (obj->type != ACPI_TYPE_BUFFER || - obj->buffer.length < sizeof(*lsapic)) { - kfree(buffer.pointer); - return -EINVAL; - } - - lsapic = (struct acpi_table_lsapic *)obj->buffer.pointer; - - if ((lsapic->header.type != ACPI_MADT_LSAPIC) || - (!lsapic->flags.enabled)) { - kfree(buffer.pointer); - return -EINVAL; - } - - physid = ((lsapic->id << 8) | (lsapic->eid)); - - kfree(buffer.pointer); - buffer.length = ACPI_ALLOCATE_BUFFER; - buffer.pointer = NULL; - - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - if (cpu >= NR_CPUS) - return -EINVAL; - - acpi_map_cpu2node(handle, cpu, physid); - - cpu_set(cpu, cpu_present_map); - ia64_cpu_to_sapicid[cpu] = physid; - ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu]; - - *pcpu = cpu; - return (0); -} - -EXPORT_SYMBOL(acpi_map_lsapic); - -int acpi_unmap_lsapic(int cpu) -{ - int i; - - for (i = 0; i < MAX_SAPICS; i++) { - if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) { - ia64_acpiid_to_sapicid[i] = -1; - break; - } - } - ia64_cpu_to_sapicid[cpu] = -1; - cpu_clear(cpu, cpu_present_map); - -#ifdef CONFIG_ACPI_NUMA - /* NUMA specific cleanup's */ -#endif - - return (0); -} - -EXPORT_SYMBOL(acpi_unmap_lsapic); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - -#ifdef CONFIG_ACPI_NUMA -static acpi_status __devinit -acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) -{ - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *obj; - struct acpi_table_iosapic *iosapic; - unsigned int gsi_base; - int pxm, node; - - /* Only care about objects w/ a method that returns the MADT */ - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) - return AE_OK; - - if (!buffer.length || !buffer.pointer) - return AE_OK; - - obj = buffer.pointer; - if (obj->type != ACPI_TYPE_BUFFER || - obj->buffer.length < sizeof(*iosapic)) { - kfree(buffer.pointer); - return AE_OK; - } - - iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer; - - if (iosapic->header.type != ACPI_MADT_IOSAPIC) { - kfree(buffer.pointer); - return AE_OK; - } - - gsi_base = iosapic->global_irq_base; - - kfree(buffer.pointer); - - /* - * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell - * us which node to associate this with. - */ - pxm = acpi_get_pxm(handle); - if (pxm < 0) - return AE_OK; - - node = pxm_to_node(pxm); - - if (node >= MAX_NUMNODES || !node_online(node) || - cpus_empty(node_to_cpumask(node))) - return AE_OK; - - /* We know a gsi to node mapping! */ - map_iosapic_to_node(gsi_base, node); - return AE_OK; -} - -static int __init -acpi_map_iosapics (void) -{ - acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL); - return 0; -} - -fs_initcall(acpi_map_iosapics); -#endif /* CONFIG_ACPI_NUMA */ - -int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) -{ - int err; - - if ((err = iosapic_init(phys_addr, gsi_base))) - return err; - -#ifdef CONFIG_ACPI_NUMA - acpi_map_iosapic(handle, 0, NULL, NULL); -#endif /* CONFIG_ACPI_NUMA */ - - return 0; -} - -EXPORT_SYMBOL(acpi_register_ioapic); - -int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) -{ - return iosapic_remove(gsi_base); -} - -EXPORT_SYMBOL(acpi_unregister_ioapic); - -#endif /* CONFIG_ACPI */ diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c deleted file mode 100644 index 2aa8c101aa..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Generate definitions needed by assembly language modules. - * This code generates raw asm output which is post-processed - * to extract and format the required data. - */ - -#define ASM_OFFSETS_C 1 - -#include <linux/sched.h> - -#include <asm-ia64/processor.h> -#include <asm-ia64/ptrace.h> -#include <asm-ia64/siginfo.h> -#include <asm-ia64/sigcontext.h> -#include <asm-ia64/mca.h> - -#include "../kernel/sigframe.h" - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -void foo(void) -{ - DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct)); - DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info)); - DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs)); - DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack)); - DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo)); - DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64)); - DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); - DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); - - BLANK(); - - DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); - - BLANK(); - - DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); - DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); - DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); - DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); - DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid)); - DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent)); - DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand)); - DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); - DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); - DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp)); - DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack)); - - BLANK(); - - DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock)); - - BLANK(); - - DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct, - group_stop_count)); - DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending)); - - BLANK(); - - DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6)); - DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7)); - DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd)); - DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd)); - DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8)); - DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9)); - DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10)); - DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11)); - DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr)); - DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip)); - DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs)); - DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat)); - DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs)); - DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc)); - DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat)); - - DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore)); - DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr)); - DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0)); - DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs)); - DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1)); - DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12)); - DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13)); - DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr)); - DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15)); - DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14)); - DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2)); - DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3)); - DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16)); - DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17)); - DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18)); - DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19)); - DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20)); - DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21)); - DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22)); - DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23)); - DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24)); - DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25)); - DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26)); - DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27)); - DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28)); - DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29)); - DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30)); - DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31)); - DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv)); - DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6)); - DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7)); - DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8)); - DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9)); - DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10)); - DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11)); - - BLANK(); - - DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat)); - DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr)); - DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2)); - DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3)); - DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4)); - DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5)); - DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12)); - DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13)); - DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14)); - DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15)); - DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16)); - DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17)); - DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18)); - DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19)); - DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20)); - DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21)); - DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22)); - DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23)); - DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24)); - DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25)); - DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26)); - DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27)); - DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28)); - DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29)); - DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30)); - DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31)); - DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4)); - DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5)); - DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6)); - DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7)); - DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0)); - DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1)); - DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2)); - DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3)); - DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4)); - DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5)); - DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs)); - DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc)); - DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat)); - DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat)); - DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore)); - DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr)); - - BLANK(); - - DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip)); - DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp)); - DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr)); - DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat)); - DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat)); - DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0])); - DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm)); - DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags)); - DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6])); - DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr)); - DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12])); - DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base)); - DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs)); - - BLANK(); - - DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal)); - - BLANK(); - - DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0)); - DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1)); - DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2)); - DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler)); - DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc)); - BLANK(); - /* for assembly files which can't include sched.h: */ - DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); - DEFINE(IA64_CLONE_VM, CLONE_VM); - - BLANK(); - DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, - offsetof (struct cpuinfo_ia64, nsec_per_cyc)); - DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET, - offsetof (struct cpuinfo_ia64, ptce_base)); - DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET, - offsetof (struct cpuinfo_ia64, ptce_count)); - DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET, - offsetof (struct cpuinfo_ia64, ptce_stride)); - BLANK(); - DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, - offsetof (struct timespec, tv_nsec)); - - DEFINE(CLONE_SETTLS_BIT, 19); -#if CLONE_SETTLS != (1<<19) -# error "CLONE_SETTLS_BIT incorrect, please fix" -#endif - - BLANK(); - DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET, - offsetof (struct ia64_mca_cpu, mca_stack)); - DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, - offsetof (struct ia64_mca_cpu, init_stack)); - BLANK(); - DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET, - offsetof (struct ia64_sal_os_state, os_gp)); - DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, - offsetof (struct ia64_sal_os_state, proc_state_param)); - DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET, - offsetof (struct ia64_sal_os_state, sal_ra)); - DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET, - offsetof (struct ia64_sal_os_state, sal_gp)); - DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, - offsetof (struct ia64_sal_os_state, pal_min_state)); - DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET, - offsetof (struct ia64_sal_os_state, os_status)); - DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET, - offsetof (struct ia64_sal_os_state, context)); - DEFINE(IA64_SAL_OS_STATE_SIZE, - sizeof (struct ia64_sal_os_state)); - BLANK(); - - DEFINE(IA64_PMSA_GR_OFFSET, - offsetof (struct pal_min_state_area_s, pmsa_gr)); - DEFINE(IA64_PMSA_BANK1_GR_OFFSET, - offsetof (struct pal_min_state_area_s, pmsa_bank1_gr)); - DEFINE(IA64_PMSA_PR_OFFSET, - offsetof (struct pal_min_state_area_s, pmsa_pr)); - DEFINE(IA64_PMSA_BR0_OFFSET, - offsetof (struct pal_min_state_area_s, pmsa_br0)); - DEFINE(IA64_PMSA_RSC_OFFSET, - offsetof (struct pal_min_state_area_s, pmsa_rsc)); - DEFINE(IA64_PMSA_IIP_OFFSET, - offsetof (struct pal_min_state_area_s, pmsa_iip)); - DEFINE(IA64_PMSA_IPSR_OFFSET, - offsetof (struct pal_min_state_area_s, pmsa_ipsr)); - DEFINE(IA64_PMSA_IFS_OFFSET, - offsetof (struct pal_min_state_area_s, pmsa_ifs)); - DEFINE(IA64_PMSA_XIP_OFFSET, - offsetof (struct pal_min_state_area_s, pmsa_xip)); - BLANK(); - - /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */ - DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr)); - DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source)); - DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift)); - DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc)); - DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset)); - DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle)); - DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter)); - DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter)); - DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask)); - DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU); - DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64); - DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32); - DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec)); - -#ifdef CONFIG_XEN - BLANK(); - -#define DEFINE_MAPPED_REG_OFS(sym, field) \ - DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(mapped_regs_t, field))) - - DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr); - DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr); - DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip); - DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs); - DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs); - DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr); - DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa); - DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa); - DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim); - DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha); - DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir); - DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled); - DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum); - DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]); - DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]); - DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat); - DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat); -#endif /* CONFIG_XEN */ -} diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S b/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S deleted file mode 100644 index f46bdcf401..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S +++ /dev/null @@ -1,1620 +0,0 @@ -/* - * ia64/kernel/entry.S - * - * Kernel entry points. - * - * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * Copyright (C) 1999, 2002-2003 - * Asit Mallick <Asit.K.Mallick@intel.com> - * Don Dugger <Don.Dugger@intel.com> - * Suresh Siddha <suresh.b.siddha@intel.com> - * Fenghua Yu <fenghua.yu@intel.com> - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> - */ -/* - * ia64_switch_to now places correct virtual mapping in in TR2 for - * kernel stack. This allows us to handle interrupts without changing - * to physical mode. - * - * Jonathan Nicklin <nicklin@missioncriticallinux.com> - * Patrick O'Rourke <orourke@missioncriticallinux.com> - * 11/07/2000 - */ -/* - * Global (preserved) predicate usage on syscall entry/exit path: - * - * pKStk: See entry.h. - * pUStk: See entry.h. - * pSys: See entry.h. - * pNonSys: !pSys - */ - - -#include <asm/asmmacro.h> -#include <asm/cache.h> -#include <asm/errno.h> -#include <asm/kregs.h> -#include <asm/asm-offsets.h> -#include <asm/pgtable.h> -#include <asm/percpu.h> -#include <asm/processor.h> -#include <asm/thread_info.h> -#include <asm/unistd.h> - -#include "minstate.h" - - /* - * execve() is special because in case of success, we need to - * setup a null register window frame. - */ -ENTRY(ia64_execve) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,4,0 - mov loc0=rp - .body - mov out0=in0 // filename - ;; // stop bit between alloc and call - mov out1=in1 // argv - mov out2=in2 // envp - add out3=16,sp // regs - br.call.sptk.many rp=sys_execve -.ret0: -#ifdef CONFIG_IA32_SUPPORT - /* - * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers - * from pt_regs. - */ - adds r16=PT(CR_IPSR)+16,sp - ;; - ld8 r16=[r16] -#endif - cmp4.ge p6,p7=r8,r0 - mov ar.pfs=loc1 // restore ar.pfs - sxt4 r8=r8 // return 64-bit result - ;; - stf.spill [sp]=f0 -(p6) cmp.ne pKStk,pUStk=r0,r0 // a successful execve() lands us in user-mode... - mov rp=loc0 -(p6) mov ar.pfs=r0 // clear ar.pfs on success -(p7) br.ret.sptk.many rp - - /* - * In theory, we'd have to zap this state only to prevent leaking of - * security sensitive state (e.g., if current->mm->dumpable is zero). However, - * this executes in less than 20 cycles even on Itanium, so it's not worth - * optimizing for...). - */ - mov ar.unat=0; mov ar.lc=0 - mov r4=0; mov f2=f0; mov b1=r0 - mov r5=0; mov f3=f0; mov b2=r0 - mov r6=0; mov f4=f0; mov b3=r0 - mov r7=0; mov f5=f0; mov b4=r0 - ldf.fill f12=[sp]; mov f13=f0; mov b5=r0 - ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0 - ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0 - ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0 - ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0 - ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0 - ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0 -#ifdef CONFIG_IA32_SUPPORT - tbit.nz p6,p0=r16, IA64_PSR_IS_BIT - movl loc0=ia64_ret_from_ia32_execve - ;; -(p6) mov rp=loc0 -#endif - br.ret.sptk.many rp -END(ia64_execve) - -/* - * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr, - * u64 tls) - */ -GLOBAL_ENTRY(sys_clone2) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc r16=ar.pfs,8,2,6,0 - DO_SAVE_SWITCH_STACK - adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp - mov loc0=rp - mov loc1=r16 // save ar.pfs across do_fork - .body - mov out1=in1 - mov out3=in2 - tbit.nz p6,p0=in0,CLONE_SETTLS_BIT - mov out4=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID - ;; -(p6) st8 [r2]=in5 // store TLS in r16 for copy_thread() - mov out5=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID - adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s - mov out0=in0 // out0 = clone_flags - br.call.sptk.many rp=do_fork -.ret1: .restore sp - adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack - mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(sys_clone2) - -/* - * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls) - * Deprecated. Use sys_clone2() instead. - */ -GLOBAL_ENTRY(sys_clone) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc r16=ar.pfs,8,2,6,0 - DO_SAVE_SWITCH_STACK - adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp - mov loc0=rp - mov loc1=r16 // save ar.pfs across do_fork - .body - mov out1=in1 - mov out3=16 // stacksize (compensates for 16-byte scratch area) - tbit.nz p6,p0=in0,CLONE_SETTLS_BIT - mov out4=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID - ;; -(p6) st8 [r2]=in4 // store TLS in r13 (tp) - mov out5=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID - adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s - mov out0=in0 // out0 = clone_flags - br.call.sptk.many rp=do_fork -.ret2: .restore sp - adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack - mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(sys_clone) - -/* - * prev_task <- ia64_switch_to(struct task_struct *next) - * With Ingo's new scheduler, interrupts are disabled when this routine gets - * called. The code starting at .map relies on this. The rest of the code - * doesn't care about the interrupt masking status. - */ -GLOBAL_ENTRY(__ia64_switch_to) - .prologue - alloc r16=ar.pfs,1,0,0,0 - DO_SAVE_SWITCH_STACK - .body - - adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 - movl r25=init_task - mov r27=IA64_KR(CURRENT_STACK) - adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 - dep r20=0,in0,61,3 // physical address of "next" - ;; - st8 [r22]=sp // save kernel stack pointer of old task - shr.u r26=r20,IA64_GRANULE_SHIFT - cmp.eq p7,p6=r25,in0 - ;; - /* - * If we've already mapped this task's page, we can skip doing it again. - */ -(p6) cmp.eq p7,p6=r26,r27 -(p6) br.cond.dpnt .map - ;; -.done: - ld8 sp=[r21] // load kernel stack pointer of new task - mov IA64_KR(CURRENT)=in0 // update "current" application register - mov r8=r13 // return pointer to previously running task - mov r13=in0 // set "current" pointer - ;; - DO_LOAD_SWITCH_STACK - -#ifdef CONFIG_SMP - sync.i // ensure "fc"s done by this CPU are visible on other CPUs -#endif - br.ret.sptk.many rp // boogie on out in new context - -.map: - rsm psr.ic // interrupts (psr.i) are already disabled here - movl r25=PAGE_KERNEL - ;; - srlz.d - or r23=r25,r20 // construct PA | page properties - mov r25=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r25 - mov cr.ifa=in0 // VA of next task... - ;; - mov r25=IA64_TR_CURRENT_STACK - mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped... - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ssm psr.ic // reenable the psr.ic bit - ;; - srlz.d - br.cond.sptk .done -END(__ia64_switch_to) - -/* - * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This - * means that we may get an interrupt with "sp" pointing to the new kernel stack while - * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc, - * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a - * problem. Also, we don't need to specify unwind information for preserved registers - * that are not modified in save_switch_stack as the right unwind information is already - * specified at the call-site of save_switch_stack. - */ - -/* - * save_switch_stack: - * - r16 holds ar.pfs - * - b7 holds address to return to - * - rp (b0) holds return address to save - */ -GLOBAL_ENTRY(save_switch_stack) - .prologue - .altrp b7 - flushrs // flush dirty regs to backing store (must be first in insn group) - .save @priunat,r17 - mov r17=ar.unat // preserve caller's - .body -#ifdef CONFIG_ITANIUM - adds r2=16+128,sp - adds r3=16+64,sp - adds r14=SW(R4)+16,sp - ;; - st8.spill [r14]=r4,16 // spill r4 - lfetch.fault.excl.nt1 [r3],128 - ;; - lfetch.fault.excl.nt1 [r2],128 - lfetch.fault.excl.nt1 [r3],128 - ;; - lfetch.fault.excl [r2] - lfetch.fault.excl [r3] - adds r15=SW(R5)+16,sp -#else - add r2=16+3*128,sp - add r3=16,sp - add r14=SW(R4)+16,sp - ;; - st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0 - lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010 - ;; - lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090 - lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190 - ;; - lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110 - lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210 - adds r15=SW(R5)+16,sp -#endif - ;; - st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5 - mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0 - add r2=SW(F2)+16,sp // r2 = &sw->f2 - ;; - st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6 - mov.m r18=ar.fpsr // preserve fpsr - add r3=SW(F3)+16,sp // r3 = &sw->f3 - ;; - stf.spill [r2]=f2,32 - mov.m r19=ar.rnat - mov r21=b0 - - stf.spill [r3]=f3,32 - st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7 - mov r22=b1 - ;; - // since we're done with the spills, read and save ar.unat: - mov.m r29=ar.unat - mov.m r20=ar.bspstore - mov r23=b2 - stf.spill [r2]=f4,32 - stf.spill [r3]=f5,32 - mov r24=b3 - ;; - st8 [r14]=r21,SW(B1)-SW(B0) // save b0 - st8 [r15]=r23,SW(B3)-SW(B2) // save b2 - mov r25=b4 - mov r26=b5 - ;; - st8 [r14]=r22,SW(B4)-SW(B1) // save b1 - st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3 - mov r21=ar.lc // I-unit - stf.spill [r2]=f12,32 - stf.spill [r3]=f13,32 - ;; - st8 [r14]=r25,SW(B5)-SW(B4) // save b4 - st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs - stf.spill [r2]=f14,32 - stf.spill [r3]=f15,32 - ;; - st8 [r14]=r26 // save b5 - st8 [r15]=r21 // save ar.lc - stf.spill [r2]=f16,32 - stf.spill [r3]=f17,32 - ;; - stf.spill [r2]=f18,32 - stf.spill [r3]=f19,32 - ;; - stf.spill [r2]=f20,32 - stf.spill [r3]=f21,32 - ;; - stf.spill [r2]=f22,32 - stf.spill [r3]=f23,32 - ;; - stf.spill [r2]=f24,32 - stf.spill [r3]=f25,32 - ;; - stf.spill [r2]=f26,32 - stf.spill [r3]=f27,32 - ;; - stf.spill [r2]=f28,32 - stf.spill [r3]=f29,32 - ;; - stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30) - stf.spill [r3]=f31,SW(PR)-SW(F31) - add r14=SW(CALLER_UNAT)+16,sp - ;; - st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat - st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat - mov r21=pr - ;; - st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat - st8 [r3]=r21 // save predicate registers - ;; - st8 [r2]=r20 // save ar.bspstore - st8 [r14]=r18 // save fpsr - mov ar.rsc=3 // put RSE back into eager mode, pl 0 - br.cond.sptk.many b7 -END(save_switch_stack) - -/* - * load_switch_stack: - * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK) - * - b7 holds address to return to - * - must not touch r8-r11 - */ -GLOBAL_ENTRY(load_switch_stack) - .prologue - .altrp b7 - - .body - lfetch.fault.nt1 [sp] - adds r2=SW(AR_BSPSTORE)+16,sp - adds r3=SW(AR_UNAT)+16,sp - mov ar.rsc=0 // put RSE into enforced lazy mode - adds r14=SW(CALLER_UNAT)+16,sp - adds r15=SW(AR_FPSR)+16,sp - ;; - ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore - ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat - ;; - ld8 r21=[r2],16 // restore b0 - ld8 r22=[r3],16 // restore b1 - ;; - ld8 r23=[r2],16 // restore b2 - ld8 r24=[r3],16 // restore b3 - ;; - ld8 r25=[r2],16 // restore b4 - ld8 r26=[r3],16 // restore b5 - ;; - ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs - ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc - ;; - ld8 r28=[r2] // restore pr - ld8 r30=[r3] // restore rnat - ;; - ld8 r18=[r14],16 // restore caller's unat - ld8 r19=[r15],24 // restore fpsr - ;; - ldf.fill f2=[r14],32 - ldf.fill f3=[r15],32 - ;; - ldf.fill f4=[r14],32 - ldf.fill f5=[r15],32 - ;; - ldf.fill f12=[r14],32 - ldf.fill f13=[r15],32 - ;; - ldf.fill f14=[r14],32 - ldf.fill f15=[r15],32 - ;; - ldf.fill f16=[r14],32 - ldf.fill f17=[r15],32 - ;; - ldf.fill f18=[r14],32 - ldf.fill f19=[r15],32 - mov b0=r21 - ;; - ldf.fill f20=[r14],32 - ldf.fill f21=[r15],32 - mov b1=r22 - ;; - ldf.fill f22=[r14],32 - ldf.fill f23=[r15],32 - mov b2=r23 - ;; - mov ar.bspstore=r27 - mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7 - mov b3=r24 - ;; - ldf.fill f24=[r14],32 - ldf.fill f25=[r15],32 - mov b4=r25 - ;; - ldf.fill f26=[r14],32 - ldf.fill f27=[r15],32 - mov b5=r26 - ;; - ldf.fill f28=[r14],32 - ldf.fill f29=[r15],32 - mov ar.pfs=r16 - ;; - ldf.fill f30=[r14],32 - ldf.fill f31=[r15],24 - mov ar.lc=r17 - ;; - ld8.fill r4=[r14],16 - ld8.fill r5=[r15],16 - mov pr=r28,-1 - ;; - ld8.fill r6=[r14],16 - ld8.fill r7=[r15],16 - - mov ar.unat=r18 // restore caller's unat - mov ar.rnat=r30 // must restore after bspstore but before rsc! - mov ar.fpsr=r19 // restore fpsr - mov ar.rsc=3 // put RSE back into eager mode, pl 0 - br.cond.sptk.many b7 -END(load_switch_stack) - -GLOBAL_ENTRY(prefetch_stack) - add r14 = -IA64_SWITCH_STACK_SIZE, sp - add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0 - ;; - ld8 r16 = [r15] // load next's stack pointer - lfetch.fault.excl [r14], 128 - ;; - lfetch.fault.excl [r14], 128 - lfetch.fault [r16], 128 - ;; - lfetch.fault.excl [r14], 128 - lfetch.fault [r16], 128 - ;; - lfetch.fault.excl [r14], 128 - lfetch.fault [r16], 128 - ;; - lfetch.fault.excl [r14], 128 - lfetch.fault [r16], 128 - ;; - lfetch.fault [r16], 128 - br.ret.sptk.many rp -END(prefetch_stack) - -GLOBAL_ENTRY(execve) - mov r15=__NR_execve // put syscall number in place - break __BREAK_SYSCALL - br.ret.sptk.many rp -END(execve) - -GLOBAL_ENTRY(clone) - mov r15=__NR_clone // put syscall number in place - break __BREAK_SYSCALL - br.ret.sptk.many rp -END(clone) - - /* - * Invoke a system call, but do some tracing before and after the call. - * We MUST preserve the current register frame throughout this routine - * because some system calls (such as ia64_execve) directly - * manipulate ar.pfs. - */ -GLOBAL_ENTRY(__ia64_trace_syscall) - PT_REGS_UNWIND_INFO(0) - /* - * We need to preserve the scratch registers f6-f11 in case the system - * call is sigreturn. - */ - adds r16=PT(F6)+16,sp - adds r17=PT(F7)+16,sp - ;; - stf.spill [r16]=f6,32 - stf.spill [r17]=f7,32 - ;; - stf.spill [r16]=f8,32 - stf.spill [r17]=f9,32 - ;; - stf.spill [r16]=f10 - stf.spill [r17]=f11 - br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args - adds r16=PT(F6)+16,sp - adds r17=PT(F7)+16,sp - ;; - ldf.fill f6=[r16],32 - ldf.fill f7=[r17],32 - ;; - ldf.fill f8=[r16],32 - ldf.fill f9=[r17],32 - ;; - ldf.fill f10=[r16] - ldf.fill f11=[r17] - // the syscall number may have changed, so re-load it and re-calculate the - // syscall entry-point: - adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #) - ;; - ld8 r15=[r15] - mov r3=NR_syscalls - 1 - ;; - adds r15=-1024,r15 - movl r16=sys_call_table - ;; - shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) - cmp.leu p6,p7=r15,r3 - ;; -(p6) ld8 r20=[r20] // load address of syscall entry point -(p7) movl r20=sys_ni_syscall - ;; - mov b6=r20 - br.call.sptk.many rp=b6 // do the syscall -.strace_check_retval: - cmp.lt p6,p0=r8,r0 // syscall failed? - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 - mov r10=0 -(p6) br.cond.sptk strace_error // syscall failed -> - ;; // avoid RAW on r10 -.strace_save_retval: -.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 -.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 - br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -.ret3: -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk - br.cond.sptk .work_pending_syscall_end - -strace_error: - ld8 r3=[r2] // load pt_regs.r8 - sub r9=0,r8 // negate return value to get errno value - ;; - cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? - adds r3=16,r2 // r3=&pt_regs.r10 - ;; -(p6) mov r10=-1 -(p6) mov r8=r9 - br.cond.sptk .strace_save_retval -END(__ia64_trace_syscall) - - /* - * When traced and returning from sigreturn, we invoke syscall_trace but then - * go straight to ia64_leave_kernel rather than ia64_leave_syscall. - */ -GLOBAL_ENTRY(ia64_strace_leave_kernel) - PT_REGS_UNWIND_INFO(0) -{ /* - * Some versions of gas generate bad unwind info if the first instruction of a - * procedure doesn't go into the first slot of a bundle. This is a workaround. - */ - nop.m 0 - nop.i 0 - br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -} -.ret4: br.cond.sptk ia64_leave_kernel -END(ia64_strace_leave_kernel) - -GLOBAL_ENTRY(__ia64_ret_from_clone) - PT_REGS_UNWIND_INFO(0) -{ /* - * Some versions of gas generate bad unwind info if the first instruction of a - * procedure doesn't go into the first slot of a bundle. This is a workaround. - */ - nop.m 0 - nop.i 0 - /* - * We need to call schedule_tail() to complete the scheduling process. - * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the - * address of the previously executing task. - */ - br.call.sptk.many rp=ia64_invoke_schedule_tail -} -.ret8: - adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 r2=[r2] - ;; - mov r8=0 - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 - ;; - cmp.ne p6,p0=r2,r0 -(p6) br.cond.spnt .strace_check_retval - ;; // added stop bits to prevent r8 dependency -END(__ia64_ret_from_clone) - // fall through -GLOBAL_ENTRY(ia64_ret_from_syscall) - PT_REGS_UNWIND_INFO(0) - cmp.ge p6,p7=r8,r0 // syscall executed successfully? - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - mov r10=r0 // clear error indication in r10 -(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure - ;; - // don't fall through, ia64_leave_syscall may be #define'd - br.cond.sptk.few ia64_leave_syscall - ;; -END(ia64_ret_from_syscall) -/* - * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't - * need to switch to bank 0 and doesn't restore the scratch registers. - * To avoid leaking kernel bits, the scratch registers are set to - * the following known-to-be-safe values: - * - * r1: restored (global pointer) - * r2: cleared - * r3: 1 (when returning to user-level) - * r8-r11: restored (syscall return value(s)) - * r12: restored (user-level stack pointer) - * r13: restored (user-level thread pointer) - * r14: set to __kernel_syscall_via_epc - * r15: restored (syscall #) - * r16-r17: cleared - * r18: user-level b6 - * r19: cleared - * r20: user-level ar.fpsr - * r21: user-level b0 - * r22: cleared - * r23: user-level ar.bspstore - * r24: user-level ar.rnat - * r25: user-level ar.unat - * r26: user-level ar.pfs - * r27: user-level ar.rsc - * r28: user-level ip - * r29: user-level psr - * r30: user-level cfm - * r31: user-level pr - * f6-f11: cleared - * pr: restored (user-level pr) - * b0: restored (user-level rp) - * b6: restored - * b7: set to __kernel_syscall_via_epc - * ar.unat: restored (user-level ar.unat) - * ar.pfs: restored (user-level ar.pfs) - * ar.rsc: restored (user-level ar.rsc) - * ar.rnat: restored (user-level ar.rnat) - * ar.bspstore: restored (user-level ar.bspstore) - * ar.fpsr: restored (user-level ar.fpsr) - * ar.ccv: cleared - * ar.csd: cleared - * ar.ssd: cleared - */ -GLOBAL_ENTRY(__ia64_leave_syscall) - PT_REGS_UNWIND_INFO(0) - /* - * work.need_resched etc. mustn't get changed by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on. - * - * p6 controls whether current_thread_info()->flags needs to be check for - * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count - * is 0. After extra work processing has been completed, execution - * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check - * needs to be redone. - */ -#ifdef CONFIG_PREEMPT - rsm psr.i // disable interrupts - cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; - .pred.rel.mutex pUStk,pKStk -(pKStk) ld4 r21=[r20] // r21 <- preempt_count -(pUStk) mov r21=0 // r21 <- 0 - ;; - cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else /* !CONFIG_PREEMPT */ -(pUStk) rsm psr.i - cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -#endif -.work_processed_syscall: - adds r2=PT(LOADRS)+16,r12 - adds r3=PT(AR_BSPSTORE)+16,r12 - adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r18] // load current_thread_info()->flags - ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" - nop.i 0 - ;; - mov r16=ar.bsp // M2 get existing backing store pointer - ld8 r18=[r2],PT(R9)-PT(B6) // load b6 -(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? - ;; - ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) -(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? -(p6) br.cond.spnt .work_pending_syscall - ;; - // start restoring the state saved on the kernel stack (struct pt_regs): - ld8 r9=[r2],PT(CR_IPSR)-PT(R9) - ld8 r11=[r3],PT(CR_IIP)-PT(R11) -(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! - ;; - invala // M0|1 invalidate ALAT - rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection - cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs - - ld8 r29=[r2],16 // M0|1 load cr.ipsr - ld8 r28=[r3],16 // M0|1 load cr.iip - mov r22=r0 // A clear r22 - ;; - ld8 r30=[r2],16 // M0|1 load cr.ifs - ld8 r25=[r3],16 // M0|1 load ar.unat -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 - ;; - ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled - nop 0 - ;; - ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 - ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc - mov f6=f0 // F clear f6 - ;; - ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage) - ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates - mov f7=f0 // F clear f7 - ;; - ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr - ld8.fill r1=[r3],16 // M0|1 load r1 -(pUStk) mov r17=1 // A - ;; -(pUStk) st1 [r14]=r17 // M2|3 - ld8.fill r13=[r3],16 // M0|1 - mov f8=f0 // F clear f8 - ;; - ld8.fill r12=[r2] // M0|1 restore r12 (sp) - ld8.fill r15=[r3] // M0|1 restore r15 - mov b6=r18 // I0 restore b6 - - addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A - mov f9=f0 // F clear f9 -(pKStk) br.cond.dpnt.many skip_rbs_switch // B - - srlz.d // M0 ensure interruption collection is off (for cover) - shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition - cover // B add current frame into dirty partition & set cr.ifs - ;; -(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8 - mov r19=ar.bsp // M2 get new backing store pointer - mov f10=f0 // F clear f10 - - nop.m 0 - movl r14=__kernel_syscall_via_epc // X - ;; - mov.m ar.csd=r0 // M2 clear ar.csd - mov.m ar.ccv=r0 // M2 clear ar.ccv - mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) - - mov.m ar.ssd=r0 // M2 clear ar.ssd - mov f11=f0 // F clear f11 - br.cond.sptk.many rbs_switch // B -END(__ia64_leave_syscall) - -#ifdef CONFIG_IA32_SUPPORT -GLOBAL_ENTRY(ia64_ret_from_ia32_execve) - PT_REGS_UNWIND_INFO(0) - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 - ;; - .mem.offset 0,0 - st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit - .mem.offset 8,0 - st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit - ;; - // don't fall through, ia64_leave_kernel may be #define'd - br.cond.sptk.few ia64_leave_kernel - ;; -END(ia64_ret_from_ia32_execve) -#endif /* CONFIG_IA32_SUPPORT */ -GLOBAL_ENTRY(__ia64_leave_kernel) - PT_REGS_UNWIND_INFO(0) - /* - * work.need_resched etc. mustn't get changed by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on. - * - * p6 controls whether current_thread_info()->flags needs to be check for - * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count - * is 0. After extra work processing has been completed, execution - * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check - * needs to be redone. - */ -#ifdef CONFIG_PREEMPT - rsm psr.i // disable interrupts - cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; - .pred.rel.mutex pUStk,pKStk -(pKStk) ld4 r21=[r20] // r21 <- preempt_count -(pUStk) mov r21=0 // r21 <- 0 - ;; - cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else -(pUStk) rsm psr.i - cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -#endif -.work_processed_kernel: - adds r17=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r17] // load current_thread_info()->flags - adds r21=PT(PR)+16,r12 - ;; - - lfetch [r21],PT(CR_IPSR)-PT(PR) - adds r2=PT(B6)+16,r12 - adds r3=PT(R16)+16,r12 - ;; - lfetch [r21] - ld8 r28=[r2],8 // load b6 - adds r29=PT(R24)+16,r12 - - ld8.fill r16=[r3],PT(AR_CSD)-PT(R16) - adds r30=PT(AR_CCV)+16,r12 -(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? - ;; - ld8.fill r24=[r29] - ld8 r15=[r30] // load ar.ccv -(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending? - ;; - ld8 r29=[r2],16 // load b7 - ld8 r30=[r3],16 // load ar.csd -(p6) br.cond.spnt .work_pending - ;; - ld8 r31=[r2],16 // load ar.ssd - ld8.fill r8=[r3],16 - ;; - ld8.fill r9=[r2],16 - ld8.fill r10=[r3],PT(R17)-PT(R10) - ;; - ld8.fill r11=[r2],PT(R18)-PT(R11) - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - mov ar.csd=r30 - mov ar.ssd=r31 - ;; - rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - ld8.fill r22=[r2],24 - ld8.fill r23=[r3],24 - mov b6=r28 - ;; - ld8.fill r25=[r2],16 - ld8.fill r26=[r3],16 - mov b7=r29 - ;; - ld8.fill r27=[r2],16 - ld8.fill r28=[r3],16 - ;; - ld8.fill r29=[r2],16 - ld8.fill r30=[r3],24 - ;; - ld8.fill r31=[r2],PT(F9)-PT(R31) - adds r3=PT(F10)-PT(F6),r3 - ;; - ldf.fill f9=[r2],PT(F6)-PT(F9) - ldf.fill f10=[r3],PT(F8)-PT(F10) - ;; - ldf.fill f6=[r2],PT(F7)-PT(F6) - ;; - ldf.fill f7=[r2],PT(F11)-PT(F7) - ldf.fill f8=[r3],32 - ;; - srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned) - mov ar.ccv=r15 - ;; - ldf.fill f11=[r2] - bsw.0 // switch back to bank 0 (no stop bit required beforehand...) - ;; -(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) - adds r16=PT(CR_IPSR)+16,r12 - adds r17=PT(CR_IIP)+16,r12 - -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled - nop.i 0 - nop.i 0 - ;; - ld8 r29=[r16],16 // load cr.ipsr - ld8 r28=[r17],16 // load cr.iip - ;; - ld8 r30=[r16],16 // load cr.ifs - ld8 r25=[r17],16 // load ar.unat - ;; - ld8 r26=[r16],16 // load ar.pfs - ld8 r27=[r17],16 // load ar.rsc - cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs - ;; - ld8 r24=[r16],16 // load ar.rnat (may be garbage) - ld8 r23=[r17],16 // load ar.bspstore (may be garbage) - ;; - ld8 r31=[r16],16 // load predicates - ld8 r21=[r17],16 // load b0 - ;; - ld8 r19=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 // load r1 - ;; - ld8.fill r12=[r16],16 - ld8.fill r13=[r17],16 -(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 - ;; - ld8 r20=[r16],16 // ar.fpsr - ld8.fill r15=[r17],16 - ;; - ld8.fill r14=[r16],16 - ld8.fill r2=[r17] -(pUStk) mov r17=1 - ;; - ld8.fill r3=[r16] -(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack - shr.u r18=r19,16 // get byte size of existing "dirty" partition - ;; - mov r16=ar.bsp // get existing backing store pointer - addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 - ;; - ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 -(pKStk) br.cond.dpnt skip_rbs_switch - - /* - * Restore user backing store. - * - * NOTE: alloc, loadrs, and cover can't be predicated. - */ -(pNonSys) br.cond.dpnt dont_preserve_current_frame - cover // add current frame into dirty partition and set cr.ifs - ;; - mov r19=ar.bsp // get new backing store pointer -rbs_switch: - sub r16=r16,r18 // krbs = old bsp - size of dirty partition - cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs - ;; - sub r19=r19,r16 // calculate total byte size of dirty partition - add r18=64,r18 // don't force in0-in7 into memory... - ;; - shl r19=r19,16 // shift size of dirty partition into loadrs position - ;; -dont_preserve_current_frame: - /* - * To prevent leaking bits between the kernel and user-space, - * we must clear the stacked registers in the "invalid" partition here. - * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium, - * 5 registers/cycle on McKinley). - */ -# define pRecurse p6 -# define pReturn p7 -#ifdef CONFIG_ITANIUM -# define Nregs 10 -#else -# define Nregs 14 -#endif - alloc loc0=ar.pfs,2,Nregs-2,2,0 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) - sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize - ;; - mov ar.rsc=r19 // load ar.rsc to be used for "loadrs" - shladd in0=loc1,3,r17 - mov in1=0 - ;; - TEXT_ALIGN(32) -rse_clear_invalid: -#ifdef CONFIG_ITANIUM - // cycle 0 - { .mii - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 -}{ .mfb - add out1=1,in1 // increment recursion count - nop.f 0 - nop.b 0 // can't do br.call here because of alloc (WAW on CFM) - ;; -}{ .mfi // cycle 1 - mov loc1=0 - nop.f 0 - mov loc2=0 -}{ .mib - mov loc3=0 - mov loc4=0 -(pRecurse) br.call.sptk.many b0=rse_clear_invalid - -}{ .mfi // cycle 2 - mov loc5=0 - nop.f 0 - cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret -}{ .mib - mov loc6=0 - mov loc7=0 -(pReturn) br.ret.sptk.many b0 -} -#else /* !CONFIG_ITANIUM */ - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 - add out1=1,in1 // increment recursion count - mov loc1=0 - mov loc2=0 - ;; - mov loc3=0 - mov loc4=0 - mov loc5=0 - mov loc6=0 - mov loc7=0 -(pRecurse) br.call.dptk.few b0=rse_clear_invalid - ;; - mov loc8=0 - mov loc9=0 - cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret - mov loc10=0 - mov loc11=0 -(pReturn) br.ret.dptk.many b0 -#endif /* !CONFIG_ITANIUM */ -# undef pRecurse -# undef pReturn - ;; - alloc r17=ar.pfs,0,0,0,0 // drop current register frame - ;; - loadrs - ;; -skip_rbs_switch: - mov ar.unat=r25 // M2 -(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22 -(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise - ;; -(pUStk) mov ar.bspstore=r23 // M2 -(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp -(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise - ;; - mov cr.ipsr=r29 // M2 - mov ar.pfs=r26 // I0 -(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise - -(p9) mov cr.ifs=r30 // M2 - mov b0=r21 // I0 -(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise - - mov ar.fpsr=r20 // M2 - mov cr.iip=r28 // M2 - nop 0 - ;; -(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode - nop 0 -(pLvSys)mov r2=r0 - - mov ar.rsc=r27 // M2 - mov pr=r31,-1 // I0 - rfi // B - - /* - * On entry: - * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT) - * r31 = current->thread_info->flags - * On exit: - * p6 = TRUE if work-pending-check needs to be redone - */ -.work_pending_syscall: - add r2=-8,r2 - add r3=-8,r3 - ;; - st8 [r2]=r8 - st8 [r3]=r10 -.work_pending: - tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? -(p6) br.cond.sptk.few .notify -#ifdef CONFIG_PREEMPT -(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1 - ;; -(pKStk) st4 [r20]=r21 - ssm psr.i // enable interrupts -#endif - br.call.spnt.many rp=schedule -.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 - rsm psr.i // disable interrupts - ;; -#ifdef CONFIG_PREEMPT -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; -(pKStk) st4 [r20]=r0 // preempt_count() <- 0 -#endif -(pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // re-check - -.notify: -(pUStk) br.call.spnt.many rp=notify_resume_user -.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 -(pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // don't re-check - -.work_pending_syscall_end: - adds r2=PT(R8)+16,r12 - adds r3=PT(R10)+16,r12 - ;; - ld8 r8=[r2] - ld8 r10=[r3] - br.cond.sptk.many .work_processed_syscall // re-check - -END(__ia64_leave_kernel) - -ENTRY(handle_syscall_error) - /* - * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could - * lead us to mistake a negative return value as a failed syscall. Those syscall - * must deposit a non-zero value in pt_regs.r8 to indicate an error. If - * pt_regs.r8 is zero, we assume that the call completed successfully. - */ - PT_REGS_UNWIND_INFO(0) - ld8 r3=[r2] // load pt_regs.r8 - ;; - cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? - ;; -(p7) mov r10=-1 -(p7) sub r8=0,r8 // negate return value to get errno - br.cond.sptk ia64_leave_syscall -END(handle_syscall_error) - - /* - * Invoke schedule_tail(task) while preserving in0-in7, which may be needed - * in case a system call gets restarted. - */ -GLOBAL_ENTRY(ia64_invoke_schedule_tail) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,1,0 - mov loc0=rp - mov out0=r8 // Address of previous task - ;; - br.call.sptk.many rp=schedule_tail -.ret11: mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(ia64_invoke_schedule_tail) - - /* - * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to - * be set up by the caller. We declare 8 input registers so the system call - * args get preserved, in case we need to restart a system call. - */ -GLOBAL_ENTRY(notify_resume_user) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! - mov r9=ar.unat - mov loc0=rp // save return address - mov out0=0 // there is no "oldset" - adds out1=8,sp // out1=&sigscratch->ar_pfs -(pSys) mov out2=1 // out2==1 => we're in a syscall - ;; -(pNonSys) mov out2=0 // out2==0 => not a syscall - .fframe 16 - .spillsp ar.unat, 16 - st8 [sp]=r9,-16 // allocate space for ar.unat and save it - st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch - .body - br.call.sptk.many rp=do_notify_resume_user -.ret15: .restore sp - adds sp=16,sp // pop scratch stack space - ;; - ld8 r9=[sp] // load new unat from sigscratch->scratch_unat - mov rp=loc0 - ;; - mov ar.unat=r9 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(notify_resume_user) - -GLOBAL_ENTRY(sys_rt_sigsuspend) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! - mov r9=ar.unat - mov loc0=rp // save return address - mov out0=in0 // mask - mov out1=in1 // sigsetsize - adds out2=8,sp // out2=&sigscratch->ar_pfs - ;; - .fframe 16 - .spillsp ar.unat, 16 - st8 [sp]=r9,-16 // allocate space for ar.unat and save it - st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch - .body - br.call.sptk.many rp=ia64_rt_sigsuspend -.ret17: .restore sp - adds sp=16,sp // pop scratch stack space - ;; - ld8 r9=[sp] // load new unat from sw->caller_unat - mov rp=loc0 - ;; - mov ar.unat=r9 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(sys_rt_sigsuspend) - -ENTRY(sys_rt_sigreturn) - PT_REGS_UNWIND_INFO(0) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - alloc r2=ar.pfs,8,0,1,0 - .prologue - PT_REGS_SAVES(16) - adds sp=-16,sp - .body - cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall... - ;; - /* - * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined - * syscall-entry path does not save them we save them here instead. Note: we - * don't need to save any other registers that are not saved by the stream-lined - * syscall path, because restore_sigcontext() restores them. - */ - adds r16=PT(F6)+32,sp - adds r17=PT(F7)+32,sp - ;; - stf.spill [r16]=f6,32 - stf.spill [r17]=f7,32 - ;; - stf.spill [r16]=f8,32 - stf.spill [r17]=f9,32 - ;; - stf.spill [r16]=f10 - stf.spill [r17]=f11 - adds out0=16,sp // out0 = &sigscratch - br.call.sptk.many rp=ia64_rt_sigreturn -.ret19: .restore sp,0 - adds sp=16,sp - ;; - ld8 r9=[sp] // load new ar.unat - mov.sptk b7=r8,__ia64_leave_kernel - ;; - mov ar.unat=r9 - br.many b7 -END(sys_rt_sigreturn) - -GLOBAL_ENTRY(ia64_prepare_handle_unaligned) - .prologue - /* - * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 - */ - mov r16=r0 - DO_SAVE_SWITCH_STACK - br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt -.ret21: .body - DO_LOAD_SWITCH_STACK - br.cond.sptk.many rp // goes to ia64_leave_kernel -END(ia64_prepare_handle_unaligned) - - // - // unw_init_running(void (*callback)(info, arg), void *arg) - // -# define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15) - -GLOBAL_ENTRY(unw_init_running) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - alloc loc1=ar.pfs,2,3,3,0 - ;; - ld8 loc2=[in0],8 - mov loc0=rp - mov r16=loc1 - DO_SAVE_SWITCH_STACK - .body - - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE - SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE) - adds sp=-EXTRA_FRAME_SIZE,sp - .body - ;; - adds out0=16,sp // &info - mov out1=r13 // current - adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack - br.call.sptk.many rp=unw_init_frame_info -1: adds out0=16,sp // &info - mov b6=loc2 - mov loc2=gp // save gp across indirect function call - ;; - ld8 gp=[in0] - mov out1=in1 // arg - br.call.sptk.many rp=b6 // invoke the callback function -1: mov gp=loc2 // restore gp - - // For now, we don't allow changing registers from within - // unw_init_running; if we ever want to allow that, we'd - // have to do a load_switch_stack here: - .restore sp - adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp - - mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(unw_init_running) - - .rodata - .align 8 - .globl sys_call_table -sys_call_table: - data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S. - data8 sys_exit // 1025 - data8 sys_read - data8 sys_write - data8 sys_open - data8 sys_close - data8 sys_creat // 1030 - data8 sys_link - data8 sys_unlink - data8 ia64_execve - data8 sys_chdir - data8 sys_fchdir // 1035 - data8 sys_utimes - data8 sys_mknod - data8 sys_chmod - data8 sys_chown - data8 sys_lseek // 1040 - data8 sys_getpid - data8 sys_getppid - data8 sys_mount - data8 sys_umount - data8 sys_setuid // 1045 - data8 sys_getuid - data8 sys_geteuid - data8 sys_ptrace - data8 sys_access - data8 sys_sync // 1050 - data8 sys_fsync - data8 sys_fdatasync - data8 sys_kill - data8 sys_rename - data8 sys_mkdir // 1055 - data8 sys_rmdir - data8 sys_dup - data8 sys_pipe - data8 sys_times - data8 ia64_brk // 1060 - data8 sys_setgid - data8 sys_getgid - data8 sys_getegid - data8 sys_acct - data8 sys_ioctl // 1065 - data8 sys_fcntl - data8 sys_umask - data8 sys_chroot - data8 sys_ustat - data8 sys_dup2 // 1070 - data8 sys_setreuid - data8 sys_setregid - data8 sys_getresuid - data8 sys_setresuid - data8 sys_getresgid // 1075 - data8 sys_setresgid - data8 sys_getgroups - data8 sys_setgroups - data8 sys_getpgid - data8 sys_setpgid // 1080 - data8 sys_setsid - data8 sys_getsid - data8 sys_sethostname - data8 sys_setrlimit - data8 sys_getrlimit // 1085 - data8 sys_getrusage - data8 sys_gettimeofday - data8 sys_settimeofday - data8 sys_select - data8 sys_poll // 1090 - data8 sys_symlink - data8 sys_readlink - data8 sys_uselib - data8 sys_swapon - data8 sys_swapoff // 1095 - data8 sys_reboot - data8 sys_truncate - data8 sys_ftruncate - data8 sys_fchmod - data8 sys_fchown // 1100 - data8 ia64_getpriority - data8 sys_setpriority - data8 sys_statfs - data8 sys_fstatfs - data8 sys_gettid // 1105 - data8 sys_semget - data8 sys_semop - data8 sys_semctl - data8 sys_msgget - data8 sys_msgsnd // 1110 - data8 sys_msgrcv - data8 sys_msgctl - data8 sys_shmget - data8 sys_shmat - data8 sys_shmdt // 1115 - data8 sys_shmctl - data8 sys_syslog - data8 sys_setitimer - data8 sys_getitimer - data8 sys_ni_syscall // 1120 /* was: ia64_oldstat */ - data8 sys_ni_syscall /* was: ia64_oldlstat */ - data8 sys_ni_syscall /* was: ia64_oldfstat */ - data8 sys_vhangup - data8 sys_lchown - data8 sys_remap_file_pages // 1125 - data8 sys_wait4 - data8 sys_sysinfo - data8 sys_clone - data8 sys_setdomainname - data8 sys_newuname // 1130 - data8 sys_adjtimex - data8 sys_ni_syscall /* was: ia64_create_module */ - data8 sys_init_module - data8 sys_delete_module - data8 sys_ni_syscall // 1135 /* was: sys_get_kernel_syms */ - data8 sys_ni_syscall /* was: sys_query_module */ - data8 sys_quotactl - data8 sys_bdflush - data8 sys_sysfs - data8 sys_personality // 1140 - data8 sys_ni_syscall // sys_afs_syscall - data8 sys_setfsuid - data8 sys_setfsgid - data8 sys_getdents - data8 sys_flock // 1145 - data8 sys_readv - data8 sys_writev - data8 sys_pread64 - data8 sys_pwrite64 - data8 sys_sysctl // 1150 - data8 sys_mmap - data8 sys_munmap - data8 sys_mlock - data8 sys_mlockall - data8 sys_mprotect // 1155 - data8 ia64_mremap - data8 sys_msync - data8 sys_munlock - data8 sys_munlockall - data8 sys_sched_getparam // 1160 - data8 sys_sched_setparam - data8 sys_sched_getscheduler - data8 sys_sched_setscheduler - data8 sys_sched_yield - data8 sys_sched_get_priority_max // 1165 - data8 sys_sched_get_priority_min - data8 sys_sched_rr_get_interval - data8 sys_nanosleep - data8 sys_nfsservctl - data8 sys_prctl // 1170 - data8 sys_getpagesize - data8 sys_mmap2 - data8 sys_pciconfig_read - data8 sys_pciconfig_write - data8 sys_perfmonctl // 1175 - data8 sys_sigaltstack - data8 sys_rt_sigaction - data8 sys_rt_sigpending - data8 sys_rt_sigprocmask - data8 sys_rt_sigqueueinfo // 1180 - data8 sys_rt_sigreturn - data8 sys_rt_sigsuspend - data8 sys_rt_sigtimedwait - data8 sys_getcwd - data8 sys_capget // 1185 - data8 sys_capset - data8 sys_sendfile64 - data8 sys_ni_syscall // sys_getpmsg (STREAMS) - data8 sys_ni_syscall // sys_putpmsg (STREAMS) - data8 sys_socket // 1190 - data8 sys_bind - data8 sys_connect - data8 sys_listen - data8 sys_accept - data8 sys_getsockname // 1195 - data8 sys_getpeername - data8 sys_socketpair - data8 sys_send - data8 sys_sendto - data8 sys_recv // 1200 - data8 sys_recvfrom - data8 sys_shutdown - data8 sys_setsockopt - data8 sys_getsockopt - data8 sys_sendmsg // 1205 - data8 sys_recvmsg - data8 sys_pivot_root - data8 sys_mincore - data8 sys_madvise - data8 sys_newstat // 1210 - data8 sys_newlstat - data8 sys_newfstat - data8 sys_clone2 - data8 sys_getdents64 - data8 sys_getunwind // 1215 - data8 sys_readahead - data8 sys_setxattr - data8 sys_lsetxattr - data8 sys_fsetxattr - data8 sys_getxattr // 1220 - data8 sys_lgetxattr - data8 sys_fgetxattr - data8 sys_listxattr - data8 sys_llistxattr - data8 sys_flistxattr // 1225 - data8 sys_removexattr - data8 sys_lremovexattr - data8 sys_fremovexattr - data8 sys_tkill - data8 sys_futex // 1230 - data8 sys_sched_setaffinity - data8 sys_sched_getaffinity - data8 sys_set_tid_address - data8 sys_fadvise64_64 - data8 sys_tgkill // 1235 - data8 sys_exit_group - data8 sys_lookup_dcookie - data8 sys_io_setup - data8 sys_io_destroy - data8 sys_io_getevents // 1240 - data8 sys_io_submit - data8 sys_io_cancel - data8 sys_epoll_create - data8 sys_epoll_ctl - data8 sys_epoll_wait // 1245 - data8 sys_restart_syscall - data8 sys_semtimedop - data8 sys_timer_create - data8 sys_timer_settime - data8 sys_timer_gettime // 1250 - data8 sys_timer_getoverrun - data8 sys_timer_delete - data8 sys_clock_settime - data8 sys_clock_gettime - data8 sys_clock_getres // 1255 - data8 sys_clock_nanosleep - data8 sys_fstatfs64 - data8 sys_statfs64 - data8 sys_mbind - data8 sys_get_mempolicy // 1260 - data8 sys_set_mempolicy - data8 sys_mq_open - data8 sys_mq_unlink - data8 sys_mq_timedsend - data8 sys_mq_timedreceive // 1265 - data8 sys_mq_notify - data8 sys_mq_getsetattr - data8 sys_ni_syscall // reserved for kexec_load - data8 sys_ni_syscall // reserved for vserver - data8 sys_waitid // 1270 - data8 sys_add_key - data8 sys_request_key - data8 sys_keyctl - data8 sys_ioprio_set - data8 sys_ioprio_get // 1275 - data8 sys_move_pages - data8 sys_inotify_init - data8 sys_inotify_add_watch - data8 sys_inotify_rm_watch - data8 sys_migrate_pages // 1280 - data8 sys_openat - data8 sys_mkdirat - data8 sys_mknodat - data8 sys_fchownat - data8 sys_futimesat // 1285 - data8 sys_newfstatat - data8 sys_unlinkat - data8 sys_renameat - data8 sys_linkat - data8 sys_symlinkat // 1290 - data8 sys_readlinkat - data8 sys_fchmodat - data8 sys_faccessat - data8 sys_ni_syscall // reserved for pselect - data8 sys_ni_syscall // 1295 reserved for ppoll - data8 sys_unshare - data8 sys_splice - data8 sys_ni_syscall // reserved for set_robust_list - data8 sys_ni_syscall // reserved for get_robust_list - data8 sys_sync_file_range // 1300 - data8 sys_tee - data8 sys_vmsplice - - .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S b/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S deleted file mode 100644 index 98b5d15855..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S +++ /dev/null @@ -1,925 +0,0 @@ -/* - * This file contains the light-weight system call handlers (fsyscall-handlers). - * - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * - * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). - * 18-Feb-03 louisk Implement fsys_gettimeofday(). - * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, - * probably broke it along the way... ;-) - * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make - * it capable of using memory based clocks without falling back to C code. - */ - -#include <asm/asmmacro.h> -#include <asm/errno.h> -#include <asm/asm-offsets.h> -#include <asm/percpu.h> -#include <asm/thread_info.h> -#include <asm/sal.h> -#include <asm/signal.h> -#include <asm/system.h> -#include <asm/unistd.h> - -#include "entry.h" - -/* - * See Documentation/ia64/fsys.txt for details on fsyscalls. - * - * On entry to an fsyscall handler: - * r10 = 0 (i.e., defaults to "successful syscall return") - * r11 = saved ar.pfs (a user-level value) - * r15 = system call number - * r16 = "current" task pointer (in normal kernel-mode, this is in r13) - * r32-r39 = system call arguments - * b6 = return address (a user-level value) - * ar.pfs = previous frame-state (a user-level value) - * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) - * all other registers may contain values passed in from user-mode - * - * On return from an fsyscall handler: - * r11 = saved ar.pfs (as passed into the fsyscall handler) - * r15 = system call number (as passed into the fsyscall handler) - * r32-r39 = system call arguments (as passed into the fsyscall handler) - * b6 = return address (as passed into the fsyscall handler) - * ar.pfs = previous frame-state (as passed into the fsyscall handler) - */ - -ENTRY(fsys_ni_syscall) - .prologue - .altrp b6 - .body - mov r8=ENOSYS - mov r10=-1 - FSYS_RETURN -END(fsys_ni_syscall) - -ENTRY(fsys_getpid) - .prologue - .altrp b6 - .body - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - ld4 r9=[r9] - add r8=IA64_TASK_TGID_OFFSET,r16 - ;; - and r9=TIF_ALLWORK_MASK,r9 - ld4 r8=[r8] // r8 = current->tgid - ;; - cmp.ne p8,p0=0,r9 -(p8) br.spnt.many fsys_fallback_syscall - FSYS_RETURN -END(fsys_getpid) - -ENTRY(fsys_getppid) - .prologue - .altrp b6 - .body - add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 - ;; - ld8 r17=[r17] // r17 = current->group_leader - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - - ld4 r9=[r9] - add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent - ;; - and r9=TIF_ALLWORK_MASK,r9 - -1: ld8 r18=[r17] // r18 = current->group_leader->real_parent - ;; - cmp.ne p8,p0=0,r9 - add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid - ;; - - /* - * The .acq is needed to ensure that the read of tgid has returned its data before - * we re-check "real_parent". - */ - ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid -#ifdef CONFIG_SMP - /* - * Re-read current->group_leader->real_parent. - */ - ld8 r19=[r17] // r19 = current->group_leader->real_parent -(p8) br.spnt.many fsys_fallback_syscall - ;; - cmp.ne p6,p0=r18,r19 // did real_parent change? - mov r19=0 // i must not leak kernel bits... -(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check - ;; - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... -#else - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... - mov r19=0 // i must not leak kernel bits... -#endif - FSYS_RETURN -END(fsys_getppid) - -ENTRY(fsys_set_tid_address) - .prologue - .altrp b6 - .body - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - ld4 r9=[r9] - tnat.z p6,p7=r32 // check argument register for being NaT - ;; - and r9=TIF_ALLWORK_MASK,r9 - add r8=IA64_TASK_PID_OFFSET,r16 - add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 - ;; - ld4 r8=[r8] - cmp.ne p8,p0=0,r9 - mov r17=-1 - ;; -(p6) st8 [r18]=r32 -(p7) st8 [r18]=r17 -(p8) br.spnt.many fsys_fallback_syscall - ;; - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... - FSYS_RETURN -END(fsys_set_tid_address) - -/* - * Ensure that the time interpolator structure is compatible with the asm code - */ -#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \ - || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4 -#error fsys_gettimeofday incompatible with changes to struct time_interpolator -#endif -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_DIVIDE_BY_1000 0x4000 -#define CLOCK_ADD_MONOTONIC 0x8000 - -ENTRY(fsys_gettimeofday) - .prologue - .altrp b6 - .body - mov r31 = r32 - tnat.nz p6,p0 = r33 // guard against NaT argument -(p6) br.cond.spnt.few .fail_einval - mov r30 = CLOCK_DIVIDE_BY_1000 - ;; -.gettime: - // Register map - // Incoming r31 = pointer to address where to place result - // r30 = flags determining how time is processed - // r2,r3 = temp r4-r7 preserved - // r8 = result nanoseconds - // r9 = result seconds - // r10 = temporary storage for clock difference - // r11 = preserved: saved ar.pfs - // r12 = preserved: memory stack - // r13 = preserved: thread pointer - // r14 = address of mask / mask - // r15 = preserved: system call number - // r16 = preserved: current task pointer - // r17 = wall to monotonic use - // r18 = time_interpolator->offset - // r19 = address of wall_to_monotonic - // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address - // r21 = shift factor - // r22 = address of time interpolator->last_counter - // r23 = address of time_interpolator->last_cycle - // r24 = adress of time_interpolator->offset - // r25 = last_cycle value - // r26 = last_counter value - // r27 = pointer to xtime - // r28 = sequence number at the beginning of critcal section - // r29 = address of seqlock - // r30 = time processing flags / memory address - // r31 = pointer to result - // Predicates - // p6,p7 short term use - // p8 = timesource ar.itc - // p9 = timesource mmio64 - // p10 = timesource mmio32 - // p11 = timesource not to be handled by asm code - // p12 = memory time source ( = p9 | p10) - // p13 = do cmpxchg with time_interpolator_last_cycle - // p14 = Divide by 1000 - // p15 = Add monotonic - // - // Note that instructions are optimized for McKinley. McKinley can process two - // bundles simultaneously and therefore we continuously try to feed the CPU - // two bundles and then a stop. - tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure - mov pr = r30,0xc000 // Set predicates according to function - add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 - movl r20 = time_interpolator - ;; - ld8 r20 = [r20] // get pointer to time_interpolator structure - movl r29 = xtime_lock - ld4 r2 = [r2] // process work pending flags - movl r27 = xtime - ;; // only one bundle here - ld8 r21 = [r20] // first quad with control information - and r2 = TIF_ALLWORK_MASK,r2 -(p6) br.cond.spnt.few .fail_einval // deferred branch - ;; - add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20 - extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc - extr r8 = r21,0,16 // time_interpolator->source - cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled -(p6) br.cond.spnt.many fsys_fallback_syscall - ;; - cmp.eq p8,p12 = 0,r8 // Check for cpu timer - cmp.eq p9,p0 = 1,r8 // MMIO64 ? - extr r2 = r21,24,8 // time_interpolator->jitter - cmp.eq p10,p0 = 2,r8 // MMIO32 ? - cmp.ltu p11,p0 = 2,r8 // function or other clock -(p11) br.cond.spnt.many fsys_fallback_syscall - ;; - setf.sig f7 = r3 // Setup for scaling of counter -(p15) movl r19 = wall_to_monotonic -(p12) ld8 r30 = [r10] - cmp.ne p13,p0 = r2,r0 // need jitter compensation? - extr r21 = r21,16,8 // shift factor - ;; -.time_redo: - .pred.rel.mutex p8,p9,p10 - ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes -(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! - add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20 -(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues.. -(p10) ld4 r2 = [r30] // readw(ti->address) -(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20 - ;; // could be removed by moving the last add upward - ld8 r26 = [r22] // time_interpolator->last_counter -(p13) ld8 r25 = [r23] // time interpolator->last_cycle - add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20 -(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET - ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET - add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20 - ;; - ld8 r18 = [r24] // time_interpolator->offset - ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec -(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) - ;; - ld8 r14 = [r14] // time_interpolator->mask -(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared - sub r10 = r2,r26 // current_counter - last_counter - ;; -(p6) sub r10 = r25,r26 // time we got was less than last_cycle -(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg - ;; - and r10 = r10,r14 // Apply mask - ;; - setf.sig f8 = r10 - nop.i 123 - ;; -(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv -EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time - xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) -(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs - ;; -(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET -(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo - // simulate tbit.nz.or p7,p0 = r28,0 - and r28 = ~1,r28 // Make sequence even to force retry if odd - getf.sig r2 = f8 - mf - add r8 = r8,r18 // Add time interpolator offset - ;; - ld4 r10 = [r29] // xtime_lock.sequence -(p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs - shr.u r2 = r2,r21 - ;; // overloaded 3 bundles! - // End critical section. - add r8 = r8,r2 // Add xtime.nsecs - cmp4.ne.or p7,p0 = r28,r10 -(p7) br.cond.dpnt.few .time_redo // sequence number changed ? - // Now r8=tv->tv_nsec and r9=tv->tv_sec - mov r10 = r0 - movl r2 = 1000000000 - add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 -(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack - ;; -.time_normalize: - mov r21 = r8 - cmp.ge p6,p0 = r8,r2 -(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time - ;; -(p14) setf.sig f8 = r20 -(p6) sub r8 = r8,r2 -(p6) add r9 = 1,r9 // two nops before the branch. -(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod -(p6) br.cond.dpnt.few .time_normalize - ;; - // Divided by 8 though shift. Now divide by 125 - // The compiler was able to do that with a multiply - // and a shift and we do the same -EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles -(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it... - ;; - mov r8 = r0 -(p14) getf.sig r2 = f8 - ;; -(p14) shr.u r21 = r2, 4 - ;; -EX(.fail_efault, st8 [r31] = r9) -EX(.fail_efault, st8 [r23] = r21) - FSYS_RETURN -.fail_einval: - mov r8 = EINVAL - mov r10 = -1 - FSYS_RETURN -.fail_efault: - mov r8 = EFAULT - mov r10 = -1 - FSYS_RETURN -END(fsys_gettimeofday) - -ENTRY(fsys_clock_gettime) - .prologue - .altrp b6 - .body - cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 - // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC -(p6) br.spnt.few fsys_fallback_syscall - mov r31 = r33 - shl r30 = r32,15 - br.many .gettime -END(fsys_clock_gettime) - -/* - * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). - */ -#if _NSIG_WORDS != 1 -# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1. -#endif -ENTRY(fsys_rt_sigprocmask) - .prologue - .altrp b6 - .body - - add r2=IA64_TASK_BLOCKED_OFFSET,r16 - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - cmp4.ltu p6,p0=SIG_SETMASK,r32 - - cmp.ne p15,p0=r0,r34 // oset != NULL? - tnat.nz p8,p0=r34 - add r31=IA64_TASK_SIGHAND_OFFSET,r16 - ;; - ld8 r3=[r2] // read/prefetch current->blocked - ld4 r9=[r9] - tnat.nz.or p6,p0=r35 - - cmp.ne.or p6,p0=_NSIG_WORDS*8,r35 - tnat.nz.or p6,p0=r32 -(p6) br.spnt.few .fail_einval // fail with EINVAL - ;; -#ifdef CONFIG_SMP - ld8 r31=[r31] // r31 <- current->sighand -#endif - and r9=TIF_ALLWORK_MASK,r9 - tnat.nz.or p8,p0=r33 - ;; - cmp.ne p7,p0=0,r9 - cmp.eq p6,p0=r0,r33 // set == NULL? - add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock -(p8) br.spnt.few .fail_efault // fail with EFAULT -(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work... -(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask - - /* Argh, we actually have to do some work and _update_ the signal mask: */ - -EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set -EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set - mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1)) - ;; - - rsm psr.i // mask interrupt delivery - mov ar.ccv=0 - andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP - -#ifdef CONFIG_SMP - mov r17=1 - ;; - cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock - mov r8=EINVAL // default to EINVAL - ;; - ld8 r3=[r2] // re-read current->blocked now that we hold the lock - cmp4.ne p6,p0=r18,r0 -(p6) br.cond.spnt.many .lock_contention - ;; -#else - ld8 r3=[r2] // re-read current->blocked now that we hold the lock - mov r8=EINVAL // default to EINVAL -#endif - add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 - add r19=IA64_TASK_SIGNAL_OFFSET,r16 - cmp4.eq p6,p0=SIG_BLOCK,r32 - ;; - ld8 r19=[r19] // r19 <- current->signal - cmp4.eq p7,p0=SIG_UNBLOCK,r32 - cmp4.eq p8,p0=SIG_SETMASK,r32 - ;; - ld8 r18=[r18] // r18 <- current->pending.signal - .pred.rel.mutex p6,p7,p8 -(p6) or r14=r3,r14 // SIG_BLOCK -(p7) andcm r14=r3,r14 // SIG_UNBLOCK - -(p8) mov r14=r14 // SIG_SETMASK -(p6) mov r8=0 // clear error code - // recalc_sigpending() - add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19 - - add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19 - ;; - ld4 r17=[r17] // r17 <- current->signal->group_stop_count -(p7) mov r8=0 // clear error code - - ld8 r19=[r19] // r19 <- current->signal->shared_pending - ;; - cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)? -(p8) mov r8=0 // clear error code - - or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending - ;; - // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked: - andcm r18=r18,r14 - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - -(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending - mov r19=0 // i must not leak kernel bits... -(p6) br.cond.dpnt.many .sig_pending - ;; - -1: ld4 r17=[r9] // r17 <- current->thread_info->flags - ;; - mov ar.ccv=r17 - and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING) - ;; - - st8 [r2]=r14 // update current->blocked with new mask - cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18 - ;; - cmp.ne p6,p0=r17,r8 // update failed? -(p6) br.cond.spnt.few 1b // yes -> retry - -#ifdef CONFIG_SMP - st4.rel [r31]=r0 // release the lock -#endif - ssm psr.i - ;; - - srlz.d // ensure psr.i is set again - mov r18=0 // i must not leak kernel bits... - -.store_mask: -EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset -EX(.fail_efault, (p15) st8 [r34]=r3) - mov r2=0 // i must not leak kernel bits... - mov r3=0 // i must not leak kernel bits... - mov r8=0 // return 0 - mov r9=0 // i must not leak kernel bits... - mov r14=0 // i must not leak kernel bits... - mov r17=0 // i must not leak kernel bits... - mov r31=0 // i must not leak kernel bits... - FSYS_RETURN - -.sig_pending: -#ifdef CONFIG_SMP - st4.rel [r31]=r0 // release the lock -#endif - ssm psr.i - ;; - srlz.d - br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall - -#ifdef CONFIG_SMP -.lock_contention: - /* Rather than spinning here, fall back on doing a heavy-weight syscall. */ - ssm psr.i - ;; - srlz.d - br.sptk.many fsys_fallback_syscall -#endif -END(fsys_rt_sigprocmask) - -ENTRY(fsys_fallback_syscall) - .prologue - .altrp b6 - .body - /* - * We only get here from light-weight syscall handlers. Thus, we already - * know that r15 contains a valid syscall number. No need to re-check. - */ - adds r17=-1024,r15 - movl r14=sys_call_table - ;; -#ifdef CONFIG_XEN - movl r18=running_on_xen;; - ld4 r18=[r18];; - // p14 = running_on_xen - // p15 = !running_on_xen - cmp.ne p14,p15=r0,r18 - ;; -(p14) movl r18=XSI_PSR_I_ADDR;; -(p14) ld8 r18=[r18] -(p14) mov r29=1;; -(p14) st1 [r18]=r29 -(p15) rsm psr.i -#else - rsm psr.i -#endif - shladd r18=r17,3,r14 - ;; - ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point -#ifdef CONFIG_XEN -(p14) mov r27=r8 -(p14) XEN_HYPER_GET_PSR - ;; -(p14) mov r29=r8 -(p14) mov r8=r27 -(p15) mov r29=psr // read psr (12 cyc load latency) -#else - mov r29=psr // read psr (12 cyc load latency) -#endif - mov r27=ar.rsc - mov r21=ar.fpsr - mov r26=ar.pfs -END(fsys_fallback_syscall) - /* FALL THROUGH */ -GLOBAL_ENTRY(fsys_bubble_down) - .prologue - .altrp b6 - .body - /* - * We get here for syscalls that don't have a lightweight - * handler. For those, we need to bubble down into the kernel - * and that requires setting up a minimal pt_regs structure, - * and initializing the CPU state more or less as if an - * interruption had occurred. To make syscall-restarts work, - * we setup pt_regs such that cr_iip points to the second - * instruction in syscall_via_break. Decrementing the IP - * hence will restart the syscall via break and not - * decrementing IP will return us to the caller, as usual. - * Note that we preserve the value of psr.pp rather than - * initializing it from dcr.pp. This makes it possible to - * distinguish fsyscall execution from other privileged - * execution. - * - * On entry: - * - normal fsyscall handler register usage, except - * that we also have: - * - r18: address of syscall entry point - * - r21: ar.fpsr - * - r26: ar.pfs - * - r27: ar.rsc - * - r29: psr - * - * We used to clear some PSR bits here but that requires slow - * serialization. Fortuntely, that isn't really necessary. - * The rationale is as follows: we used to clear bits - * ~PSR_PRESERVED_BITS in PSR.L. Since - * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we - * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. - * However, - * - * PSR.BE : already is turned off in __kernel_syscall_via_epc() - * PSR.AC : don't care (kernel normally turns PSR.AC on) - * PSR.I : already turned off by the time fsys_bubble_down gets - * invoked - * PSR.DFL: always 0 (kernel never turns it on) - * PSR.DFH: don't care --- kernel never touches f32-f127 on its own - * initiative - * PSR.DI : always 0 (kernel never turns it on) - * PSR.SI : always 0 (kernel never turns it on) - * PSR.DB : don't care --- kernel never enables kernel-level - * breakpoints - * PSR.TB : must be 0 already; if it wasn't zero on entry to - * __kernel_syscall_via_epc, the branch to fsys_bubble_down - * will trigger a taken branch; the taken-trap-handler then - * converts the syscall into a break-based system-call. - */ - /* - * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. - * The rest we have to synthesize. - */ -# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ - | (0x1 << IA64_PSR_RI_BIT) \ - | IA64_PSR_BN | IA64_PSR_I) - - invala // M0|1 - movl r14=ia64_ret_from_syscall // X - - nop.m 0 - movl r28=__kernel_syscall_via_break // X create cr.iip - ;; - - mov r2=r16 // A get task addr to addl-addressable register - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A - mov r31=pr // I0 save pr (2 cyc) - ;; - st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag - addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS - add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A - ;; - ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags - lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store - nop.i 0 - ;; - mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 - nop.m 0 - nop.i 0 - ;; - mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore - mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) - nop.i 0 - ;; - mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS - movl r8=PSR_ONE_BITS // X - ;; - mov r25=ar.unat // M2 (5 cyc) save ar.unat - mov r19=b6 // I0 save b6 (2 cyc) - mov r20=r1 // A save caller's gp in r20 - ;; - or r29=r8,r29 // A construct cr.ipsr value to save - mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) - addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack - - mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) - cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 - br.call.sptk.many b7=ia64_syscall_setup // B - ;; - mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 - mov rp=r14 // I0 set the real return addr - and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A - ;; -#ifdef CONFIG_XEN - movl r14=running_on_xen;; - ld4 r14=[r14];; - // p14 = running_on_xen - // p15 = !running_on_xen - cmp.ne p14,p15=r0,r14 - ;; -(p14) movl r28=XSI_PSR_I_ADDR;; -(p14) ld8 r28=[r28];; -(p14) adds r28=-1,r28;; // event_pending -(p14) ld1 r14=[r28];; -(p14) cmp.ne.unc p13,p14=r14,r0;; -(p13) XEN_HYPER_SSM_I -(p14) adds r28=1,r28;; // event_mask -(p14) st1 [r28]=r0;; -(p15) ssm psr.i -#else - ssm psr.i // M2 we're on kernel stacks now, reenable irqs -#endif - cmp.eq p8,p0=r3,r0 // A -(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT - - nop.m 0 -(p8) br.call.sptk.many b6=b6 // B (ignore return address) - br.cond.spnt ia64_trace_syscall // B -END(fsys_bubble_down) - - .rodata - .align 8 - .globl fsyscall_table - - data8 fsys_bubble_down -fsyscall_table: - data8 fsys_ni_syscall - data8 0 // exit // 1025 - data8 0 // read - data8 0 // write - data8 0 // open - data8 0 // close - data8 0 // creat // 1030 - data8 0 // link - data8 0 // unlink - data8 0 // execve - data8 0 // chdir - data8 0 // fchdir // 1035 - data8 0 // utimes - data8 0 // mknod - data8 0 // chmod - data8 0 // chown - data8 0 // lseek // 1040 - data8 fsys_getpid // getpid - data8 fsys_getppid // getppid - data8 0 // mount - data8 0 // umount - data8 0 // setuid // 1045 - data8 0 // getuid - data8 0 // geteuid - data8 0 // ptrace - data8 0 // access - data8 0 // sync // 1050 - data8 0 // fsync - data8 0 // fdatasync - data8 0 // kill - data8 0 // rename - data8 0 // mkdir // 1055 - data8 0 // rmdir - data8 0 // dup - data8 0 // pipe - data8 0 // times - data8 0 // brk // 1060 - data8 0 // setgid - data8 0 // getgid - data8 0 // getegid - data8 0 // acct - data8 0 // ioctl // 1065 - data8 0 // fcntl - data8 0 // umask - data8 0 // chroot - data8 0 // ustat - data8 0 // dup2 // 1070 - data8 0 // setreuid - data8 0 // setregid - data8 0 // getresuid - data8 0 // setresuid - data8 0 // getresgid // 1075 - data8 0 // setresgid - data8 0 // getgroups - data8 0 // setgroups - data8 0 // getpgid - data8 0 // setpgid // 1080 - data8 0 // setsid - data8 0 // getsid - data8 0 // sethostname - data8 0 // setrlimit - data8 0 // getrlimit // 1085 - data8 0 // getrusage - data8 fsys_gettimeofday // gettimeofday - data8 0 // settimeofday - data8 0 // select - data8 0 // poll // 1090 - data8 0 // symlink - data8 0 // readlink - data8 0 // uselib - data8 0 // swapon - data8 0 // swapoff // 1095 - data8 0 // reboot - data8 0 // truncate - data8 0 // ftruncate - data8 0 // fchmod - data8 0 // fchown // 1100 - data8 0 // getpriority - data8 0 // setpriority - data8 0 // statfs - data8 0 // fstatfs - data8 0 // gettid // 1105 - data8 0 // semget - data8 0 // semop - data8 0 // semctl - data8 0 // msgget - data8 0 // msgsnd // 1110 - data8 0 // msgrcv - data8 0 // msgctl - data8 0 // shmget - data8 0 // shmat - data8 0 // shmdt // 1115 - data8 0 // shmctl - data8 0 // syslog - data8 0 // setitimer - data8 0 // getitimer - data8 0 // 1120 - data8 0 - data8 0 - data8 0 // vhangup - data8 0 // lchown - data8 0 // remap_file_pages // 1125 - data8 0 // wait4 - data8 0 // sysinfo - data8 0 // clone - data8 0 // setdomainname - data8 0 // newuname // 1130 - data8 0 // adjtimex - data8 0 - data8 0 // init_module - data8 0 // delete_module - data8 0 // 1135 - data8 0 - data8 0 // quotactl - data8 0 // bdflush - data8 0 // sysfs - data8 0 // personality // 1140 - data8 0 // afs_syscall - data8 0 // setfsuid - data8 0 // setfsgid - data8 0 // getdents - data8 0 // flock // 1145 - data8 0 // readv - data8 0 // writev - data8 0 // pread64 - data8 0 // pwrite64 - data8 0 // sysctl // 1150 - data8 0 // mmap - data8 0 // munmap - data8 0 // mlock - data8 0 // mlockall - data8 0 // mprotect // 1155 - data8 0 // mremap - data8 0 // msync - data8 0 // munlock - data8 0 // munlockall - data8 0 // sched_getparam // 1160 - data8 0 // sched_setparam - data8 0 // sched_getscheduler - data8 0 // sched_setscheduler - data8 0 // sched_yield - data8 0 // sched_get_priority_max // 1165 - data8 0 // sched_get_priority_min - data8 0 // sched_rr_get_interval - data8 0 // nanosleep - data8 0 // nfsservctl - data8 0 // prctl // 1170 - data8 0 // getpagesize - data8 0 // mmap2 - data8 0 // pciconfig_read - data8 0 // pciconfig_write - data8 0 // perfmonctl // 1175 - data8 0 // sigaltstack - data8 0 // rt_sigaction - data8 0 // rt_sigpending - data8 fsys_rt_sigprocmask // rt_sigprocmask - data8 0 // rt_sigqueueinfo // 1180 - data8 0 // rt_sigreturn - data8 0 // rt_sigsuspend - data8 0 // rt_sigtimedwait - data8 0 // getcwd - data8 0 // capget // 1185 - data8 0 // capset - data8 0 // sendfile - data8 0 - data8 0 - data8 0 // socket // 1190 - data8 0 // bind - data8 0 // connect - data8 0 // listen - data8 0 // accept - data8 0 // getsockname // 1195 - data8 0 // getpeername - data8 0 // socketpair - data8 0 // send - data8 0 // sendto - data8 0 // recv // 1200 - data8 0 // recvfrom - data8 0 // shutdown - data8 0 // setsockopt - data8 0 // getsockopt - data8 0 // sendmsg // 1205 - data8 0 // recvmsg - data8 0 // pivot_root - data8 0 // mincore - data8 0 // madvise - data8 0 // newstat // 1210 - data8 0 // newlstat - data8 0 // newfstat - data8 0 // clone2 - data8 0 // getdents64 - data8 0 // getunwind // 1215 - data8 0 // readahead - data8 0 // setxattr - data8 0 // lsetxattr - data8 0 // fsetxattr - data8 0 // getxattr // 1220 - data8 0 // lgetxattr - data8 0 // fgetxattr - data8 0 // listxattr - data8 0 // llistxattr - data8 0 // flistxattr // 1225 - data8 0 // removexattr - data8 0 // lremovexattr - data8 0 // fremovexattr - data8 0 // tkill - data8 0 // futex // 1230 - data8 0 // sched_setaffinity - data8 0 // sched_getaffinity - data8 fsys_set_tid_address // set_tid_address - data8 0 // fadvise64_64 - data8 0 // tgkill // 1235 - data8 0 // exit_group - data8 0 // lookup_dcookie - data8 0 // io_setup - data8 0 // io_destroy - data8 0 // io_getevents // 1240 - data8 0 // io_submit - data8 0 // io_cancel - data8 0 // epoll_create - data8 0 // epoll_ctl - data8 0 // epoll_wait // 1245 - data8 0 // restart_syscall - data8 0 // semtimedop - data8 0 // timer_create - data8 0 // timer_settime - data8 0 // timer_gettime // 1250 - data8 0 // timer_getoverrun - data8 0 // timer_delete - data8 0 // clock_settime - data8 fsys_clock_gettime // clock_gettime - - // fill in zeros for the remaining entries - .zero: - .space fsyscall_table + 8*NR_syscalls - .zero, 0 diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S deleted file mode 100644 index e242e36b04..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S +++ /dev/null @@ -1,478 +0,0 @@ -/* - * This file contains the code that gets mapped at the upper end of each task's text - * region. For now, it contains the signal trampoline code only. - * - * Copyright (C) 1999-2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - */ - - -#include <asm/asmmacro.h> -#include <asm/errno.h> -#include <asm/asm-offsets.h> -#include <asm/sigcontext.h> -#include <asm/system.h> -#include <asm/unistd.h> -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT -# include <asm/privop.h> -#endif - -/* - * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, - * complications with the linker (which likes to create PLT stubs for branches - * to targets outside the shared object) and to avoid multi-phase kernel builds, we - * simply create minimalistic "patch lists" in special ELF sections. - */ - .section ".data.patch.fsyscall_table", "a" - .previous -#define LOAD_FSYSCALL_TABLE(reg) \ -[1:] movl reg=0; \ - .xdata4 ".data.patch.fsyscall_table", 1b-. - - .section ".data.patch.brl_fsys_bubble_down", "a" - .previous -#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ -[1:](pr)brl.cond.sptk 0; \ - .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-. - -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT - // The page in which hyperprivop lives must be pinned by ITR. - // However vDSO area isn't pinned. So issuing hyperprivop - // from vDSO page causes trouble that Kevin pointed out. - // After clearing vpsr.ic, the vcpu is pre-empted and the itlb - // is flushed. Then vcpu get cpu again, tlb miss fault occures. - // However it results in nested dtlb fault because vpsr.ic is off. - // To avoid such a situation, we jump into the kernel text area - // which is pinned, and then issue hyperprivop and return back - // to vDSO page. - // This is Dan Magenheimer's idea. - - // Currently is_running_on_xen() is defined as running_on_xen. - // If is_running_on_xen() is a real function, we must update - // according to it. - .section ".data.patch.running_on_xen", "a" - .previous -#define LOAD_RUNNING_ON_XEN(reg) \ -[1:] movl reg=0; \ - .xdata4 ".data.patch.running_on_xen", 1b-. - - .section ".data.patch.brl_xen_ssm_i_0", "a" - .previous -#define BRL_COND_XEN_SSM_I_0(pr) \ -[1:](pr)brl.cond.sptk 0; \ - .xdata4 ".data.patch.brl_xen_ssm_i_0", 1b-. - - .section ".data.patch.brl_xen_ssm_i_1", "a" - .previous -#define BRL_COND_XEN_SSM_I_1(pr) \ -[1:](pr)brl.cond.sptk 0; \ - .xdata4 ".data.patch.brl_xen_ssm_i_1", 1b-. -#endif - -GLOBAL_ENTRY(__kernel_syscall_via_break) - .prologue - .altrp b6 - .body - /* - * Note: for (fast) syscall restart to work, the break instruction must be - * the first one in the bundle addressed by syscall_via_break. - */ -{ .mib - break 0x100000 - nop.i 0 - br.ret.sptk.many b6 -} -END(__kernel_syscall_via_break) - -/* - * On entry: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * b6 = return address - * On exit: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * all other "scratch" registers: undefined - * all "preserved" registers: same as on entry - */ - -GLOBAL_ENTRY(__kernel_syscall_via_epc) - .prologue - .altrp b6 - .body -{ - /* - * Note: the kernel cannot assume that the first two instructions in this - * bundle get executed. The remaining code must be safe even if - * they do not get executed. - */ - adds r17=-1024,r15 // A - mov r10=0 // A default to successful syscall execution - epc // B causes split-issue -} - ;; -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT - // r20 = 1 - // r22 = &vcpu->vcpu_info->evtchn_upcall_mask - // r23 = &vpsr.ic - // r24 = &vcpu->vcpu_info->evtchn_upcall_pending - // r25 = tmp - // r28 = &running_on_xen - // r30 = running_on_xen - // r31 = tmp - // p11 = tmp - // p12 = running_on_xen - // p13 = !running_on_xen - // p14 = tmp - // p15 = tmp -#define isXen p12 -#define isRaw p13 - LOAD_RUNNING_ON_XEN(r28) - movl r22=XSI_PSR_I_ADDR - ;; - ld8 r22=[r22] - ;; - movl r23=XSI_PSR_IC - adds r24=-1,r22 - mov r20=1 - ;; - ld4 r30=[r28] - ;; - cmp.ne isXen,isRaw=r0,r30 - ;; -(isRaw) rsm psr.be | psr.i -(isXen) st1 [r22]=r20 -(isXen) rum psr.be - ;; -#else - rsm psr.be | psr.i // M2 (5 cyc to srlz.d) -#endif - LOAD_FSYSCALL_TABLE(r14) // X - ;; - mov r16=IA64_KR(CURRENT) // M2 (12 cyc) - shladd r18=r17,3,r14 // A - mov r19=NR_syscalls-1 // A - ;; - lfetch [r18] // M0|1 -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT -(isRaw) mov r29=psr -(isXen) XEN_HYPER_GET_PSR - ;; -(isXen) mov r29=r8 -#else - mov r29=psr // M2 (12 cyc) -#endif - // If r17 is a NaT, p6 will be zero - cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? - ;; - mov r21=ar.fpsr // M2 (12 cyc) - tnat.nz p10,p9=r15 // I0 - mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) - ;; - srlz.d // M0 (forces split-issue) ensure PSR.BE==0 -(p6) ld8 r18=[r18] // M0|1 - nop.i 0 - ;; - nop.m 0 -(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT - ;; - // p14 = running_on_xen && p8 - // p15 = !running_on_xen && p8 -(p8) cmp.ne.unc p14,p15=r0,r30 - ;; -(p15) ssm psr.i - BRL_COND_XEN_SSM_I_0(p14) - .global .vdso_ssm_i_0_ret -.vdso_ssm_i_0_ret: -#else - nop.i 0 - ;; -(p8) ssm psr.i -#endif -(p6) mov b7=r18 // I0 -(p8) br.dptk.many b7 // B - - mov r27=ar.rsc // M2 (12 cyc) -/* - * brl.cond doesn't work as intended because the linker would convert this branch - * into a branch to a PLT. Perhaps there will be a way to avoid this with some - * future version of the linker. In the meantime, we just use an indirect branch - * instead. - */ -#ifdef CONFIG_ITANIUM -(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry - ;; -(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down - ;; -(p6) mov b7=r14 -(p6) br.sptk.many b7 -#else - BRL_COND_FSYS_BUBBLE_DOWN(p6) -#endif -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT -(isRaw) ssm psr.i - BRL_COND_XEN_SSM_I_1(isXen) - .global .vdso_ssm_i_1_ret -.vdso_ssm_i_1_ret: -#else - ssm psr.i -#endif - mov r10=-1 -(p10) mov r8=EINVAL -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT - dv_serialize_data // shut up gas warning. - // we know xen_hyper_ssm_i_0 or xen_hyper_ssm_i_1 - // doesn't change p9 and p10 -#endif -(p9) mov r8=ENOSYS - FSYS_RETURN -END(__kernel_syscall_via_epc) - -# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) -# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) -# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) -# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET) -# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET) - -# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET -# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET -# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET -# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET -# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET -# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET -# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET -# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET -# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET -# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET -# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET -# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET -# define base0 r2 -# define base1 r3 - /* - * When we get here, the memory stack looks like this: - * - * +===============================+ - * | | - * // struct sigframe // - * | | - * +-------------------------------+ <-- sp+16 - * | 16 byte of scratch | - * | space | - * +-------------------------------+ <-- sp - * - * The register stack looks _exactly_ the way it looked at the time the signal - * occurred. In other words, we're treading on a potential mine-field: each - * incoming general register may be a NaT value (including sp, in which case the - * process ends up dying with a SIGSEGV). - * - * The first thing need to do is a cover to get the registers onto the backing - * store. Once that is done, we invoke the signal handler which may modify some - * of the machine state. After returning from the signal handler, we return - * control to the previous context by executing a sigreturn system call. A signal - * handler may call the rt_sigreturn() function to directly return to a given - * sigcontext. However, the user-level sigreturn() needs to do much more than - * calling the rt_sigreturn() system call as it needs to unwind the stack to - * restore preserved registers that may have been saved on the signal handler's - * call stack. - */ - -#define SIGTRAMP_SAVES \ - .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \ - .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \ - .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \ - .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \ - .savesp pr, PR_OFF+SIGCONTEXT_OFF; \ - .savesp rp, RP_OFF+SIGCONTEXT_OFF; \ - .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \ - .vframesp SP_OFF+SIGCONTEXT_OFF - -GLOBAL_ENTRY(__kernel_sigtramp) - // describe the state that is active when we get here: - .prologue - SIGTRAMP_SAVES - .body - - .label_state 1 - - adds base0=SIGHANDLER_OFF,sp - adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp - br.call.sptk.many rp=1f -1: - ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel - ld8 r15=[base1] // get address of new RBS base (or NULL) - cover // push args in interrupted frame onto backing store - ;; - cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel) - mov.m r9=ar.bsp // fetch ar.bsp - .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF -(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20) -back_from_setup_rbs: - alloc r8=ar.pfs,0,0,3,0 - ld8 out0=[base0],16 // load arg0 (signum) - adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1 - ;; - ld8 out1=[base1] // load arg1 (siginfop) - ld8 r10=[r17],8 // get signal handler entry point - ;; - ld8 out2=[base0] // load arg2 (sigcontextp) - ld8 gp=[r17] // get signal handler's global pointer - adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp - ;; - .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF - st8 [base0]=r9 // save sc_ar_bsp - adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp - adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp - ;; - stf.spill [base0]=f6,32 - stf.spill [base1]=f7,32 - ;; - stf.spill [base0]=f8,32 - stf.spill [base1]=f9,32 - mov b6=r10 - ;; - stf.spill [base0]=f10,32 - stf.spill [base1]=f11,32 - ;; - stf.spill [base0]=f12,32 - stf.spill [base1]=f13,32 - ;; - stf.spill [base0]=f14,32 - stf.spill [base1]=f15,32 - br.call.sptk.many rp=b6 // call the signal handler -.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp - ;; - ld8 r15=[base0] // fetch sc_ar_bsp - mov r14=ar.bsp - ;; - cmp.ne p1,p0=r14,r15 // do we need to restore the rbs? -(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7) - ;; -back_from_restore_rbs: - adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp - adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp - ;; - ldf.fill f6=[base0],32 - ldf.fill f7=[base1],32 - ;; - ldf.fill f8=[base0],32 - ldf.fill f9=[base1],32 - ;; - ldf.fill f10=[base0],32 - ldf.fill f11=[base1],32 - ;; - ldf.fill f12=[base0],32 - ldf.fill f13=[base1],32 - ;; - ldf.fill f14=[base0],32 - ldf.fill f15=[base1],32 - mov r15=__NR_rt_sigreturn - .restore sp // pop .prologue - break __BREAK_SYSCALL - - .prologue - SIGTRAMP_SAVES -setup_rbs: - mov ar.rsc=0 // put RSE into enforced lazy mode - ;; - .save ar.rnat, r19 - mov r19=ar.rnat // save RNaT before switching backing store area - adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp - - mov r18=ar.bspstore - mov ar.bspstore=r15 // switch over to new register backing store area - ;; - - .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF - st8 [r14]=r19 // save sc_ar_rnat - .body - mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16 - adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp - ;; - invala - sub r15=r16,r15 - extr.u r20=r18,3,6 - ;; - mov ar.rsc=0xf // set RSE into eager mode, pl 3 - cmp.eq p8,p0=63,r20 - shl r15=r15,16 - ;; - st8 [r14]=r15 // save sc_loadrs -(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now - .restore sp // pop .prologue - br.cond.sptk back_from_setup_rbs - - .prologue - SIGTRAMP_SAVES - .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF - .body -restore_rbs: - // On input: - // r14 = bsp1 (bsp at the time of return from signal handler) - // r15 = bsp0 (bsp at the time the signal occurred) - // - // Here, we need to calculate bspstore0, the value that ar.bspstore needs - // to be set to, based on bsp0 and the size of the dirty partition on - // the alternate stack (sc_loadrs >> 16). This can be done with the - // following algorithm: - // - // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1)); - // - // This is what the code below does. - // - alloc r2=ar.pfs,0,0,0,0 // alloc null frame - adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp - adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp - ;; - ld8 r17=[r16] - ld8 r16=[r18] // get new rnat - extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0) - ;; - mov ar.rsc=r17 // put RSE into enforced lazy mode - shr.u r17=r17,16 - ;; - sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16) - shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19) - ;; - loadrs // restore dirty partition - extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1) - ;; - add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19) - ;; - shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40 - ;; - sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1) - movl r17=0x8208208208208209 - ;; - add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1) - setf.sig f7=r17 - cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)? - ;; -(p7) adds r18=-62,r18 // delta -= 62 - ;; - setf.sig f6=r18 - ;; - xmpy.h f6=f6,f7 - ;; - getf.sig r17=f6 - ;; - add r17=r17,r18 - shr r18=r18,63 - ;; - shr r17=r17,5 - ;; - sub r17=r17,r18 // r17 = delta/63 - ;; - add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1) - ;; - shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1)) - ;; - mov ar.bspstore=r15 // switch back to old register backing store area - ;; - mov ar.rnat=r16 // restore RNaT - mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) - // invala not necessary as that will happen when returning to user-mode - br.cond.sptk back_from_restore_rbs -END(__kernel_sigtramp) diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S deleted file mode 100644 index 58582ccdfe..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Linker script for gate DSO. The gate pages are an ELF shared object prelinked to its - * virtual address, with only one read-only segment and one execute-only segment (both fit - * in one page). This script controls its layout. - */ - - -#include <asm/system.h> - -SECTIONS -{ - . = GATE_ADDR + SIZEOF_HEADERS; - - .hash : { *(.hash) } :readable - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - .dynamic : { *(.dynamic) } :readable :dynamic - - /* - * This linker script is used both with -r and with -shared. For the layouts to match, - * we need to skip more than enough space for the dynamic symbol table et al. If this - * amount is insufficient, ld -shared will barf. Just increase it here. - */ - . = GATE_ADDR + 0x500; - - .data.patch : { - __start_gate_mckinley_e9_patchlist = .; - *(.data.patch.mckinley_e9) - __end_gate_mckinley_e9_patchlist = .; - - __start_gate_vtop_patchlist = .; - *(.data.patch.vtop) - __end_gate_vtop_patchlist = .; - - __start_gate_fsyscall_patchlist = .; - *(.data.patch.fsyscall_table) - __end_gate_fsyscall_patchlist = .; - - __start_gate_brl_fsys_bubble_down_patchlist = .; - *(.data.patch.brl_fsys_bubble_down) - __end_gate_brl_fsys_bubble_down_patchlist = .; - -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT - __start_gate_running_on_xen_patchlist = .; - *(.data.patch.running_on_xen) - __end_gate_running_on_xen_patchlist = .; - - __start_gate_brl_xen_ssm_i_0_patchlist = .; - *(.data.patch.brl_xen_ssm_i_0) - __end_gate_brl_xen_ssm_i_0_patchlist = .; - - __start_gate_brl_xen_ssm_i_1_patchlist = .; - *(.data.patch.brl_xen_ssm_i_1) - __end_gate_brl_xen_ssm_i_1_patchlist = .; -#endif - } :readable - .IA_64.unwind_info : { *(.IA_64.unwind_info*) } - .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind -#ifdef HAVE_BUGGY_SEGREL - .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable -#else - . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1)); - .text : { *(.text) *(.text.*) } :epc -#endif - - /DISCARD/ : { - *(.got.plt) *(.got) - *(.data .data.* .gnu.linkonce.d.*) - *(.dynbss) - *(.bss .bss.* .gnu.linkonce.b.*) - *(__ex_table) - *(__mca_table) - } -} - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */ -#ifndef HAVE_BUGGY_SEGREL - epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */ -#endif - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */ -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - LINUX_2.5 { - global: - __kernel_syscall_via_break; - __kernel_syscall_via_epc; - __kernel_sigtramp; - - local: *; - }; -} - -/* The ELF entry point can be used to set the AT_SYSINFO value. */ -ENTRY(__kernel_syscall_via_epc) diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/head.S b/linux-2.6-xen-sparse/arch/ia64/kernel/head.S deleted file mode 100644 index dded6f24f1..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/head.S +++ /dev/null @@ -1,1229 +0,0 @@ -/* - * Here is where the ball gets rolling as far as the kernel is concerned. - * When control is transferred to _start, the bootload has already - * loaded us to the correct address. All that's left to do here is - * to set up the kernel's global pointer and jump to the kernel - * entry point. - * - * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * Stephane Eranian <eranian@hpl.hp.com> - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> - * Copyright (C) 1999 Intel Corp. - * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> - * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> - * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com> - * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. - * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com> - * Support for CPU Hotplug - */ - - -#include <asm/asmmacro.h> -#include <asm/fpu.h> -#include <asm/kregs.h> -#include <asm/mmu_context.h> -#include <asm/asm-offsets.h> -#include <asm/pal.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/ptrace.h> -#include <asm/system.h> -#include <asm/mca_asm.h> - -#ifdef CONFIG_HOTPLUG_CPU -#define SAL_PSR_BITS_TO_SET \ - (IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL) - -#define SAVE_FROM_REG(src, ptr, dest) \ - mov dest=src;; \ - st8 [ptr]=dest,0x08 - -#define RESTORE_REG(reg, ptr, _tmp) \ - ld8 _tmp=[ptr],0x08;; \ - mov reg=_tmp - -#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\ - mov ar.lc=IA64_NUM_DBG_REGS-1;; \ - mov _idx=0;; \ -1: \ - SAVE_FROM_REG(_breg[_idx], ptr, _dest);; \ - add _idx=1,_idx;; \ - br.cloop.sptk.many 1b - -#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\ - mov ar.lc=IA64_NUM_DBG_REGS-1;; \ - mov _idx=0;; \ -_lbl: RESTORE_REG(_breg[_idx], ptr, _tmp);; \ - add _idx=1, _idx;; \ - br.cloop.sptk.many _lbl - -#define SAVE_ONE_RR(num, _reg, _tmp) \ - movl _tmp=(num<<61);; \ - mov _reg=rr[_tmp] - -#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ - SAVE_ONE_RR(0,_r0, _tmp);; \ - SAVE_ONE_RR(1,_r1, _tmp);; \ - SAVE_ONE_RR(2,_r2, _tmp);; \ - SAVE_ONE_RR(3,_r3, _tmp);; \ - SAVE_ONE_RR(4,_r4, _tmp);; \ - SAVE_ONE_RR(5,_r5, _tmp);; \ - SAVE_ONE_RR(6,_r6, _tmp);; \ - SAVE_ONE_RR(7,_r7, _tmp);; - -#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ - st8 [ptr]=_r0, 8;; \ - st8 [ptr]=_r1, 8;; \ - st8 [ptr]=_r2, 8;; \ - st8 [ptr]=_r3, 8;; \ - st8 [ptr]=_r4, 8;; \ - st8 [ptr]=_r5, 8;; \ - st8 [ptr]=_r6, 8;; \ - st8 [ptr]=_r7, 8;; - -#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \ - mov ar.lc=0x08-1;; \ - movl _idx1=0x00;; \ -RestRR: \ - dep.z _idx2=_idx1,61,3;; \ - ld8 _tmp=[ptr],8;; \ - mov rr[_idx2]=_tmp;; \ - srlz.d;; \ - add _idx1=1,_idx1;; \ - br.cloop.sptk.few RestRR - -#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \ - movl reg1=sal_state_for_booting_cpu;; \ - ld8 reg2=[reg1];; - -/* - * Adjust region registers saved before starting to save - * break regs and rest of the states that need to be preserved. - */ -#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred) \ - SAVE_FROM_REG(b0,_reg1,_reg2);; \ - SAVE_FROM_REG(b1,_reg1,_reg2);; \ - SAVE_FROM_REG(b2,_reg1,_reg2);; \ - SAVE_FROM_REG(b3,_reg1,_reg2);; \ - SAVE_FROM_REG(b4,_reg1,_reg2);; \ - SAVE_FROM_REG(b5,_reg1,_reg2);; \ - st8 [_reg1]=r1,0x08;; \ - st8 [_reg1]=r12,0x08;; \ - st8 [_reg1]=r13,0x08;; \ - SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.pfs,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.rnat,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.unat,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.dcr,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.iva,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.pta,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.itv,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.pmv,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);; \ - st8 [_reg1]=r4,0x08;; \ - st8 [_reg1]=r5,0x08;; \ - st8 [_reg1]=r6,0x08;; \ - st8 [_reg1]=r7,0x08;; \ - st8 [_reg1]=_pred,0x08;; \ - SAVE_FROM_REG(ar.lc, _reg1, _reg2);; \ - stf.spill.nta [_reg1]=f2,16;; \ - stf.spill.nta [_reg1]=f3,16;; \ - stf.spill.nta [_reg1]=f4,16;; \ - stf.spill.nta [_reg1]=f5,16;; \ - stf.spill.nta [_reg1]=f16,16;; \ - stf.spill.nta [_reg1]=f17,16;; \ - stf.spill.nta [_reg1]=f18,16;; \ - stf.spill.nta [_reg1]=f19,16;; \ - stf.spill.nta [_reg1]=f20,16;; \ - stf.spill.nta [_reg1]=f21,16;; \ - stf.spill.nta [_reg1]=f22,16;; \ - stf.spill.nta [_reg1]=f23,16;; \ - stf.spill.nta [_reg1]=f24,16;; \ - stf.spill.nta [_reg1]=f25,16;; \ - stf.spill.nta [_reg1]=f26,16;; \ - stf.spill.nta [_reg1]=f27,16;; \ - stf.spill.nta [_reg1]=f28,16;; \ - stf.spill.nta [_reg1]=f29,16;; \ - stf.spill.nta [_reg1]=f30,16;; \ - stf.spill.nta [_reg1]=f31,16;; - -#else -#define SET_AREA_FOR_BOOTING_CPU(a1, a2) -#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3) -#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) -#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) -#endif - -#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \ - movl _tmp1=(num << 61);; \ - mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \ - mov rr[_tmp1]=_tmp2 - - .section __special_page_section,"ax" - - .global empty_zero_page -empty_zero_page: - .skip PAGE_SIZE - - .global swapper_pg_dir -swapper_pg_dir: - .skip PAGE_SIZE - - .rodata -halt_msg: - stringz "Halting kernel\n" - - .text - - .global start_ap - - /* - * Start the kernel. When the bootloader passes control to _start(), r28 - * points to the address of the boot parameter area. Execution reaches - * here in physical mode. - */ -GLOBAL_ENTRY(_start) -start_ap: - .prologue - .save rp, r0 // terminate unwind chain with a NULL rp - .body - - rsm psr.i | psr.ic - ;; - srlz.i - ;; - { - flushrs // must be first insn in group - srlz.i - } - ;; - /* - * Save the region registers, predicate before they get clobbered - */ - SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15); - mov r25=pr;; - - /* - * Initialize kernel region registers: - * rr[0]: VHPT enabled, page size = PAGE_SHIFT - * rr[1]: VHPT enabled, page size = PAGE_SHIFT - * rr[2]: VHPT enabled, page size = PAGE_SHIFT - * rr[3]: VHPT enabled, page size = PAGE_SHIFT - * rr[4]: VHPT enabled, page size = PAGE_SHIFT - * rr[5]: VHPT enabled, page size = PAGE_SHIFT - * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT - * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT - * We initialize all of them to prevent inadvertently assuming - * something about the state of address translation early in boot. - */ - SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);; - SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);; - /* - * Now pin mappings into the TLB for kernel text and data - */ - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - movl r17=KERNEL_START - ;; - mov cr.itir=r18 - mov cr.ifa=r17 - mov r16=IA64_TR_KERNEL - mov r3=ip - movl r18=PAGE_KERNEL - ;; - dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT - ;; - or r18=r2,r18 - ;; - srlz.i - ;; - itr.i itr[r16]=r18 - ;; - itr.d dtr[r16]=r18 - ;; - srlz.i - - /* - * Switch into virtual mode: - */ - movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ - |IA64_PSR_DI) - ;; - mov cr.ipsr=r16 - movl r17=1f - ;; - mov cr.iip=r17 - mov cr.ifs=r0 - ;; - rfi - ;; -1: // now we are in virtual mode - - SET_AREA_FOR_BOOTING_CPU(r2, r16); - - STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15); - SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25) - ;; - - // set IVT entry point---can't access I/O ports without it - movl r3=ia64_ivt - ;; - mov cr.iva=r3 - movl r2=FPSR_DEFAULT - ;; - srlz.i - movl gp=__gp - - mov ar.fpsr=r2 - ;; - -#define isAP p2 // are we an Application Processor? -#define isBP p3 // are we the Bootstrap Processor? - -#ifdef CONFIG_SMP - /* - * Find the init_task for the currently booting CPU. At poweron, and in - * UP mode, task_for_booting_cpu is NULL. - */ - movl r3=task_for_booting_cpu - ;; - ld8 r3=[r3] - movl r2=init_task - ;; - cmp.eq isBP,isAP=r3,r0 - ;; -(isAP) mov r2=r3 -#else - movl r2=init_task - cmp.eq isBP,isAP=r0,r0 -#endif - ;; - tpa r3=r2 // r3 == phys addr of task struct - mov r16=-1 -(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it - - // load mapping for stack (virtaddr in r2, physaddr in r3) - rsm psr.ic - movl r17=PAGE_KERNEL - ;; - srlz.d - dep r18=0,r3,0,12 - ;; - or r18=r17,r18 - dep r2=-1,r3,61,3 // IMVA of task - ;; - mov r17=rr[r2] - shr.u r16=r3,IA64_GRANULE_SHIFT - ;; - dep r17=0,r17,8,24 - ;; - mov cr.itir=r17 - mov cr.ifa=r2 - - mov r19=IA64_TR_CURRENT_STACK - ;; - itr.d dtr[r19]=r18 - ;; - ssm psr.ic - srlz.d - ;; - -.load_current: - // load the "current" pointer (r13) and ar.k6 with the current task - mov IA64_KR(CURRENT)=r2 // virtual address - mov IA64_KR(CURRENT_STACK)=r16 - mov r13=r2 - /* - * Reserve space at the top of the stack for "struct pt_regs". Kernel - * threads don't store interesting values in that structure, but the space - * still needs to be there because time-critical stuff such as the context - * switching can be implemented more efficiently (for example, __switch_to() - * always sets the psr.dfh bit of the task it is switching to). - */ - - addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 - addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE - mov ar.rsc=0 // place RSE in enforced lazy mode - ;; - loadrs // clear the dirty partition - mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base - ;; - mov ar.bspstore=r2 // establish the new RSE stack - ;; - mov ar.rsc=0x3 // place RSE in eager mode - -(isBP) dep r28=-1,r28,61,3 // make address virtual -(isBP) movl r2=ia64_boot_param - ;; -(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader - -#ifdef CONFIG_XEN - // Note: isBP is used by the subprogram. - br.call.sptk.many rp=early_xen_setup - ;; -#endif - -#ifdef CONFIG_SMP -(isAP) br.call.sptk.many rp=start_secondary -.ret0: -(isAP) br.cond.sptk self -#endif - - // This is executed by the bootstrap processor (bsp) only: - -#ifdef CONFIG_IA64_FW_EMU - // initialize PAL & SAL emulator: - br.call.sptk.many rp=sys_fw_init -.ret1: -#endif - br.call.sptk.many rp=start_kernel -.ret2: addl r3=@ltoff(halt_msg),gp - ;; - alloc r2=ar.pfs,8,0,2,0 - ;; - ld8 out0=[r3] - br.call.sptk.many b0=console_print - -self: hint @pause - br.sptk.many self // endless loop -END(_start) - -GLOBAL_ENTRY(ia64_save_debug_regs) - alloc r16=ar.pfs,1,0,0,0 - mov r20=ar.lc // preserve ar.lc - mov ar.lc=IA64_NUM_DBG_REGS-1 - mov r18=0 - add r19=IA64_NUM_DBG_REGS*8,in0 - ;; -1: mov r16=dbr[r18] -#ifdef CONFIG_ITANIUM - ;; - srlz.d -#endif - mov r17=ibr[r18] - add r18=1,r18 - ;; - st8.nta [in0]=r16,8 - st8.nta [r19]=r17,8 - br.cloop.sptk.many 1b - ;; - mov ar.lc=r20 // restore ar.lc - br.ret.sptk.many rp -END(ia64_save_debug_regs) - -GLOBAL_ENTRY(ia64_load_debug_regs) - alloc r16=ar.pfs,1,0,0,0 - lfetch.nta [in0] - mov r20=ar.lc // preserve ar.lc - add r19=IA64_NUM_DBG_REGS*8,in0 - mov ar.lc=IA64_NUM_DBG_REGS-1 - mov r18=-1 - ;; -1: ld8.nta r16=[in0],8 - ld8.nta r17=[r19],8 - add r18=1,r18 - ;; - mov dbr[r18]=r16 -#ifdef CONFIG_ITANIUM - ;; - srlz.d // Errata 132 (NoFix status) -#endif - mov ibr[r18]=r17 - br.cloop.sptk.many 1b - ;; - mov ar.lc=r20 // restore ar.lc - br.ret.sptk.many rp -END(ia64_load_debug_regs) - -GLOBAL_ENTRY(__ia64_save_fpu) - alloc r2=ar.pfs,1,4,0,0 - adds loc0=96*16-16,in0 - adds loc1=96*16-16-128,in0 - ;; - stf.spill.nta [loc0]=f127,-256 - stf.spill.nta [loc1]=f119,-256 - ;; - stf.spill.nta [loc0]=f111,-256 - stf.spill.nta [loc1]=f103,-256 - ;; - stf.spill.nta [loc0]=f95,-256 - stf.spill.nta [loc1]=f87,-256 - ;; - stf.spill.nta [loc0]=f79,-256 - stf.spill.nta [loc1]=f71,-256 - ;; - stf.spill.nta [loc0]=f63,-256 - stf.spill.nta [loc1]=f55,-256 - adds loc2=96*16-32,in0 - ;; - stf.spill.nta [loc0]=f47,-256 - stf.spill.nta [loc1]=f39,-256 - adds loc3=96*16-32-128,in0 - ;; - stf.spill.nta [loc2]=f126,-256 - stf.spill.nta [loc3]=f118,-256 - ;; - stf.spill.nta [loc2]=f110,-256 - stf.spill.nta [loc3]=f102,-256 - ;; - stf.spill.nta [loc2]=f94,-256 - stf.spill.nta [loc3]=f86,-256 - ;; - stf.spill.nta [loc2]=f78,-256 - stf.spill.nta [loc3]=f70,-256 - ;; - stf.spill.nta [loc2]=f62,-256 - stf.spill.nta [loc3]=f54,-256 - adds loc0=96*16-48,in0 - ;; - stf.spill.nta [loc2]=f46,-256 - stf.spill.nta [loc3]=f38,-256 - adds loc1=96*16-48-128,in0 - ;; - stf.spill.nta [loc0]=f125,-256 - stf.spill.nta [loc1]=f117,-256 - ;; - stf.spill.nta [loc0]=f109,-256 - stf.spill.nta [loc1]=f101,-256 - ;; - stf.spill.nta [loc0]=f93,-256 - stf.spill.nta [loc1]=f85,-256 - ;; - stf.spill.nta [loc0]=f77,-256 - stf.spill.nta [loc1]=f69,-256 - ;; - stf.spill.nta [loc0]=f61,-256 - stf.spill.nta [loc1]=f53,-256 - adds loc2=96*16-64,in0 - ;; - stf.spill.nta [loc0]=f45,-256 - stf.spill.nta [loc1]=f37,-256 - adds loc3=96*16-64-128,in0 - ;; - stf.spill.nta [loc2]=f124,-256 - stf.spill.nta [loc3]=f116,-256 - ;; - stf.spill.nta [loc2]=f108,-256 - stf.spill.nta [loc3]=f100,-256 - ;; - stf.spill.nta [loc2]=f92,-256 - stf.spill.nta [loc3]=f84,-256 - ;; - stf.spill.nta [loc2]=f76,-256 - stf.spill.nta [loc3]=f68,-256 - ;; - stf.spill.nta [loc2]=f60,-256 - stf.spill.nta [loc3]=f52,-256 - adds loc0=96*16-80,in0 - ;; - stf.spill.nta [loc2]=f44,-256 - stf.spill.nta [loc3]=f36,-256 - adds loc1=96*16-80-128,in0 - ;; - stf.spill.nta [loc0]=f123,-256 - stf.spill.nta [loc1]=f115,-256 - ;; - stf.spill.nta [loc0]=f107,-256 - stf.spill.nta [loc1]=f99,-256 - ;; - stf.spill.nta [loc0]=f91,-256 - stf.spill.nta [loc1]=f83,-256 - ;; - stf.spill.nta [loc0]=f75,-256 - stf.spill.nta [loc1]=f67,-256 - ;; - stf.spill.nta [loc0]=f59,-256 - stf.spill.nta [loc1]=f51,-256 - adds loc2=96*16-96,in0 - ;; - stf.spill.nta [loc0]=f43,-256 - stf.spill.nta [loc1]=f35,-256 - adds loc3=96*16-96-128,in0 - ;; - stf.spill.nta [loc2]=f122,-256 - stf.spill.nta [loc3]=f114,-256 - ;; - stf.spill.nta [loc2]=f106,-256 - stf.spill.nta [loc3]=f98,-256 - ;; - stf.spill.nta [loc2]=f90,-256 - stf.spill.nta [loc3]=f82,-256 - ;; - stf.spill.nta [loc2]=f74,-256 - stf.spill.nta [loc3]=f66,-256 - ;; - stf.spill.nta [loc2]=f58,-256 - stf.spill.nta [loc3]=f50,-256 - adds loc0=96*16-112,in0 - ;; - stf.spill.nta [loc2]=f42,-256 - stf.spill.nta [loc3]=f34,-256 - adds loc1=96*16-112-128,in0 - ;; - stf.spill.nta [loc0]=f121,-256 - stf.spill.nta [loc1]=f113,-256 - ;; - stf.spill.nta [loc0]=f105,-256 - stf.spill.nta [loc1]=f97,-256 - ;; - stf.spill.nta [loc0]=f89,-256 - stf.spill.nta [loc1]=f81,-256 - ;; - stf.spill.nta [loc0]=f73,-256 - stf.spill.nta [loc1]=f65,-256 - ;; - stf.spill.nta [loc0]=f57,-256 - stf.spill.nta [loc1]=f49,-256 - adds loc2=96*16-128,in0 - ;; - stf.spill.nta [loc0]=f41,-256 - stf.spill.nta [loc1]=f33,-256 - adds loc3=96*16-128-128,in0 - ;; - stf.spill.nta [loc2]=f120,-256 - stf.spill.nta [loc3]=f112,-256 - ;; - stf.spill.nta [loc2]=f104,-256 - stf.spill.nta [loc3]=f96,-256 - ;; - stf.spill.nta [loc2]=f88,-256 - stf.spill.nta [loc3]=f80,-256 - ;; - stf.spill.nta [loc2]=f72,-256 - stf.spill.nta [loc3]=f64,-256 - ;; - stf.spill.nta [loc2]=f56,-256 - stf.spill.nta [loc3]=f48,-256 - ;; - stf.spill.nta [loc2]=f40 - stf.spill.nta [loc3]=f32 - br.ret.sptk.many rp -END(__ia64_save_fpu) - -GLOBAL_ENTRY(__ia64_load_fpu) - alloc r2=ar.pfs,1,2,0,0 - adds r3=128,in0 - adds r14=256,in0 - adds r15=384,in0 - mov loc0=512 - mov loc1=-1024+16 - ;; - ldf.fill.nta f32=[in0],loc0 - ldf.fill.nta f40=[ r3],loc0 - ldf.fill.nta f48=[r14],loc0 - ldf.fill.nta f56=[r15],loc0 - ;; - ldf.fill.nta f64=[in0],loc0 - ldf.fill.nta f72=[ r3],loc0 - ldf.fill.nta f80=[r14],loc0 - ldf.fill.nta f88=[r15],loc0 - ;; - ldf.fill.nta f96=[in0],loc1 - ldf.fill.nta f104=[ r3],loc1 - ldf.fill.nta f112=[r14],loc1 - ldf.fill.nta f120=[r15],loc1 - ;; - ldf.fill.nta f33=[in0],loc0 - ldf.fill.nta f41=[ r3],loc0 - ldf.fill.nta f49=[r14],loc0 - ldf.fill.nta f57=[r15],loc0 - ;; - ldf.fill.nta f65=[in0],loc0 - ldf.fill.nta f73=[ r3],loc0 - ldf.fill.nta f81=[r14],loc0 - ldf.fill.nta f89=[r15],loc0 - ;; - ldf.fill.nta f97=[in0],loc1 - ldf.fill.nta f105=[ r3],loc1 - ldf.fill.nta f113=[r14],loc1 - ldf.fill.nta f121=[r15],loc1 - ;; - ldf.fill.nta f34=[in0],loc0 - ldf.fill.nta f42=[ r3],loc0 - ldf.fill.nta f50=[r14],loc0 - ldf.fill.nta f58=[r15],loc0 - ;; - ldf.fill.nta f66=[in0],loc0 - ldf.fill.nta f74=[ r3],loc0 - ldf.fill.nta f82=[r14],loc0 - ldf.fill.nta f90=[r15],loc0 - ;; - ldf.fill.nta f98=[in0],loc1 - ldf.fill.nta f106=[ r3],loc1 - ldf.fill.nta f114=[r14],loc1 - ldf.fill.nta f122=[r15],loc1 - ;; - ldf.fill.nta f35=[in0],loc0 - ldf.fill.nta f43=[ r3],loc0 - ldf.fill.nta f51=[r14],loc0 - ldf.fill.nta f59=[r15],loc0 - ;; - ldf.fill.nta f67=[in0],loc0 - ldf.fill.nta f75=[ r3],loc0 - ldf.fill.nta f83=[r14],loc0 - ldf.fill.nta f91=[r15],loc0 - ;; - ldf.fill.nta f99=[in0],loc1 - ldf.fill.nta f107=[ r3],loc1 - ldf.fill.nta f115=[r14],loc1 - ldf.fill.nta f123=[r15],loc1 - ;; - ldf.fill.nta f36=[in0],loc0 - ldf.fill.nta f44=[ r3],loc0 - ldf.fill.nta f52=[r14],loc0 - ldf.fill.nta f60=[r15],loc0 - ;; - ldf.fill.nta f68=[in0],loc0 - ldf.fill.nta f76=[ r3],loc0 - ldf.fill.nta f84=[r14],loc0 - ldf.fill.nta f92=[r15],loc0 - ;; - ldf.fill.nta f100=[in0],loc1 - ldf.fill.nta f108=[ r3],loc1 - ldf.fill.nta f116=[r14],loc1 - ldf.fill.nta f124=[r15],loc1 - ;; - ldf.fill.nta f37=[in0],loc0 - ldf.fill.nta f45=[ r3],loc0 - ldf.fill.nta f53=[r14],loc0 - ldf.fill.nta f61=[r15],loc0 - ;; - ldf.fill.nta f69=[in0],loc0 - ldf.fill.nta f77=[ r3],loc0 - ldf.fill.nta f85=[r14],loc0 - ldf.fill.nta f93=[r15],loc0 - ;; - ldf.fill.nta f101=[in0],loc1 - ldf.fill.nta f109=[ r3],loc1 - ldf.fill.nta f117=[r14],loc1 - ldf.fill.nta f125=[r15],loc1 - ;; - ldf.fill.nta f38 =[in0],loc0 - ldf.fill.nta f46 =[ r3],loc0 - ldf.fill.nta f54 =[r14],loc0 - ldf.fill.nta f62 =[r15],loc0 - ;; - ldf.fill.nta f70 =[in0],loc0 - ldf.fill.nta f78 =[ r3],loc0 - ldf.fill.nta f86 =[r14],loc0 - ldf.fill.nta f94 =[r15],loc0 - ;; - ldf.fill.nta f102=[in0],loc1 - ldf.fill.nta f110=[ r3],loc1 - ldf.fill.nta f118=[r14],loc1 - ldf.fill.nta f126=[r15],loc1 - ;; - ldf.fill.nta f39 =[in0],loc0 - ldf.fill.nta f47 =[ r3],loc0 - ldf.fill.nta f55 =[r14],loc0 - ldf.fill.nta f63 =[r15],loc0 - ;; - ldf.fill.nta f71 =[in0],loc0 - ldf.fill.nta f79 =[ r3],loc0 - ldf.fill.nta f87 =[r14],loc0 - ldf.fill.nta f95 =[r15],loc0 - ;; - ldf.fill.nta f103=[in0] - ldf.fill.nta f111=[ r3] - ldf.fill.nta f119=[r14] - ldf.fill.nta f127=[r15] - br.ret.sptk.many rp -END(__ia64_load_fpu) - -GLOBAL_ENTRY(__ia64_init_fpu) - stf.spill [sp]=f0 // M3 - mov f32=f0 // F - nop.b 0 - - ldfps f33,f34=[sp] // M0 - ldfps f35,f36=[sp] // M1 - mov f37=f0 // F - ;; - - setf.s f38=r0 // M2 - setf.s f39=r0 // M3 - mov f40=f0 // F - - ldfps f41,f42=[sp] // M0 - ldfps f43,f44=[sp] // M1 - mov f45=f0 // F - - setf.s f46=r0 // M2 - setf.s f47=r0 // M3 - mov f48=f0 // F - - ldfps f49,f50=[sp] // M0 - ldfps f51,f52=[sp] // M1 - mov f53=f0 // F - - setf.s f54=r0 // M2 - setf.s f55=r0 // M3 - mov f56=f0 // F - - ldfps f57,f58=[sp] // M0 - ldfps f59,f60=[sp] // M1 - mov f61=f0 // F - - setf.s f62=r0 // M2 - setf.s f63=r0 // M3 - mov f64=f0 // F - - ldfps f65,f66=[sp] // M0 - ldfps f67,f68=[sp] // M1 - mov f69=f0 // F - - setf.s f70=r0 // M2 - setf.s f71=r0 // M3 - mov f72=f0 // F - - ldfps f73,f74=[sp] // M0 - ldfps f75,f76=[sp] // M1 - mov f77=f0 // F - - setf.s f78=r0 // M2 - setf.s f79=r0 // M3 - mov f80=f0 // F - - ldfps f81,f82=[sp] // M0 - ldfps f83,f84=[sp] // M1 - mov f85=f0 // F - - setf.s f86=r0 // M2 - setf.s f87=r0 // M3 - mov f88=f0 // F - - /* - * When the instructions are cached, it would be faster to initialize - * the remaining registers with simply mov instructions (F-unit). - * This gets the time down to ~29 cycles. However, this would use up - * 33 bundles, whereas continuing with the above pattern yields - * 10 bundles and ~30 cycles. - */ - - ldfps f89,f90=[sp] // M0 - ldfps f91,f92=[sp] // M1 - mov f93=f0 // F - - setf.s f94=r0 // M2 - setf.s f95=r0 // M3 - mov f96=f0 // F - - ldfps f97,f98=[sp] // M0 - ldfps f99,f100=[sp] // M1 - mov f101=f0 // F - - setf.s f102=r0 // M2 - setf.s f103=r0 // M3 - mov f104=f0 // F - - ldfps f105,f106=[sp] // M0 - ldfps f107,f108=[sp] // M1 - mov f109=f0 // F - - setf.s f110=r0 // M2 - setf.s f111=r0 // M3 - mov f112=f0 // F - - ldfps f113,f114=[sp] // M0 - ldfps f115,f116=[sp] // M1 - mov f117=f0 // F - - setf.s f118=r0 // M2 - setf.s f119=r0 // M3 - mov f120=f0 // F - - ldfps f121,f122=[sp] // M0 - ldfps f123,f124=[sp] // M1 - mov f125=f0 // F - - setf.s f126=r0 // M2 - setf.s f127=r0 // M3 - br.ret.sptk.many rp // F -END(__ia64_init_fpu) - -/* - * Switch execution mode from virtual to physical - * - * Inputs: - * r16 = new psr to establish - * Output: - * r19 = old virtual address of ar.bsp - * r20 = old virtual address of sp - * - * Note: RSE must already be in enforced lazy mode - */ -GLOBAL_ENTRY(ia64_switch_mode_phys) - { - rsm psr.i | psr.ic // disable interrupts and interrupt collection - mov r15=ip - } - ;; - { - flushrs // must be first insn in group - srlz.i - } - ;; - mov cr.ipsr=r16 // set new PSR - add r3=1f-ia64_switch_mode_phys,r15 - - mov r19=ar.bsp - mov r20=sp - mov r14=rp // get return address into a general register - ;; - - // going to physical mode, use tpa to translate virt->phys - tpa r17=r19 - tpa r3=r3 - tpa sp=sp - tpa r14=r14 - ;; - - mov r18=ar.rnat // save ar.rnat - mov ar.bspstore=r17 // this steps on ar.rnat - mov cr.iip=r3 - mov cr.ifs=r0 - ;; - mov ar.rnat=r18 // restore ar.rnat - rfi // must be last insn in group - ;; -1: mov rp=r14 - br.ret.sptk.many rp -END(ia64_switch_mode_phys) - -/* - * Switch execution mode from physical to virtual - * - * Inputs: - * r16 = new psr to establish - * r19 = new bspstore to establish - * r20 = new sp to establish - * - * Note: RSE must already be in enforced lazy mode - */ -GLOBAL_ENTRY(ia64_switch_mode_virt) - { - rsm psr.i | psr.ic // disable interrupts and interrupt collection - mov r15=ip - } - ;; - { - flushrs // must be first insn in group - srlz.i - } - ;; - mov cr.ipsr=r16 // set new PSR - add r3=1f-ia64_switch_mode_virt,r15 - - mov r14=rp // get return address into a general register - ;; - - // going to virtual - // - for code addresses, set upper bits of addr to KERNEL_START - // - for stack addresses, copy from input argument - movl r18=KERNEL_START - dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT - dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT - mov sp=r20 - ;; - or r3=r3,r18 - or r14=r14,r18 - ;; - - mov r18=ar.rnat // save ar.rnat - mov ar.bspstore=r19 // this steps on ar.rnat - mov cr.iip=r3 - mov cr.ifs=r0 - ;; - mov ar.rnat=r18 // restore ar.rnat - rfi // must be last insn in group - ;; -1: mov rp=r14 - br.ret.sptk.many rp -END(ia64_switch_mode_virt) - -GLOBAL_ENTRY(ia64_delay_loop) - .prologue -{ nop 0 // work around GAS unwind info generation bug... - .save ar.lc,r2 - mov r2=ar.lc - .body - ;; - mov ar.lc=r32 -} - ;; - // force loop to be 32-byte aligned (GAS bug means we cannot use .align - // inside function body without corrupting unwind info). -{ nop 0 } -1: br.cloop.sptk.few 1b - ;; - mov ar.lc=r2 - br.ret.sptk.many rp -END(ia64_delay_loop) - -/* - * Return a CPU-local timestamp in nano-seconds. This timestamp is - * NOT synchronized across CPUs its return value must never be - * compared against the values returned on another CPU. The usage in - * kernel/sched.c ensures that. - * - * The return-value of sched_clock() is NOT supposed to wrap-around. - * If it did, it would cause some scheduling hiccups (at the worst). - * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even - * that would happen only once every 5+ years. - * - * The code below basically calculates: - * - * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT - * - * except that the multiplication and the shift are done with 128-bit - * intermediate precision so that we can produce a full 64-bit result. - */ -GLOBAL_ENTRY(sched_clock) - addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 - mov.m r9=ar.itc // fetch cycle-counter (35 cyc) - ;; - ldf8 f8=[r8] - ;; - setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8... - ;; - xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) - xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product - ;; - getf.sig r8=f10 // (5 cyc) - getf.sig r9=f11 - ;; - shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT - br.ret.sptk.many rp -END(sched_clock) - -GLOBAL_ENTRY(start_kernel_thread) - .prologue - .save rp, r0 // this is the end of the call-chain - .body - alloc r2 = ar.pfs, 0, 0, 2, 0 - mov out0 = r9 - mov out1 = r11;; - br.call.sptk.many rp = kernel_thread_helper;; - mov out0 = r8 - br.call.sptk.many rp = sys_exit;; -1: br.sptk.few 1b // not reached -END(start_kernel_thread) - -#ifdef CONFIG_IA64_BRL_EMU - -/* - * Assembly routines used by brl_emu.c to set preserved register state. - */ - -#define SET_REG(reg) \ - GLOBAL_ENTRY(ia64_set_##reg); \ - alloc r16=ar.pfs,1,0,0,0; \ - mov reg=r32; \ - ;; \ - br.ret.sptk.many rp; \ - END(ia64_set_##reg) - -SET_REG(b1); -SET_REG(b2); -SET_REG(b3); -SET_REG(b4); -SET_REG(b5); - -#endif /* CONFIG_IA64_BRL_EMU */ - -#ifdef CONFIG_SMP - /* - * This routine handles spinlock contention. It uses a non-standard calling - * convention to avoid converting leaf routines into interior routines. Because - * of this special convention, there are several restrictions: - * - * - do not use gp relative variables, this code is called from the kernel - * and from modules, r1 is undefined. - * - do not use stacked registers, the caller owns them. - * - do not use the scratch stack space, the caller owns it. - * - do not use any registers other than the ones listed below - * - * Inputs: - * ar.pfs - saved CFM of caller - * ar.ccv - 0 (and available for use) - * r27 - flags from spin_lock_irqsave or 0. Must be preserved. - * r28 - available for use. - * r29 - available for use. - * r30 - available for use. - * r31 - address of lock, available for use. - * b6 - return address - * p14 - available for use. - * p15 - used to track flag status. - * - * If you patch this code to use more registers, do not forget to update - * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h. - */ - -#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) - -GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) - .prologue - .save ar.pfs, r0 // this code effectively has a zero frame size - .save rp, r28 - .body - nop 0 - tbit.nz p15,p0=r27,IA64_PSR_I_BIT - .restore sp // pop existing prologue after next insn - mov b6 = r28 - .prologue - .save ar.pfs, r0 - .altrp b6 - .body - ;; -(p15) ssm psr.i // reenable interrupts if they were on - // DavidM says that srlz.d is slow and is not required in this case -.wait: - // exponential backoff, kdb, lockmeter etc. go in here - hint @pause - ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word - nop 0 - ;; - cmp4.ne p14,p0=r30,r0 -(p14) br.cond.sptk.few .wait -(p15) rsm psr.i // disable interrupts if we reenabled them - br.cond.sptk.few b6 // lock is now free, try to acquire - .global ia64_spinlock_contention_pre3_4_end // for kernprof -ia64_spinlock_contention_pre3_4_end: -END(ia64_spinlock_contention_pre3_4) - -#else - -GLOBAL_ENTRY(ia64_spinlock_contention) - .prologue - .altrp b6 - .body - tbit.nz p15,p0=r27,IA64_PSR_I_BIT - ;; -.wait: -(p15) ssm psr.i // reenable interrupts if they were on - // DavidM says that srlz.d is slow and is not required in this case -.wait2: - // exponential backoff, kdb, lockmeter etc. go in here - hint @pause - ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word - ;; - cmp4.ne p14,p0=r30,r0 - mov r30 = 1 -(p14) br.cond.sptk.few .wait2 -(p15) rsm psr.i // disable interrupts if we reenabled them - ;; - cmpxchg4.acq r30=[r31], r30, ar.ccv - ;; - cmp4.ne p14,p0=r0,r30 -(p14) br.cond.sptk.few .wait - - br.ret.sptk.many b6 // lock is now taken -END(ia64_spinlock_contention) - -#endif - -#ifdef CONFIG_HOTPLUG_CPU -GLOBAL_ENTRY(ia64_jump_to_sal) - alloc r16=ar.pfs,1,0,0,0;; - rsm psr.i | psr.ic -{ - flushrs - srlz.i -} - tpa r25=in0 - movl r18=tlb_purge_done;; - DATA_VA_TO_PA(r18);; - mov b1=r18 // Return location - movl r18=ia64_do_tlb_purge;; - DATA_VA_TO_PA(r18);; - mov b2=r18 // doing tlb_flush work - mov ar.rsc=0 // Put RSE in enforced lazy, LE mode - movl r17=1f;; - DATA_VA_TO_PA(r17);; - mov cr.iip=r17 - movl r16=SAL_PSR_BITS_TO_SET;; - mov cr.ipsr=r16 - mov cr.ifs=r0;; - rfi;; -1: - /* - * Invalidate all TLB data/inst - */ - br.sptk.many b2;; // jump to tlb purge code - -tlb_purge_done: - RESTORE_REGION_REGS(r25, r17,r18,r19);; - RESTORE_REG(b0, r25, r17);; - RESTORE_REG(b1, r25, r17);; - RESTORE_REG(b2, r25, r17);; - RESTORE_REG(b3, r25, r17);; - RESTORE_REG(b4, r25, r17);; - RESTORE_REG(b5, r25, r17);; - ld8 r1=[r25],0x08;; - ld8 r12=[r25],0x08;; - ld8 r13=[r25],0x08;; - RESTORE_REG(ar.fpsr, r25, r17);; - RESTORE_REG(ar.pfs, r25, r17);; - RESTORE_REG(ar.rnat, r25, r17);; - RESTORE_REG(ar.unat, r25, r17);; - RESTORE_REG(ar.bspstore, r25, r17);; - RESTORE_REG(cr.dcr, r25, r17);; - RESTORE_REG(cr.iva, r25, r17);; - RESTORE_REG(cr.pta, r25, r17);; - RESTORE_REG(cr.itv, r25, r17);; - RESTORE_REG(cr.pmv, r25, r17);; - RESTORE_REG(cr.cmcv, r25, r17);; - RESTORE_REG(cr.lrr0, r25, r17);; - RESTORE_REG(cr.lrr1, r25, r17);; - ld8 r4=[r25],0x08;; - ld8 r5=[r25],0x08;; - ld8 r6=[r25],0x08;; - ld8 r7=[r25],0x08;; - ld8 r17=[r25],0x08;; - mov pr=r17,-1;; - RESTORE_REG(ar.lc, r25, r17);; - /* - * Now Restore floating point regs - */ - ldf.fill.nta f2=[r25],16;; - ldf.fill.nta f3=[r25],16;; - ldf.fill.nta f4=[r25],16;; - ldf.fill.nta f5=[r25],16;; - ldf.fill.nta f16=[r25],16;; - ldf.fill.nta f17=[r25],16;; - ldf.fill.nta f18=[r25],16;; - ldf.fill.nta f19=[r25],16;; - ldf.fill.nta f20=[r25],16;; - ldf.fill.nta f21=[r25],16;; - ldf.fill.nta f22=[r25],16;; - ldf.fill.nta f23=[r25],16;; - ldf.fill.nta f24=[r25],16;; - ldf.fill.nta f25=[r25],16;; - ldf.fill.nta f26=[r25],16;; - ldf.fill.nta f27=[r25],16;; - ldf.fill.nta f28=[r25],16;; - ldf.fill.nta f29=[r25],16;; - ldf.fill.nta f30=[r25],16;; - ldf.fill.nta f31=[r25],16;; - - /* - * Now that we have done all the register restores - * we are now ready for the big DIVE to SAL Land - */ - ssm psr.ic;; - srlz.d;; - br.ret.sptk.many b0;; -END(ia64_jump_to_sal) -#endif /* CONFIG_HOTPLUG_CPU */ - -#endif /* CONFIG_SMP */ diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c b/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c deleted file mode 100644 index 1541b57a5c..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c +++ /dev/null @@ -1,1253 +0,0 @@ -/* - * I/O SAPIC support. - * - * Copyright (C) 1999 Intel Corp. - * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com> - * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com> - * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co. - * David Mosberger-Tang <davidm@hpl.hp.com> - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> - * - * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O - * APIC code. In particular, we now have separate - * handlers for edge and level triggered - * interrupts. - * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector - * allocation PCI to vector mapping, shared PCI - * interrupts. - * 00/10/27 D. Mosberger Document things a bit more to make them more - * understandable. Clean up much of the old - * IOSAPIC cruft. - * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts - * and fixes for ACPI S5(SoftOff) support. - * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT - * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt - * vectors in iosapic_set_affinity(), - * initializations for /proc/irq/#/smp_affinity - * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. - * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq - * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to - * IOSAPIC mapping error - * 02/07/29 T. Kochi Allocate interrupt vectors dynamically - * 02/08/04 T. Kochi Cleaned up terminology (irq, global system - * interrupt, vector, etc.) - * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's - * pci_irq code. - * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. - * Remove iosapic_address & gsi_base from - * external interfaces. Rationalize - * __init/__devinit attributes. - * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004 - * Updated to work with irq migration necessary - * for CPU Hotplug - */ -/* - * Here is what the interrupt logic between a PCI device and the kernel looks - * like: - * - * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, - * INTD). The device is uniquely identified by its bus-, and slot-number - * (the function number does not matter here because all functions share - * the same interrupt lines). - * - * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC - * controller. Multiple interrupt lines may have to share the same - * IOSAPIC pin (if they're level triggered and use the same polarity). - * Each interrupt line has a unique Global System Interrupt (GSI) number - * which can be calculated as the sum of the controller's base GSI number - * and the IOSAPIC pin number to which the line connects. - * - * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the - * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then - * sent to the CPU. - * - * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is - * used as architecture-independent interrupt handling mechanism in Linux. - * As an IRQ is a number, we have to have - * IA-64 interrupt vector number <-> IRQ number mapping. On smaller - * systems, we use one-to-one mapping between IA-64 vector and IRQ. A - * platform can implement platform_irq_to_vector(irq) and - * platform_local_vector_to_irq(vector) APIs to differentiate the mapping. - * Please see also include/asm-ia64/hw_irq.h for those APIs. - * - * To sum up, there are three levels of mappings involved: - * - * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ - * - * Note: The term "IRQ" is loosely used everywhere in Linux kernel to - * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ - * (isa_irq) is the only exception in this source code. - */ - -#include <linux/acpi.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/list.h> -#include <linux/pci.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/string.h> -#include <linux/bootmem.h> - -#include <asm/delay.h> -#include <asm/hw_irq.h> -#include <asm/io.h> -#include <asm/iosapic.h> -#include <asm/machvec.h> -#include <asm/processor.h> -#include <asm/ptrace.h> -#include <asm/system.h> - -#undef DEBUG_INTERRUPT_ROUTING - -#ifdef DEBUG_INTERRUPT_ROUTING -#define DBG(fmt...) printk(fmt) -#else -#define DBG(fmt...) -#endif - -#define NR_PREALLOCATE_RTE_ENTRIES \ - (PAGE_SIZE / sizeof(struct iosapic_rte_info)) -#define RTE_PREALLOCATED (1) - -static DEFINE_SPINLOCK(iosapic_lock); - -/* - * These tables map IA-64 vectors to the IOSAPIC pin that generates this - * vector. - */ - -struct iosapic_rte_info { - struct list_head rte_list; /* node in list of RTEs sharing the - * same vector */ - char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* first GSI assigned to this - * IOSAPIC */ - char rte_index; /* IOSAPIC RTE index */ - int refcnt; /* reference counter */ - unsigned int flags; /* flags */ -} ____cacheline_aligned; - -static struct iosapic_intr_info { - struct list_head rtes; /* RTEs using this vector (empty => - * not an IOSAPIC interrupt) */ - int count; /* # of RTEs that shares this vector */ - u32 low32; /* current value of low word of - * Redirection table entry */ - unsigned int dest; /* destination CPU physical ID */ - unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ - unsigned char polarity: 1; /* interrupt polarity - * (see iosapic.h) */ - unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ -} iosapic_intr_info[IA64_NUM_VECTORS]; - -static struct iosapic { - char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* first GSI assigned to this - * IOSAPIC */ - unsigned short num_rte; /* # of RTEs on this IOSAPIC */ - int rtes_inuse; /* # of RTEs in use on this IOSAPIC */ -#ifdef CONFIG_NUMA - unsigned short node; /* numa node association via pxm */ -#endif -} iosapic_lists[NR_IOSAPICS]; - -static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */ - -static int iosapic_kmalloc_ok; -static LIST_HEAD(free_rte_list); - -#ifdef CONFIG_XEN -#include <xen/interface/xen.h> -#include <xen/interface/physdev.h> -#include <asm/hypervisor.h> -static inline unsigned int xen_iosapic_read(char __iomem *iosapic, unsigned int reg) -{ - struct physdev_apic apic_op; - int ret; - - apic_op.apic_physbase = (unsigned long)iosapic - - __IA64_UNCACHED_OFFSET; - apic_op.reg = reg; - ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); - if (ret) - return ret; - return apic_op.value; -} - -static inline void xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) -{ - struct physdev_apic apic_op; - - apic_op.apic_physbase = (unsigned long)iosapic - - __IA64_UNCACHED_OFFSET; - apic_op.reg = reg; - apic_op.value = val; - HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op); -} - -static inline unsigned int iosapic_read(char __iomem *iosapic, unsigned int reg) -{ - if (!is_running_on_xen()) { - writel(reg, iosapic + IOSAPIC_REG_SELECT); - return readl(iosapic + IOSAPIC_WINDOW); - } else - return xen_iosapic_read(iosapic, reg); -} - -static inline void iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) -{ - if (!is_running_on_xen()) { - writel(reg, iosapic + IOSAPIC_REG_SELECT); - writel(val, iosapic + IOSAPIC_WINDOW); - } else - xen_iosapic_write(iosapic, reg, val); -} - -int xen_assign_irq_vector(int irq) -{ - struct physdev_irq irq_op; - - irq_op.irq = irq; - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) - return -ENOSPC; - - return irq_op.vector; -} - -void xen_free_irq_vector(int vector) -{ - struct physdev_irq irq_op; - - irq_op.vector = vector; - if (HYPERVISOR_physdev_op(PHYSDEVOP_free_irq_vector, &irq_op)) - printk(KERN_WARNING "%s: xen_free_irq_vecotr fail vector=%d\n", - __FUNCTION__, vector); -} -#endif /* XEN */ - -/* - * Find an IOSAPIC associated with a GSI - */ -static inline int -find_iosapic (unsigned int gsi) -{ - int i; - - for (i = 0; i < NR_IOSAPICS; i++) { - if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < - iosapic_lists[i].num_rte) - return i; - } - - return -1; -} - -static inline int -_gsi_to_vector (unsigned int gsi) -{ - struct iosapic_intr_info *info; - struct iosapic_rte_info *rte; - - for (info = iosapic_intr_info; info < - iosapic_intr_info + IA64_NUM_VECTORS; ++info) - list_for_each_entry(rte, &info->rtes, rte_list) - if (rte->gsi_base + rte->rte_index == gsi) - return info - iosapic_intr_info; - return -1; -} - -/* - * Translate GSI number to the corresponding IA-64 interrupt vector. If no - * entry exists, return -1. - */ -inline int -gsi_to_vector (unsigned int gsi) -{ - return _gsi_to_vector(gsi); -} - -int -gsi_to_irq (unsigned int gsi) -{ - unsigned long flags; - int irq; - /* - * XXX fix me: this assumes an identity mapping between IA-64 vector - * and Linux irq numbers... - */ - spin_lock_irqsave(&iosapic_lock, flags); - { - irq = _gsi_to_vector(gsi); - } - spin_unlock_irqrestore(&iosapic_lock, flags); - - return irq; -} - -static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, - unsigned int vec) -{ - struct iosapic_rte_info *rte; - - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) - if (rte->gsi_base + rte->rte_index == gsi) - return rte; - return NULL; -} - -static void -set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) -{ - unsigned long pol, trigger, dmode; - u32 low32, high32; - char __iomem *addr; - int rte_index; - char redir; - struct iosapic_rte_info *rte; - - DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest); - - rte = gsi_vector_to_rte(gsi, vector); - if (!rte) - return; /* not an IOSAPIC interrupt */ - - rte_index = rte->rte_index; - addr = rte->addr; - pol = iosapic_intr_info[vector].polarity; - trigger = iosapic_intr_info[vector].trigger; - dmode = iosapic_intr_info[vector].dmode; - - redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; - -#ifdef CONFIG_SMP - { - unsigned int irq; - - for (irq = 0; irq < NR_IRQS; ++irq) - if (irq_to_vector(irq) == vector) { - set_irq_affinity_info(irq, - (int)(dest & 0xffff), - redir); - break; - } - } -#endif - - low32 = ((pol << IOSAPIC_POLARITY_SHIFT) | - (trigger << IOSAPIC_TRIGGER_SHIFT) | - (dmode << IOSAPIC_DELIVERY_SHIFT) | - ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) | - vector); - - /* dest contains both id and eid */ - high32 = (dest << IOSAPIC_DEST_SHIFT); - - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); - iosapic_intr_info[vector].low32 = low32; - iosapic_intr_info[vector].dest = dest; -} - -static void -nop (unsigned int irq) -{ - /* do nothing... */ -} - -static void -mask_irq (unsigned int irq) -{ - unsigned long flags; - char __iomem *addr; - u32 low32; - int rte_index; - ia64_vector vec = irq_to_vector(irq); - struct iosapic_rte_info *rte; - - if (list_empty(&iosapic_intr_info[vec].rtes)) - return; /* not an IOSAPIC interrupt! */ - - spin_lock_irqsave(&iosapic_lock, flags); - { - /* set only the mask bit */ - low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, - rte_list) { - addr = rte->addr; - rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); - } - } - spin_unlock_irqrestore(&iosapic_lock, flags); -} - -static void -unmask_irq (unsigned int irq) -{ - unsigned long flags; - char __iomem *addr; - u32 low32; - int rte_index; - ia64_vector vec = irq_to_vector(irq); - struct iosapic_rte_info *rte; - - if (list_empty(&iosapic_intr_info[vec].rtes)) - return; /* not an IOSAPIC interrupt! */ - - spin_lock_irqsave(&iosapic_lock, flags); - { - low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, - rte_list) { - addr = rte->addr; - rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); - } - } - spin_unlock_irqrestore(&iosapic_lock, flags); -} - - -static void -iosapic_set_affinity (unsigned int irq, cpumask_t mask) -{ -#ifdef CONFIG_SMP - unsigned long flags; - u32 high32, low32; - int dest, rte_index; - char __iomem *addr; - int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; - ia64_vector vec; - struct iosapic_rte_info *rte; - - irq &= (~IA64_IRQ_REDIRECTED); - vec = irq_to_vector(irq); - - if (cpus_empty(mask)) - return; - - dest = cpu_physical_id(first_cpu(mask)); - - if (list_empty(&iosapic_intr_info[vec].rtes)) - return; /* not an IOSAPIC interrupt */ - - set_irq_affinity_info(irq, dest, redir); - - /* dest contains both id and eid */ - high32 = dest << IOSAPIC_DEST_SHIFT; - - spin_lock_irqsave(&iosapic_lock, flags); - { - low32 = iosapic_intr_info[vec].low32 & - ~(7 << IOSAPIC_DELIVERY_SHIFT); - - if (redir) - /* change delivery mode to lowest priority */ - low32 |= (IOSAPIC_LOWEST_PRIORITY << - IOSAPIC_DELIVERY_SHIFT); - else - /* change delivery mode to fixed */ - low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); - - iosapic_intr_info[vec].low32 = low32; - iosapic_intr_info[vec].dest = dest; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, - rte_list) { - addr = rte->addr; - rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), - high32); - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); - } - } - spin_unlock_irqrestore(&iosapic_lock, flags); -#endif -} - -/* - * Handlers for level-triggered interrupts. - */ - -static unsigned int -iosapic_startup_level_irq (unsigned int irq) -{ - unmask_irq(irq); - return 0; -} - -static void -iosapic_end_level_irq (unsigned int irq) -{ - ia64_vector vec = irq_to_vector(irq); - struct iosapic_rte_info *rte; - - move_native_irq(irq); - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) - iosapic_eoi(rte->addr, vec); -} - -#define iosapic_shutdown_level_irq mask_irq -#define iosapic_enable_level_irq unmask_irq -#define iosapic_disable_level_irq mask_irq -#define iosapic_ack_level_irq nop - -struct hw_interrupt_type irq_type_iosapic_level = { - .typename = "IO-SAPIC-level", - .startup = iosapic_startup_level_irq, - .shutdown = iosapic_shutdown_level_irq, - .enable = iosapic_enable_level_irq, - .disable = iosapic_disable_level_irq, - .ack = iosapic_ack_level_irq, - .end = iosapic_end_level_irq, - .set_affinity = iosapic_set_affinity -}; - -/* - * Handlers for edge-triggered interrupts. - */ - -static unsigned int -iosapic_startup_edge_irq (unsigned int irq) -{ - unmask_irq(irq); - /* - * IOSAPIC simply drops interrupts pended while the - * corresponding pin was masked, so we can't know if an - * interrupt is pending already. Let's hope not... - */ - return 0; -} - -static void -iosapic_ack_edge_irq (unsigned int irq) -{ - irq_desc_t *idesc = irq_desc + irq; - - move_native_irq(irq); - /* - * Once we have recorded IRQ_PENDING already, we can mask the - * interrupt for real. This prevents IRQ storms from unhandled - * devices. - */ - if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == - (IRQ_PENDING|IRQ_DISABLED)) - mask_irq(irq); -} - -#define iosapic_enable_edge_irq unmask_irq -#define iosapic_disable_edge_irq nop -#define iosapic_end_edge_irq nop - -struct hw_interrupt_type irq_type_iosapic_edge = { - .typename = "IO-SAPIC-edge", - .startup = iosapic_startup_edge_irq, - .shutdown = iosapic_disable_edge_irq, - .enable = iosapic_enable_edge_irq, - .disable = iosapic_disable_edge_irq, - .ack = iosapic_ack_edge_irq, - .end = iosapic_end_edge_irq, - .set_affinity = iosapic_set_affinity -}; - -unsigned int -iosapic_version (char __iomem *addr) -{ - /* - * IOSAPIC Version Register return 32 bit structure like: - * { - * unsigned int version : 8; - * unsigned int reserved1 : 8; - * unsigned int max_redir : 8; - * unsigned int reserved2 : 8; - * } - */ - return iosapic_read(addr, IOSAPIC_VERSION); -} - -static int iosapic_find_sharable_vector (unsigned long trigger, - unsigned long pol) -{ - int i, vector = -1, min_count = -1; - struct iosapic_intr_info *info; - - /* - * shared vectors for edge-triggered interrupts are not - * supported yet - */ - if (trigger == IOSAPIC_EDGE) - return -1; - - for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) { - info = &iosapic_intr_info[i]; - if (info->trigger == trigger && info->polarity == pol && - (info->dmode == IOSAPIC_FIXED || info->dmode == - IOSAPIC_LOWEST_PRIORITY)) { - if (min_count == -1 || info->count < min_count) { - vector = i; - min_count = info->count; - } - } - } - - return vector; -} - -/* - * if the given vector is already owned by other, - * assign a new vector for the other and make the vector available - */ -static void __init -iosapic_reassign_vector (int vector) -{ - int new_vector; - - if (!list_empty(&iosapic_intr_info[vector].rtes)) { - new_vector = assign_irq_vector(AUTO_ASSIGN); - if (new_vector < 0) - panic("%s: out of interrupt vectors!\n", __FUNCTION__); - printk(KERN_INFO "Reassigning vector %d to %d\n", - vector, new_vector); - memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector], - sizeof(struct iosapic_intr_info)); - INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes); - list_move(iosapic_intr_info[vector].rtes.next, - &iosapic_intr_info[new_vector].rtes); - memset(&iosapic_intr_info[vector], 0, - sizeof(struct iosapic_intr_info)); - iosapic_intr_info[vector].low32 = IOSAPIC_MASK; - INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); - } -} - -static struct iosapic_rte_info *iosapic_alloc_rte (void) -{ - int i; - struct iosapic_rte_info *rte; - int preallocated = 0; - - if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) { - rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * - NR_PREALLOCATE_RTE_ENTRIES); - if (!rte) - return NULL; - for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++) - list_add(&rte->rte_list, &free_rte_list); - } - - if (!list_empty(&free_rte_list)) { - rte = list_entry(free_rte_list.next, struct iosapic_rte_info, - rte_list); - list_del(&rte->rte_list); - preallocated++; - } else { - rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC); - if (!rte) - return NULL; - } - - memset(rte, 0, sizeof(struct iosapic_rte_info)); - if (preallocated) - rte->flags |= RTE_PREALLOCATED; - - return rte; -} - -static void iosapic_free_rte (struct iosapic_rte_info *rte) -{ - if (rte->flags & RTE_PREALLOCATED) - list_add_tail(&rte->rte_list, &free_rte_list); - else - kfree(rte); -} - -static inline int vector_is_shared (int vector) -{ - return (iosapic_intr_info[vector].count > 1); -} - -static int -register_intr (unsigned int gsi, int vector, unsigned char delivery, - unsigned long polarity, unsigned long trigger) -{ - irq_desc_t *idesc; - struct hw_interrupt_type *irq_type; - int rte_index; - int index; - unsigned long gsi_base; - void __iomem *iosapic_address; - struct iosapic_rte_info *rte; - - index = find_iosapic(gsi); - if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", - __FUNCTION__, gsi); - return -ENODEV; - } - - iosapic_address = iosapic_lists[index].addr; - gsi_base = iosapic_lists[index].gsi_base; - - rte = gsi_vector_to_rte(gsi, vector); - if (!rte) { - rte = iosapic_alloc_rte(); - if (!rte) { - printk(KERN_WARNING "%s: cannot allocate memory\n", - __FUNCTION__); - return -ENOMEM; - } - - rte_index = gsi - gsi_base; - rte->rte_index = rte_index; - rte->addr = iosapic_address; - rte->gsi_base = gsi_base; - rte->refcnt++; - list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes); - iosapic_intr_info[vector].count++; - iosapic_lists[index].rtes_inuse++; - } - else if (vector_is_shared(vector)) { - struct iosapic_intr_info *info = &iosapic_intr_info[vector]; - if (info->trigger != trigger || info->polarity != polarity) { - printk (KERN_WARNING - "%s: cannot override the interrupt\n", - __FUNCTION__); - return -EINVAL; - } - } - - iosapic_intr_info[vector].polarity = polarity; - iosapic_intr_info[vector].dmode = delivery; - iosapic_intr_info[vector].trigger = trigger; - - if (is_running_on_xen()) - return 0; - - if (trigger == IOSAPIC_EDGE) - irq_type = &irq_type_iosapic_edge; - else - irq_type = &irq_type_iosapic_level; - - idesc = irq_desc + vector; - if (idesc->chip != irq_type) { - if (idesc->chip != &no_irq_type) - printk(KERN_WARNING - "%s: changing vector %d from %s to %s\n", - __FUNCTION__, vector, - idesc->chip->typename, irq_type->typename); - idesc->chip = irq_type; - } - return 0; -} - -static unsigned int -get_target_cpu (unsigned int gsi, int vector) -{ -#ifdef CONFIG_SMP - static int cpu = -1; - extern int cpe_vector; - - /* - * In case of vector shared by multiple RTEs, all RTEs that - * share the vector need to use the same destination CPU. - */ - if (!list_empty(&iosapic_intr_info[vector].rtes)) - return iosapic_intr_info[vector].dest; - - /* - * If the platform supports redirection via XTP, let it - * distribute interrupts. - */ - if (smp_int_redirect & SMP_IRQ_REDIRECTION) - return cpu_physical_id(smp_processor_id()); - - /* - * Some interrupts (ACPI SCI, for instance) are registered - * before the BSP is marked as online. - */ - if (!cpu_online(smp_processor_id())) - return cpu_physical_id(smp_processor_id()); - -#ifdef CONFIG_ACPI - if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR) - return get_cpei_target_cpu(); -#endif - -#ifdef CONFIG_NUMA - { - int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; - cpumask_t cpu_mask; - - iosapic_index = find_iosapic(gsi); - if (iosapic_index < 0 || - iosapic_lists[iosapic_index].node == MAX_NUMNODES) - goto skip_numa_setup; - - cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); - - for_each_cpu_mask(numa_cpu, cpu_mask) { - if (!cpu_online(numa_cpu)) - cpu_clear(numa_cpu, cpu_mask); - } - - num_cpus = cpus_weight(cpu_mask); - - if (!num_cpus) - goto skip_numa_setup; - - /* Use vector assignment to distribute across cpus in node */ - cpu_index = vector % num_cpus; - - for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) - numa_cpu = next_cpu(numa_cpu, cpu_mask); - - if (numa_cpu != NR_CPUS) - return cpu_physical_id(numa_cpu); - } -skip_numa_setup: -#endif - /* - * Otherwise, round-robin interrupt vectors across all the - * processors. (It'd be nice if we could be smarter in the - * case of NUMA.) - */ - do { - if (++cpu >= NR_CPUS) - cpu = 0; - } while (!cpu_online(cpu)); - - return cpu_physical_id(cpu); -#else /* CONFIG_SMP */ - return cpu_physical_id(smp_processor_id()); -#endif -} - -/* - * ACPI can describe IOSAPIC interrupts via static tables and namespace - * methods. This provides an interface to register those interrupts and - * program the IOSAPIC RTE. - */ -int -iosapic_register_intr (unsigned int gsi, - unsigned long polarity, unsigned long trigger) -{ - int vector, mask = 1, err; - unsigned int dest; - unsigned long flags; - struct iosapic_rte_info *rte; - u32 low32; -again: - /* - * If this GSI has already been registered (i.e., it's a - * shared interrupt, or we lost a race to register it), - * don't touch the RTE. - */ - spin_lock_irqsave(&iosapic_lock, flags); - { - vector = gsi_to_vector(gsi); - if (vector > 0) { - rte = gsi_vector_to_rte(gsi, vector); - rte->refcnt++; - spin_unlock_irqrestore(&iosapic_lock, flags); - return vector; - } - } - spin_unlock_irqrestore(&iosapic_lock, flags); - - /* If vector is running out, we try to find a sharable vector */ - vector = assign_irq_vector(AUTO_ASSIGN); - if (vector < 0) { - vector = iosapic_find_sharable_vector(trigger, polarity); - if (vector < 0) - return -ENOSPC; - } - - spin_lock_irqsave(&irq_desc[vector].lock, flags); - spin_lock(&iosapic_lock); - { - if (gsi_to_vector(gsi) > 0) { - if (list_empty(&iosapic_intr_info[vector].rtes)) - free_irq_vector(vector); - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_desc[vector].lock, - flags); - goto again; - } - - dest = get_target_cpu(gsi, vector); - err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, - polarity, trigger); - if (err < 0) { - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_desc[vector].lock, - flags); - return err; - } - - /* - * If the vector is shared and already unmasked for - * other interrupt sources, don't mask it. - */ - low32 = iosapic_intr_info[vector].low32; - if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK)) - mask = 0; - set_rte(gsi, vector, dest, mask); - } - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_desc[vector].lock, flags); - - printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", - gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), - (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - cpu_logical_id(dest), dest, vector); - - return vector; -} - -void -iosapic_unregister_intr (unsigned int gsi) -{ - unsigned long flags; - int irq, vector, index; - irq_desc_t *idesc; - u32 low32; - unsigned long trigger, polarity; - unsigned int dest; - struct iosapic_rte_info *rte; - - /* - * If the irq associated with the gsi is not found, - * iosapic_unregister_intr() is unbalanced. We need to check - * this again after getting locks. - */ - irq = gsi_to_irq(gsi); - if (irq < 0) { - printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", - gsi); - WARN_ON(1); - return; - } - vector = irq_to_vector(irq); - - idesc = irq_desc + irq; - spin_lock_irqsave(&idesc->lock, flags); - spin_lock(&iosapic_lock); - { - if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) { - printk(KERN_ERR - "iosapic_unregister_intr(%u) unbalanced\n", - gsi); - WARN_ON(1); - goto out; - } - - if (--rte->refcnt > 0) - goto out; - - /* Mask the interrupt */ - low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK; - iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), - low32); - - /* Remove the rte entry from the list */ - list_del(&rte->rte_list); - iosapic_intr_info[vector].count--; - iosapic_free_rte(rte); - index = find_iosapic(gsi); - iosapic_lists[index].rtes_inuse--; - WARN_ON(iosapic_lists[index].rtes_inuse < 0); - - trigger = iosapic_intr_info[vector].trigger; - polarity = iosapic_intr_info[vector].polarity; - dest = iosapic_intr_info[vector].dest; - printk(KERN_INFO - "GSI %u (%s, %s) -> CPU %d (0x%04x)" - " vector %d unregistered\n", - gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), - (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - cpu_logical_id(dest), dest, vector); - - if (list_empty(&iosapic_intr_info[vector].rtes)) { - /* Sanity check */ - BUG_ON(iosapic_intr_info[vector].count); - - /* Clear the interrupt controller descriptor */ - idesc->chip = &no_irq_type; - - /* Clear the interrupt information */ - memset(&iosapic_intr_info[vector], 0, - sizeof(struct iosapic_intr_info)); - iosapic_intr_info[vector].low32 |= IOSAPIC_MASK; - INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); - - if (idesc->action) { - printk(KERN_ERR - "interrupt handlers still exist on" - "IRQ %u\n", irq); - WARN_ON(1); - } - - /* Free the interrupt vector */ - free_irq_vector(vector); - } - } - out: - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&idesc->lock, flags); -} - -/* - * ACPI calls this when it finds an entry for a platform interrupt. - */ -int __init -iosapic_register_platform_intr (u32 int_type, unsigned int gsi, - int iosapic_vector, u16 eid, u16 id, - unsigned long polarity, unsigned long trigger) -{ - static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"}; - unsigned char delivery; - int vector, mask = 0; - unsigned int dest = ((id << 8) | eid) & 0xffff; - - switch (int_type) { - case ACPI_INTERRUPT_PMI: - vector = iosapic_vector; - /* - * since PMI vector is alloc'd by FW(ACPI) not by kernel, - * we need to make sure the vector is available - */ - iosapic_reassign_vector(vector); - delivery = IOSAPIC_PMI; - break; - case ACPI_INTERRUPT_INIT: - vector = assign_irq_vector(AUTO_ASSIGN); - if (vector < 0) - panic("%s: out of interrupt vectors!\n", __FUNCTION__); - delivery = IOSAPIC_INIT; - break; - case ACPI_INTERRUPT_CPEI: - vector = IA64_CPE_VECTOR; - delivery = IOSAPIC_LOWEST_PRIORITY; - mask = 1; - break; - default: - printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__, - int_type); - return -1; - } - - register_intr(gsi, vector, delivery, polarity, trigger); - - printk(KERN_INFO - "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)" - " vector %d\n", - int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown", - int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), - (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - cpu_logical_id(dest), dest, vector); - - set_rte(gsi, vector, dest, mask); - return vector; -} - -/* - * ACPI calls this when it finds an entry for a legacy ISA IRQ override. - */ -void __init -iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, - unsigned long polarity, - unsigned long trigger) -{ - int vector; - unsigned int dest = cpu_physical_id(smp_processor_id()); - - vector = isa_irq_to_vector(isa_irq); - - register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger); - - DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n", - isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level", - polarity == IOSAPIC_POL_HIGH ? "high" : "low", - cpu_logical_id(dest), dest, vector); - - set_rte(gsi, vector, dest, 1); -} - -void __init -iosapic_system_init (int system_pcat_compat) -{ - int vector; - - for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) { - iosapic_intr_info[vector].low32 = IOSAPIC_MASK; - /* mark as unused */ - INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); - } - - pcat_compat = system_pcat_compat; - if (is_running_on_xen()) - return; - - if (pcat_compat) { - /* - * Disable the compatibility mode interrupts (8259 style), - * needs IN/OUT support enabled. - */ - printk(KERN_INFO - "%s: Disabling PC-AT compatible 8259 interrupts\n", - __FUNCTION__); - outb(0xff, 0xA1); - outb(0xff, 0x21); - } -} - -static inline int -iosapic_alloc (void) -{ - int index; - - for (index = 0; index < NR_IOSAPICS; index++) - if (!iosapic_lists[index].addr) - return index; - - printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__); - return -1; -} - -static inline void -iosapic_free (int index) -{ - memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0])); -} - -static inline int -iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver) -{ - int index; - unsigned int gsi_end, base, end; - - /* check gsi range */ - gsi_end = gsi_base + ((ver >> 16) & 0xff); - for (index = 0; index < NR_IOSAPICS; index++) { - if (!iosapic_lists[index].addr) - continue; - - base = iosapic_lists[index].gsi_base; - end = base + iosapic_lists[index].num_rte - 1; - - if (gsi_end < base || end < gsi_base) - continue; /* OK */ - - return -EBUSY; - } - return 0; -} - -int __devinit -iosapic_init (unsigned long phys_addr, unsigned int gsi_base) -{ - int num_rte, err, index; - unsigned int isa_irq, ver; - char __iomem *addr; - unsigned long flags; - - spin_lock_irqsave(&iosapic_lock, flags); - { - addr = ioremap(phys_addr, 0); - ver = iosapic_version(addr); - - if ((err = iosapic_check_gsi_range(gsi_base, ver))) { - iounmap(addr); - spin_unlock_irqrestore(&iosapic_lock, flags); - return err; - } - - /* - * The MAX_REDIR register holds the highest input pin - * number (starting from 0). - * We add 1 so that we can use it for number of pins (= RTEs) - */ - num_rte = ((ver >> 16) & 0xff) + 1; - - index = iosapic_alloc(); - iosapic_lists[index].addr = addr; - iosapic_lists[index].gsi_base = gsi_base; - iosapic_lists[index].num_rte = num_rte; -#ifdef CONFIG_NUMA - iosapic_lists[index].node = MAX_NUMNODES; -#endif - } - spin_unlock_irqrestore(&iosapic_lock, flags); - - if ((gsi_base == 0) && pcat_compat) { - /* - * Map the legacy ISA devices into the IOSAPIC data. Some of - * these may get reprogrammed later on with data from the ACPI - * Interrupt Source Override table. - */ - for (isa_irq = 0; isa_irq < 16; ++isa_irq) - iosapic_override_isa_irq(isa_irq, isa_irq, - IOSAPIC_POL_HIGH, - IOSAPIC_EDGE); - } - return 0; -} - -#ifdef CONFIG_HOTPLUG -int -iosapic_remove (unsigned int gsi_base) -{ - int index, err = 0; - unsigned long flags; - - spin_lock_irqsave(&iosapic_lock, flags); - { - index = find_iosapic(gsi_base); - if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n", - __FUNCTION__, gsi_base); - goto out; - } - - if (iosapic_lists[index].rtes_inuse) { - err = -EBUSY; - printk(KERN_WARNING - "%s: IOSAPIC for GSI base %u is busy\n", - __FUNCTION__, gsi_base); - goto out; - } - - iounmap(iosapic_lists[index].addr); - iosapic_free(index); - } - out: - spin_unlock_irqrestore(&iosapic_lock, flags); - return err; -} -#endif /* CONFIG_HOTPLUG */ - -#ifdef CONFIG_NUMA -void __devinit -map_iosapic_to_node(unsigned int gsi_base, int node) -{ - int index; - - index = find_iosapic(gsi_base); - if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", - __FUNCTION__, gsi_base); - return; - } - iosapic_lists[index].node = node; - return; -} -#endif - -static int __init iosapic_enable_kmalloc (void) -{ - iosapic_kmalloc_ok = 1; - return 0; -} -core_initcall (iosapic_enable_kmalloc); diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c b/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c deleted file mode 100644 index 5a9db93417..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c +++ /dev/null @@ -1,649 +0,0 @@ -/* - * linux/arch/ia64/kernel/irq.c - * - * Copyright (C) 1998-2001 Hewlett-Packard Co - * Stephane Eranian <eranian@hpl.hp.com> - * David Mosberger-Tang <davidm@hpl.hp.com> - * - * 6/10/99: Updated to bring in sync with x86 version to facilitate - * support for SMP and different interrupt controllers. - * - * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector - * PCI to vector allocation routine. - * 04/14/2004 Ashok Raj <ashok.raj@intel.com> - * Added CPU Hotplug handling for IPF. - */ - -#include <linux/module.h> - -#include <linux/jiffies.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/kernel_stat.h> -#include <linux/slab.h> -#include <linux/ptrace.h> -#include <linux/random.h> /* for rand_initialize_irq() */ -#include <linux/signal.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/threads.h> -#include <linux/bitops.h> -#ifdef CONFIG_XEN -#include <linux/cpu.h> -#endif - -#include <asm/delay.h> -#include <asm/intrinsics.h> -#include <asm/io.h> -#include <asm/hw_irq.h> -#include <asm/machvec.h> -#include <asm/pgtable.h> -#include <asm/system.h> - -#ifdef CONFIG_PERFMON -# include <asm/perfmon.h> -#endif - -#define IRQ_DEBUG 0 - -/* These can be overridden in platform_irq_init */ -int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR; -int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR; - -/* default base addr of IPI table */ -void __iomem *ipi_base_addr = ((void __iomem *) - (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR)); - -/* - * Legacy IRQ to IA-64 vector translation table. - */ -__u8 isa_irq_to_vector_map[16] = { - /* 8259 IRQ translation, first 16 entries */ - 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, - 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21 -}; -EXPORT_SYMBOL(isa_irq_to_vector_map); - -static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_MAX_DEVICE_VECTORS)]; - -int -assign_irq_vector (int irq) -{ - int pos, vector; - -#ifdef CONFIG_XEN - if (is_running_on_xen()) { - extern int xen_assign_irq_vector(int); - return xen_assign_irq_vector(irq); - } -#endif - again: - pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS); - vector = IA64_FIRST_DEVICE_VECTOR + pos; - if (vector > IA64_LAST_DEVICE_VECTOR) - return -ENOSPC; - if (test_and_set_bit(pos, ia64_vector_mask)) - goto again; - return vector; -} - -void -free_irq_vector (int vector) -{ - int pos; - - if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) - return; - -#ifdef CONFIG_XEN - if (is_running_on_xen()) { - extern void xen_free_irq_vector(int); - xen_free_irq_vector(vector); - return; - } -#endif - pos = vector - IA64_FIRST_DEVICE_VECTOR; - if (!test_and_clear_bit(pos, ia64_vector_mask)) - printk(KERN_WARNING "%s: double free!\n", __FUNCTION__); -} - -int -reserve_irq_vector (int vector) -{ - int pos; - - if (vector < IA64_FIRST_DEVICE_VECTOR || - vector > IA64_LAST_DEVICE_VECTOR) - return -EINVAL; - - pos = vector - IA64_FIRST_DEVICE_VECTOR; - return test_and_set_bit(pos, ia64_vector_mask); -} - -#ifdef CONFIG_SMP -# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) -#else -# define IS_RESCHEDULE(vec) (0) -#endif -/* - * That's where the IVT branches when we get an external - * interrupt. This branches to the correct hardware IRQ handler via - * function ptr. - */ -void -ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) -{ - unsigned long saved_tpr; - -#if IRQ_DEBUG - { - unsigned long bsp, sp; - - /* - * Note: if the interrupt happened while executing in - * the context switch routine (ia64_switch_to), we may - * get a spurious stack overflow here. This is - * because the register and the memory stack are not - * switched atomically. - */ - bsp = ia64_getreg(_IA64_REG_AR_BSP); - sp = ia64_getreg(_IA64_REG_SP); - - if ((sp - bsp) < 1024) { - static unsigned char count; - static long last_time; - - if (jiffies - last_time > 5*HZ) - count = 0; - if (++count < 5) { - last_time = jiffies; - printk("ia64_handle_irq: DANGER: less than " - "1KB of free stack space!!\n" - "(bsp=0x%lx, sp=%lx)\n", bsp, sp); - } - } - } -#endif /* IRQ_DEBUG */ - - /* - * Always set TPR to limit maximum interrupt nesting depth to - * 16 (without this, it would be ~240, which could easily lead - * to kernel stack overflows). - */ - irq_enter(); - saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); - ia64_srlz_d(); - while (vector != IA64_SPURIOUS_INT_VECTOR) { - if (!IS_RESCHEDULE(vector)) { - ia64_setreg(_IA64_REG_CR_TPR, vector); - ia64_srlz_d(); - - __do_IRQ(local_vector_to_irq(vector), regs); - - /* - * Disable interrupts and send EOI: - */ - local_irq_disable(); - ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); - } - ia64_eoi(); - vector = ia64_get_ivr(); - } - /* - * This must be done *after* the ia64_eoi(). For example, the keyboard softirq - * handler needs to be able to wait for further keyboard interrupts, which can't - * come through until ia64_eoi() has been done. - */ - irq_exit(); -} - -#ifdef CONFIG_HOTPLUG_CPU -/* - * This function emulates a interrupt processing when a cpu is about to be - * brought down. - */ -void ia64_process_pending_intr(void) -{ - ia64_vector vector; - unsigned long saved_tpr; - extern unsigned int vectors_in_migration[NR_IRQS]; - - vector = ia64_get_ivr(); - - irq_enter(); - saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); - ia64_srlz_d(); - - /* - * Perform normal interrupt style processing - */ - while (vector != IA64_SPURIOUS_INT_VECTOR) { - if (!IS_RESCHEDULE(vector)) { - ia64_setreg(_IA64_REG_CR_TPR, vector); - ia64_srlz_d(); - - /* - * Now try calling normal ia64_handle_irq as it would have got called - * from a real intr handler. Try passing null for pt_regs, hopefully - * it will work. I hope it works!. - * Probably could shared code. - */ - vectors_in_migration[local_vector_to_irq(vector)]=0; - __do_IRQ(local_vector_to_irq(vector), NULL); - - /* - * Disable interrupts and send EOI - */ - local_irq_disable(); - ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); - } - ia64_eoi(); - vector = ia64_get_ivr(); - } - irq_exit(); -} -#endif - - -#ifdef CONFIG_SMP -extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs); - -static struct irqaction ipi_irqaction = { - .handler = handle_IPI, - .flags = IRQF_DISABLED, - .name = "IPI" -}; -#endif - -#ifdef CONFIG_XEN -#include <xen/evtchn.h> -#include <xen/interface/callback.h> - -static DEFINE_PER_CPU(int, timer_irq) = -1; -static DEFINE_PER_CPU(int, ipi_irq) = -1; -static DEFINE_PER_CPU(int, resched_irq) = -1; -static DEFINE_PER_CPU(int, cmc_irq) = -1; -static DEFINE_PER_CPU(int, cmcp_irq) = -1; -static DEFINE_PER_CPU(int, cpep_irq) = -1; -static char timer_name[NR_CPUS][15]; -static char ipi_name[NR_CPUS][15]; -static char resched_name[NR_CPUS][15]; -static char cmc_name[NR_CPUS][15]; -static char cmcp_name[NR_CPUS][15]; -static char cpep_name[NR_CPUS][15]; - -struct saved_irq { - unsigned int irq; - struct irqaction *action; -}; -/* 16 should be far optimistic value, since only several percpu irqs - * are registered early. - */ -#define MAX_LATE_IRQ 16 -static struct saved_irq saved_percpu_irqs[MAX_LATE_IRQ]; -static unsigned short late_irq_cnt = 0; -static unsigned short saved_irq_cnt = 0; -static int xen_slab_ready = 0; - -#ifdef CONFIG_SMP -/* Dummy stub. Though we may check RESCHEDULE_VECTOR before __do_IRQ, - * it ends up to issue several memory accesses upon percpu data and - * thus adds unnecessary traffic to other paths. - */ -static irqreturn_t -handle_reschedule(int irq, void *dev_id, struct pt_regs *regs) -{ - - return IRQ_HANDLED; -} - -static struct irqaction resched_irqaction = { - .handler = handle_reschedule, - .flags = SA_INTERRUPT, - .name = "RESCHED" -}; -#endif - -/* - * This is xen version percpu irq registration, which needs bind - * to xen specific evtchn sub-system. One trick here is that xen - * evtchn binding interface depends on kmalloc because related - * port needs to be freed at device/cpu down. So we cache the - * registration on BSP before slab is ready and then deal them - * at later point. For rest instances happening after slab ready, - * we hook them to xen evtchn immediately. - * - * FIXME: MCA is not supported by far, and thus "nomca" boot param is - * required. - */ -static void -xen_register_percpu_irq(unsigned int cpu, unsigned int vec, - struct irqaction *action, int save) -{ - irq_desc_t *desc; - int irq = 0; - - if (xen_slab_ready) { - switch (vec) { - case IA64_TIMER_VECTOR: - sprintf(timer_name[cpu], "%s%d", action->name, cpu); - irq = bind_virq_to_irqhandler(VIRQ_ITC, cpu, - action->handler, action->flags, - timer_name[cpu], action->dev_id); - per_cpu(timer_irq,cpu) = irq; - break; - case IA64_IPI_RESCHEDULE: - sprintf(resched_name[cpu], "%s%d", action->name, cpu); - irq = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, cpu, - action->handler, action->flags, - resched_name[cpu], action->dev_id); - per_cpu(resched_irq,cpu) = irq; - break; - case IA64_IPI_VECTOR: - sprintf(ipi_name[cpu], "%s%d", action->name, cpu); - irq = bind_ipi_to_irqhandler(IPI_VECTOR, cpu, - action->handler, action->flags, - ipi_name[cpu], action->dev_id); - per_cpu(ipi_irq,cpu) = irq; - break; - case IA64_CMC_VECTOR: - sprintf(cmc_name[cpu], "%s%d", action->name, cpu); - irq = bind_virq_to_irqhandler(VIRQ_MCA_CMC, cpu, - action->handler, - action->flags, - cmc_name[cpu], - action->dev_id); - per_cpu(cmc_irq,cpu) = irq; - break; - case IA64_CMCP_VECTOR: - sprintf(cmcp_name[cpu], "%s%d", action->name, cpu); - irq = bind_ipi_to_irqhandler(CMCP_VECTOR, cpu, - action->handler, - action->flags, - cmcp_name[cpu], - action->dev_id); - per_cpu(cmcp_irq,cpu) = irq; - break; - case IA64_CPEP_VECTOR: - sprintf(cpep_name[cpu], "%s%d", action->name, cpu); - irq = bind_ipi_to_irqhandler(CPEP_VECTOR, cpu, - action->handler, - action->flags, - cpep_name[cpu], - action->dev_id); - per_cpu(cpep_irq,cpu) = irq; - break; - case IA64_CPE_VECTOR: - case IA64_MCA_RENDEZ_VECTOR: - case IA64_PERFMON_VECTOR: - case IA64_MCA_WAKEUP_VECTOR: - case IA64_SPURIOUS_INT_VECTOR: - /* No need to complain, these aren't supported. */ - break; - default: - printk(KERN_WARNING "Percpu irq %d is unsupported " - "by xen!\n", vec); - break; - } - BUG_ON(irq < 0); - - if (irq > 0) { - /* - * Mark percpu. Without this, migrate_irqs() will - * mark the interrupt for migrations and trigger it - * on cpu hotplug. - */ - desc = irq_desc + irq; - desc->status |= IRQ_PER_CPU; - } - } - - /* For BSP, we cache registered percpu irqs, and then re-walk - * them when initializing APs - */ - if (!cpu && save) { - BUG_ON(saved_irq_cnt == MAX_LATE_IRQ); - saved_percpu_irqs[saved_irq_cnt].irq = vec; - saved_percpu_irqs[saved_irq_cnt].action = action; - saved_irq_cnt++; - if (!xen_slab_ready) - late_irq_cnt++; - } -} - -static void -xen_bind_early_percpu_irq (void) -{ - int i; - - xen_slab_ready = 1; - /* There's no race when accessing this cached array, since only - * BSP will face with such step shortly - */ - for (i = 0; i < late_irq_cnt; i++) - xen_register_percpu_irq(smp_processor_id(), - saved_percpu_irqs[i].irq, - saved_percpu_irqs[i].action, 0); -} - -/* FIXME: There's no obvious point to check whether slab is ready. So - * a hack is used here by utilizing a late time hook. - */ -extern void (*late_time_init)(void); -extern char xen_event_callback; -extern void xen_init_IRQ(void); - -#ifdef CONFIG_HOTPLUG_CPU -static int __devinit -unbind_evtchn_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - if (action == CPU_DEAD) { - /* Unregister evtchn. */ - if (per_cpu(cpep_irq,cpu) >= 0) { - unbind_from_irqhandler(per_cpu(cpep_irq, cpu), NULL); - per_cpu(cpep_irq, cpu) = -1; - } - if (per_cpu(cmcp_irq,cpu) >= 0) { - unbind_from_irqhandler(per_cpu(cmcp_irq, cpu), NULL); - per_cpu(cmcp_irq, cpu) = -1; - } - if (per_cpu(cmc_irq,cpu) >= 0) { - unbind_from_irqhandler(per_cpu(cmc_irq, cpu), NULL); - per_cpu(cmc_irq, cpu) = -1; - } - if (per_cpu(ipi_irq,cpu) >= 0) { - unbind_from_irqhandler (per_cpu(ipi_irq, cpu), NULL); - per_cpu(ipi_irq, cpu) = -1; - } - if (per_cpu(resched_irq,cpu) >= 0) { - unbind_from_irqhandler (per_cpu(resched_irq, cpu), - NULL); - per_cpu(resched_irq, cpu) = -1; - } - if (per_cpu(timer_irq,cpu) >= 0) { - unbind_from_irqhandler (per_cpu(timer_irq, cpu), NULL); - per_cpu(timer_irq, cpu) = -1; - } - } - return NOTIFY_OK; -} - -static struct notifier_block unbind_evtchn_notifier = { - .notifier_call = unbind_evtchn_callback, - .priority = 0 -}; -#endif - -DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); -void xen_smp_intr_init_early(unsigned int cpu) -{ -#ifdef CONFIG_SMP - unsigned int i; - - for (i = 0; i < saved_irq_cnt; i++) - xen_register_percpu_irq(cpu, saved_percpu_irqs[i].irq, - saved_percpu_irqs[i].action, 0); -#endif -} - -void xen_smp_intr_init(void) -{ -#ifdef CONFIG_SMP - unsigned int cpu = smp_processor_id(); - struct callback_register event = { - .type = CALLBACKTYPE_event, - .address = (unsigned long)&xen_event_callback, - }; - - if (cpu == 0) { - /* Initialization was already done for boot cpu. */ -#ifdef CONFIG_HOTPLUG_CPU - /* Register the notifier only once. */ - register_cpu_notifier(&unbind_evtchn_notifier); -#endif - return; - } - - /* This should be piggyback when setup vcpu guest context */ - BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event)); -#endif /* CONFIG_SMP */ -} - -void -xen_irq_init(void) -{ - struct callback_register event = { - .type = CALLBACKTYPE_event, - .address = (unsigned long)&xen_event_callback, - }; - - xen_init_IRQ(); - BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event)); - late_time_init = xen_bind_early_percpu_irq; -#ifdef CONFIG_SMP - register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); -#endif -} - -void -xen_platform_send_ipi(int cpu, int vector, int delivery_mode, int redirect) -{ - int irq = -1; - -#ifdef CONFIG_SMP - /* TODO: we need to call vcpu_up here */ - if (unlikely(vector == ap_wakeup_vector)) { - extern void xen_send_ipi (int cpu, int vec); - - /* XXX - * This should be in __cpu_up(cpu) in ia64 smpboot.c - * like x86. But don't want to modify it, - * keep it untouched. - */ - xen_smp_intr_init_early(cpu); - - xen_send_ipi (cpu, vector); - //vcpu_prepare_and_up(cpu); - return; - } -#endif - - switch (vector) { - case IA64_IPI_VECTOR: - irq = per_cpu(ipi_to_irq, cpu)[IPI_VECTOR]; - break; - case IA64_IPI_RESCHEDULE: - irq = per_cpu(ipi_to_irq, cpu)[RESCHEDULE_VECTOR]; - break; - case IA64_CMCP_VECTOR: - irq = per_cpu(ipi_to_irq, cpu)[CMCP_VECTOR]; - break; - case IA64_CPEP_VECTOR: - irq = per_cpu(ipi_to_irq, cpu)[CPEP_VECTOR]; - break; - default: - printk(KERN_WARNING "Unsupported IPI type 0x%x\n", - vector); - irq = 0; - break; - } - - BUG_ON(irq < 0); - notify_remote_via_irq(irq); - return; -} -#endif /* CONFIG_XEN */ - -void -register_percpu_irq (ia64_vector vec, struct irqaction *action) -{ - irq_desc_t *desc; - unsigned int irq; - -#ifdef CONFIG_XEN - if (is_running_on_xen()) - return xen_register_percpu_irq(smp_processor_id(), - vec, action, 1); -#endif - - for (irq = 0; irq < NR_IRQS; ++irq) - if (irq_to_vector(irq) == vec) { - desc = irq_desc + irq; - desc->status |= IRQ_PER_CPU; - desc->chip = &irq_type_ia64_lsapic; - if (action) - setup_irq(irq, action); - } -} - -void __init -init_IRQ (void) -{ - register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); -#ifdef CONFIG_SMP - register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); -#endif -#ifdef CONFIG_PERFMON - pfm_init_percpu(); -#endif - platform_irq_init(); -#ifdef CONFIG_XEN - if (is_running_on_xen() && !ia64_platform_is("xen")) - xen_irq_init(); -#endif -} - -void -ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect) -{ - void __iomem *ipi_addr; - unsigned long ipi_data; - unsigned long phys_cpu_id; - -#ifdef CONFIG_XEN - if (is_running_on_xen()) { - xen_platform_send_ipi(cpu, vector, delivery_mode, redirect); - return; - } -#endif - -#ifdef CONFIG_SMP - phys_cpu_id = cpu_physical_id(cpu); -#else - phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff; -#endif - - /* - * cpu number is in 8bit ID and 8bit EID - */ - - ipi_data = (delivery_mode << 8) | (vector & 0xff); - ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3)); - - writeq(ipi_data, ipi_addr); -} diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S b/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S deleted file mode 100644 index af5cc0bc41..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S +++ /dev/null @@ -1,303 +0,0 @@ -/* - * PAL Firmware support - * IA-64 Processor Programmers Reference Vol 2 - * - * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> - * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> - * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co - * David Mosberger <davidm@hpl.hp.com> - * Stephane Eranian <eranian@hpl.hp.com> - * - * 05/22/2000 eranian Added support for stacked register calls - * 05/24/2000 eranian Added support for physical mode static calls - */ - -#include <asm/asmmacro.h> -#include <asm/processor.h> - - .data - .globl pal_entry_point -pal_entry_point: - data8 ia64_pal_default_handler - .text - -/* - * Set the PAL entry point address. This could be written in C code, but we do it here - * to keep it all in one module (besides, it's so trivial that it's - * not a big deal). - * - * in0 Address of the PAL entry point (text address, NOT a function descriptor). - */ -GLOBAL_ENTRY(ia64_pal_handler_init) - alloc r3=ar.pfs,1,0,0,0 - movl r2=pal_entry_point - ;; - st8 [r2]=in0 - br.ret.sptk.many rp -END(ia64_pal_handler_init) - -/* - * Default PAL call handler. This needs to be coded in assembly because it uses - * the static calling convention, i.e., the RSE may not be used and calls are - * done via "br.cond" (not "br.call"). - */ -GLOBAL_ENTRY(ia64_pal_default_handler) - mov r8=-1 - br.cond.sptk.many rp -END(ia64_pal_default_handler) - -/* - * Make a PAL call using the static calling convention. - * - * in0 Index of PAL service - * in1 - in3 Remaining PAL arguments - * in4 1 ==> clear psr.ic, 0 ==> don't clear psr.ic - * - */ -GLOBAL_ENTRY(__ia64_pal_call_static) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) - alloc loc1 = ar.pfs,5,5,0,0 - movl loc2 = pal_entry_point -1: { - mov r28 = in0 - mov r29 = in1 - mov r8 = ip - } - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - tbit.nz p6,p7 = in4, 0 - adds r8 = 1f-1b,r8 - mov loc4=ar.rsc // save RSE configuration - ;; - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov loc3 = psr - mov loc0 = rp - .body - mov r30 = in2 - -(p6) rsm psr.i | psr.ic - mov r31 = in3 - mov b7 = loc2 - -(p7) rsm psr.i - ;; -(p6) srlz.i - mov rp = r8 - br.cond.sptk.many b7 -1: mov psr.l = loc3 - mov ar.rsc = loc4 // restore RSE configuration - mov ar.pfs = loc1 - mov rp = loc0 - ;; - srlz.d // seralize restoration of psr.l - br.ret.sptk.many b0 -END(__ia64_pal_call_static) - -/* - * Make a PAL call using the stacked registers calling convention. - * - * Inputs: - * in0 Index of PAL service - * in2 - in3 Remaning PAL arguments - */ -GLOBAL_ENTRY(ia64_pal_call_stacked) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) - alloc loc1 = ar.pfs,4,4,4,0 - movl loc2 = pal_entry_point - - mov r28 = in0 // Index MUST be copied to r28 - mov out0 = in0 // AND in0 of PAL function - mov loc0 = rp - .body - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - mov out1 = in1 - mov out2 = in2 - mov out3 = in3 - mov loc3 = psr - ;; - rsm psr.i - mov b7 = loc2 - ;; - br.call.sptk.many rp=b7 // now make the call -.ret0: mov psr.l = loc3 - mov ar.pfs = loc1 - mov rp = loc0 - ;; - srlz.d // serialize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_stacked) - -/* - * Make a physical mode PAL call using the static registers calling convention. - * - * Inputs: - * in0 Index of PAL service - * in2 - in3 Remaning PAL arguments - * - * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel. - * So we don't need to clear them. - */ -#define PAL_PSR_BITS_TO_CLEAR \ - (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT | \ - IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ - IA64_PSR_DFL | IA64_PSR_DFH) - -#define PAL_PSR_BITS_TO_SET \ - (IA64_PSR_BN) - - -GLOBAL_ENTRY(ia64_pal_call_phys_static) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) - alloc loc1 = ar.pfs,4,7,0,0 - movl loc2 = pal_entry_point -1: { - mov r28 = in0 // copy procedure index - mov r8 = ip // save ip to compute branch - mov loc0 = rp // save rp - } - .body - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - mov r29 = in1 // first argument - mov r30 = in2 // copy arg2 - mov r31 = in3 // copy arg3 - ;; - mov loc3 = psr // save psr - adds r8 = 1f-1b,r8 // calculate return address for call - ;; - mov loc4=ar.rsc // save RSE configuration - dep.z loc2=loc2,0,61 // convert pal entry point to physical - tpa r8=r8 // convert rp to physical - ;; - mov b7 = loc2 // install target to branch reg - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - movl r16=PAL_PSR_BITS_TO_CLEAR - movl r17=PAL_PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 // add in psr the bits to set - ;; - andcm r16=loc3,r16 // removes bits to clear from psr - br.call.sptk.many rp=ia64_switch_mode_phys -.ret1: mov rp = r8 // install return address (physical) - mov loc5 = r19 - mov loc6 = r20 - br.cond.sptk.many b7 -1: - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 // r16= original psr - mov r19=loc5 - mov r20=loc6 - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode -.ret2: - mov psr.l = loc3 // restore init PSR - - mov ar.pfs = loc1 - mov rp = loc0 - ;; - mov ar.rsc=loc4 // restore RSE configuration - srlz.d // seralize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_phys_static) - -/* - * Make a PAL call using the stacked registers in physical mode. - * - * Inputs: - * in0 Index of PAL service - * in2 - in3 Remaning PAL arguments - */ -GLOBAL_ENTRY(ia64_pal_call_phys_stacked) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) - alloc loc1 = ar.pfs,5,7,4,0 - movl loc2 = pal_entry_point -1: { - mov r28 = in0 // copy procedure index - mov loc0 = rp // save rp - } - .body - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - mov loc3 = psr // save psr - ;; - mov loc4=ar.rsc // save RSE configuration - dep.z loc2=loc2,0,61 // convert pal entry point to physical - ;; - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - movl r16=PAL_PSR_BITS_TO_CLEAR - movl r17=PAL_PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 // add in psr the bits to set - mov b7 = loc2 // install target to branch reg - ;; - andcm r16=loc3,r16 // removes bits to clear from psr - br.call.sptk.many rp=ia64_switch_mode_phys - - mov out0 = in0 // first argument - mov out1 = in1 // copy arg2 - mov out2 = in2 // copy arg3 - mov out3 = in3 // copy arg3 - mov loc5 = r19 - mov loc6 = r20 - - br.call.sptk.many rp=b7 // now make the call - - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 // r16= original psr - mov r19=loc5 - mov r20=loc6 - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode - - mov psr.l = loc3 // restore init PSR - mov ar.pfs = loc1 - mov rp = loc0 - ;; - mov ar.rsc=loc4 // restore RSE configuration - srlz.d // seralize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_phys_stacked) - -/* - * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15). - * - * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch - * regs fp-low partition. - * - * Inputs: - * in0 Address of stack storage for fp regs - */ -GLOBAL_ENTRY(ia64_save_scratch_fpregs) - alloc r3=ar.pfs,1,0,0,0 - add r2=16,in0 - ;; - stf.spill [in0] = f10,32 - stf.spill [r2] = f11,32 - ;; - stf.spill [in0] = f12,32 - stf.spill [r2] = f13,32 - ;; - stf.spill [in0] = f14,32 - stf.spill [r2] = f15,32 - br.ret.sptk.many rp -END(ia64_save_scratch_fpregs) - -/* - * Load scratch fp scratch regs (fp10-fp15) - * - * Inputs: - * in0 Address of stack storage for fp regs - */ -GLOBAL_ENTRY(ia64_load_scratch_fpregs) - alloc r3=ar.pfs,1,0,0,0 - add r2=16,in0 - ;; - ldf.fill f10 = [in0],32 - ldf.fill f11 = [r2],32 - ;; - ldf.fill f12 = [in0],32 - ldf.fill f13 = [r2],32 - ;; - ldf.fill f14 = [in0],32 - ldf.fill f15 = [r2],32 - br.ret.sptk.many rp -END(ia64_load_scratch_fpregs) diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c b/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c deleted file mode 100644 index 73597d2866..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Instruction-patching support. - * - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - */ -#include <linux/init.h> -#include <linux/string.h> - -#include <asm/patch.h> -#include <asm/processor.h> -#include <asm/sections.h> -#include <asm/system.h> -#include <asm/unistd.h> - -/* - * This was adapted from code written by Tony Luck: - * - * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle - * like this: - * - * 6 6 5 4 3 2 1 - * 3210987654321098765432109876543210987654321098765432109876543210 - * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG - * - * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB - */ -static u64 -get_imm64 (u64 insn_addr) -{ - u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */ - - return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/ - ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/ - ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/ - ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/ - ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/ - ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/ - ((p[1] & 0x000007f000000000UL) >> 36); /*G*/ -} - -/* Patch instruction with "val" where "mask" has 1 bits. */ -void -ia64_patch (u64 insn_addr, u64 mask, u64 val) -{ - u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16); -# define insn_mask ((1UL << 41) - 1) - unsigned long shift; - - b0 = b[0]; b1 = b[1]; - shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */ - if (shift >= 64) { - m1 = mask << (shift - 64); - v1 = val << (shift - 64); - } else { - m0 = mask << shift; m1 = mask >> (64 - shift); - v0 = val << shift; v1 = val >> (64 - shift); - b[0] = (b0 & ~m0) | (v0 & m0); - } - b[1] = (b1 & ~m1) | (v1 & m1); -} - -void -ia64_patch_imm64 (u64 insn_addr, u64 val) -{ - /* The assembler may generate offset pointing to either slot 1 - or slot 2 for a long (2-slot) instruction, occupying slots 1 - and 2. */ - insn_addr &= -16UL; - ia64_patch(insn_addr + 2, - 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ - | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ - | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ - | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ - | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */)); - ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22); -} - -void -ia64_patch_imm60 (u64 insn_addr, u64 val) -{ - /* The assembler may generate offset pointing to either slot 1 - or slot 2 for a long (2-slot) instruction, occupying slots 1 - and 2. */ - insn_addr &= -16UL; - ia64_patch(insn_addr + 2, - 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ - | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); - ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18); -} - -/* - * We need sometimes to load the physical address of a kernel - * object. Often we can convert the virtual address to physical - * at execution time, but sometimes (either for performance reasons - * or during error recovery) we cannot to this. Patch the marked - * bundles to load the physical address. - */ -void __init -ia64_patch_vtop (unsigned long start, unsigned long end) -{ - s32 *offp = (s32 *) start; - u64 ip; - - while (offp < (s32 *) end) { - ip = (u64) offp + *offp; - - /* replace virtual address with corresponding physical address: */ - ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip))); - ia64_fc((void *) ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -void __init -ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) -{ - static int first_time = 1; - int need_workaround; - s32 *offp = (s32 *) start; - u64 *wp; - - need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0); - - if (first_time) { - first_time = 0; - if (need_workaround) - printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n"); - else - printk(KERN_INFO "McKinley Errata 9 workaround not needed; " - "disabling it\n"); - } - if (need_workaround) - return; - - while (offp < (s32 *) end) { - wp = (u64 *) ia64_imva((char *) offp + *offp); - wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ - wp[1] = 0x0004000000000200UL; - wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ - wp[3] = 0x0084006880000200UL; - ia64_fc(wp); ia64_fc(wp + 2); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -static void __init -patch_fsyscall_table (unsigned long start, unsigned long end) -{ - extern unsigned long fsyscall_table[NR_syscalls]; - s32 *offp = (s32 *) start; - u64 ip; - - while (offp < (s32 *) end) { - ip = (u64) ia64_imva((char *) offp + *offp); - ia64_patch_imm64(ip, (u64) fsyscall_table); - ia64_fc((void *) ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -static void __init -patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) -{ - extern char fsys_bubble_down[]; - s32 *offp = (s32 *) start; - u64 ip; - - while (offp < (s32 *) end) { - ip = (u64) offp + *offp; - ia64_patch_imm60((u64) ia64_imva((void *) ip), - (u64) (fsys_bubble_down - (ip & -16)) / 16); - ia64_fc((void *) ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT -extern char __start_gate_running_on_xen_patchlist[]; -extern char __end_gate_running_on_xen_patchlist[]; - -void -patch_running_on_xen(unsigned long start, unsigned long end) -{ - extern int running_on_xen; - s32 *offp = (s32 *)start; - u64 ip; - - while (offp < (s32 *)end) { - ip = (u64)ia64_imva((char *)offp + *offp); - ia64_patch_imm64(ip, (u64)&running_on_xen); - ia64_fc((void *)ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -static void -patch_brl_symaddr(unsigned long start, unsigned long end, - unsigned long symaddr) -{ - s32 *offp = (s32 *)start; - u64 ip; - - while (offp < (s32 *)end) { - ip = (u64)offp + *offp; - ia64_patch_imm60((u64)ia64_imva((void *)ip), - (u64)(symaddr - (ip & -16)) / 16); - ia64_fc((void *)ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -#define EXTERN_PATCHLIST(name) \ - extern char __start_gate_brl_##name##_patchlist[]; \ - extern char __end_gate_brl_##name##_patchlist[]; \ - extern char name[] - -#define PATCH_BRL_SYMADDR(name) \ - patch_brl_symaddr((unsigned long)__start_gate_brl_##name##_patchlist, \ - (unsigned long)__end_gate_brl_##name##_patchlist, \ - (unsigned long)name) - -static void -patch_brl_in_vdso(void) -{ - EXTERN_PATCHLIST(xen_ssm_i_0); - EXTERN_PATCHLIST(xen_ssm_i_1); - - PATCH_BRL_SYMADDR(xen_ssm_i_0); - PATCH_BRL_SYMADDR(xen_ssm_i_1); -} -#else -#define patch_running_on_xen(start, end) do { } while (0) -#define patch_brl_in_vdso() do { } while (0) -#endif - -void __init -ia64_patch_gate (void) -{ -# define START(name) ((unsigned long) __start_gate_##name##_patchlist) -# define END(name) ((unsigned long)__end_gate_##name##_patchlist) - - patch_fsyscall_table(START(fsyscall), END(fsyscall)); - patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); -#ifdef CONFIG_XEN - patch_running_on_xen(START(running_on_xen), END(running_on_xen)); - patch_brl_in_vdso(); -#endif - ia64_patch_vtop(START(vtop), END(vtop)); - ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); -} diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c b/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c deleted file mode 100644 index 59d277fb8b..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c +++ /dev/null @@ -1,6943 +0,0 @@ -/* - * This file implements the perfmon-2 subsystem which is used - * to program the IA-64 Performance Monitoring Unit (PMU). - * - * The initial version of perfmon.c was written by - * Ganesh Venkitachalam, IBM Corp. - * - * Then it was modified for perfmon-1.x by Stephane Eranian and - * David Mosberger, Hewlett Packard Co. - * - * Version Perfmon-2.x is a rewrite of perfmon-1.x - * by Stephane Eranian, Hewlett Packard Co. - * - * Copyright (C) 1999-2005 Hewlett Packard Co - * Stephane Eranian <eranian@hpl.hp.com> - * David Mosberger-Tang <davidm@hpl.hp.com> - * - * More information about perfmon available at: - * http://www.hpl.hp.com/research/linux/perfmon - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/smp_lock.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/init.h> -#include <linux/vmalloc.h> -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/list.h> -#include <linux/file.h> -#include <linux/poll.h> -#include <linux/vfs.h> -#include <linux/pagemap.h> -#include <linux/mount.h> -#include <linux/bitops.h> -#include <linux/capability.h> -#include <linux/rcupdate.h> -#include <linux/completion.h> - -#include <asm/errno.h> -#include <asm/intrinsics.h> -#include <asm/page.h> -#include <asm/perfmon.h> -#include <asm/processor.h> -#include <asm/signal.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/delay.h> - -#ifdef CONFIG_PERFMON -#ifdef CONFIG_XEN -//#include <xen/xenoprof.h> -#include <xen/interface/xenoprof.h> - -static int xenoprof_is_primary = 0; -#define init_xenoprof_primary(is_primary) (xenoprof_is_primary = (is_primary)) -#define is_xenoprof_primary() (xenoprof_is_primary) -#define XEN_NOT_SUPPORTED_YET \ - do { \ - if (is_running_on_xen()) { \ - printk("%s is not supported yet under xen.\n", \ - __func__); \ - return -ENOSYS; \ - } \ - } while (0) -#else -#define init_xenoprof_primary(is_primary) do { } while (0) -#define is_xenoprof_primary() (0) -#define XEN_NOT_SUPPORTED_YET do { } while (0) -#define HYPERVISOR_perfmon_op(cmd, arg, count) do { } while (0) -#endif - -/* - * perfmon context state - */ -#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ -#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ -#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ -#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ - -#define PFM_INVALID_ACTIVATION (~0UL) - -/* - * depth of message queue - */ -#define PFM_MAX_MSGS 32 -#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) - -/* - * type of a PMU register (bitmask). - * bitmask structure: - * bit0 : register implemented - * bit1 : end marker - * bit2-3 : reserved - * bit4 : pmc has pmc.pm - * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter - * bit6-7 : register type - * bit8-31: reserved - */ -#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ -#define PFM_REG_IMPL 0x1 /* register implemented */ -#define PFM_REG_END 0x2 /* end marker */ -#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ -#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ -#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ -#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ -#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ - -#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) -#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) - -#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) - -/* i assumed unsigned */ -#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) -#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) - -/* XXX: these assume that register i is implemented */ -#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) -#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) -#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) -#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) - -#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value -#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask -#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] -#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] - -#define PFM_NUM_IBRS IA64_NUM_DBG_REGS -#define PFM_NUM_DBRS IA64_NUM_DBG_REGS - -#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) -#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) -#define PFM_CTX_TASK(h) (h)->ctx_task - -#define PMU_PMC_OI 5 /* position of pmc.oi bit */ - -/* XXX: does not support more than 64 PMDs */ -#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) -#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) - -#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) - -#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) -#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) -#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) -#define PFM_CODE_RR 0 /* requesting code range restriction */ -#define PFM_DATA_RR 1 /* requestion data range restriction */ - -#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) -#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) -#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) - -#define RDEP(x) (1UL<<(x)) - -/* - * context protection macros - * in SMP: - * - we need to protect against CPU concurrency (spin_lock) - * - we need to protect against PMU overflow interrupts (local_irq_disable) - * in UP: - * - we need to protect against PMU overflow interrupts (local_irq_disable) - * - * spin_lock_irqsave()/spin_lock_irqrestore(): - * in SMP: local_irq_disable + spin_lock - * in UP : local_irq_disable - * - * spin_lock()/spin_lock(): - * in UP : removed automatically - * in SMP: protect against context accesses from other CPU. interrupts - * are not masked. This is useful for the PMU interrupt handler - * because we know we will not get PMU concurrency in that code. - */ -#define PROTECT_CTX(c, f) \ - do { \ - DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \ - spin_lock_irqsave(&(c)->ctx_lock, f); \ - DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \ - } while(0) - -#define UNPROTECT_CTX(c, f) \ - do { \ - DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \ - spin_unlock_irqrestore(&(c)->ctx_lock, f); \ - } while(0) - -#define PROTECT_CTX_NOPRINT(c, f) \ - do { \ - spin_lock_irqsave(&(c)->ctx_lock, f); \ - } while(0) - - -#define UNPROTECT_CTX_NOPRINT(c, f) \ - do { \ - spin_unlock_irqrestore(&(c)->ctx_lock, f); \ - } while(0) - - -#define PROTECT_CTX_NOIRQ(c) \ - do { \ - spin_lock(&(c)->ctx_lock); \ - } while(0) - -#define UNPROTECT_CTX_NOIRQ(c) \ - do { \ - spin_unlock(&(c)->ctx_lock); \ - } while(0) - - -#ifdef CONFIG_SMP - -#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) -#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ -#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() - -#else /* !CONFIG_SMP */ -#define SET_ACTIVATION(t) do {} while(0) -#define GET_ACTIVATION(t) do {} while(0) -#define INC_ACTIVATION(t) do {} while(0) -#endif /* CONFIG_SMP */ - -#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) -#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) -#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) - -#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) -#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) - -#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) - -/* - * cmp0 must be the value of pmc0 - */ -#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) - -#define PFMFS_MAGIC 0xa0b4d889 - -/* - * debugging - */ -#define PFM_DEBUGGING 1 -#ifdef PFM_DEBUGGING -#define DPRINT(a) \ - do { \ - if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ - } while (0) - -#define DPRINT_ovfl(a) \ - do { \ - if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ - } while (0) -#endif - -/* - * 64-bit software counter structure - * - * the next_reset_type is applied to the next call to pfm_reset_regs() - */ -typedef struct { - unsigned long val; /* virtual 64bit counter value */ - unsigned long lval; /* last reset value */ - unsigned long long_reset; /* reset value on sampling overflow */ - unsigned long short_reset; /* reset value on overflow */ - unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ - unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ - unsigned long seed; /* seed for random-number generator */ - unsigned long mask; /* mask for random-number generator */ - unsigned int flags; /* notify/do not notify */ - unsigned long eventid; /* overflow event identifier */ -} pfm_counter_t; - -/* - * context flags - */ -typedef struct { - unsigned int block:1; /* when 1, task will blocked on user notifications */ - unsigned int system:1; /* do system wide monitoring */ - unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ - unsigned int is_sampling:1; /* true if using a custom format */ - unsigned int excl_idle:1; /* exclude idle task in system wide session */ - unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ - unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ - unsigned int no_msg:1; /* no message sent on overflow */ - unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ - unsigned int reserved:22; -} pfm_context_flags_t; - -#define PFM_TRAP_REASON_NONE 0x0 /* default value */ -#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ -#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ - - -/* - * perfmon context: encapsulates all the state of a monitoring session - */ - -typedef struct pfm_context { - spinlock_t ctx_lock; /* context protection */ - - pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ - unsigned int ctx_state; /* state: active/inactive (no bitfield) */ - - struct task_struct *ctx_task; /* task to which context is attached */ - - unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ - - struct completion ctx_restart_done; /* use for blocking notification mode */ - - unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ - unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ - unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ - - unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ - unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ - unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ - - unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ - - unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ - unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ - unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ - unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ - - pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ - - u64 ctx_saved_psr_up; /* only contains psr.up value */ - - unsigned long ctx_last_activation; /* context last activation number for last_cpu */ - unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ - unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ - - int ctx_fd; /* file descriptor used my this context */ - pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ - - pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ - void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ - unsigned long ctx_smpl_size; /* size of sampling buffer */ - void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ - - wait_queue_head_t ctx_msgq_wait; - pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; - int ctx_msgq_head; - int ctx_msgq_tail; - struct fasync_struct *ctx_async_queue; - - wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ -} pfm_context_t; - -/* - * magic number used to verify that structure is really - * a perfmon context - */ -#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) - -#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) - -#ifdef CONFIG_SMP -#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) -#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu -#else -#define SET_LAST_CPU(ctx, v) do {} while(0) -#define GET_LAST_CPU(ctx) do {} while(0) -#endif - - -#define ctx_fl_block ctx_flags.block -#define ctx_fl_system ctx_flags.system -#define ctx_fl_using_dbreg ctx_flags.using_dbreg -#define ctx_fl_is_sampling ctx_flags.is_sampling -#define ctx_fl_excl_idle ctx_flags.excl_idle -#define ctx_fl_going_zombie ctx_flags.going_zombie -#define ctx_fl_trap_reason ctx_flags.trap_reason -#define ctx_fl_no_msg ctx_flags.no_msg -#define ctx_fl_can_restart ctx_flags.can_restart - -#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); -#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking - -/* - * global information about all sessions - * mostly used to synchronize between system wide and per-process - */ -typedef struct { - spinlock_t pfs_lock; /* lock the structure */ - - unsigned int pfs_task_sessions; /* number of per task sessions */ - unsigned int pfs_sys_sessions; /* number of per system wide sessions */ - unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ - unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ - struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ -} pfm_session_t; - -/* - * information about a PMC or PMD. - * dep_pmd[]: a bitmask of dependent PMD registers - * dep_pmc[]: a bitmask of dependent PMC registers - */ -typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); -typedef struct { - unsigned int type; - int pm_pos; - unsigned long default_value; /* power-on default value */ - unsigned long reserved_mask; /* bitmask of reserved bits */ - pfm_reg_check_t read_check; - pfm_reg_check_t write_check; - unsigned long dep_pmd[4]; - unsigned long dep_pmc[4]; -} pfm_reg_desc_t; - -/* assume cnum is a valid monitor */ -#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) - -/* - * This structure is initialized at boot time and contains - * a description of the PMU main characteristics. - * - * If the probe function is defined, detection is based - * on its return value: - * - 0 means recognized PMU - * - anything else means not supported - * When the probe function is not defined, then the pmu_family field - * is used and it must match the host CPU family such that: - * - cpu->family & config->pmu_family != 0 - */ -typedef struct { - unsigned long ovfl_val; /* overflow value for counters */ - - pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ - pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ - - unsigned int num_pmcs; /* number of PMCS: computed at init time */ - unsigned int num_pmds; /* number of PMDS: computed at init time */ - unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ - unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ - - char *pmu_name; /* PMU family name */ - unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ - unsigned int flags; /* pmu specific flags */ - unsigned int num_ibrs; /* number of IBRS: computed at init time */ - unsigned int num_dbrs; /* number of DBRS: computed at init time */ - unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ - int (*probe)(void); /* customized probe routine */ - unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ -} pmu_config_t; -/* - * PMU specific flags - */ -#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ - -/* - * debug register related type definitions - */ -typedef struct { - unsigned long ibr_mask:56; - unsigned long ibr_plm:4; - unsigned long ibr_ig:3; - unsigned long ibr_x:1; -} ibr_mask_reg_t; - -typedef struct { - unsigned long dbr_mask:56; - unsigned long dbr_plm:4; - unsigned long dbr_ig:2; - unsigned long dbr_w:1; - unsigned long dbr_r:1; -} dbr_mask_reg_t; - -typedef union { - unsigned long val; - ibr_mask_reg_t ibr; - dbr_mask_reg_t dbr; -} dbreg_t; - - -/* - * perfmon command descriptions - */ -typedef struct { - int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); - char *cmd_name; - int cmd_flags; - unsigned int cmd_narg; - size_t cmd_argsize; - int (*cmd_getsize)(void *arg, size_t *sz); -} pfm_cmd_desc_t; - -#define PFM_CMD_FD 0x01 /* command requires a file descriptor */ -#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ -#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ -#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ - - -#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name -#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) -#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) -#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) -#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) - -#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ - -typedef struct { - unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ - unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ - unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ - unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ - unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ - unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ - unsigned long pfm_smpl_handler_calls; - unsigned long pfm_smpl_handler_cycles; - char pad[SMP_CACHE_BYTES] ____cacheline_aligned; -} pfm_stats_t; - -/* - * perfmon internal variables - */ -static pfm_stats_t pfm_stats[NR_CPUS]; -static pfm_session_t pfm_sessions; /* global sessions information */ - -static DEFINE_SPINLOCK(pfm_alt_install_check); -static pfm_intr_handler_desc_t *pfm_alt_intr_handler; - -static struct proc_dir_entry *perfmon_dir; -static pfm_uuid_t pfm_null_uuid = {0,}; - -static spinlock_t pfm_buffer_fmt_lock; -static LIST_HEAD(pfm_buffer_fmt_list); - -static pmu_config_t *pmu_conf; - -/* sysctl() controls */ -pfm_sysctl_t pfm_sysctl; -EXPORT_SYMBOL(pfm_sysctl); - -static ctl_table pfm_ctl_table[]={ - {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, - {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, - {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,}, - {4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,}, - { 0, }, -}; -static ctl_table pfm_sysctl_dir[] = { - {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, }, - {0,}, -}; -static ctl_table pfm_sysctl_root[] = { - {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, }, - {0,}, -}; -static struct ctl_table_header *pfm_sysctl_header; - -static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); - -#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) -#define pfm_get_cpu_data(a,b) per_cpu(a, b) - -static inline void -pfm_put_task(struct task_struct *task) -{ - if (task != current) put_task_struct(task); -} - -static inline void -pfm_set_task_notify(struct task_struct *task) -{ - struct thread_info *info; - - info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE); - set_bit(TIF_NOTIFY_RESUME, &info->flags); -} - -static inline void -pfm_clear_task_notify(void) -{ - clear_thread_flag(TIF_NOTIFY_RESUME); -} - -static inline void -pfm_reserve_page(unsigned long a) -{ - SetPageReserved(vmalloc_to_page((void *)a)); -} -static inline void -pfm_unreserve_page(unsigned long a) -{ - ClearPageReserved(vmalloc_to_page((void*)a)); -} - -static inline unsigned long -pfm_protect_ctx_ctxsw(pfm_context_t *x) -{ - spin_lock(&(x)->ctx_lock); - return 0UL; -} - -static inline void -pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) -{ - spin_unlock(&(x)->ctx_lock); -} - -static inline unsigned int -pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct) -{ - return do_munmap(mm, addr, len); -} - -static inline unsigned long -pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) -{ - return get_unmapped_area(file, addr, len, pgoff, flags); -} - - -static int -pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - struct vfsmount *mnt) -{ - return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); -} - -static struct file_system_type pfm_fs_type = { - .name = "pfmfs", - .get_sb = pfmfs_get_sb, - .kill_sb = kill_anon_super, -}; - -DEFINE_PER_CPU(unsigned long, pfm_syst_info); -DEFINE_PER_CPU(struct task_struct *, pmu_owner); -DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); -DEFINE_PER_CPU(unsigned long, pmu_activation_number); -EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); - - -/* forward declaration */ -static struct file_operations pfm_file_ops; - -/* - * forward declarations - */ -#ifndef CONFIG_SMP -static void pfm_lazy_save_regs (struct task_struct *ta); -#endif - -void dump_pmu_state(const char *); -static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); - -#include "perfmon_itanium.h" -#include "perfmon_mckinley.h" -#include "perfmon_montecito.h" -#include "perfmon_generic.h" - -static pmu_config_t *pmu_confs[]={ - &pmu_conf_mont, - &pmu_conf_mck, - &pmu_conf_ita, - &pmu_conf_gen, /* must be last */ - NULL -}; - - -static int pfm_end_notify_user(pfm_context_t *ctx); - -static inline void -pfm_clear_psr_pp(void) -{ - ia64_rsm(IA64_PSR_PP); - ia64_srlz_i(); -} - -static inline void -pfm_set_psr_pp(void) -{ - ia64_ssm(IA64_PSR_PP); - ia64_srlz_i(); -} - -static inline void -pfm_clear_psr_up(void) -{ - ia64_rsm(IA64_PSR_UP); - ia64_srlz_i(); -} - -static inline void -pfm_set_psr_up(void) -{ - ia64_ssm(IA64_PSR_UP); - ia64_srlz_i(); -} - -static inline unsigned long -pfm_get_psr(void) -{ - unsigned long tmp; - tmp = ia64_getreg(_IA64_REG_PSR); - ia64_srlz_i(); - return tmp; -} - -static inline void -pfm_set_psr_l(unsigned long val) -{ - ia64_setreg(_IA64_REG_PSR_L, val); - ia64_srlz_i(); -} - -static inline void -pfm_freeze_pmu(void) -{ - ia64_set_pmc(0,1UL); - ia64_srlz_d(); -} - -static inline void -pfm_unfreeze_pmu(void) -{ - ia64_set_pmc(0,0UL); - ia64_srlz_d(); -} - -static inline void -pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) -{ - int i; - - for (i=0; i < nibrs; i++) { - ia64_set_ibr(i, ibrs[i]); - ia64_dv_serialize_instruction(); - } - ia64_srlz_i(); -} - -static inline void -pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) -{ - int i; - - for (i=0; i < ndbrs; i++) { - ia64_set_dbr(i, dbrs[i]); - ia64_dv_serialize_data(); - } - ia64_srlz_d(); -} - -/* - * PMD[i] must be a counter. no check is made - */ -static inline unsigned long -pfm_read_soft_counter(pfm_context_t *ctx, int i) -{ - return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); -} - -/* - * PMD[i] must be a counter. no check is made - */ -static inline void -pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) -{ - unsigned long ovfl_val = pmu_conf->ovfl_val; - - ctx->ctx_pmds[i].val = val & ~ovfl_val; - /* - * writing to unimplemented part is ignore, so we do not need to - * mask off top part - */ - ia64_set_pmd(i, val & ovfl_val); -} - -static pfm_msg_t * -pfm_get_new_msg(pfm_context_t *ctx) -{ - int idx, next; - - next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; - - DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); - if (next == ctx->ctx_msgq_head) return NULL; - - idx = ctx->ctx_msgq_tail; - ctx->ctx_msgq_tail = next; - - DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); - - return ctx->ctx_msgq+idx; -} - -static pfm_msg_t * -pfm_get_next_msg(pfm_context_t *ctx) -{ - pfm_msg_t *msg; - - DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); - - if (PFM_CTXQ_EMPTY(ctx)) return NULL; - - /* - * get oldest message - */ - msg = ctx->ctx_msgq+ctx->ctx_msgq_head; - - /* - * and move forward - */ - ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; - - DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); - - return msg; -} - -static void -pfm_reset_msgq(pfm_context_t *ctx) -{ - ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; - DPRINT(("ctx=%p msgq reset\n", ctx)); -} - -static void * -pfm_rvmalloc(unsigned long size) -{ - void *mem; - unsigned long addr; - - size = PAGE_ALIGN(size); - mem = vmalloc(size); - if (mem) { - //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); - memset(mem, 0, size); - addr = (unsigned long)mem; - while (size > 0) { - pfm_reserve_page(addr); - addr+=PAGE_SIZE; - size-=PAGE_SIZE; - } - } - return mem; -} - -static void -pfm_rvfree(void *mem, unsigned long size) -{ - unsigned long addr; - - if (mem) { - DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); - addr = (unsigned long) mem; - while ((long) size > 0) { - pfm_unreserve_page(addr); - addr+=PAGE_SIZE; - size-=PAGE_SIZE; - } - vfree(mem); - } - return; -} - -static pfm_context_t * -pfm_context_alloc(void) -{ - pfm_context_t *ctx; - - /* - * allocate context descriptor - * must be able to free with interrupts disabled - */ - ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); - if (ctx) { - memset(ctx, 0, sizeof(pfm_context_t)); - DPRINT(("alloc ctx @%p\n", ctx)); - } - return ctx; -} - -static void -pfm_context_free(pfm_context_t *ctx) -{ - if (ctx) { - DPRINT(("free ctx @%p\n", ctx)); - kfree(ctx); - } -} - -static void -pfm_mask_monitoring(struct task_struct *task) -{ - pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; - unsigned long mask, val, ovfl_mask; - int i; - - DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid)); - - ovfl_mask = pmu_conf->ovfl_val; - /* - * monitoring can only be masked as a result of a valid - * counter overflow. In UP, it means that the PMU still - * has an owner. Note that the owner can be different - * from the current task. However the PMU state belongs - * to the owner. - * In SMP, a valid overflow only happens when task is - * current. Therefore if we come here, we know that - * the PMU state belongs to the current task, therefore - * we can access the live registers. - * - * So in both cases, the live register contains the owner's - * state. We can ONLY touch the PMU registers and NOT the PSR. - * - * As a consequence to this call, the thread->pmds[] array - * contains stale information which must be ignored - * when context is reloaded AND monitoring is active (see - * pfm_restart). - */ - mask = ctx->ctx_used_pmds[0]; - for (i = 0; mask; i++, mask>>=1) { - /* skip non used pmds */ - if ((mask & 0x1) == 0) continue; - val = ia64_get_pmd(i); - - if (PMD_IS_COUNTING(i)) { - /* - * we rebuild the full 64 bit value of the counter - */ - ctx->ctx_pmds[i].val += (val & ovfl_mask); - } else { - ctx->ctx_pmds[i].val = val; - } - DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", - i, - ctx->ctx_pmds[i].val, - val & ovfl_mask)); - } - /* - * mask monitoring by setting the privilege level to 0 - * we cannot use psr.pp/psr.up for this, it is controlled by - * the user - * - * if task is current, modify actual registers, otherwise modify - * thread save state, i.e., what will be restored in pfm_load_regs() - */ - mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; - for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { - if ((mask & 0x1) == 0UL) continue; - ia64_set_pmc(i, th->pmcs[i] & ~0xfUL); - th->pmcs[i] &= ~0xfUL; - DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i])); - } - /* - * make all of this visible - */ - ia64_srlz_d(); -} - -/* - * must always be done with task == current - * - * context must be in MASKED state when calling - */ -static void -pfm_restore_monitoring(struct task_struct *task) -{ - pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; - unsigned long mask, ovfl_mask; - unsigned long psr, val; - int i, is_system; - - is_system = ctx->ctx_fl_system; - ovfl_mask = pmu_conf->ovfl_val; - - if (task != current) { - printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid); - return; - } - if (ctx->ctx_state != PFM_CTX_MASKED) { - printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, - task->pid, current->pid, ctx->ctx_state); - return; - } - psr = pfm_get_psr(); - /* - * monitoring is masked via the PMC. - * As we restore their value, we do not want each counter to - * restart right away. We stop monitoring using the PSR, - * restore the PMC (and PMD) and then re-establish the psr - * as it was. Note that there can be no pending overflow at - * this point, because monitoring was MASKED. - * - * system-wide session are pinned and self-monitoring - */ - if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { - /* disable dcr pp */ - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); - pfm_clear_psr_pp(); - } else { - pfm_clear_psr_up(); - } - /* - * first, we restore the PMD - */ - mask = ctx->ctx_used_pmds[0]; - for (i = 0; mask; i++, mask>>=1) { - /* skip non used pmds */ - if ((mask & 0x1) == 0) continue; - - if (PMD_IS_COUNTING(i)) { - /* - * we split the 64bit value according to - * counter width - */ - val = ctx->ctx_pmds[i].val & ovfl_mask; - ctx->ctx_pmds[i].val &= ~ovfl_mask; - } else { - val = ctx->ctx_pmds[i].val; - } - ia64_set_pmd(i, val); - - DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", - i, - ctx->ctx_pmds[i].val, - val)); - } - /* - * restore the PMCs - */ - mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; - for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { - if ((mask & 0x1) == 0UL) continue; - th->pmcs[i] = ctx->ctx_pmcs[i]; - ia64_set_pmc(i, th->pmcs[i]); - DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i])); - } - ia64_srlz_d(); - - /* - * must restore DBR/IBR because could be modified while masked - * XXX: need to optimize - */ - if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); - } - - /* - * now restore PSR - */ - if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { - /* enable dcr pp */ - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); - ia64_srlz_i(); - } - pfm_set_psr_l(psr); -} - -static inline void -pfm_save_pmds(unsigned long *pmds, unsigned long mask) -{ - int i; - - ia64_srlz_d(); - - for (i=0; mask; i++, mask>>=1) { - if (mask & 0x1) pmds[i] = ia64_get_pmd(i); - } -} - -/* - * reload from thread state (used for ctxw only) - */ -static inline void -pfm_restore_pmds(unsigned long *pmds, unsigned long mask) -{ - int i; - unsigned long val, ovfl_val = pmu_conf->ovfl_val; - - for (i=0; mask; i++, mask>>=1) { - if ((mask & 0x1) == 0) continue; - val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; - ia64_set_pmd(i, val); - } - ia64_srlz_d(); -} - -/* - * propagate PMD from context to thread-state - */ -static inline void -pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) -{ - struct thread_struct *thread = &task->thread; - unsigned long ovfl_val = pmu_conf->ovfl_val; - unsigned long mask = ctx->ctx_all_pmds[0]; - unsigned long val; - int i; - - DPRINT(("mask=0x%lx\n", mask)); - - for (i=0; mask; i++, mask>>=1) { - - val = ctx->ctx_pmds[i].val; - - /* - * We break up the 64 bit value into 2 pieces - * the lower bits go to the machine state in the - * thread (will be reloaded on ctxsw in). - * The upper part stays in the soft-counter. - */ - if (PMD_IS_COUNTING(i)) { - ctx->ctx_pmds[i].val = val & ~ovfl_val; - val &= ovfl_val; - } - thread->pmds[i] = val; - - DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", - i, - thread->pmds[i], - ctx->ctx_pmds[i].val)); - } -} - -/* - * propagate PMC from context to thread-state - */ -static inline void -pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) -{ - struct thread_struct *thread = &task->thread; - unsigned long mask = ctx->ctx_all_pmcs[0]; - int i; - - DPRINT(("mask=0x%lx\n", mask)); - - for (i=0; mask; i++, mask>>=1) { - /* masking 0 with ovfl_val yields 0 */ - thread->pmcs[i] = ctx->ctx_pmcs[i]; - DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i])); - } -} - - - -static inline void -pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) -{ - int i; - - for (i=0; mask; i++, mask>>=1) { - if ((mask & 0x1) == 0) continue; - ia64_set_pmc(i, pmcs[i]); - } - ia64_srlz_d(); -} - -static inline int -pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) -{ - return memcmp(a, b, sizeof(pfm_uuid_t)); -} - -static inline int -pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) -{ - int ret = 0; - if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); - return ret; -} - -static inline int -pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) -{ - int ret = 0; - if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); - return ret; -} - - -static inline int -pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, - int cpu, void *arg) -{ - int ret = 0; - if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); - return ret; -} - -static inline int -pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, - int cpu, void *arg) -{ - int ret = 0; - if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); - return ret; -} - -static inline int -pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) -{ - int ret = 0; - if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); - return ret; -} - -static inline int -pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) -{ - int ret = 0; - if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); - return ret; -} - -static pfm_buffer_fmt_t * -__pfm_find_buffer_fmt(pfm_uuid_t uuid) -{ - struct list_head * pos; - pfm_buffer_fmt_t * entry; - - list_for_each(pos, &pfm_buffer_fmt_list) { - entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); - if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) - return entry; - } - return NULL; -} - -/* - * find a buffer format based on its uuid - */ -static pfm_buffer_fmt_t * -pfm_find_buffer_fmt(pfm_uuid_t uuid) -{ - pfm_buffer_fmt_t * fmt; - spin_lock(&pfm_buffer_fmt_lock); - fmt = __pfm_find_buffer_fmt(uuid); - spin_unlock(&pfm_buffer_fmt_lock); - return fmt; -} - -int -pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) -{ - int ret = 0; - - /* some sanity checks */ - if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; - - /* we need at least a handler */ - if (fmt->fmt_handler == NULL) return -EINVAL; - - /* - * XXX: need check validity of fmt_arg_size - */ - - spin_lock(&pfm_buffer_fmt_lock); - - if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { - printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); - ret = -EBUSY; - goto out; - } - list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); - printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); - -out: - spin_unlock(&pfm_buffer_fmt_lock); - return ret; -} -EXPORT_SYMBOL(pfm_register_buffer_fmt); - -int -pfm_unregister_buffer_fmt(pfm_uuid_t uuid) -{ - pfm_buffer_fmt_t *fmt; - int ret = 0; - - spin_lock(&pfm_buffer_fmt_lock); - - fmt = __pfm_find_buffer_fmt(uuid); - if (!fmt) { - printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); - ret = -EINVAL; - goto out; - } - list_del_init(&fmt->fmt_list); - printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); - -out: - spin_unlock(&pfm_buffer_fmt_lock); - return ret; - -} -EXPORT_SYMBOL(pfm_unregister_buffer_fmt); - -extern void update_pal_halt_status(int); - -static int -pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) -{ - unsigned long flags; - /* - * validy checks on cpu_mask have been done upstream - */ - LOCK_PFS(flags); - - DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_use_dbregs, - is_syswide, - cpu)); - - if (is_syswide) { - /* - * cannot mix system wide and per-task sessions - */ - if (pfm_sessions.pfs_task_sessions > 0UL) { - DPRINT(("system wide not possible, %u conflicting task_sessions\n", - pfm_sessions.pfs_task_sessions)); - goto abort; - } - - if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; - - DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); - - pfm_sessions.pfs_sys_session[cpu] = task; - - pfm_sessions.pfs_sys_sessions++ ; - - } else { - if (pfm_sessions.pfs_sys_sessions) goto abort; - pfm_sessions.pfs_task_sessions++; - } - - DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_use_dbregs, - is_syswide, - cpu)); - - /* - * disable default_idle() to go to PAL_HALT - */ - update_pal_halt_status(0); - - UNLOCK_PFS(flags); - - return 0; - -error_conflict: - DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", - pfm_sessions.pfs_sys_session[cpu]->pid, - cpu)); -abort: - UNLOCK_PFS(flags); - - return -EBUSY; - -} - -static int -pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) -{ - unsigned long flags; - /* - * validy checks on cpu_mask have been done upstream - */ - LOCK_PFS(flags); - - DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_use_dbregs, - is_syswide, - cpu)); - - - if (is_syswide) { - pfm_sessions.pfs_sys_session[cpu] = NULL; - /* - * would not work with perfmon+more than one bit in cpu_mask - */ - if (ctx && ctx->ctx_fl_using_dbreg) { - if (pfm_sessions.pfs_sys_use_dbregs == 0) { - printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); - } else { - pfm_sessions.pfs_sys_use_dbregs--; - } - } - pfm_sessions.pfs_sys_sessions--; - } else { - pfm_sessions.pfs_task_sessions--; - } - DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_use_dbregs, - is_syswide, - cpu)); - - /* - * if possible, enable default_idle() to go into PAL_HALT - */ - if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) - update_pal_halt_status(1); - - UNLOCK_PFS(flags); - - return 0; -} - -/* - * removes virtual mapping of the sampling buffer. - * IMPORTANT: cannot be called with interrupts disable, e.g. inside - * a PROTECT_CTX() section. - */ -static int -pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size) -{ - int r; - - /* sanity checks */ - if (task->mm == NULL || size == 0UL || vaddr == NULL) { - printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm); - return -EINVAL; - } - - DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); - - /* - * does the actual unmapping - */ - down_write(&task->mm->mmap_sem); - - DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size)); - - r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0); - - up_write(&task->mm->mmap_sem); - if (r !=0) { - printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size); - } - - DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); - - return 0; -} - -/* - * free actual physical storage used by sampling buffer - */ -#if 0 -static int -pfm_free_smpl_buffer(pfm_context_t *ctx) -{ - pfm_buffer_fmt_t *fmt; - - if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; - - /* - * we won't use the buffer format anymore - */ - fmt = ctx->ctx_buf_fmt; - - DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", - ctx->ctx_smpl_hdr, - ctx->ctx_smpl_size, - ctx->ctx_smpl_vaddr)); - - pfm_buf_fmt_exit(fmt, current, NULL, NULL); - - /* - * free the buffer - */ - pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size); - - ctx->ctx_smpl_hdr = NULL; - ctx->ctx_smpl_size = 0UL; - - return 0; - -invalid_free: - printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid); - return -EINVAL; -} -#endif - -static inline void -pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) -{ - if (fmt == NULL) return; - - pfm_buf_fmt_exit(fmt, current, NULL, NULL); - -} - -/* - * pfmfs should _never_ be mounted by userland - too much of security hassle, - * no real gain from having the whole whorehouse mounted. So we don't need - * any operations on the root directory. However, we need a non-trivial - * d_name - pfm: will go nicely and kill the special-casing in procfs. - */ -static struct vfsmount *pfmfs_mnt; - -static int __init -init_pfm_fs(void) -{ - int err = register_filesystem(&pfm_fs_type); - if (!err) { - pfmfs_mnt = kern_mount(&pfm_fs_type); - err = PTR_ERR(pfmfs_mnt); - if (IS_ERR(pfmfs_mnt)) - unregister_filesystem(&pfm_fs_type); - else - err = 0; - } - return err; -} - -static void __exit -exit_pfm_fs(void) -{ - unregister_filesystem(&pfm_fs_type); - mntput(pfmfs_mnt); -} - -static ssize_t -pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) -{ - pfm_context_t *ctx; - pfm_msg_t *msg; - ssize_t ret; - unsigned long flags; - DECLARE_WAITQUEUE(wait, current); - XEN_NOT_SUPPORTED_YET; - if (PFM_IS_FILE(filp) == 0) { - printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); - return -EINVAL; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid); - return -EINVAL; - } - - /* - * check even when there is no message - */ - if (size < sizeof(pfm_msg_t)) { - DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); - return -EINVAL; - } - - PROTECT_CTX(ctx, flags); - - /* - * put ourselves on the wait queue - */ - add_wait_queue(&ctx->ctx_msgq_wait, &wait); - - - for(;;) { - /* - * check wait queue - */ - - set_current_state(TASK_INTERRUPTIBLE); - - DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); - - ret = 0; - if(PFM_CTXQ_EMPTY(ctx) == 0) break; - - UNPROTECT_CTX(ctx, flags); - - /* - * check non-blocking read - */ - ret = -EAGAIN; - if(filp->f_flags & O_NONBLOCK) break; - - /* - * check pending signals - */ - if(signal_pending(current)) { - ret = -EINTR; - break; - } - /* - * no message, so wait - */ - schedule(); - - PROTECT_CTX(ctx, flags); - } - DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret)); - set_current_state(TASK_RUNNING); - remove_wait_queue(&ctx->ctx_msgq_wait, &wait); - - if (ret < 0) goto abort; - - ret = -EINVAL; - msg = pfm_get_next_msg(ctx); - if (msg == NULL) { - printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid); - goto abort_locked; - } - - DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); - - ret = -EFAULT; - if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); - -abort_locked: - UNPROTECT_CTX(ctx, flags); -abort: - return ret; -} - -static ssize_t -pfm_write(struct file *file, const char __user *ubuf, - size_t size, loff_t *ppos) -{ - DPRINT(("pfm_write called\n")); - return -EINVAL; -} - -static unsigned int -pfm_poll(struct file *filp, poll_table * wait) -{ - pfm_context_t *ctx; - unsigned long flags; - unsigned int mask = 0; - - if (PFM_IS_FILE(filp) == 0) { - printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); - return 0; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid); - return 0; - } - - - DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); - - poll_wait(filp, &ctx->ctx_msgq_wait, wait); - - PROTECT_CTX(ctx, flags); - - if (PFM_CTXQ_EMPTY(ctx) == 0) - mask = POLLIN | POLLRDNORM; - - UNPROTECT_CTX(ctx, flags); - - DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); - - return mask; -} - -static int -pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) -{ - DPRINT(("pfm_ioctl called\n")); - return -EINVAL; -} - -/* - * interrupt cannot be masked when coming here - */ -static inline int -pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) -{ - int ret; - - ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); - - DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", - current->pid, - fd, - on, - ctx->ctx_async_queue, ret)); - - return ret; -} - -static int -pfm_fasync(int fd, struct file *filp, int on) -{ - pfm_context_t *ctx; - int ret; - - if (PFM_IS_FILE(filp) == 0) { - printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid); - return -EBADF; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid); - return -EBADF; - } - /* - * we cannot mask interrupts during this call because this may - * may go to sleep if memory is not readily avalaible. - * - * We are protected from the conetxt disappearing by the get_fd()/put_fd() - * done in caller. Serialization of this function is ensured by caller. - */ - ret = pfm_do_fasync(fd, filp, ctx, on); - - - DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", - fd, - on, - ctx->ctx_async_queue, ret)); - - return ret; -} - -#ifdef CONFIG_SMP -/* - * this function is exclusively called from pfm_close(). - * The context is not protected at that time, nor are interrupts - * on the remote CPU. That's necessary to avoid deadlocks. - */ -static void -pfm_syswide_force_stop(void *info) -{ - pfm_context_t *ctx = (pfm_context_t *)info; - struct pt_regs *regs = task_pt_regs(current); - struct task_struct *owner; - unsigned long flags; - int ret; - - if (ctx->ctx_cpu != smp_processor_id()) { - printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", - ctx->ctx_cpu, - smp_processor_id()); - return; - } - owner = GET_PMU_OWNER(); - if (owner != ctx->ctx_task) { - printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", - smp_processor_id(), - owner->pid, ctx->ctx_task->pid); - return; - } - if (GET_PMU_CTX() != ctx) { - printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", - smp_processor_id(), - GET_PMU_CTX(), ctx); - return; - } - - DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid)); - /* - * the context is already protected in pfm_close(), we simply - * need to mask interrupts to avoid a PMU interrupt race on - * this CPU - */ - local_irq_save(flags); - - ret = pfm_context_unload(ctx, NULL, 0, regs); - if (ret) { - DPRINT(("context_unload returned %d\n", ret)); - } - - /* - * unmask interrupts, PMU interrupts are now spurious here - */ - local_irq_restore(flags); -} - -static void -pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) -{ - int ret; - - DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); - ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1); - DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); -} -#endif /* CONFIG_SMP */ - -/* - * called for each close(). Partially free resources. - * When caller is self-monitoring, the context is unloaded. - */ -static int -pfm_flush(struct file *filp, fl_owner_t id) -{ - pfm_context_t *ctx; - struct task_struct *task; - struct pt_regs *regs; - unsigned long flags; - unsigned long smpl_buf_size = 0UL; - void *smpl_buf_vaddr = NULL; - int state, is_system; - - if (PFM_IS_FILE(filp) == 0) { - DPRINT(("bad magic for\n")); - return -EBADF; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid); - return -EBADF; - } - - /* - * remove our file from the async queue, if we use this mode. - * This can be done without the context being protected. We come - * here when the context has become unreacheable by other tasks. - * - * We may still have active monitoring at this point and we may - * end up in pfm_overflow_handler(). However, fasync_helper() - * operates with interrupts disabled and it cleans up the - * queue. If the PMU handler is called prior to entering - * fasync_helper() then it will send a signal. If it is - * invoked after, it will find an empty queue and no - * signal will be sent. In both case, we are safe - */ - if (filp->f_flags & FASYNC) { - DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue)); - pfm_do_fasync (-1, filp, ctx, 0); - } - - PROTECT_CTX(ctx, flags); - - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - task = PFM_CTX_TASK(ctx); - regs = task_pt_regs(task); - - DPRINT(("ctx_state=%d is_current=%d\n", - state, - task == current ? 1 : 0)); - - /* - * if state == UNLOADED, then task is NULL - */ - - /* - * we must stop and unload because we are losing access to the context. - */ - if (task == current) { -#ifdef CONFIG_SMP - /* - * the task IS the owner but it migrated to another CPU: that's bad - * but we must handle this cleanly. Unfortunately, the kernel does - * not provide a mechanism to block migration (while the context is loaded). - * - * We need to release the resource on the ORIGINAL cpu. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - /* - * keep context protected but unmask interrupt for IPI - */ - local_irq_restore(flags); - - pfm_syswide_cleanup_other_cpu(ctx); - - /* - * restore interrupt masking - */ - local_irq_save(flags); - - /* - * context is unloaded at this point - */ - } else -#endif /* CONFIG_SMP */ - { - - DPRINT(("forcing unload\n")); - /* - * stop and unload, returning with state UNLOADED - * and session unreserved. - */ - pfm_context_unload(ctx, NULL, 0, regs); - - DPRINT(("ctx_state=%d\n", ctx->ctx_state)); - } - } - - /* - * remove virtual mapping, if any, for the calling task. - * cannot reset ctx field until last user is calling close(). - * - * ctx_smpl_vaddr must never be cleared because it is needed - * by every task with access to the context - * - * When called from do_exit(), the mm context is gone already, therefore - * mm is NULL, i.e., the VMA is already gone and we do not have to - * do anything here - */ - if (ctx->ctx_smpl_vaddr && current->mm) { - smpl_buf_vaddr = ctx->ctx_smpl_vaddr; - smpl_buf_size = ctx->ctx_smpl_size; - } - - UNPROTECT_CTX(ctx, flags); - - /* - * if there was a mapping, then we systematically remove it - * at this point. Cannot be done inside critical section - * because some VM function reenables interrupts. - * - */ - if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size); - - return 0; -} -/* - * called either on explicit close() or from exit_files(). - * Only the LAST user of the file gets to this point, i.e., it is - * called only ONCE. - * - * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero - * (fput()),i.e, last task to access the file. Nobody else can access the - * file at this point. - * - * When called from exit_files(), the VMA has been freed because exit_mm() - * is executed before exit_files(). - * - * When called from exit_files(), the current task is not yet ZOMBIE but we - * flush the PMU state to the context. - */ -static int -pfm_close(struct inode *inode, struct file *filp) -{ - pfm_context_t *ctx; - struct task_struct *task; - struct pt_regs *regs; - DECLARE_WAITQUEUE(wait, current); - unsigned long flags; - unsigned long smpl_buf_size = 0UL; - void *smpl_buf_addr = NULL; - int free_possible = 1; - int state, is_system; - - DPRINT(("pfm_close called private=%p\n", filp->private_data)); - - if (PFM_IS_FILE(filp) == 0) { - DPRINT(("bad magic\n")); - return -EBADF; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid); - return -EBADF; - } - - PROTECT_CTX(ctx, flags); - - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - task = PFM_CTX_TASK(ctx); - regs = task_pt_regs(task); - - DPRINT(("ctx_state=%d is_current=%d\n", - state, - task == current ? 1 : 0)); - - /* - * if task == current, then pfm_flush() unloaded the context - */ - if (state == PFM_CTX_UNLOADED) goto doit; - - /* - * context is loaded/masked and task != current, we need to - * either force an unload or go zombie - */ - - /* - * The task is currently blocked or will block after an overflow. - * we must force it to wakeup to get out of the - * MASKED state and transition to the unloaded state by itself. - * - * This situation is only possible for per-task mode - */ - if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { - - /* - * set a "partial" zombie state to be checked - * upon return from down() in pfm_handle_work(). - * - * We cannot use the ZOMBIE state, because it is checked - * by pfm_load_regs() which is called upon wakeup from down(). - * In such case, it would free the context and then we would - * return to pfm_handle_work() which would access the - * stale context. Instead, we set a flag invisible to pfm_load_regs() - * but visible to pfm_handle_work(). - * - * For some window of time, we have a zombie context with - * ctx_state = MASKED and not ZOMBIE - */ - ctx->ctx_fl_going_zombie = 1; - - /* - * force task to wake up from MASKED state - */ - complete(&ctx->ctx_restart_done); - - DPRINT(("waking up ctx_state=%d\n", state)); - - /* - * put ourself to sleep waiting for the other - * task to report completion - * - * the context is protected by mutex, therefore there - * is no risk of being notified of completion before - * begin actually on the waitq. - */ - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&ctx->ctx_zombieq, &wait); - - UNPROTECT_CTX(ctx, flags); - - /* - * XXX: check for signals : - * - ok for explicit close - * - not ok when coming from exit_files() - */ - schedule(); - - - PROTECT_CTX(ctx, flags); - - - remove_wait_queue(&ctx->ctx_zombieq, &wait); - set_current_state(TASK_RUNNING); - - /* - * context is unloaded at this point - */ - DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); - } - else if (task != current) { -#ifdef CONFIG_SMP - /* - * switch context to zombie state - */ - ctx->ctx_state = PFM_CTX_ZOMBIE; - - DPRINT(("zombie ctx for [%d]\n", task->pid)); - /* - * cannot free the context on the spot. deferred until - * the task notices the ZOMBIE state - */ - free_possible = 0; -#else - pfm_context_unload(ctx, NULL, 0, regs); -#endif - } - -doit: - /* reload state, may have changed during opening of critical section */ - state = ctx->ctx_state; - - /* - * the context is still attached to a task (possibly current) - * we cannot destroy it right now - */ - - /* - * we must free the sampling buffer right here because - * we cannot rely on it being cleaned up later by the - * monitored task. It is not possible to free vmalloc'ed - * memory in pfm_load_regs(). Instead, we remove the buffer - * now. should there be subsequent PMU overflow originally - * meant for sampling, the will be converted to spurious - * and that's fine because the monitoring tools is gone anyway. - */ - if (ctx->ctx_smpl_hdr) { - smpl_buf_addr = ctx->ctx_smpl_hdr; - smpl_buf_size = ctx->ctx_smpl_size; - /* no more sampling */ - ctx->ctx_smpl_hdr = NULL; - ctx->ctx_fl_is_sampling = 0; - } - - DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", - state, - free_possible, - smpl_buf_addr, - smpl_buf_size)); - - if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); - - /* - * UNLOADED that the session has already been unreserved. - */ - if (state == PFM_CTX_ZOMBIE) { - pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); - } - - /* - * disconnect file descriptor from context must be done - * before we unlock. - */ - filp->private_data = NULL; - - /* - * if we free on the spot, the context is now completely unreacheable - * from the callers side. The monitored task side is also cut, so we - * can freely cut. - * - * If we have a deferred free, only the caller side is disconnected. - */ - UNPROTECT_CTX(ctx, flags); - - /* - * All memory free operations (especially for vmalloc'ed memory) - * MUST be done with interrupts ENABLED. - */ - if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); - - /* - * return the memory used by the context - */ - if (free_possible) pfm_context_free(ctx); - - if (is_running_on_xen()) { - if (is_xenoprof_primary()) { - int ret = HYPERVISOR_perfmon_op(PFM_DESTROY_CONTEXT, - NULL, 0); - if (ret) - printk("%s:%d PFM_DESTROY_CONTEXT hypercall " - "failed\n", __func__, __LINE__); - } - } - return 0; -} - -static int -pfm_no_open(struct inode *irrelevant, struct file *dontcare) -{ - DPRINT(("pfm_no_open called\n")); - return -ENXIO; -} - - - -static struct file_operations pfm_file_ops = { - .llseek = no_llseek, - .read = pfm_read, - .write = pfm_write, - .poll = pfm_poll, - .ioctl = pfm_ioctl, - .open = pfm_no_open, /* special open code to disallow open via /proc */ - .fasync = pfm_fasync, - .release = pfm_close, - .flush = pfm_flush -}; - -static int -pfmfs_delete_dentry(struct dentry *dentry) -{ - return 1; -} - -static struct dentry_operations pfmfs_dentry_operations = { - .d_delete = pfmfs_delete_dentry, -}; - - -static int -pfm_alloc_fd(struct file **cfile) -{ - int fd, ret = 0; - struct file *file = NULL; - struct inode * inode; - char name[32]; - struct qstr this; - - fd = get_unused_fd(); - if (fd < 0) return -ENFILE; - - ret = -ENFILE; - - file = get_empty_filp(); - if (!file) goto out; - - /* - * allocate a new inode - */ - inode = new_inode(pfmfs_mnt->mnt_sb); - if (!inode) goto out; - - DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); - - inode->i_mode = S_IFCHR|S_IRUGO; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - - sprintf(name, "[%lu]", inode->i_ino); - this.name = name; - this.len = strlen(name); - this.hash = inode->i_ino; - - ret = -ENOMEM; - - /* - * allocate a new dcache entry - */ - file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); - if (!file->f_dentry) goto out; - - file->f_dentry->d_op = &pfmfs_dentry_operations; - - d_add(file->f_dentry, inode); - file->f_vfsmnt = mntget(pfmfs_mnt); - file->f_mapping = inode->i_mapping; - - file->f_op = &pfm_file_ops; - file->f_mode = FMODE_READ; - file->f_flags = O_RDONLY; - file->f_pos = 0; - - /* - * may have to delay until context is attached? - */ - fd_install(fd, file); - - /* - * the file structure we will use - */ - *cfile = file; - - return fd; -out: - if (file) put_filp(file); - put_unused_fd(fd); - return ret; -} - -static void -pfm_free_fd(int fd, struct file *file) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - - /* - * there ie no fd_uninstall(), so we do it here - */ - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - rcu_assign_pointer(fdt->fd[fd], NULL); - spin_unlock(&files->file_lock); - - if (file) - put_filp(file); - put_unused_fd(fd); -} - -static int -pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) -{ - DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); - - while (size > 0) { - unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; - - - if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) - return -ENOMEM; - - addr += PAGE_SIZE; - buf += PAGE_SIZE; - size -= PAGE_SIZE; - } - return 0; -} - -/* - * allocate a sampling buffer and remaps it into the user address space of the task - */ -static int -pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma = NULL; - unsigned long size; - void *smpl_buf; - - - /* - * the fixed header + requested size and align to page boundary - */ - size = PAGE_ALIGN(rsize); - - DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); - - /* - * check requested size to avoid Denial-of-service attacks - * XXX: may have to refine this test - * Check against address space limit. - * - * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) - * return -ENOMEM; - */ - if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) - return -ENOMEM; - - /* - * We do the easy to undo allocations first. - * - * pfm_rvmalloc(), clears the buffer, so there is no leak - */ - smpl_buf = pfm_rvmalloc(size); - if (smpl_buf == NULL) { - DPRINT(("Can't allocate sampling buffer\n")); - return -ENOMEM; - } - - DPRINT(("smpl_buf @%p\n", smpl_buf)); - - /* allocate vma */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) { - DPRINT(("Cannot allocate vma\n")); - goto error_kmem; - } - memset(vma, 0, sizeof(*vma)); - - /* - * partially initialize the vma for the sampling buffer - */ - vma->vm_mm = mm; - vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; - vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ - - /* - * Now we have everything we need and we can initialize - * and connect all the data structures - */ - - ctx->ctx_smpl_hdr = smpl_buf; - ctx->ctx_smpl_size = size; /* aligned size */ - - /* - * Let's do the difficult operations next. - * - * now we atomically find some area in the address space and - * remap the buffer in it. - */ - down_write(&task->mm->mmap_sem); - - /* find some free area in address space, must have mmap sem held */ - vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); - if (vma->vm_start == 0UL) { - DPRINT(("Cannot find unmapped area for size %ld\n", size)); - up_write(&task->mm->mmap_sem); - goto error; - } - vma->vm_end = vma->vm_start + size; - vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; - - DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); - - /* can only be applied to current task, need to have the mm semaphore held when called */ - if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { - DPRINT(("Can't remap buffer\n")); - up_write(&task->mm->mmap_sem); - goto error; - } - - /* - * now insert the vma in the vm list for the process, must be - * done with mmap lock held - */ - insert_vm_struct(mm, vma); - - mm->total_vm += size >> PAGE_SHIFT; - vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, - vma_pages(vma)); - up_write(&task->mm->mmap_sem); - - /* - * keep track of user level virtual address - */ - ctx->ctx_smpl_vaddr = (void *)vma->vm_start; - *(unsigned long *)user_vaddr = vma->vm_start; - - return 0; - -error: - kmem_cache_free(vm_area_cachep, vma); -error_kmem: - pfm_rvfree(smpl_buf, size); - - return -ENOMEM; -} - -/* - * XXX: do something better here - */ -static int -pfm_bad_permissions(struct task_struct *task) -{ - /* inspired by ptrace_attach() */ - DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", - current->uid, - current->gid, - task->euid, - task->suid, - task->uid, - task->egid, - task->sgid)); - - return ((current->uid != task->euid) - || (current->uid != task->suid) - || (current->uid != task->uid) - || (current->gid != task->egid) - || (current->gid != task->sgid) - || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE); -} - -static int -pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) -{ - int ctx_flags; - - /* valid signal */ - - ctx_flags = pfx->ctx_flags; - - if (ctx_flags & PFM_FL_SYSTEM_WIDE) { - - /* - * cannot block in this mode - */ - if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { - DPRINT(("cannot use blocking mode when in system wide monitoring\n")); - return -EINVAL; - } - } else { - } - /* probably more to add here */ - - return 0; -} - -static int -pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags, - unsigned int cpu, pfarg_context_t *arg) -{ - pfm_buffer_fmt_t *fmt = NULL; - unsigned long size = 0UL; - void *uaddr = NULL; - void *fmt_arg = NULL; - int ret = 0; -#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) - - /* invoke and lock buffer format, if found */ - fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); - if (fmt == NULL) { - DPRINT(("[%d] cannot find buffer format\n", task->pid)); - return -EINVAL; - } - - /* - * buffer argument MUST be contiguous to pfarg_context_t - */ - if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); - - ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); - - DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret)); - - if (ret) goto error; - - /* link buffer format and context */ - ctx->ctx_buf_fmt = fmt; - - /* - * check if buffer format wants to use perfmon buffer allocation/mapping service - */ - ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); - if (ret) goto error; - - if (size) { - /* - * buffer is always remapped into the caller's address space - */ - ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr); - if (ret) goto error; - - /* keep track of user address of buffer */ - arg->ctx_smpl_vaddr = uaddr; - } - ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); - -error: - return ret; -} - -static void -pfm_reset_pmu_state(pfm_context_t *ctx) -{ - int i; - - /* - * install reset values for PMC. - */ - for (i=1; PMC_IS_LAST(i) == 0; i++) { - if (PMC_IS_IMPL(i) == 0) continue; - ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); - DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); - } - /* - * PMD registers are set to 0UL when the context in memset() - */ - - /* - * On context switched restore, we must restore ALL pmc and ALL pmd even - * when they are not actively used by the task. In UP, the incoming process - * may otherwise pick up left over PMC, PMD state from the previous process. - * As opposed to PMD, stale PMC can cause harm to the incoming - * process because they may change what is being measured. - * Therefore, we must systematically reinstall the entire - * PMC state. In SMP, the same thing is possible on the - * same CPU but also on between 2 CPUs. - * - * The problem with PMD is information leaking especially - * to user level when psr.sp=0 - * - * There is unfortunately no easy way to avoid this problem - * on either UP or SMP. This definitively slows down the - * pfm_load_regs() function. - */ - - /* - * bitmask of all PMCs accessible to this context - * - * PMC0 is treated differently. - */ - ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; - - /* - * bitmask of all PMDs that are accesible to this context - */ - ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; - - DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); - - /* - * useful in case of re-enable after disable - */ - ctx->ctx_used_ibrs[0] = 0UL; - ctx->ctx_used_dbrs[0] = 0UL; -} - -static int -pfm_ctx_getsize(void *arg, size_t *sz) -{ - pfarg_context_t *req = (pfarg_context_t *)arg; - pfm_buffer_fmt_t *fmt; - - *sz = 0; - - if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; - - fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); - if (fmt == NULL) { - DPRINT(("cannot find buffer format\n")); - return -EINVAL; - } - /* get just enough to copy in user parameters */ - *sz = fmt->fmt_arg_size; - DPRINT(("arg_size=%lu\n", *sz)); - - return 0; -} - - - -/* - * cannot attach if : - * - kernel task - * - task not owned by caller - * - task incompatible with context mode - */ -static int -pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) -{ - /* - * no kernel task or task not owner by caller - */ - if (task->mm == NULL) { - DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid)); - return -EPERM; - } - if (pfm_bad_permissions(task)) { - DPRINT(("no permission to attach to [%d]\n", task->pid)); - return -EPERM; - } - /* - * cannot block in self-monitoring mode - */ - if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { - DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid)); - return -EINVAL; - } - - if (task->exit_state == EXIT_ZOMBIE) { - DPRINT(("cannot attach to zombie task [%d]\n", task->pid)); - return -EBUSY; - } - - /* - * always ok for self - */ - if (task == current) return 0; - - if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { - DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state)); - return -EBUSY; - } - /* - * make sure the task is off any CPU - */ - wait_task_inactive(task); - - /* more to come... */ - - return 0; -} - -static int -pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) -{ - struct task_struct *p = current; - int ret; - - /* XXX: need to add more checks here */ - if (pid < 2) return -EPERM; - - if (pid != current->pid) { - - read_lock(&tasklist_lock); - - p = find_task_by_pid(pid); - - /* make sure task cannot go away while we operate on it */ - if (p) get_task_struct(p); - - read_unlock(&tasklist_lock); - - if (p == NULL) return -ESRCH; - } - - ret = pfm_task_incompatible(ctx, p); - if (ret == 0) { - *task = p; - } else if (p != current) { - pfm_put_task(p); - } - return ret; -} - - - -static int -pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - pfarg_context_t *req = (pfarg_context_t *)arg; - struct file *filp; - int ctx_flags; - int ret; - - /* let's check the arguments first */ - ret = pfarg_is_sane(current, req); - if (ret < 0) return ret; - - ctx_flags = req->ctx_flags; - - ret = -ENOMEM; - - ctx = pfm_context_alloc(); - if (!ctx) goto error; - - ret = pfm_alloc_fd(&filp); - if (ret < 0) goto error_file; - - req->ctx_fd = ctx->ctx_fd = ret; - - /* - * attach context to file - */ - filp->private_data = ctx; - - /* - * does the user want to sample? - */ - if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { - ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req); - if (ret) goto buffer_error; - } - - /* - * init context protection lock - */ - spin_lock_init(&ctx->ctx_lock); - - /* - * context is unloaded - */ - ctx->ctx_state = PFM_CTX_UNLOADED; - - /* - * initialization of context's flags - */ - ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; - ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; - ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */ - ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; - /* - * will move to set properties - * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; - */ - - /* - * init restart semaphore to locked - */ - init_completion(&ctx->ctx_restart_done); - - /* - * activation is used in SMP only - */ - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; - SET_LAST_CPU(ctx, -1); - - /* - * initialize notification message queue - */ - ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; - init_waitqueue_head(&ctx->ctx_msgq_wait); - init_waitqueue_head(&ctx->ctx_zombieq); - - DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n", - ctx, - ctx_flags, - ctx->ctx_fl_system, - ctx->ctx_fl_block, - ctx->ctx_fl_excl_idle, - ctx->ctx_fl_no_msg, - ctx->ctx_fd)); - - /* - * initialize soft PMU state - */ - pfm_reset_pmu_state(ctx); - - if (is_running_on_xen()) { - /* - * kludge to get xenoprof.is_primary. - * XENOPROF_init/ia64 is nop. so it is safe to call it here. - */ - struct xenoprof_init init; - ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init); - if (ret) - goto buffer_error; - init_xenoprof_primary(init.is_primary); - - if (is_xenoprof_primary()) { - ret = HYPERVISOR_perfmon_op(PFM_CREATE_CONTEXT, arg, 0); - if (ret) - goto buffer_error; - } - } - return 0; - -buffer_error: - pfm_free_fd(ctx->ctx_fd, filp); - - if (ctx->ctx_buf_fmt) { - pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); - } -error_file: - pfm_context_free(ctx); - -error: - return ret; -} - -static inline unsigned long -pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) -{ - unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; - unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; - extern unsigned long carta_random32 (unsigned long seed); - - if (reg->flags & PFM_REGFL_RANDOM) { - new_seed = carta_random32(old_seed); - val -= (old_seed & mask); /* counter values are negative numbers! */ - if ((mask >> 32) != 0) - /* construct a full 64-bit random value: */ - new_seed |= carta_random32(old_seed >> 32) << 32; - reg->seed = new_seed; - } - reg->lval = val; - return val; -} - -static void -pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) -{ - unsigned long mask = ovfl_regs[0]; - unsigned long reset_others = 0UL; - unsigned long val; - int i; - - /* - * now restore reset value on sampling overflowed counters - */ - mask >>= PMU_FIRST_COUNTER; - for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { - - if ((mask & 0x1UL) == 0UL) continue; - - ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); - reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; - - DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); - } - - /* - * Now take care of resetting the other registers - */ - for(i = 0; reset_others; i++, reset_others >>= 1) { - - if ((reset_others & 0x1) == 0) continue; - - ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); - - DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", - is_long_reset ? "long" : "short", i, val)); - } -} - -static void -pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) -{ - unsigned long mask = ovfl_regs[0]; - unsigned long reset_others = 0UL; - unsigned long val; - int i; - - DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); - - if (ctx->ctx_state == PFM_CTX_MASKED) { - pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); - return; - } - - /* - * now restore reset value on sampling overflowed counters - */ - mask >>= PMU_FIRST_COUNTER; - for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { - - if ((mask & 0x1UL) == 0UL) continue; - - val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); - reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; - - DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); - - pfm_write_soft_counter(ctx, i, val); - } - - /* - * Now take care of resetting the other registers - */ - for(i = 0; reset_others; i++, reset_others >>= 1) { - - if ((reset_others & 0x1) == 0) continue; - - val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); - - if (PMD_IS_COUNTING(i)) { - pfm_write_soft_counter(ctx, i, val); - } else { - ia64_set_pmd(i, val); - } - DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", - is_long_reset ? "long" : "short", i, val)); - } - ia64_srlz_d(); -} - -static int -pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct thread_struct *thread = NULL; - struct task_struct *task; - pfarg_reg_t *req = (pfarg_reg_t *)arg; - unsigned long value, pmc_pm; - unsigned long smpl_pmds, reset_pmds, impl_pmds; - unsigned int cnum, reg_flags, flags, pmc_type; - int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; - int is_monitor, is_counting, state; - int ret = -EINVAL; - pfm_reg_check_t wr_func; -#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) - - if (is_running_on_xen()) { - if (is_xenoprof_primary()) - return HYPERVISOR_perfmon_op(PFM_WRITE_PMCS, - arg, count); - return 0; - } - state = ctx->ctx_state; - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; - is_system = ctx->ctx_fl_system; - task = ctx->ctx_task; - impl_pmds = pmu_conf->impl_pmds[0]; - - if (state == PFM_CTX_ZOMBIE) return -EINVAL; - - if (is_loaded) { - thread = &task->thread; - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; - } - expert_mode = pfm_sysctl.expert_mode; - - for (i = 0; i < count; i++, req++) { - - cnum = req->reg_num; - reg_flags = req->reg_flags; - value = req->reg_value; - smpl_pmds = req->reg_smpl_pmds[0]; - reset_pmds = req->reg_reset_pmds[0]; - flags = 0; - - - if (cnum >= PMU_MAX_PMCS) { - DPRINT(("pmc%u is invalid\n", cnum)); - goto error; - } - - pmc_type = pmu_conf->pmc_desc[cnum].type; - pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; - is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; - is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; - - /* - * we reject all non implemented PMC as well - * as attempts to modify PMC[0-3] which are used - * as status registers by the PMU - */ - if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { - DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); - goto error; - } - wr_func = pmu_conf->pmc_desc[cnum].write_check; - /* - * If the PMC is a monitor, then if the value is not the default: - * - system-wide session: PMCx.pm=1 (privileged monitor) - * - per-task : PMCx.pm=0 (user monitor) - */ - if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { - DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", - cnum, - pmc_pm, - is_system)); - goto error; - } - - if (is_counting) { - /* - * enforce generation of overflow interrupt. Necessary on all - * CPUs. - */ - value |= 1 << PMU_PMC_OI; - - if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { - flags |= PFM_REGFL_OVFL_NOTIFY; - } - - if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; - - /* verify validity of smpl_pmds */ - if ((smpl_pmds & impl_pmds) != smpl_pmds) { - DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); - goto error; - } - - /* verify validity of reset_pmds */ - if ((reset_pmds & impl_pmds) != reset_pmds) { - DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); - goto error; - } - } else { - if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { - DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); - goto error; - } - /* eventid on non-counting monitors are ignored */ - } - - /* - * execute write checker, if any - */ - if (likely(expert_mode == 0 && wr_func)) { - ret = (*wr_func)(task, ctx, cnum, &value, regs); - if (ret) goto error; - ret = -EINVAL; - } - - /* - * no error on this register - */ - PFM_REG_RETFLAG_SET(req->reg_flags, 0); - - /* - * Now we commit the changes to the software state - */ - - /* - * update overflow information - */ - if (is_counting) { - /* - * full flag update each time a register is programmed - */ - ctx->ctx_pmds[cnum].flags = flags; - - ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; - ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; - ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; - - /* - * Mark all PMDS to be accessed as used. - * - * We do not keep track of PMC because we have to - * systematically restore ALL of them. - * - * We do not update the used_monitors mask, because - * if we have not programmed them, then will be in - * a quiescent state, therefore we will not need to - * mask/restore then when context is MASKED. - */ - CTX_USED_PMD(ctx, reset_pmds); - CTX_USED_PMD(ctx, smpl_pmds); - /* - * make sure we do not try to reset on - * restart because we have established new values - */ - if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; - } - /* - * Needed in case the user does not initialize the equivalent - * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no - * possible leak here. - */ - CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); - - /* - * keep track of the monitor PMC that we are using. - * we save the value of the pmc in ctx_pmcs[] and if - * the monitoring is not stopped for the context we also - * place it in the saved state area so that it will be - * picked up later by the context switch code. - * - * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). - * - * The value in thread->pmcs[] may be modified on overflow, i.e., when - * monitoring needs to be stopped. - */ - if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); - - /* - * update context state - */ - ctx->ctx_pmcs[cnum] = value; - - if (is_loaded) { - /* - * write thread state - */ - if (is_system == 0) thread->pmcs[cnum] = value; - - /* - * write hardware register if we can - */ - if (can_access_pmu) { - ia64_set_pmc(cnum, value); - } -#ifdef CONFIG_SMP - else { - /* - * per-task SMP only here - * - * we are guaranteed that the task is not running on the other CPU, - * we indicate that this PMD will need to be reloaded if the task - * is rescheduled on the CPU it ran last on. - */ - ctx->ctx_reload_pmcs[0] |= 1UL << cnum; - } -#endif - } - - DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", - cnum, - value, - is_loaded, - can_access_pmu, - flags, - ctx->ctx_all_pmcs[0], - ctx->ctx_used_pmds[0], - ctx->ctx_pmds[cnum].eventid, - smpl_pmds, - reset_pmds, - ctx->ctx_reload_pmcs[0], - ctx->ctx_used_monitors[0], - ctx->ctx_ovfl_regs[0])); - } - - /* - * make sure the changes are visible - */ - if (can_access_pmu) ia64_srlz_d(); - - return 0; -error: - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); - return ret; -} - -static int -pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct thread_struct *thread = NULL; - struct task_struct *task; - pfarg_reg_t *req = (pfarg_reg_t *)arg; - unsigned long value, hw_value, ovfl_mask; - unsigned int cnum; - int i, can_access_pmu = 0, state; - int is_counting, is_loaded, is_system, expert_mode; - int ret = -EINVAL; - pfm_reg_check_t wr_func; - - if (is_running_on_xen()) { - if (is_xenoprof_primary()) - return HYPERVISOR_perfmon_op(PFM_WRITE_PMDS, - arg, count); - return 0; - } - - state = ctx->ctx_state; - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; - is_system = ctx->ctx_fl_system; - ovfl_mask = pmu_conf->ovfl_val; - task = ctx->ctx_task; - - if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; - - /* - * on both UP and SMP, we can only write to the PMC when the task is - * the owner of the local PMU. - */ - if (likely(is_loaded)) { - thread = &task->thread; - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; - } - expert_mode = pfm_sysctl.expert_mode; - - for (i = 0; i < count; i++, req++) { - - cnum = req->reg_num; - value = req->reg_value; - - if (!PMD_IS_IMPL(cnum)) { - DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); - goto abort_mission; - } - is_counting = PMD_IS_COUNTING(cnum); - wr_func = pmu_conf->pmd_desc[cnum].write_check; - - /* - * execute write checker, if any - */ - if (unlikely(expert_mode == 0 && wr_func)) { - unsigned long v = value; - - ret = (*wr_func)(task, ctx, cnum, &v, regs); - if (ret) goto abort_mission; - - value = v; - ret = -EINVAL; - } - - /* - * no error on this register - */ - PFM_REG_RETFLAG_SET(req->reg_flags, 0); - - /* - * now commit changes to software state - */ - hw_value = value; - - /* - * update virtualized (64bits) counter - */ - if (is_counting) { - /* - * write context state - */ - ctx->ctx_pmds[cnum].lval = value; - - /* - * when context is load we use the split value - */ - if (is_loaded) { - hw_value = value & ovfl_mask; - value = value & ~ovfl_mask; - } - } - /* - * update reset values (not just for counters) - */ - ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; - ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; - - /* - * update randomization parameters (not just for counters) - */ - ctx->ctx_pmds[cnum].seed = req->reg_random_seed; - ctx->ctx_pmds[cnum].mask = req->reg_random_mask; - - /* - * update context value - */ - ctx->ctx_pmds[cnum].val = value; - - /* - * Keep track of what we use - * - * We do not keep track of PMC because we have to - * systematically restore ALL of them. - */ - CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); - - /* - * mark this PMD register used as well - */ - CTX_USED_PMD(ctx, RDEP(cnum)); - - /* - * make sure we do not try to reset on - * restart because we have established new values - */ - if (is_counting && state == PFM_CTX_MASKED) { - ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; - } - - if (is_loaded) { - /* - * write thread state - */ - if (is_system == 0) thread->pmds[cnum] = hw_value; - - /* - * write hardware register if we can - */ - if (can_access_pmu) { - ia64_set_pmd(cnum, hw_value); - } else { -#ifdef CONFIG_SMP - /* - * we are guaranteed that the task is not running on the other CPU, - * we indicate that this PMD will need to be reloaded if the task - * is rescheduled on the CPU it ran last on. - */ - ctx->ctx_reload_pmds[0] |= 1UL << cnum; -#endif - } - } - - DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " - "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", - cnum, - value, - is_loaded, - can_access_pmu, - hw_value, - ctx->ctx_pmds[cnum].val, - ctx->ctx_pmds[cnum].short_reset, - ctx->ctx_pmds[cnum].long_reset, - PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', - ctx->ctx_pmds[cnum].seed, - ctx->ctx_pmds[cnum].mask, - ctx->ctx_used_pmds[0], - ctx->ctx_pmds[cnum].reset_pmds[0], - ctx->ctx_reload_pmds[0], - ctx->ctx_all_pmds[0], - ctx->ctx_ovfl_regs[0])); - } - - /* - * make changes visible - */ - if (can_access_pmu) ia64_srlz_d(); - - return 0; - -abort_mission: - /* - * for now, we have only one possibility for error - */ - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); - return ret; -} - -/* - * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. - * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an - * interrupt is delivered during the call, it will be kept pending until we leave, making - * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are - * guaranteed to return consistent data to the user, it may simply be old. It is not - * trivial to treat the overflow while inside the call because you may end up in - * some module sampling buffer code causing deadlocks. - */ -static int -pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct thread_struct *thread = NULL; - struct task_struct *task; - unsigned long val = 0UL, lval, ovfl_mask, sval; - pfarg_reg_t *req = (pfarg_reg_t *)arg; - unsigned int cnum, reg_flags = 0; - int i, can_access_pmu = 0, state; - int is_loaded, is_system, is_counting, expert_mode; - int ret = -EINVAL; - pfm_reg_check_t rd_func; - XEN_NOT_SUPPORTED_YET; - - /* - * access is possible when loaded only for - * self-monitoring tasks or in UP mode - */ - - state = ctx->ctx_state; - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; - is_system = ctx->ctx_fl_system; - ovfl_mask = pmu_conf->ovfl_val; - task = ctx->ctx_task; - - if (state == PFM_CTX_ZOMBIE) return -EINVAL; - - if (likely(is_loaded)) { - thread = &task->thread; - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - /* - * this can be true when not self-monitoring only in UP - */ - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; - - if (can_access_pmu) ia64_srlz_d(); - } - expert_mode = pfm_sysctl.expert_mode; - - DPRINT(("ld=%d apmu=%d ctx_state=%d\n", - is_loaded, - can_access_pmu, - state)); - - /* - * on both UP and SMP, we can only read the PMD from the hardware register when - * the task is the owner of the local PMU. - */ - - for (i = 0; i < count; i++, req++) { - - cnum = req->reg_num; - reg_flags = req->reg_flags; - - if (unlikely(!PMD_IS_IMPL(cnum))) goto error; - /* - * we can only read the register that we use. That includes - * the one we explicitely initialize AND the one we want included - * in the sampling buffer (smpl_regs). - * - * Having this restriction allows optimization in the ctxsw routine - * without compromising security (leaks) - */ - if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; - - sval = ctx->ctx_pmds[cnum].val; - lval = ctx->ctx_pmds[cnum].lval; - is_counting = PMD_IS_COUNTING(cnum); - - /* - * If the task is not the current one, then we check if the - * PMU state is still in the local live register due to lazy ctxsw. - * If true, then we read directly from the registers. - */ - if (can_access_pmu){ - val = ia64_get_pmd(cnum); - } else { - /* - * context has been saved - * if context is zombie, then task does not exist anymore. - * In this case, we use the full value saved in the context (pfm_flush_regs()). - */ - val = is_loaded ? thread->pmds[cnum] : 0UL; - } - rd_func = pmu_conf->pmd_desc[cnum].read_check; - - if (is_counting) { - /* - * XXX: need to check for overflow when loaded - */ - val &= ovfl_mask; - val += sval; - } - - /* - * execute read checker, if any - */ - if (unlikely(expert_mode == 0 && rd_func)) { - unsigned long v = val; - ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); - if (ret) goto error; - val = v; - ret = -EINVAL; - } - - PFM_REG_RETFLAG_SET(reg_flags, 0); - - DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); - - /* - * update register return value, abort all if problem during copy. - * we only modify the reg_flags field. no check mode is fine because - * access has been verified upfront in sys_perfmonctl(). - */ - req->reg_value = val; - req->reg_flags = reg_flags; - req->reg_last_reset_val = lval; - } - - return 0; - -error: - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); - return ret; -} - -int -pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) -{ - pfm_context_t *ctx; - - if (req == NULL) return -EINVAL; - - ctx = GET_PMU_CTX(); - - if (ctx == NULL) return -EINVAL; - - /* - * for now limit to current task, which is enough when calling - * from overflow handler - */ - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; - - return pfm_write_pmcs(ctx, req, nreq, regs); -} -EXPORT_SYMBOL(pfm_mod_write_pmcs); - -int -pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) -{ - pfm_context_t *ctx; - - if (req == NULL) return -EINVAL; - - ctx = GET_PMU_CTX(); - - if (ctx == NULL) return -EINVAL; - - /* - * for now limit to current task, which is enough when calling - * from overflow handler - */ - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; - - return pfm_read_pmds(ctx, req, nreq, regs); -} -EXPORT_SYMBOL(pfm_mod_read_pmds); - -/* - * Only call this function when a process it trying to - * write the debug registers (reading is always allowed) - */ -int -pfm_use_debug_registers(struct task_struct *task) -{ - pfm_context_t *ctx = task->thread.pfm_context; - unsigned long flags; - int ret = 0; - - if (pmu_conf->use_rr_dbregs == 0) return 0; - - DPRINT(("called for [%d]\n", task->pid)); - - /* - * do it only once - */ - if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; - - /* - * Even on SMP, we do not need to use an atomic here because - * the only way in is via ptrace() and this is possible only when the - * process is stopped. Even in the case where the ctxsw out is not totally - * completed by the time we come here, there is no way the 'stopped' process - * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. - * So this is always safe. - */ - if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; - - LOCK_PFS(flags); - - /* - * We cannot allow setting breakpoints when system wide monitoring - * sessions are using the debug registers. - */ - if (pfm_sessions.pfs_sys_use_dbregs> 0) - ret = -1; - else - pfm_sessions.pfs_ptrace_use_dbregs++; - - DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", - pfm_sessions.pfs_ptrace_use_dbregs, - pfm_sessions.pfs_sys_use_dbregs, - task->pid, ret)); - - UNLOCK_PFS(flags); - - return ret; -} - -/* - * This function is called for every task that exits with the - * IA64_THREAD_DBG_VALID set. This indicates a task which was - * able to use the debug registers for debugging purposes via - * ptrace(). Therefore we know it was not using them for - * perfmormance monitoring, so we only decrement the number - * of "ptraced" debug register users to keep the count up to date - */ -int -pfm_release_debug_registers(struct task_struct *task) -{ - unsigned long flags; - int ret; - - if (pmu_conf->use_rr_dbregs == 0) return 0; - - LOCK_PFS(flags); - if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { - printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid); - ret = -1; - } else { - pfm_sessions.pfs_ptrace_use_dbregs--; - ret = 0; - } - UNLOCK_PFS(flags); - - return ret; -} - -static int -pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct task_struct *task; - pfm_buffer_fmt_t *fmt; - pfm_ovfl_ctrl_t rst_ctrl; - int state, is_system; - int ret = 0; - XEN_NOT_SUPPORTED_YET; - - state = ctx->ctx_state; - fmt = ctx->ctx_buf_fmt; - is_system = ctx->ctx_fl_system; - task = PFM_CTX_TASK(ctx); - - switch(state) { - case PFM_CTX_MASKED: - break; - case PFM_CTX_LOADED: - if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; - /* fall through */ - case PFM_CTX_UNLOADED: - case PFM_CTX_ZOMBIE: - DPRINT(("invalid state=%d\n", state)); - return -EBUSY; - default: - DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); - return -EINVAL; - } - - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - - /* sanity check */ - if (unlikely(task == NULL)) { - printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid); - return -EINVAL; - } - - if (task == current || is_system) { - - fmt = ctx->ctx_buf_fmt; - - DPRINT(("restarting self %d ovfl=0x%lx\n", - task->pid, - ctx->ctx_ovfl_regs[0])); - - if (CTX_HAS_SMPL(ctx)) { - - prefetch(ctx->ctx_smpl_hdr); - - rst_ctrl.bits.mask_monitoring = 0; - rst_ctrl.bits.reset_ovfl_pmds = 0; - - if (state == PFM_CTX_LOADED) - ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); - else - ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); - } else { - rst_ctrl.bits.mask_monitoring = 0; - rst_ctrl.bits.reset_ovfl_pmds = 1; - } - - if (ret == 0) { - if (rst_ctrl.bits.reset_ovfl_pmds) - pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); - - if (rst_ctrl.bits.mask_monitoring == 0) { - DPRINT(("resuming monitoring for [%d]\n", task->pid)); - - if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); - } else { - DPRINT(("keeping monitoring stopped for [%d]\n", task->pid)); - - // cannot use pfm_stop_monitoring(task, regs); - } - } - /* - * clear overflowed PMD mask to remove any stale information - */ - ctx->ctx_ovfl_regs[0] = 0UL; - - /* - * back to LOADED state - */ - ctx->ctx_state = PFM_CTX_LOADED; - - /* - * XXX: not really useful for self monitoring - */ - ctx->ctx_fl_can_restart = 0; - - return 0; - } - - /* - * restart another task - */ - - /* - * When PFM_CTX_MASKED, we cannot issue a restart before the previous - * one is seen by the task. - */ - if (state == PFM_CTX_MASKED) { - if (ctx->ctx_fl_can_restart == 0) return -EINVAL; - /* - * will prevent subsequent restart before this one is - * seen by other task - */ - ctx->ctx_fl_can_restart = 0; - } - - /* - * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. - * the task is blocked or on its way to block. That's the normal - * restart path. If the monitoring is not masked, then the task - * can be actively monitoring and we cannot directly intervene. - * Therefore we use the trap mechanism to catch the task and - * force it to reset the buffer/reset PMDs. - * - * if non-blocking, then we ensure that the task will go into - * pfm_handle_work() before returning to user mode. - * - * We cannot explicitely reset another task, it MUST always - * be done by the task itself. This works for system wide because - * the tool that is controlling the session is logically doing - * "self-monitoring". - */ - if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { - DPRINT(("unblocking [%d] \n", task->pid)); - complete(&ctx->ctx_restart_done); - } else { - DPRINT(("[%d] armed exit trap\n", task->pid)); - - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; - - PFM_SET_WORK_PENDING(task, 1); - - pfm_set_task_notify(task); - - /* - * XXX: send reschedule if task runs on another CPU - */ - } - return 0; -} - -static int -pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - unsigned int m = *(unsigned int *)arg; - XEN_NOT_SUPPORTED_YET; - - pfm_sysctl.debug = m == 0 ? 0 : 1; - - printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); - - if (m == 0) { - memset(pfm_stats, 0, sizeof(pfm_stats)); - for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; - } - return 0; -} - -/* - * arg can be NULL and count can be zero for this function - */ -static int -pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct thread_struct *thread = NULL; - struct task_struct *task; - pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; - unsigned long flags; - dbreg_t dbreg; - unsigned int rnum; - int first_time; - int ret = 0, state; - int i, can_access_pmu = 0; - int is_system, is_loaded; - - if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; - - state = ctx->ctx_state; - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; - is_system = ctx->ctx_fl_system; - task = ctx->ctx_task; - - if (state == PFM_CTX_ZOMBIE) return -EINVAL; - - /* - * on both UP and SMP, we can only write to the PMC when the task is - * the owner of the local PMU. - */ - if (is_loaded) { - thread = &task->thread; - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; - } - - /* - * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w - * ensuring that no real breakpoint can be installed via this call. - * - * IMPORTANT: regs can be NULL in this function - */ - - first_time = ctx->ctx_fl_using_dbreg == 0; - - /* - * don't bother if we are loaded and task is being debugged - */ - if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { - DPRINT(("debug registers already in use for [%d]\n", task->pid)); - return -EBUSY; - } - - /* - * check for debug registers in system wide mode - * - * If though a check is done in pfm_context_load(), - * we must repeat it here, in case the registers are - * written after the context is loaded - */ - if (is_loaded) { - LOCK_PFS(flags); - - if (first_time && is_system) { - if (pfm_sessions.pfs_ptrace_use_dbregs) - ret = -EBUSY; - else - pfm_sessions.pfs_sys_use_dbregs++; - } - UNLOCK_PFS(flags); - } - - if (ret != 0) return ret; - - /* - * mark ourself as user of the debug registers for - * perfmon purposes. - */ - ctx->ctx_fl_using_dbreg = 1; - - /* - * clear hardware registers to make sure we don't - * pick up stale state. - * - * for a system wide session, we do not use - * thread.dbr, thread.ibr because this process - * never leaves the current CPU and the state - * is shared by all processes running on it - */ - if (first_time && can_access_pmu) { - DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid)); - for (i=0; i < pmu_conf->num_ibrs; i++) { - ia64_set_ibr(i, 0UL); - ia64_dv_serialize_instruction(); - } - ia64_srlz_i(); - for (i=0; i < pmu_conf->num_dbrs; i++) { - ia64_set_dbr(i, 0UL); - ia64_dv_serialize_data(); - } - ia64_srlz_d(); - } - - /* - * Now install the values into the registers - */ - for (i = 0; i < count; i++, req++) { - - rnum = req->dbreg_num; - dbreg.val = req->dbreg_value; - - ret = -EINVAL; - - if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { - DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", - rnum, dbreg.val, mode, i, count)); - - goto abort_mission; - } - - /* - * make sure we do not install enabled breakpoint - */ - if (rnum & 0x1) { - if (mode == PFM_CODE_RR) - dbreg.ibr.ibr_x = 0; - else - dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; - } - - PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); - - /* - * Debug registers, just like PMC, can only be modified - * by a kernel call. Moreover, perfmon() access to those - * registers are centralized in this routine. The hardware - * does not modify the value of these registers, therefore, - * if we save them as they are written, we can avoid having - * to save them on context switch out. This is made possible - * by the fact that when perfmon uses debug registers, ptrace() - * won't be able to modify them concurrently. - */ - if (mode == PFM_CODE_RR) { - CTX_USED_IBR(ctx, rnum); - - if (can_access_pmu) { - ia64_set_ibr(rnum, dbreg.val); - ia64_dv_serialize_instruction(); - } - - ctx->ctx_ibrs[rnum] = dbreg.val; - - DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", - rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); - } else { - CTX_USED_DBR(ctx, rnum); - - if (can_access_pmu) { - ia64_set_dbr(rnum, dbreg.val); - ia64_dv_serialize_data(); - } - ctx->ctx_dbrs[rnum] = dbreg.val; - - DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", - rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); - } - } - - return 0; - -abort_mission: - /* - * in case it was our first attempt, we undo the global modifications - */ - if (first_time) { - LOCK_PFS(flags); - if (ctx->ctx_fl_system) { - pfm_sessions.pfs_sys_use_dbregs--; - } - UNLOCK_PFS(flags); - ctx->ctx_fl_using_dbreg = 0; - } - /* - * install error return flag - */ - PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); - - return ret; -} - -static int -pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); -} - -static int -pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); -} - -int -pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) -{ - pfm_context_t *ctx; - - if (req == NULL) return -EINVAL; - - ctx = GET_PMU_CTX(); - - if (ctx == NULL) return -EINVAL; - - /* - * for now limit to current task, which is enough when calling - * from overflow handler - */ - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; - - return pfm_write_ibrs(ctx, req, nreq, regs); -} -EXPORT_SYMBOL(pfm_mod_write_ibrs); - -int -pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) -{ - pfm_context_t *ctx; - - if (req == NULL) return -EINVAL; - - ctx = GET_PMU_CTX(); - - if (ctx == NULL) return -EINVAL; - - /* - * for now limit to current task, which is enough when calling - * from overflow handler - */ - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; - - return pfm_write_dbrs(ctx, req, nreq, regs); -} -EXPORT_SYMBOL(pfm_mod_write_dbrs); - - -static int -pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - pfarg_features_t *req = (pfarg_features_t *)arg; - - if (is_running_on_xen()) - return HYPERVISOR_perfmon_op(PFM_GET_FEATURES, &arg, 0); - req->ft_version = PFM_VERSION; - return 0; -} - -static int -pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct pt_regs *tregs; - struct task_struct *task = PFM_CTX_TASK(ctx); - int state, is_system; - - if (is_running_on_xen()) { - if (is_xenoprof_primary()) - return HYPERVISOR_perfmon_op(PFM_STOP, NULL, 0); - return 0; - } - - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - /* - * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) - */ - if (state == PFM_CTX_UNLOADED) return -EINVAL; - - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - DPRINT(("task [%d] ctx_state=%d is_system=%d\n", - PFM_CTX_TASK(ctx)->pid, - state, - is_system)); - /* - * in system mode, we need to update the PMU directly - * and the user level state of the caller, which may not - * necessarily be the creator of the context. - */ - if (is_system) { - /* - * Update local PMU first - * - * disable dcr pp - */ - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); - ia64_srlz_i(); - - /* - * update local cpuinfo - */ - PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); - - /* - * stop monitoring, does srlz.i - */ - pfm_clear_psr_pp(); - - /* - * stop monitoring in the caller - */ - ia64_psr(regs)->pp = 0; - - return 0; - } - /* - * per-task mode - */ - - if (task == current) { - /* stop monitoring at kernel level */ - pfm_clear_psr_up(); - - /* - * stop monitoring at the user level - */ - ia64_psr(regs)->up = 0; - } else { - tregs = task_pt_regs(task); - - /* - * stop monitoring at the user level - */ - ia64_psr(tregs)->up = 0; - - /* - * monitoring disabled in kernel at next reschedule - */ - ctx->ctx_saved_psr_up = 0; - DPRINT(("task=[%d]\n", task->pid)); - } - return 0; -} - - -static int -pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct pt_regs *tregs; - int state, is_system; - - if (is_running_on_xen()) { - if (is_xenoprof_primary()) - return HYPERVISOR_perfmon_op(PFM_START, NULL, 0); - return 0; - } - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - if (state != PFM_CTX_LOADED) return -EINVAL; - - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - - /* - * in system mode, we need to update the PMU directly - * and the user level state of the caller, which may not - * necessarily be the creator of the context. - */ - if (is_system) { - - /* - * set user level psr.pp for the caller - */ - ia64_psr(regs)->pp = 1; - - /* - * now update the local PMU and cpuinfo - */ - PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); - - /* - * start monitoring at kernel level - */ - pfm_set_psr_pp(); - - /* enable dcr pp */ - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); - ia64_srlz_i(); - - return 0; - } - - /* - * per-process mode - */ - - if (ctx->ctx_task == current) { - - /* start monitoring at kernel level */ - pfm_set_psr_up(); - - /* - * activate monitoring at user level - */ - ia64_psr(regs)->up = 1; - - } else { - tregs = task_pt_regs(ctx->ctx_task); - - /* - * start monitoring at the kernel level the next - * time the task is scheduled - */ - ctx->ctx_saved_psr_up = IA64_PSR_UP; - - /* - * activate monitoring at user level - */ - ia64_psr(tregs)->up = 1; - } - return 0; -} - -static int -pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - pfarg_reg_t *req = (pfarg_reg_t *)arg; - unsigned int cnum; - int i; - int ret = -EINVAL; - XEN_NOT_SUPPORTED_YET; - - for (i = 0; i < count; i++, req++) { - - cnum = req->reg_num; - - if (!PMC_IS_IMPL(cnum)) goto abort_mission; - - req->reg_value = PMC_DFL_VAL(cnum); - - PFM_REG_RETFLAG_SET(req->reg_flags, 0); - - DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); - } - return 0; - -abort_mission: - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); - return ret; -} - -static int -pfm_check_task_exist(pfm_context_t *ctx) -{ - struct task_struct *g, *t; - int ret = -ESRCH; - - read_lock(&tasklist_lock); - - do_each_thread (g, t) { - if (t->thread.pfm_context == ctx) { - ret = 0; - break; - } - } while_each_thread (g, t); - - read_unlock(&tasklist_lock); - - DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); - - return ret; -} - -static int -pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct task_struct *task; - struct thread_struct *thread; - struct pfm_context_t *old; - unsigned long flags; -#ifndef CONFIG_SMP - struct task_struct *owner_task = NULL; -#endif - pfarg_load_t *req = (pfarg_load_t *)arg; - unsigned long *pmcs_source, *pmds_source; - int the_cpu; - int ret = 0; - int state, is_system, set_dbregs = 0; - - if (is_running_on_xen()) { - if (is_xenoprof_primary()) - return HYPERVISOR_perfmon_op(PFM_LOAD_CONTEXT, arg, 0); - return 0; - } - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - /* - * can only load from unloaded or terminated state - */ - if (state != PFM_CTX_UNLOADED) { - DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", - req->load_pid, - ctx->ctx_state)); - return -EBUSY; - } - - DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); - - if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { - DPRINT(("cannot use blocking mode on self\n")); - return -EINVAL; - } - - ret = pfm_get_task(ctx, req->load_pid, &task); - if (ret) { - DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); - return ret; - } - - ret = -EINVAL; - - /* - * system wide is self monitoring only - */ - if (is_system && task != current) { - DPRINT(("system wide is self monitoring only load_pid=%d\n", - req->load_pid)); - goto error; - } - - thread = &task->thread; - - ret = 0; - /* - * cannot load a context which is using range restrictions, - * into a task that is being debugged. - */ - if (ctx->ctx_fl_using_dbreg) { - if (thread->flags & IA64_THREAD_DBG_VALID) { - ret = -EBUSY; - DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); - goto error; - } - LOCK_PFS(flags); - - if (is_system) { - if (pfm_sessions.pfs_ptrace_use_dbregs) { - DPRINT(("cannot load [%d] dbregs in use\n", task->pid)); - ret = -EBUSY; - } else { - pfm_sessions.pfs_sys_use_dbregs++; - DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs)); - set_dbregs = 1; - } - } - - UNLOCK_PFS(flags); - - if (ret) goto error; - } - - /* - * SMP system-wide monitoring implies self-monitoring. - * - * The programming model expects the task to - * be pinned on a CPU throughout the session. - * Here we take note of the current CPU at the - * time the context is loaded. No call from - * another CPU will be allowed. - * - * The pinning via shed_setaffinity() - * must be done by the calling task prior - * to this call. - * - * systemwide: keep track of CPU this session is supposed to run on - */ - the_cpu = ctx->ctx_cpu = smp_processor_id(); - - ret = -EBUSY; - /* - * now reserve the session - */ - ret = pfm_reserve_session(current, is_system, the_cpu); - if (ret) goto error; - - /* - * task is necessarily stopped at this point. - * - * If the previous context was zombie, then it got removed in - * pfm_save_regs(). Therefore we should not see it here. - * If we see a context, then this is an active context - * - * XXX: needs to be atomic - */ - DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", - thread->pfm_context, ctx)); - - ret = -EBUSY; - old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); - if (old != NULL) { - DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); - goto error_unres; - } - - pfm_reset_msgq(ctx); - - ctx->ctx_state = PFM_CTX_LOADED; - - /* - * link context to task - */ - ctx->ctx_task = task; - - if (is_system) { - /* - * we load as stopped - */ - PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); - PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); - - if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); - } else { - thread->flags |= IA64_THREAD_PM_VALID; - } - - /* - * propagate into thread-state - */ - pfm_copy_pmds(task, ctx); - pfm_copy_pmcs(task, ctx); - - pmcs_source = thread->pmcs; - pmds_source = thread->pmds; - - /* - * always the case for system-wide - */ - if (task == current) { - - if (is_system == 0) { - - /* allow user level control */ - ia64_psr(regs)->sp = 0; - DPRINT(("clearing psr.sp for [%d]\n", task->pid)); - - SET_LAST_CPU(ctx, smp_processor_id()); - INC_ACTIVATION(); - SET_ACTIVATION(ctx); -#ifndef CONFIG_SMP - /* - * push the other task out, if any - */ - owner_task = GET_PMU_OWNER(); - if (owner_task) pfm_lazy_save_regs(owner_task); -#endif - } - /* - * load all PMD from ctx to PMU (as opposed to thread state) - * restore all PMC from ctx to PMU - */ - pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); - pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); - - ctx->ctx_reload_pmcs[0] = 0UL; - ctx->ctx_reload_pmds[0] = 0UL; - - /* - * guaranteed safe by earlier check against DBG_VALID - */ - if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); - } - /* - * set new ownership - */ - SET_PMU_OWNER(task, ctx); - - DPRINT(("context loaded on PMU for [%d]\n", task->pid)); - } else { - /* - * when not current, task MUST be stopped, so this is safe - */ - regs = task_pt_regs(task); - - /* force a full reload */ - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; - SET_LAST_CPU(ctx, -1); - - /* initial saved psr (stopped) */ - ctx->ctx_saved_psr_up = 0UL; - ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; - } - - ret = 0; - -error_unres: - if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); -error: - /* - * we must undo the dbregs setting (for system-wide) - */ - if (ret && set_dbregs) { - LOCK_PFS(flags); - pfm_sessions.pfs_sys_use_dbregs--; - UNLOCK_PFS(flags); - } - /* - * release task, there is now a link with the context - */ - if (is_system == 0 && task != current) { - pfm_put_task(task); - - if (ret == 0) { - ret = pfm_check_task_exist(ctx); - if (ret) { - ctx->ctx_state = PFM_CTX_UNLOADED; - ctx->ctx_task = NULL; - } - } - } - return ret; -} - -/* - * in this function, we do not need to increase the use count - * for the task via get_task_struct(), because we hold the - * context lock. If the task were to disappear while having - * a context attached, it would go through pfm_exit_thread() - * which also grabs the context lock and would therefore be blocked - * until we are here. - */ -static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); - -static int -pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct task_struct *task = PFM_CTX_TASK(ctx); - struct pt_regs *tregs; - int prev_state, is_system; - int ret; - - if (is_running_on_xen()) { - if (is_xenoprof_primary()) - return HYPERVISOR_perfmon_op(PFM_UNLOAD_CONTEXT, - NULL, 0); - return 0; - } - DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1)); - - prev_state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - /* - * unload only when necessary - */ - if (prev_state == PFM_CTX_UNLOADED) { - DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); - return 0; - } - - /* - * clear psr and dcr bits - */ - ret = pfm_stop(ctx, NULL, 0, regs); - if (ret) return ret; - - ctx->ctx_state = PFM_CTX_UNLOADED; - - /* - * in system mode, we need to update the PMU directly - * and the user level state of the caller, which may not - * necessarily be the creator of the context. - */ - if (is_system) { - - /* - * Update cpuinfo - * - * local PMU is taken care of in pfm_stop() - */ - PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); - PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); - - /* - * save PMDs in context - * release ownership - */ - pfm_flush_pmds(current, ctx); - - /* - * at this point we are done with the PMU - * so we can unreserve the resource. - */ - if (prev_state != PFM_CTX_ZOMBIE) - pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); - - /* - * disconnect context from task - */ - task->thread.pfm_context = NULL; - /* - * disconnect task from context - */ - ctx->ctx_task = NULL; - - /* - * There is nothing more to cleanup here. - */ - return 0; - } - - /* - * per-task mode - */ - tregs = task == current ? regs : task_pt_regs(task); - - if (task == current) { - /* - * cancel user level control - */ - ia64_psr(regs)->sp = 1; - - DPRINT(("setting psr.sp for [%d]\n", task->pid)); - } - /* - * save PMDs to context - * release ownership - */ - pfm_flush_pmds(task, ctx); - - /* - * at this point we are done with the PMU - * so we can unreserve the resource. - * - * when state was ZOMBIE, we have already unreserved. - */ - if (prev_state != PFM_CTX_ZOMBIE) - pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); - - /* - * reset activation counter and psr - */ - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; - SET_LAST_CPU(ctx, -1); - - /* - * PMU state will not be restored - */ - task->thread.flags &= ~IA64_THREAD_PM_VALID; - - /* - * break links between context and task - */ - task->thread.pfm_context = NULL; - ctx->ctx_task = NULL; - - PFM_SET_WORK_PENDING(task, 0); - - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; - ctx->ctx_fl_can_restart = 0; - ctx->ctx_fl_going_zombie = 0; - - DPRINT(("disconnected [%d] from context\n", task->pid)); - - return 0; -} - - -/* - * called only from exit_thread(): task == current - * we come here only if current has a context attached (loaded or masked) - */ -void -pfm_exit_thread(struct task_struct *task) -{ - pfm_context_t *ctx; - unsigned long flags; - struct pt_regs *regs = task_pt_regs(task); - int ret, state; - int free_ok = 0; - - ctx = PFM_GET_CTX(task); - - PROTECT_CTX(ctx, flags); - - DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid)); - - state = ctx->ctx_state; - switch(state) { - case PFM_CTX_UNLOADED: - /* - * only comes to thios function if pfm_context is not NULL, i.e., cannot - * be in unloaded state - */ - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid); - break; - case PFM_CTX_LOADED: - case PFM_CTX_MASKED: - ret = pfm_context_unload(ctx, NULL, 0, regs); - if (ret) { - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); - } - DPRINT(("ctx unloaded for current state was %d\n", state)); - - pfm_end_notify_user(ctx); - break; - case PFM_CTX_ZOMBIE: - ret = pfm_context_unload(ctx, NULL, 0, regs); - if (ret) { - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); - } - free_ok = 1; - break; - default: - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state); - break; - } - UNPROTECT_CTX(ctx, flags); - - { u64 psr = pfm_get_psr(); - BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); - BUG_ON(GET_PMU_OWNER()); - BUG_ON(ia64_psr(regs)->up); - BUG_ON(ia64_psr(regs)->pp); - } - - /* - * All memory free operations (especially for vmalloc'ed memory) - * MUST be done with interrupts ENABLED. - */ - if (free_ok) pfm_context_free(ctx); -} - -/* - * functions MUST be listed in the increasing order of their index (see permfon.h) - */ -#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } -#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } -#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) -#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) -#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} - -static pfm_cmd_desc_t pfm_cmd_tab[]={ -/* 0 */PFM_CMD_NONE, -/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), -/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), -/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), -/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), -/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), -/* 6 */PFM_CMD_NONE, -/* 7 */PFM_CMD_NONE, -/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), -/* 9 */PFM_CMD_NONE, -/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), -/* 11 */PFM_CMD_NONE, -/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), -/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), -/* 14 */PFM_CMD_NONE, -/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), -/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), -/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), -/* 18 */PFM_CMD_NONE, -/* 19 */PFM_CMD_NONE, -/* 20 */PFM_CMD_NONE, -/* 21 */PFM_CMD_NONE, -/* 22 */PFM_CMD_NONE, -/* 23 */PFM_CMD_NONE, -/* 24 */PFM_CMD_NONE, -/* 25 */PFM_CMD_NONE, -/* 26 */PFM_CMD_NONE, -/* 27 */PFM_CMD_NONE, -/* 28 */PFM_CMD_NONE, -/* 29 */PFM_CMD_NONE, -/* 30 */PFM_CMD_NONE, -/* 31 */PFM_CMD_NONE, -/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), -/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) -}; -#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) - -static int -pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) -{ - struct task_struct *task; - int state, old_state; - -recheck: - state = ctx->ctx_state; - task = ctx->ctx_task; - - if (task == NULL) { - DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); - return 0; - } - - DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", - ctx->ctx_fd, - state, - task->pid, - task->state, PFM_CMD_STOPPED(cmd))); - - /* - * self-monitoring always ok. - * - * for system-wide the caller can either be the creator of the - * context (to one to which the context is attached to) OR - * a task running on the same CPU as the session. - */ - if (task == current || ctx->ctx_fl_system) return 0; - - /* - * we are monitoring another thread - */ - switch(state) { - case PFM_CTX_UNLOADED: - /* - * if context is UNLOADED we are safe to go - */ - return 0; - case PFM_CTX_ZOMBIE: - /* - * no command can operate on a zombie context - */ - DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); - return -EINVAL; - case PFM_CTX_MASKED: - /* - * PMU state has been saved to software even though - * the thread may still be running. - */ - if (cmd != PFM_UNLOAD_CONTEXT) return 0; - } - - /* - * context is LOADED or MASKED. Some commands may need to have - * the task stopped. - * - * We could lift this restriction for UP but it would mean that - * the user has no guarantee the task would not run between - * two successive calls to perfmonctl(). That's probably OK. - * If this user wants to ensure the task does not run, then - * the task must be stopped. - */ - if (PFM_CMD_STOPPED(cmd)) { - if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { - DPRINT(("[%d] task not in stopped state\n", task->pid)); - return -EBUSY; - } - /* - * task is now stopped, wait for ctxsw out - * - * This is an interesting point in the code. - * We need to unprotect the context because - * the pfm_save_regs() routines needs to grab - * the same lock. There are danger in doing - * this because it leaves a window open for - * another task to get access to the context - * and possibly change its state. The one thing - * that is not possible is for the context to disappear - * because we are protected by the VFS layer, i.e., - * get_fd()/put_fd(). - */ - old_state = state; - - UNPROTECT_CTX(ctx, flags); - - wait_task_inactive(task); - - PROTECT_CTX(ctx, flags); - - /* - * we must recheck to verify if state has changed - */ - if (ctx->ctx_state != old_state) { - DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); - goto recheck; - } - } - return 0; -} - -/* - * system-call entry point (must return long) - */ -asmlinkage long -sys_perfmonctl (int fd, int cmd, void __user *arg, int count) -{ - struct file *file = NULL; - pfm_context_t *ctx = NULL; - unsigned long flags = 0UL; - void *args_k = NULL; - long ret; /* will expand int return types */ - size_t base_sz, sz, xtra_sz = 0; - int narg, completed_args = 0, call_made = 0, cmd_flags; - int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); - int (*getsize)(void *arg, size_t *sz); -#define PFM_MAX_ARGSIZE 4096 - - /* - * reject any call if perfmon was disabled at initialization - */ - if (unlikely(pmu_conf == NULL)) return -ENOSYS; - - if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { - DPRINT(("invalid cmd=%d\n", cmd)); - return -EINVAL; - } - - func = pfm_cmd_tab[cmd].cmd_func; - narg = pfm_cmd_tab[cmd].cmd_narg; - base_sz = pfm_cmd_tab[cmd].cmd_argsize; - getsize = pfm_cmd_tab[cmd].cmd_getsize; - cmd_flags = pfm_cmd_tab[cmd].cmd_flags; - - if (unlikely(func == NULL)) { - DPRINT(("invalid cmd=%d\n", cmd)); - return -EINVAL; - } - - DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", - PFM_CMD_NAME(cmd), - cmd, - narg, - base_sz, - count)); - - /* - * check if number of arguments matches what the command expects - */ - if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) - return -EINVAL; - -restart_args: - sz = xtra_sz + base_sz*count; - /* - * limit abuse to min page size - */ - if (unlikely(sz > PFM_MAX_ARGSIZE)) { - printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz); - return -E2BIG; - } - - /* - * allocate default-sized argument buffer - */ - if (likely(count && args_k == NULL)) { - args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); - if (args_k == NULL) return -ENOMEM; - } - - ret = -EFAULT; - - /* - * copy arguments - * - * assume sz = 0 for command without parameters - */ - if (sz && copy_from_user(args_k, arg, sz)) { - DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); - goto error_args; - } - - /* - * check if command supports extra parameters - */ - if (completed_args == 0 && getsize) { - /* - * get extra parameters size (based on main argument) - */ - ret = (*getsize)(args_k, &xtra_sz); - if (ret) goto error_args; - - completed_args = 1; - - DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); - - /* retry if necessary */ - if (likely(xtra_sz)) goto restart_args; - } - - if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; - - ret = -EBADF; - - file = fget(fd); - if (unlikely(file == NULL)) { - DPRINT(("invalid fd %d\n", fd)); - goto error_args; - } - if (unlikely(PFM_IS_FILE(file) == 0)) { - DPRINT(("fd %d not related to perfmon\n", fd)); - goto error_args; - } - - ctx = (pfm_context_t *)file->private_data; - if (unlikely(ctx == NULL)) { - DPRINT(("no context for fd %d\n", fd)); - goto error_args; - } - prefetch(&ctx->ctx_state); - - PROTECT_CTX(ctx, flags); - - /* - * check task is stopped - */ - ret = pfm_check_task_state(ctx, cmd, flags); - if (unlikely(ret)) goto abort_locked; - -skip_fd: - ret = (*func)(ctx, args_k, count, task_pt_regs(current)); - - call_made = 1; - -abort_locked: - if (likely(ctx)) { - DPRINT(("context unlocked\n")); - UNPROTECT_CTX(ctx, flags); - } - - /* copy argument back to user, if needed */ - if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; - -error_args: - if (file) - fput(file); - - kfree(args_k); - - DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); - - return ret; -} - -static void -pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) -{ - pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; - pfm_ovfl_ctrl_t rst_ctrl; - int state; - int ret = 0; - - state = ctx->ctx_state; - /* - * Unlock sampling buffer and reset index atomically - * XXX: not really needed when blocking - */ - if (CTX_HAS_SMPL(ctx)) { - - rst_ctrl.bits.mask_monitoring = 0; - rst_ctrl.bits.reset_ovfl_pmds = 0; - - if (state == PFM_CTX_LOADED) - ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); - else - ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); - } else { - rst_ctrl.bits.mask_monitoring = 0; - rst_ctrl.bits.reset_ovfl_pmds = 1; - } - - if (ret == 0) { - if (rst_ctrl.bits.reset_ovfl_pmds) { - pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); - } - if (rst_ctrl.bits.mask_monitoring == 0) { - DPRINT(("resuming monitoring\n")); - if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); - } else { - DPRINT(("stopping monitoring\n")); - //pfm_stop_monitoring(current, regs); - } - ctx->ctx_state = PFM_CTX_LOADED; - } -} - -/* - * context MUST BE LOCKED when calling - * can only be called for current - */ -static void -pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) -{ - int ret; - - DPRINT(("entering for [%d]\n", current->pid)); - - ret = pfm_context_unload(ctx, NULL, 0, regs); - if (ret) { - printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret); - } - - /* - * and wakeup controlling task, indicating we are now disconnected - */ - wake_up_interruptible(&ctx->ctx_zombieq); - - /* - * given that context is still locked, the controlling - * task will only get access when we return from - * pfm_handle_work(). - */ -} - -static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); - /* - * pfm_handle_work() can be called with interrupts enabled - * (TIF_NEED_RESCHED) or disabled. The down_interruptible - * call may sleep, therefore we must re-enable interrupts - * to avoid deadlocks. It is safe to do so because this function - * is called ONLY when returning to user level (PUStk=1), in which case - * there is no risk of kernel stack overflow due to deep - * interrupt nesting. - */ -void -pfm_handle_work(void) -{ - pfm_context_t *ctx; - struct pt_regs *regs; - unsigned long flags, dummy_flags; - unsigned long ovfl_regs; - unsigned int reason; - int ret; - - ctx = PFM_GET_CTX(current); - if (ctx == NULL) { - printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid); - return; - } - - PROTECT_CTX(ctx, flags); - - PFM_SET_WORK_PENDING(current, 0); - - pfm_clear_task_notify(); - - regs = task_pt_regs(current); - - /* - * extract reason for being here and clear - */ - reason = ctx->ctx_fl_trap_reason; - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; - ovfl_regs = ctx->ctx_ovfl_regs[0]; - - DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); - - /* - * must be done before we check for simple-reset mode - */ - if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie; - - - //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; - if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking; - - /* - * restore interrupt mask to what it was on entry. - * Could be enabled/diasbled. - */ - UNPROTECT_CTX(ctx, flags); - - /* - * force interrupt enable because of down_interruptible() - */ - local_irq_enable(); - - DPRINT(("before block sleeping\n")); - - /* - * may go through without blocking on SMP systems - * if restart has been received already by the time we call down() - */ - ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); - - DPRINT(("after block sleeping ret=%d\n", ret)); - - /* - * lock context and mask interrupts again - * We save flags into a dummy because we may have - * altered interrupts mask compared to entry in this - * function. - */ - PROTECT_CTX(ctx, dummy_flags); - - /* - * we need to read the ovfl_regs only after wake-up - * because we may have had pfm_write_pmds() in between - * and that can changed PMD values and therefore - * ovfl_regs is reset for these new PMD values. - */ - ovfl_regs = ctx->ctx_ovfl_regs[0]; - - if (ctx->ctx_fl_going_zombie) { -do_zombie: - DPRINT(("context is zombie, bailing out\n")); - pfm_context_force_terminate(ctx, regs); - goto nothing_to_do; - } - /* - * in case of interruption of down() we don't restart anything - */ - if (ret < 0) goto nothing_to_do; - -skip_blocking: - pfm_resume_after_ovfl(ctx, ovfl_regs, regs); - ctx->ctx_ovfl_regs[0] = 0UL; - -nothing_to_do: - /* - * restore flags as they were upon entry - */ - UNPROTECT_CTX(ctx, flags); -} - -static int -pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) -{ - if (ctx->ctx_state == PFM_CTX_ZOMBIE) { - DPRINT(("ignoring overflow notification, owner is zombie\n")); - return 0; - } - - DPRINT(("waking up somebody\n")); - - if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); - - /* - * safe, we are not in intr handler, nor in ctxsw when - * we come here - */ - kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); - - return 0; -} - -static int -pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) -{ - pfm_msg_t *msg = NULL; - - if (ctx->ctx_fl_no_msg == 0) { - msg = pfm_get_new_msg(ctx); - if (msg == NULL) { - printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); - return -1; - } - - msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; - msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; - msg->pfm_ovfl_msg.msg_active_set = 0; - msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; - msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; - msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; - msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; - msg->pfm_ovfl_msg.msg_tstamp = 0UL; - } - - DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", - msg, - ctx->ctx_fl_no_msg, - ctx->ctx_fd, - ovfl_pmds)); - - return pfm_notify_user(ctx, msg); -} - -static int -pfm_end_notify_user(pfm_context_t *ctx) -{ - pfm_msg_t *msg; - - msg = pfm_get_new_msg(ctx); - if (msg == NULL) { - printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); - return -1; - } - /* no leak */ - memset(msg, 0, sizeof(*msg)); - - msg->pfm_end_msg.msg_type = PFM_MSG_END; - msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; - msg->pfm_ovfl_msg.msg_tstamp = 0UL; - - DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", - msg, - ctx->ctx_fl_no_msg, - ctx->ctx_fd)); - - return pfm_notify_user(ctx, msg); -} - -/* - * main overflow processing routine. - * it can be called from the interrupt path or explicitely during the context switch code - */ -static void -pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs) -{ - pfm_ovfl_arg_t *ovfl_arg; - unsigned long mask; - unsigned long old_val, ovfl_val, new_val; - unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; - unsigned long tstamp; - pfm_ovfl_ctrl_t ovfl_ctrl; - unsigned int i, has_smpl; - int must_notify = 0; - - if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; - - /* - * sanity test. Should never happen - */ - if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; - - tstamp = ia64_get_itc(); - mask = pmc0 >> PMU_FIRST_COUNTER; - ovfl_val = pmu_conf->ovfl_val; - has_smpl = CTX_HAS_SMPL(ctx); - - DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " - "used_pmds=0x%lx\n", - pmc0, - task ? task->pid: -1, - (regs ? regs->cr_iip : 0), - CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", - ctx->ctx_used_pmds[0])); - - - /* - * first we update the virtual counters - * assume there was a prior ia64_srlz_d() issued - */ - for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { - - /* skip pmd which did not overflow */ - if ((mask & 0x1) == 0) continue; - - /* - * Note that the pmd is not necessarily 0 at this point as qualified events - * may have happened before the PMU was frozen. The residual count is not - * taken into consideration here but will be with any read of the pmd via - * pfm_read_pmds(). - */ - old_val = new_val = ctx->ctx_pmds[i].val; - new_val += 1 + ovfl_val; - ctx->ctx_pmds[i].val = new_val; - - /* - * check for overflow condition - */ - if (likely(old_val > new_val)) { - ovfl_pmds |= 1UL << i; - if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; - } - - DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", - i, - new_val, - old_val, - ia64_get_pmd(i) & ovfl_val, - ovfl_pmds, - ovfl_notify)); - } - - /* - * there was no 64-bit overflow, nothing else to do - */ - if (ovfl_pmds == 0UL) return; - - /* - * reset all control bits - */ - ovfl_ctrl.val = 0; - reset_pmds = 0UL; - - /* - * if a sampling format module exists, then we "cache" the overflow by - * calling the module's handler() routine. - */ - if (has_smpl) { - unsigned long start_cycles, end_cycles; - unsigned long pmd_mask; - int j, k, ret = 0; - int this_cpu = smp_processor_id(); - - pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; - ovfl_arg = &ctx->ctx_ovfl_arg; - - prefetch(ctx->ctx_smpl_hdr); - - for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { - - mask = 1UL << i; - - if ((pmd_mask & 0x1) == 0) continue; - - ovfl_arg->ovfl_pmd = (unsigned char )i; - ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; - ovfl_arg->active_set = 0; - ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ - ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; - - ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; - ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; - ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; - - /* - * copy values of pmds of interest. Sampling format may copy them - * into sampling buffer. - */ - if (smpl_pmds) { - for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { - if ((smpl_pmds & 0x1) == 0) continue; - ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); - DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); - } - } - - pfm_stats[this_cpu].pfm_smpl_handler_calls++; - - start_cycles = ia64_get_itc(); - - /* - * call custom buffer format record (handler) routine - */ - ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); - - end_cycles = ia64_get_itc(); - - /* - * For those controls, we take the union because they have - * an all or nothing behavior. - */ - ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; - ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; - ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; - /* - * build the bitmask of pmds to reset now - */ - if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; - - pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; - } - /* - * when the module cannot handle the rest of the overflows, we abort right here - */ - if (ret && pmd_mask) { - DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", - pmd_mask<<PMU_FIRST_COUNTER)); - } - /* - * remove the pmds we reset now from the set of pmds to reset in pfm_restart() - */ - ovfl_pmds &= ~reset_pmds; - } else { - /* - * when no sampling module is used, then the default - * is to notify on overflow if requested by user - */ - ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0; - ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; - ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */ - ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; - /* - * if needed, we reset all overflowed pmds - */ - if (ovfl_notify == 0) reset_pmds = ovfl_pmds; - } - - DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); - - /* - * reset the requested PMD registers using the short reset values - */ - if (reset_pmds) { - unsigned long bm = reset_pmds; - pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); - } - - if (ovfl_notify && ovfl_ctrl.bits.notify_user) { - /* - * keep track of what to reset when unblocking - */ - ctx->ctx_ovfl_regs[0] = ovfl_pmds; - - /* - * check for blocking context - */ - if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { - - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; - - /* - * set the perfmon specific checking pending work for the task - */ - PFM_SET_WORK_PENDING(task, 1); - - /* - * when coming from ctxsw, current still points to the - * previous task, therefore we must work with task and not current. - */ - pfm_set_task_notify(task); - } - /* - * defer until state is changed (shorten spin window). the context is locked - * anyway, so the signal receiver would come spin for nothing. - */ - must_notify = 1; - } - - DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", - GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1, - PFM_GET_WORK_PENDING(task), - ctx->ctx_fl_trap_reason, - ovfl_pmds, - ovfl_notify, - ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); - /* - * in case monitoring must be stopped, we toggle the psr bits - */ - if (ovfl_ctrl.bits.mask_monitoring) { - pfm_mask_monitoring(task); - ctx->ctx_state = PFM_CTX_MASKED; - ctx->ctx_fl_can_restart = 1; - } - - /* - * send notification now - */ - if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); - - return; - -sanity_check: - printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", - smp_processor_id(), - task ? task->pid : -1, - pmc0); - return; - -stop_monitoring: - /* - * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). - * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can - * come here as zombie only if the task is the current task. In which case, we - * can access the PMU hardware directly. - * - * Note that zombies do have PM_VALID set. So here we do the minimal. - * - * In case the context was zombified it could not be reclaimed at the time - * the monitoring program exited. At this point, the PMU reservation has been - * returned, the sampiing buffer has been freed. We must convert this call - * into a spurious interrupt. However, we must also avoid infinite overflows - * by stopping monitoring for this task. We can only come here for a per-task - * context. All we need to do is to stop monitoring using the psr bits which - * are always task private. By re-enabling secure montioring, we ensure that - * the monitored task will not be able to re-activate monitoring. - * The task will eventually be context switched out, at which point the context - * will be reclaimed (that includes releasing ownership of the PMU). - * - * So there might be a window of time where the number of per-task session is zero - * yet one PMU might have a owner and get at most one overflow interrupt for a zombie - * context. This is safe because if a per-task session comes in, it will push this one - * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide - * session is force on that CPU, given that we use task pinning, pfm_save_regs() will - * also push our zombie context out. - * - * Overall pretty hairy stuff.... - */ - DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1)); - pfm_clear_psr_up(); - ia64_psr(regs)->up = 0; - ia64_psr(regs)->sp = 1; - return; -} - -static int -pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs) -{ - struct task_struct *task; - pfm_context_t *ctx; - unsigned long flags; - u64 pmc0; - int this_cpu = smp_processor_id(); - int retval = 0; - - pfm_stats[this_cpu].pfm_ovfl_intr_count++; - - /* - * srlz.d done before arriving here - */ - pmc0 = ia64_get_pmc(0); - - task = GET_PMU_OWNER(); - ctx = GET_PMU_CTX(); - - /* - * if we have some pending bits set - * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 - */ - if (PMC0_HAS_OVFL(pmc0) && task) { - /* - * we assume that pmc0.fr is always set here - */ - - /* sanity check */ - if (!ctx) goto report_spurious1; - - if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) - goto report_spurious2; - - PROTECT_CTX_NOPRINT(ctx, flags); - - pfm_overflow_handler(task, ctx, pmc0, regs); - - UNPROTECT_CTX_NOPRINT(ctx, flags); - - } else { - pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; - retval = -1; - } - /* - * keep it unfrozen at all times - */ - pfm_unfreeze_pmu(); - - return retval; - -report_spurious1: - printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", - this_cpu, task->pid); - pfm_unfreeze_pmu(); - return -1; -report_spurious2: - printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", - this_cpu, - task->pid); - pfm_unfreeze_pmu(); - return -1; -} - -static irqreturn_t -pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) -{ - unsigned long start_cycles, total_cycles; - unsigned long min, max; - int this_cpu; - int ret; - - this_cpu = get_cpu(); - if (likely(!pfm_alt_intr_handler)) { - min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; - max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; - - start_cycles = ia64_get_itc(); - - ret = pfm_do_interrupt_handler(irq, arg, regs); - - total_cycles = ia64_get_itc(); - - /* - * don't measure spurious interrupts - */ - if (likely(ret == 0)) { - total_cycles -= start_cycles; - - if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; - if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; - - pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; - } - } - else { - (*pfm_alt_intr_handler->handler)(irq, arg, regs); - } - - put_cpu_no_resched(); - return IRQ_HANDLED; -} - -/* - * /proc/perfmon interface, for debug only - */ - -#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1) - -static void * -pfm_proc_start(struct seq_file *m, loff_t *pos) -{ - if (*pos == 0) { - return PFM_PROC_SHOW_HEADER; - } - - while (*pos <= NR_CPUS) { - if (cpu_online(*pos - 1)) { - return (void *)*pos; - } - ++*pos; - } - return NULL; -} - -static void * -pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return pfm_proc_start(m, pos); -} - -static void -pfm_proc_stop(struct seq_file *m, void *v) -{ -} - -static void -pfm_proc_show_header(struct seq_file *m) -{ - struct list_head * pos; - pfm_buffer_fmt_t * entry; - unsigned long flags; - - seq_printf(m, - "perfmon version : %u.%u\n" - "model : %s\n" - "fastctxsw : %s\n" - "expert mode : %s\n" - "ovfl_mask : 0x%lx\n" - "PMU flags : 0x%x\n", - PFM_VERSION_MAJ, PFM_VERSION_MIN, - pmu_conf->pmu_name, - pfm_sysctl.fastctxsw > 0 ? "Yes": "No", - pfm_sysctl.expert_mode > 0 ? "Yes": "No", - pmu_conf->ovfl_val, - pmu_conf->flags); - - LOCK_PFS(flags); - - seq_printf(m, - "proc_sessions : %u\n" - "sys_sessions : %u\n" - "sys_use_dbregs : %u\n" - "ptrace_use_dbregs : %u\n", - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_sys_use_dbregs, - pfm_sessions.pfs_ptrace_use_dbregs); - - UNLOCK_PFS(flags); - - spin_lock(&pfm_buffer_fmt_lock); - - list_for_each(pos, &pfm_buffer_fmt_list) { - entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); - seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", - entry->fmt_uuid[0], - entry->fmt_uuid[1], - entry->fmt_uuid[2], - entry->fmt_uuid[3], - entry->fmt_uuid[4], - entry->fmt_uuid[5], - entry->fmt_uuid[6], - entry->fmt_uuid[7], - entry->fmt_uuid[8], - entry->fmt_uuid[9], - entry->fmt_uuid[10], - entry->fmt_uuid[11], - entry->fmt_uuid[12], - entry->fmt_uuid[13], - entry->fmt_uuid[14], - entry->fmt_uuid[15], - entry->fmt_name); - } - spin_unlock(&pfm_buffer_fmt_lock); - -} - -static int -pfm_proc_show(struct seq_file *m, void *v) -{ - unsigned long psr; - unsigned int i; - int cpu; - - if (v == PFM_PROC_SHOW_HEADER) { - pfm_proc_show_header(m); - return 0; - } - - /* show info for CPU (v - 1) */ - - cpu = (long)v - 1; - seq_printf(m, - "CPU%-2d overflow intrs : %lu\n" - "CPU%-2d overflow cycles : %lu\n" - "CPU%-2d overflow min : %lu\n" - "CPU%-2d overflow max : %lu\n" - "CPU%-2d smpl handler calls : %lu\n" - "CPU%-2d smpl handler cycles : %lu\n" - "CPU%-2d spurious intrs : %lu\n" - "CPU%-2d replay intrs : %lu\n" - "CPU%-2d syst_wide : %d\n" - "CPU%-2d dcr_pp : %d\n" - "CPU%-2d exclude idle : %d\n" - "CPU%-2d owner : %d\n" - "CPU%-2d context : %p\n" - "CPU%-2d activations : %lu\n", - cpu, pfm_stats[cpu].pfm_ovfl_intr_count, - cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, - cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, - cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, - cpu, pfm_stats[cpu].pfm_smpl_handler_calls, - cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, - cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, - cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, - cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, - cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, - cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, - cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, - cpu, pfm_get_cpu_data(pmu_ctx, cpu), - cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); - - if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { - - psr = pfm_get_psr(); - - ia64_srlz_d(); - - seq_printf(m, - "CPU%-2d psr : 0x%lx\n" - "CPU%-2d pmc0 : 0x%lx\n", - cpu, psr, - cpu, ia64_get_pmc(0)); - - for (i=0; PMC_IS_LAST(i) == 0; i++) { - if (PMC_IS_COUNTING(i) == 0) continue; - seq_printf(m, - "CPU%-2d pmc%u : 0x%lx\n" - "CPU%-2d pmd%u : 0x%lx\n", - cpu, i, ia64_get_pmc(i), - cpu, i, ia64_get_pmd(i)); - } - } - return 0; -} - -struct seq_operations pfm_seq_ops = { - .start = pfm_proc_start, - .next = pfm_proc_next, - .stop = pfm_proc_stop, - .show = pfm_proc_show -}; - -static int -pfm_proc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &pfm_seq_ops); -} - - -/* - * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens - * during pfm_enable() hence before pfm_start(). We cannot assume monitoring - * is active or inactive based on mode. We must rely on the value in - * local_cpu_data->pfm_syst_info - */ -void -pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) -{ - struct pt_regs *regs; - unsigned long dcr; - unsigned long dcr_pp; - - dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; - - /* - * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 - * on every CPU, so we can rely on the pid to identify the idle task. - */ - if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { - regs = task_pt_regs(task); - ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; - return; - } - /* - * if monitoring has started - */ - if (dcr_pp) { - dcr = ia64_getreg(_IA64_REG_CR_DCR); - /* - * context switching in? - */ - if (is_ctxswin) { - /* mask monitoring for the idle task */ - ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); - pfm_clear_psr_pp(); - ia64_srlz_i(); - return; - } - /* - * context switching out - * restore monitoring for next task - * - * Due to inlining this odd if-then-else construction generates - * better code. - */ - ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); - pfm_set_psr_pp(); - ia64_srlz_i(); - } -} - -#ifdef CONFIG_SMP - -static void -pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) -{ - struct task_struct *task = ctx->ctx_task; - - ia64_psr(regs)->up = 0; - ia64_psr(regs)->sp = 1; - - if (GET_PMU_OWNER() == task) { - DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid)); - SET_PMU_OWNER(NULL, NULL); - } - - /* - * disconnect the task from the context and vice-versa - */ - PFM_SET_WORK_PENDING(task, 0); - - task->thread.pfm_context = NULL; - task->thread.flags &= ~IA64_THREAD_PM_VALID; - - DPRINT(("force cleanup for [%d]\n", task->pid)); -} - - -/* - * in 2.6, interrupts are masked when we come here and the runqueue lock is held - */ -void -pfm_save_regs(struct task_struct *task) -{ - pfm_context_t *ctx; - struct thread_struct *t; - unsigned long flags; - u64 psr; - - - ctx = PFM_GET_CTX(task); - if (ctx == NULL) return; - t = &task->thread; - - /* - * we always come here with interrupts ALREADY disabled by - * the scheduler. So we simply need to protect against concurrent - * access, not CPU concurrency. - */ - flags = pfm_protect_ctx_ctxsw(ctx); - - if (ctx->ctx_state == PFM_CTX_ZOMBIE) { - struct pt_regs *regs = task_pt_regs(task); - - pfm_clear_psr_up(); - - pfm_force_cleanup(ctx, regs); - - BUG_ON(ctx->ctx_smpl_hdr); - - pfm_unprotect_ctx_ctxsw(ctx, flags); - - pfm_context_free(ctx); - return; - } - - /* - * save current PSR: needed because we modify it - */ - ia64_srlz_d(); - psr = pfm_get_psr(); - - BUG_ON(psr & (IA64_PSR_I)); - - /* - * stop monitoring: - * This is the last instruction which may generate an overflow - * - * We do not need to set psr.sp because, it is irrelevant in kernel. - * It will be restored from ipsr when going back to user level - */ - pfm_clear_psr_up(); - - /* - * keep a copy of psr.up (for reload) - */ - ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; - - /* - * release ownership of this PMU. - * PM interrupts are masked, so nothing - * can happen. - */ - SET_PMU_OWNER(NULL, NULL); - - /* - * we systematically save the PMD as we have no - * guarantee we will be schedule at that same - * CPU again. - */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); - - /* - * save pmc0 ia64_srlz_d() done in pfm_save_pmds() - * we will need it on the restore path to check - * for pending overflow. - */ - t->pmcs[0] = ia64_get_pmc(0); - - /* - * unfreeze PMU if had pending overflows - */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); - - /* - * finally, allow context access. - * interrupts will still be masked after this call. - */ - pfm_unprotect_ctx_ctxsw(ctx, flags); -} - -#else /* !CONFIG_SMP */ -void -pfm_save_regs(struct task_struct *task) -{ - pfm_context_t *ctx; - u64 psr; - - ctx = PFM_GET_CTX(task); - if (ctx == NULL) return; - - /* - * save current PSR: needed because we modify it - */ - psr = pfm_get_psr(); - - BUG_ON(psr & (IA64_PSR_I)); - - /* - * stop monitoring: - * This is the last instruction which may generate an overflow - * - * We do not need to set psr.sp because, it is irrelevant in kernel. - * It will be restored from ipsr when going back to user level - */ - pfm_clear_psr_up(); - - /* - * keep a copy of psr.up (for reload) - */ - ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; -} - -static void -pfm_lazy_save_regs (struct task_struct *task) -{ - pfm_context_t *ctx; - struct thread_struct *t; - unsigned long flags; - - { u64 psr = pfm_get_psr(); - BUG_ON(psr & IA64_PSR_UP); - } - - ctx = PFM_GET_CTX(task); - t = &task->thread; - - /* - * we need to mask PMU overflow here to - * make sure that we maintain pmc0 until - * we save it. overflow interrupts are - * treated as spurious if there is no - * owner. - * - * XXX: I don't think this is necessary - */ - PROTECT_CTX(ctx,flags); - - /* - * release ownership of this PMU. - * must be done before we save the registers. - * - * after this call any PMU interrupt is treated - * as spurious. - */ - SET_PMU_OWNER(NULL, NULL); - - /* - * save all the pmds we use - */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); - - /* - * save pmc0 ia64_srlz_d() done in pfm_save_pmds() - * it is needed to check for pended overflow - * on the restore path - */ - t->pmcs[0] = ia64_get_pmc(0); - - /* - * unfreeze PMU if had pending overflows - */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); - - /* - * now get can unmask PMU interrupts, they will - * be treated as purely spurious and we will not - * lose any information - */ - UNPROTECT_CTX(ctx,flags); -} -#endif /* CONFIG_SMP */ - -#ifdef CONFIG_SMP -/* - * in 2.6, interrupts are masked when we come here and the runqueue lock is held - */ -void -pfm_load_regs (struct task_struct *task) -{ - pfm_context_t *ctx; - struct thread_struct *t; - unsigned long pmc_mask = 0UL, pmd_mask = 0UL; - unsigned long flags; - u64 psr, psr_up; - int need_irq_resend; - - ctx = PFM_GET_CTX(task); - if (unlikely(ctx == NULL)) return; - - BUG_ON(GET_PMU_OWNER()); - - t = &task->thread; - /* - * possible on unload - */ - if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return; - - /* - * we always come here with interrupts ALREADY disabled by - * the scheduler. So we simply need to protect against concurrent - * access, not CPU concurrency. - */ - flags = pfm_protect_ctx_ctxsw(ctx); - psr = pfm_get_psr(); - - need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; - - BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); - BUG_ON(psr & IA64_PSR_I); - - if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { - struct pt_regs *regs = task_pt_regs(task); - - BUG_ON(ctx->ctx_smpl_hdr); - - pfm_force_cleanup(ctx, regs); - - pfm_unprotect_ctx_ctxsw(ctx, flags); - - /* - * this one (kmalloc'ed) is fine with interrupts disabled - */ - pfm_context_free(ctx); - - return; - } - - /* - * we restore ALL the debug registers to avoid picking up - * stale state. - */ - if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); - } - /* - * retrieve saved psr.up - */ - psr_up = ctx->ctx_saved_psr_up; - - /* - * if we were the last user of the PMU on that CPU, - * then nothing to do except restore psr - */ - if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { - - /* - * retrieve partial reload masks (due to user modifications) - */ - pmc_mask = ctx->ctx_reload_pmcs[0]; - pmd_mask = ctx->ctx_reload_pmds[0]; - - } else { - /* - * To avoid leaking information to the user level when psr.sp=0, - * we must reload ALL implemented pmds (even the ones we don't use). - * In the kernel we only allow PFM_READ_PMDS on registers which - * we initialized or requested (sampling) so there is no risk there. - */ - pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; - - /* - * ALL accessible PMCs are systematically reloaded, unused registers - * get their default (from pfm_reset_pmu_state()) values to avoid picking - * up stale configuration. - * - * PMC0 is never in the mask. It is always restored separately. - */ - pmc_mask = ctx->ctx_all_pmcs[0]; - } - /* - * when context is MASKED, we will restore PMC with plm=0 - * and PMD with stale information, but that's ok, nothing - * will be captured. - * - * XXX: optimize here - */ - if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask); - if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask); - - /* - * check for pending overflow at the time the state - * was saved. - */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { - /* - * reload pmc0 with the overflow information - * On McKinley PMU, this will trigger a PMU interrupt - */ - ia64_set_pmc(0, t->pmcs[0]); - ia64_srlz_d(); - t->pmcs[0] = 0UL; - - /* - * will replay the PMU interrupt - */ - if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); - - pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; - } - - /* - * we just did a reload, so we reset the partial reload fields - */ - ctx->ctx_reload_pmcs[0] = 0UL; - ctx->ctx_reload_pmds[0] = 0UL; - - SET_LAST_CPU(ctx, smp_processor_id()); - - /* - * dump activation value for this PMU - */ - INC_ACTIVATION(); - /* - * record current activation for this context - */ - SET_ACTIVATION(ctx); - - /* - * establish new ownership. - */ - SET_PMU_OWNER(task, ctx); - - /* - * restore the psr.up bit. measurement - * is active again. - * no PMU interrupt can happen at this point - * because we still have interrupts disabled. - */ - if (likely(psr_up)) pfm_set_psr_up(); - - /* - * allow concurrent access to context - */ - pfm_unprotect_ctx_ctxsw(ctx, flags); -} -#else /* !CONFIG_SMP */ -/* - * reload PMU state for UP kernels - * in 2.5 we come here with interrupts disabled - */ -void -pfm_load_regs (struct task_struct *task) -{ - struct thread_struct *t; - pfm_context_t *ctx; - struct task_struct *owner; - unsigned long pmd_mask, pmc_mask; - u64 psr, psr_up; - int need_irq_resend; - - owner = GET_PMU_OWNER(); - ctx = PFM_GET_CTX(task); - t = &task->thread; - psr = pfm_get_psr(); - - BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); - BUG_ON(psr & IA64_PSR_I); - - /* - * we restore ALL the debug registers to avoid picking up - * stale state. - * - * This must be done even when the task is still the owner - * as the registers may have been modified via ptrace() - * (not perfmon) by the previous task. - */ - if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); - } - - /* - * retrieved saved psr.up - */ - psr_up = ctx->ctx_saved_psr_up; - need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; - - /* - * short path, our state is still there, just - * need to restore psr and we go - * - * we do not touch either PMC nor PMD. the psr is not touched - * by the overflow_handler. So we are safe w.r.t. to interrupt - * concurrency even without interrupt masking. - */ - if (likely(owner == task)) { - if (likely(psr_up)) pfm_set_psr_up(); - return; - } - - /* - * someone else is still using the PMU, first push it out and - * then we'll be able to install our stuff ! - * - * Upon return, there will be no owner for the current PMU - */ - if (owner) pfm_lazy_save_regs(owner); - - /* - * To avoid leaking information to the user level when psr.sp=0, - * we must reload ALL implemented pmds (even the ones we don't use). - * In the kernel we only allow PFM_READ_PMDS on registers which - * we initialized or requested (sampling) so there is no risk there. - */ - pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; - - /* - * ALL accessible PMCs are systematically reloaded, unused registers - * get their default (from pfm_reset_pmu_state()) values to avoid picking - * up stale configuration. - * - * PMC0 is never in the mask. It is always restored separately - */ - pmc_mask = ctx->ctx_all_pmcs[0]; - - pfm_restore_pmds(t->pmds, pmd_mask); - pfm_restore_pmcs(t->pmcs, pmc_mask); - - /* - * check for pending overflow at the time the state - * was saved. - */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { - /* - * reload pmc0 with the overflow information - * On McKinley PMU, this will trigger a PMU interrupt - */ - ia64_set_pmc(0, t->pmcs[0]); - ia64_srlz_d(); - - t->pmcs[0] = 0UL; - - /* - * will replay the PMU interrupt - */ - if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); - - pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; - } - - /* - * establish new ownership. - */ - SET_PMU_OWNER(task, ctx); - - /* - * restore the psr.up bit. measurement - * is active again. - * no PMU interrupt can happen at this point - * because we still have interrupts disabled. - */ - if (likely(psr_up)) pfm_set_psr_up(); -} -#endif /* CONFIG_SMP */ - -/* - * this function assumes monitoring is stopped - */ -static void -pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) -{ - u64 pmc0; - unsigned long mask2, val, pmd_val, ovfl_val; - int i, can_access_pmu = 0; - int is_self; - - /* - * is the caller the task being monitored (or which initiated the - * session for system wide measurements) - */ - is_self = ctx->ctx_task == task ? 1 : 0; - - /* - * can access PMU is task is the owner of the PMU state on the current CPU - * or if we are running on the CPU bound to the context in system-wide mode - * (that is not necessarily the task the context is attached to in this mode). - * In system-wide we always have can_access_pmu true because a task running on an - * invalid processor is flagged earlier in the call stack (see pfm_stop). - */ - can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); - if (can_access_pmu) { - /* - * Mark the PMU as not owned - * This will cause the interrupt handler to do nothing in case an overflow - * interrupt was in-flight - * This also guarantees that pmc0 will contain the final state - * It virtually gives us full control on overflow processing from that point - * on. - */ - SET_PMU_OWNER(NULL, NULL); - DPRINT(("releasing ownership\n")); - - /* - * read current overflow status: - * - * we are guaranteed to read the final stable state - */ - ia64_srlz_d(); - pmc0 = ia64_get_pmc(0); /* slow */ - - /* - * reset freeze bit, overflow status information destroyed - */ - pfm_unfreeze_pmu(); - } else { - pmc0 = task->thread.pmcs[0]; - /* - * clear whatever overflow status bits there were - */ - task->thread.pmcs[0] = 0; - } - ovfl_val = pmu_conf->ovfl_val; - /* - * we save all the used pmds - * we take care of overflows for counting PMDs - * - * XXX: sampling situation is not taken into account here - */ - mask2 = ctx->ctx_used_pmds[0]; - - DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); - - for (i = 0; mask2; i++, mask2>>=1) { - - /* skip non used pmds */ - if ((mask2 & 0x1) == 0) continue; - - /* - * can access PMU always true in system wide mode - */ - val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i]; - - if (PMD_IS_COUNTING(i)) { - DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", - task->pid, - i, - ctx->ctx_pmds[i].val, - val & ovfl_val)); - - /* - * we rebuild the full 64 bit value of the counter - */ - val = ctx->ctx_pmds[i].val + (val & ovfl_val); - - /* - * now everything is in ctx_pmds[] and we need - * to clear the saved context from save_regs() such that - * pfm_read_pmds() gets the correct value - */ - pmd_val = 0UL; - - /* - * take care of overflow inline - */ - if (pmc0 & (1UL << i)) { - val += 1 + ovfl_val; - DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i)); - } - } - - DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); - - if (is_self) task->thread.pmds[i] = pmd_val; - - ctx->ctx_pmds[i].val = val; - } -} - -static struct irqaction perfmon_irqaction = { - .handler = pfm_interrupt_handler, - .flags = IRQF_DISABLED, - .name = "perfmon" -}; - -static void -pfm_alt_save_pmu_state(void *data) -{ - struct pt_regs *regs; - - regs = task_pt_regs(current); - - DPRINT(("called\n")); - - /* - * should not be necessary but - * let's take not risk - */ - pfm_clear_psr_up(); - pfm_clear_psr_pp(); - ia64_psr(regs)->pp = 0; - - /* - * This call is required - * May cause a spurious interrupt on some processors - */ - pfm_freeze_pmu(); - - ia64_srlz_d(); -} - -void -pfm_alt_restore_pmu_state(void *data) -{ - struct pt_regs *regs; - - regs = task_pt_regs(current); - - DPRINT(("called\n")); - - /* - * put PMU back in state expected - * by perfmon - */ - pfm_clear_psr_up(); - pfm_clear_psr_pp(); - ia64_psr(regs)->pp = 0; - - /* - * perfmon runs with PMU unfrozen at all times - */ - pfm_unfreeze_pmu(); - - ia64_srlz_d(); -} - -int -pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) -{ - int ret, i; - int reserve_cpu; - - /* some sanity checks */ - if (hdl == NULL || hdl->handler == NULL) return -EINVAL; - - /* do the easy test first */ - if (pfm_alt_intr_handler) return -EBUSY; - - /* one at a time in the install or remove, just fail the others */ - if (!spin_trylock(&pfm_alt_install_check)) { - return -EBUSY; - } - - /* reserve our session */ - for_each_online_cpu(reserve_cpu) { - ret = pfm_reserve_session(NULL, 1, reserve_cpu); - if (ret) goto cleanup_reserve; - } - - /* save the current system wide pmu states */ - ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - goto cleanup_reserve; - } - - /* officially change to the alternate interrupt handler */ - pfm_alt_intr_handler = hdl; - - spin_unlock(&pfm_alt_install_check); - - return 0; - -cleanup_reserve: - for_each_online_cpu(i) { - /* don't unreserve more than we reserved */ - if (i >= reserve_cpu) break; - - pfm_unreserve_session(NULL, 1, i); - } - - spin_unlock(&pfm_alt_install_check); - - return ret; -} -EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); - -int -pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) -{ - int i; - int ret; - - if (hdl == NULL) return -EINVAL; - - /* cannot remove someone else's handler! */ - if (pfm_alt_intr_handler != hdl) return -EINVAL; - - /* one at a time in the install or remove, just fail the others */ - if (!spin_trylock(&pfm_alt_install_check)) { - return -EBUSY; - } - - pfm_alt_intr_handler = NULL; - - ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - } - - for_each_online_cpu(i) { - pfm_unreserve_session(NULL, 1, i); - } - - spin_unlock(&pfm_alt_install_check); - - return 0; -} -EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); - -/* - * perfmon initialization routine, called from the initcall() table - */ -static int init_pfm_fs(void); - -static int __init -pfm_probe_pmu(void) -{ - pmu_config_t **p; - int family; - - family = local_cpu_data->family; - p = pmu_confs; - - while(*p) { - if ((*p)->probe) { - if ((*p)->probe() == 0) goto found; - } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { - goto found; - } - p++; - } - return -1; -found: - pmu_conf = *p; - return 0; -} - -static struct file_operations pfm_proc_fops = { - .open = pfm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -int __init -pfm_init(void) -{ - unsigned int n, n_counters, i; - - printk("perfmon: version %u.%u IRQ %u\n", - PFM_VERSION_MAJ, - PFM_VERSION_MIN, - IA64_PERFMON_VECTOR); - - if (pfm_probe_pmu()) { - printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", - local_cpu_data->family); - return -ENODEV; - } - - /* - * compute the number of implemented PMD/PMC from the - * description tables - */ - n = 0; - for (i=0; PMC_IS_LAST(i) == 0; i++) { - if (PMC_IS_IMPL(i) == 0) continue; - pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); - n++; - } - pmu_conf->num_pmcs = n; - - n = 0; n_counters = 0; - for (i=0; PMD_IS_LAST(i) == 0; i++) { - if (PMD_IS_IMPL(i) == 0) continue; - pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); - n++; - if (PMD_IS_COUNTING(i)) n_counters++; - } - pmu_conf->num_pmds = n; - pmu_conf->num_counters = n_counters; - - /* - * sanity checks on the number of debug registers - */ - if (pmu_conf->use_rr_dbregs) { - if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { - printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); - pmu_conf = NULL; - return -1; - } - if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { - printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); - pmu_conf = NULL; - return -1; - } - } - - printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", - pmu_conf->pmu_name, - pmu_conf->num_pmcs, - pmu_conf->num_pmds, - pmu_conf->num_counters, - ffz(pmu_conf->ovfl_val)); - - /* sanity check */ - if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) { - printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); - pmu_conf = NULL; - return -1; - } - - /* - * create /proc/perfmon (mostly for debugging purposes) - */ - perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL); - if (perfmon_dir == NULL) { - printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); - pmu_conf = NULL; - return -1; - } - /* - * install customized file operations for /proc/perfmon entry - */ - perfmon_dir->proc_fops = &pfm_proc_fops; - - /* - * create /proc/sys/kernel/perfmon (for debugging purposes) - */ - pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0); - - /* - * initialize all our spinlocks - */ - spin_lock_init(&pfm_sessions.pfs_lock); - spin_lock_init(&pfm_buffer_fmt_lock); - - init_pfm_fs(); - - for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; - - return 0; -} - -__initcall(pfm_init); - -/* - * this function is called before pfm_init() - */ -void -pfm_init_percpu (void) -{ - static int first_time=1; - /* - * make sure no measurement is active - * (may inherit programmed PMCs from EFI). - */ - pfm_clear_psr_pp(); - pfm_clear_psr_up(); - - /* - * we run with the PMU not frozen at all times - */ - pfm_unfreeze_pmu(); - - if (first_time) { - register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); - first_time=0; - } - - ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); - ia64_srlz_d(); -} - -/* - * used for debug purposes only - */ -void -dump_pmu_state(const char *from) -{ - struct task_struct *task; - struct thread_struct *t; - struct pt_regs *regs; - pfm_context_t *ctx; - unsigned long psr, dcr, info, flags; - int i, this_cpu; - - local_irq_save(flags); - - this_cpu = smp_processor_id(); - regs = task_pt_regs(current); - info = PFM_CPUINFO_GET(); - dcr = ia64_getreg(_IA64_REG_CR_DCR); - - if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { - local_irq_restore(flags); - return; - } - - printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", - this_cpu, - from, - current->pid, - regs->cr_iip, - current->comm); - - task = GET_PMU_OWNER(); - ctx = GET_PMU_CTX(); - - printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx); - - psr = pfm_get_psr(); - - printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", - this_cpu, - ia64_get_pmc(0), - psr & IA64_PSR_PP ? 1 : 0, - psr & IA64_PSR_UP ? 1 : 0, - dcr & IA64_DCR_PP ? 1 : 0, - info, - ia64_psr(regs)->up, - ia64_psr(regs)->pp); - - ia64_psr(regs)->up = 0; - ia64_psr(regs)->pp = 0; - - t = ¤t->thread; - - for (i=1; PMC_IS_LAST(i) == 0; i++) { - if (PMC_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]); - } - - for (i=1; PMD_IS_LAST(i) == 0; i++) { - if (PMD_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]); - } - - if (ctx) { - printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", - this_cpu, - ctx->ctx_state, - ctx->ctx_smpl_vaddr, - ctx->ctx_smpl_hdr, - ctx->ctx_msgq_head, - ctx->ctx_msgq_tail, - ctx->ctx_saved_psr_up); - } - local_irq_restore(flags); -} - -/* - * called from process.c:copy_thread(). task is new child. - */ -void -pfm_inherit(struct task_struct *task, struct pt_regs *regs) -{ - struct thread_struct *thread; - - DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid)); - - thread = &task->thread; - - /* - * cut links inherited from parent (current) - */ - thread->pfm_context = NULL; - - PFM_SET_WORK_PENDING(task, 0); - - /* - * the psr bits are already set properly in copy_threads() - */ -} -#else /* !CONFIG_PERFMON */ -asmlinkage long -sys_perfmonctl (int fd, int cmd, void *arg, int count) -{ - return -ENOSYS; -} -#endif /* CONFIG_PERFMON */ diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c deleted file mode 100644 index c7af364b42..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c +++ /dev/null @@ -1,1030 +0,0 @@ -/* - * Architecture-specific setup. - * - * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * Stephane Eranian <eranian@hpl.hp.com> - * Copyright (C) 2000, 2004 Intel Corp - * Rohit Seth <rohit.seth@intel.com> - * Suresh Siddha <suresh.b.siddha@intel.com> - * Gordon Jin <gordon.jin@intel.com> - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> - * - * 12/26/04 S.Siddha, G.Jin, R.Seth - * Add multi-threading and multi-core detection - * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo(). - * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map - * 03/31/00 R.Seth cpu_initialized and current->processor fixes - * 02/04/00 D.Mosberger some more get_cpuinfo fixes... - * 02/01/00 R.Seth fixed get_cpuinfo for SMP - * 01/07/99 S.Eranian added the support for command line argument - * 06/24/99 W.Drummond added boot_cpu_data. - * 05/28/05 Z. Menyhart Dynamic stride size for "flush_icache_range()" - */ -#include <linux/module.h> -#include <linux/init.h> - -#include <linux/acpi.h> -#include <linux/bootmem.h> -#include <linux/console.h> -#include <linux/delay.h> -#include <linux/kernel.h> -#include <linux/reboot.h> -#include <linux/sched.h> -#include <linux/seq_file.h> -#include <linux/string.h> -#include <linux/threads.h> -#include <linux/screen_info.h> -#include <linux/dmi.h> -#include <linux/serial.h> -#include <linux/serial_core.h> -#include <linux/efi.h> -#include <linux/initrd.h> -#include <linux/pm.h> -#include <linux/cpufreq.h> - -#include <asm/ia32.h> -#include <asm/machvec.h> -#include <asm/mca.h> -#include <asm/meminit.h> -#include <asm/page.h> -#include <asm/patch.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/sal.h> -#include <asm/sections.h> -#include <asm/serial.h> -#include <asm/setup.h> -#include <asm/smp.h> -#include <asm/system.h> -#include <asm/unistd.h> -#include <asm/system.h> -#ifdef CONFIG_XEN -#include <asm/hypervisor.h> -#include <asm/xen/xencomm.h> -#include <xen/xencons.h> -#endif -#include <linux/dma-mapping.h> - -#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) -# error "struct cpuinfo_ia64 too big!" -#endif - -#ifdef CONFIG_SMP -unsigned long __per_cpu_offset[NR_CPUS]; -EXPORT_SYMBOL(__per_cpu_offset); -#endif - -#ifdef CONFIG_XEN -static void -xen_panic_hypercall(struct unw_frame_info *info, void *arg) -{ - current->thread.ksp = (__u64)info->sw - 16; - HYPERVISOR_shutdown(SHUTDOWN_crash); - /* we're never actually going to get here... */ -} - -static int -xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - unw_init_running(xen_panic_hypercall, NULL); - /* we're never actually going to get here... */ - return NOTIFY_DONE; -} - -static struct notifier_block xen_panic_block = { - xen_panic_event, NULL, 0 /* try to go last */ -}; - -void xen_pm_power_off(void) -{ - local_irq_disable(); - HYPERVISOR_shutdown(SHUTDOWN_poweroff); -} -#endif - -extern void ia64_setup_printk_clock(void); - -DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); -DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); -DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8); -unsigned long ia64_cycles_per_usec; -struct ia64_boot_param *ia64_boot_param; -struct screen_info screen_info; -unsigned long vga_console_iobase; -unsigned long vga_console_membase; - -static struct resource data_resource = { - .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource code_resource = { - .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; -extern void efi_initialize_iomem_resources(struct resource *, - struct resource *); -extern char _text[], _end[], _etext[]; - -unsigned long ia64_max_cacheline_size; - -int dma_get_cache_alignment(void) -{ - return ia64_max_cacheline_size; -} -EXPORT_SYMBOL(dma_get_cache_alignment); - -unsigned long ia64_iobase; /* virtual address for I/O accesses */ -EXPORT_SYMBOL(ia64_iobase); -struct io_space io_space[MAX_IO_SPACES]; -EXPORT_SYMBOL(io_space); -unsigned int num_io_spaces; - -/* - * "flush_icache_range()" needs to know what processor dependent stride size to use - * when it makes i-cache(s) coherent with d-caches. - */ -#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */ -unsigned long ia64_i_cache_stride_shift = ~0; - -/* - * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This - * mask specifies a mask of address bits that must be 0 in order for two buffers to be - * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start - * address of the second buffer must be aligned to (merge_mask+1) in order to be - * mergeable). By default, we assume there is no I/O MMU which can merge physically - * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu - * page-size of 2^64. - */ -unsigned long ia64_max_iommu_merge_mask = ~0UL; -EXPORT_SYMBOL(ia64_max_iommu_merge_mask); - -/* - * We use a special marker for the end of memory and it uses the extra (+1) slot - */ -struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; -int num_rsvd_regions __initdata; - - -/* - * Filter incoming memory segments based on the primitive map created from the boot - * parameters. Segments contained in the map are removed from the memory ranges. A - * caller-specified function is called with the memory ranges that remain after filtering. - * This routine does not assume the incoming segments are sorted. - */ -int __init -filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) -{ - unsigned long range_start, range_end, prev_start; - void (*func)(unsigned long, unsigned long, int); - int i; - -#if IGNORE_PFN0 - if (start == PAGE_OFFSET) { - printk(KERN_WARNING "warning: skipping physical page 0\n"); - start += PAGE_SIZE; - if (start >= end) return 0; - } -#endif - /* - * lowest possible address(walker uses virtual) - */ - prev_start = PAGE_OFFSET; - func = arg; - - for (i = 0; i < num_rsvd_regions; ++i) { - range_start = max(start, prev_start); - range_end = min(end, rsvd_region[i].start); - - if (range_start < range_end) - call_pernode_memory(__pa(range_start), range_end - range_start, func); - - /* nothing more available in this segment */ - if (range_end == end) return 0; - - prev_start = rsvd_region[i].end; - } - /* end of memory marker allows full processing inside loop body */ - return 0; -} - -static void __init -sort_regions (struct rsvd_region *rsvd_region, int max) -{ - int j; - - /* simple bubble sorting */ - while (max--) { - for (j = 0; j < max; ++j) { - if (rsvd_region[j].start > rsvd_region[j+1].start) { - struct rsvd_region tmp; - tmp = rsvd_region[j]; - rsvd_region[j] = rsvd_region[j + 1]; - rsvd_region[j + 1] = tmp; - } - } - } -} - -/* - * Request address space for all standard resources - */ -static int __init register_memory(void) -{ - code_resource.start = ia64_tpa(_text); - code_resource.end = ia64_tpa(_etext) - 1; - data_resource.start = ia64_tpa(_etext); - data_resource.end = ia64_tpa(_end) - 1; - efi_initialize_iomem_resources(&code_resource, &data_resource); - - return 0; -} - -__initcall(register_memory); - -/** - * reserve_memory - setup reserved memory areas - * - * Setup the reserved memory areas set aside for the boot parameters, - * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined, - * see include/asm-ia64/meminit.h if you need to define more. - */ -void __init -reserve_memory (void) -{ - int n = 0; - - /* - * none of the entries in this table overlap - */ - rsvd_region[n].start = (unsigned long) ia64_boot_param; - rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param); - n++; - - rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap); - rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size; - n++; - - rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line); - rsvd_region[n].end = (rsvd_region[n].start - + strlen(__va(ia64_boot_param->command_line)) + 1); - n++; - - rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START); - rsvd_region[n].end = (unsigned long) ia64_imva(_end); - n++; - -#ifdef CONFIG_XEN - if (is_running_on_xen()) { - rsvd_region[n].start = (unsigned long)__va((HYPERVISOR_shared_info->arch.start_info_pfn << PAGE_SHIFT)); - rsvd_region[n].end = rsvd_region[n].start + PAGE_SIZE; - n++; - } -#endif - -#ifdef CONFIG_BLK_DEV_INITRD - if (ia64_boot_param->initrd_start) { - rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); - rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size; - n++; - } -#endif - - efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); - n++; - - /* end of memory marker */ - rsvd_region[n].start = ~0UL; - rsvd_region[n].end = ~0UL; - n++; - - num_rsvd_regions = n; - BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n); - - sort_regions(rsvd_region, num_rsvd_regions); -} - -/** - * find_initrd - get initrd parameters from the boot parameter structure - * - * Grab the initrd start and end from the boot parameter struct given us by - * the boot loader. - */ -void __init -find_initrd (void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - if (ia64_boot_param->initrd_start) { - initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start); - initrd_end = initrd_start+ia64_boot_param->initrd_size; - - printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n", - initrd_start, ia64_boot_param->initrd_size); - } -#endif -} - -static void __init -io_port_init (void) -{ - unsigned long phys_iobase; - - /* - * Set `iobase' based on the EFI memory map or, failing that, the - * value firmware left in ar.k0. - * - * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute - * the port's virtual address, so ia32_load_state() loads it with a - * user virtual address. But in ia64 mode, glibc uses the - * *physical* address in ar.k0 to mmap the appropriate area from - * /dev/mem, and the inX()/outX() interfaces use MMIO. In both - * cases, user-mode can only use the legacy 0-64K I/O port space. - * - * ar.k0 is not involved in kernel I/O port accesses, which can use - * any of the I/O port spaces and are done via MMIO using the - * virtual mmio_base from the appropriate io_space[]. - */ - phys_iobase = efi_get_iobase(); - if (!phys_iobase) { - phys_iobase = ia64_get_kr(IA64_KR_IO_BASE); - printk(KERN_INFO "No I/O port range found in EFI memory map, " - "falling back to AR.KR0 (0x%lx)\n", phys_iobase); - } - ia64_iobase = (unsigned long) ioremap(phys_iobase, 0); - ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); - - /* setup legacy IO port space */ - io_space[0].mmio_base = ia64_iobase; - io_space[0].sparse = 1; - num_io_spaces = 1; -} - -/** - * early_console_setup - setup debugging console - * - * Consoles started here require little enough setup that we can start using - * them very early in the boot process, either right after the machine - * vector initialization, or even before if the drivers can detect their hw. - * - * Returns non-zero if a console couldn't be setup. - */ -static inline int __init -early_console_setup (char *cmdline) -{ - int earlycons = 0; - -#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE - { - extern int sn_serial_console_early_setup(void); - if (!sn_serial_console_early_setup()) - earlycons++; - } -#endif -#ifdef CONFIG_EFI_PCDP - if (!efi_setup_pcdp_console(cmdline)) - earlycons++; -#endif -#ifdef CONFIG_SERIAL_8250_CONSOLE - if (!early_serial_console_init(cmdline)) - earlycons++; -#endif - - return (earlycons) ? 0 : -1; -} - -static inline void -mark_bsp_online (void) -{ -#ifdef CONFIG_SMP - /* If we register an early console, allow CPU 0 to printk */ - cpu_set(smp_processor_id(), cpu_online_map); -#endif -} - -#ifdef CONFIG_SMP -static void __init -check_for_logical_procs (void) -{ - pal_logical_to_physical_t info; - s64 status; - - status = ia64_pal_logical_to_phys(0, &info); - if (status == -1) { - printk(KERN_INFO "No logical to physical processor mapping " - "available\n"); - return; - } - if (status) { - printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n", - status); - return; - } - /* - * Total number of siblings that BSP has. Though not all of them - * may have booted successfully. The correct number of siblings - * booted is in info.overview_num_log. - */ - smp_num_siblings = info.overview_tpc; - smp_num_cpucores = info.overview_cpp; -} -#endif - -static __initdata int nomca; -static __init int setup_nomca(char *s) -{ - nomca = 1; - return 0; -} -early_param("nomca", setup_nomca); - -void __init -setup_arch (char **cmdline_p) -{ - unw_init(); - -#ifdef CONFIG_XEN - if (is_running_on_xen()) { - /* Must be done before any hypercall. */ - xencomm_init(); - - setup_xen_features(); - /* Register a call for panic conditions. */ - atomic_notifier_chain_register(&panic_notifier_list, - &xen_panic_block); - pm_power_off = xen_pm_power_off; - } -#endif - - ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); - - *cmdline_p = __va(ia64_boot_param->command_line); - strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); - - efi_init(); - io_port_init(); - - parse_early_param(); - -#ifdef CONFIG_IA64_GENERIC - machvec_init(NULL); -#endif - - if (early_console_setup(*cmdline_p) == 0) - mark_bsp_online(); - -#ifdef CONFIG_ACPI - /* Initialize the ACPI boot-time table parser */ - acpi_table_init(); -# ifdef CONFIG_ACPI_NUMA - acpi_numa_init(); -# endif -#else -# ifdef CONFIG_SMP - smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */ -# endif -#endif /* CONFIG_APCI_BOOT */ - - find_memory(); - - /* process SAL system table: */ - ia64_sal_init(__va(efi.sal_systab)); - - ia64_setup_printk_clock(); - -#ifdef CONFIG_SMP - cpu_physical_id(0) = hard_smp_processor_id(); - - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); - - check_for_logical_procs(); - if (smp_num_cpucores > 1) - printk(KERN_INFO - "cpu package is Multi-Core capable: number of cores=%d\n", - smp_num_cpucores); - if (smp_num_siblings > 1) - printk(KERN_INFO - "cpu package is Multi-Threading capable: number of siblings=%d\n", - smp_num_siblings); -#endif - - cpu_init(); /* initialize the bootstrap CPU */ - mmu_context_init(); /* initialize context_id bitmap */ - -#ifdef CONFIG_ACPI - acpi_boot_init(); -#endif - -#ifdef CONFIG_VT - if (!conswitchp) { -# if defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -# endif -# if defined(CONFIG_VGA_CONSOLE) - /* - * Non-legacy systems may route legacy VGA MMIO range to system - * memory. vga_con probes the MMIO hole, so memory looks like - * a VGA device to it. The EFI memory map can tell us if it's - * memory so we can avoid this problem. - */ - if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY) - conswitchp = &vga_con; -# endif - } -#ifdef CONFIG_XEN - if (is_running_on_xen()) { - shared_info_t *s = HYPERVISOR_shared_info; - - xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT); - - printk("Running on Xen! start_info_pfn=0x%lx nr_pages=%ld " - "flags=0x%x\n", s->arch.start_info_pfn, - xen_start_info->nr_pages, xen_start_info->flags); - - if (!is_initial_xendomain()) { -#if !defined(CONFIG_VT) || !defined(CONFIG_DUMMY_CONSOLE) - conswitchp = NULL; -#endif - } - - /* - * If a console= is NOT specified, we assume using the - * xencons console is desired. By default, this is ttyS0 - * for dom0 and tty0 for domU. - */ - if (!strstr(*cmdline_p, "console=")) { - char *p, *q, name[5]; - int offset = 0; - - if (is_initial_xendomain()) - strncpy(name, "ttyS", 4); - else - strncpy(name, "tty", 3); - - p = strstr(*cmdline_p, "xencons="); - - if (p) { - p += 8; - if (!strncmp(p, "ttyS", 4)) { - strncpy(name, p, 4); - p += 4; - offset = simple_strtol(p, &q, 10); - if (p == q) - offset = 0; - } else if (!strncmp(p, "tty", 3) || - !strncmp(p, "xvc", 3)) { - strncpy(name, p, 3); - p += 3; - offset = simple_strtol(p, &q, 10); - if (p == q) - offset = 0; - } else if (!strncmp(p, "off", 3)) - offset = -1; - } - - if (offset >= 0) - add_preferred_console(name, offset, NULL); - } - } - xencons_early_setup(); -#endif -#endif - - - /* enable IA-64 Machine Check Abort Handling unless disabled */ -#ifdef CONFIG_XEN - if (is_running_on_xen() && !is_initial_xendomain()) - nomca = 1; -#endif - if (!nomca) - ia64_mca_init(); - - platform_setup(cmdline_p); -#ifdef CONFIG_XEN - if (!is_running_on_xen() && !ia64_platform_is("xen")) { - extern ia64_mv_setup_t xen_setup; - xen_setup(cmdline_p); - } -#endif - paging_init(); -#ifdef CONFIG_XEN - contiguous_bitmap_init(max_pfn); -#endif -} - -/* - * Display cpu info for all cpu's. - */ -static int -show_cpuinfo (struct seq_file *m, void *v) -{ -#ifdef CONFIG_SMP -# define lpj c->loops_per_jiffy -# define cpunum c->cpu -#else -# define lpj loops_per_jiffy -# define cpunum 0 -#endif - static struct { - unsigned long mask; - const char *feature_name; - } feature_bits[] = { - { 1UL << 0, "branchlong" }, - { 1UL << 1, "spontaneous deferral"}, - { 1UL << 2, "16-byte atomic ops" } - }; - char family[32], features[128], *cp, sep; - struct cpuinfo_ia64 *c = v; - unsigned long mask; - unsigned long proc_freq; - int i; - - mask = c->features; - - switch (c->family) { - case 0x07: memcpy(family, "Itanium", 8); break; - case 0x1f: memcpy(family, "Itanium 2", 10); break; - default: sprintf(family, "%u", c->family); break; - } - - /* build the feature string: */ - memcpy(features, " standard", 10); - cp = features; - sep = 0; - for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) { - if (mask & feature_bits[i].mask) { - if (sep) - *cp++ = sep; - sep = ','; - *cp++ = ' '; - strcpy(cp, feature_bits[i].feature_name); - cp += strlen(feature_bits[i].feature_name); - mask &= ~feature_bits[i].mask; - } - } - if (mask) { - /* print unknown features as a hex value: */ - if (sep) - *cp++ = sep; - sprintf(cp, " 0x%lx", mask); - } - - proc_freq = cpufreq_quick_get(cpunum); - if (!proc_freq) - proc_freq = c->proc_freq / 1000; - - seq_printf(m, - "processor : %d\n" - "vendor : %s\n" - "arch : IA-64\n" - "family : %s\n" - "model : %u\n" - "revision : %u\n" - "archrev : %u\n" - "features :%s\n" /* don't change this---it _is_ right! */ - "cpu number : %lu\n" - "cpu regs : %u\n" - "cpu MHz : %lu.%06lu\n" - "itc MHz : %lu.%06lu\n" - "BogoMIPS : %lu.%02lu\n", - cpunum, c->vendor, family, c->model, c->revision, c->archrev, - features, c->ppn, c->number, - proc_freq / 1000, proc_freq % 1000, - c->itc_freq / 1000000, c->itc_freq % 1000000, - lpj*HZ/500000, (lpj*HZ/5000) % 100); -#ifdef CONFIG_SMP - seq_printf(m, "siblings : %u\n", cpus_weight(cpu_core_map[cpunum])); - if (c->threads_per_core > 1 || c->cores_per_socket > 1) - seq_printf(m, - "physical id: %u\n" - "core id : %u\n" - "thread id : %u\n", - c->socket_id, c->core_id, c->thread_id); -#endif - seq_printf(m,"\n"); - - return 0; -} - -static void * -c_start (struct seq_file *m, loff_t *pos) -{ -#ifdef CONFIG_SMP - while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map)) - ++*pos; -#endif - return *pos < NR_CPUS ? cpu_data(*pos) : NULL; -} - -static void * -c_next (struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} - -static void -c_stop (struct seq_file *m, void *v) -{ -} - -struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo -}; - -static void __cpuinit -identify_cpu (struct cpuinfo_ia64 *c) -{ - union { - unsigned long bits[5]; - struct { - /* id 0 & 1: */ - char vendor[16]; - - /* id 2 */ - u64 ppn; /* processor serial number */ - - /* id 3: */ - unsigned number : 8; - unsigned revision : 8; - unsigned model : 8; - unsigned family : 8; - unsigned archrev : 8; - unsigned reserved : 24; - - /* id 4: */ - u64 features; - } field; - } cpuid; - pal_vm_info_1_u_t vm1; - pal_vm_info_2_u_t vm2; - pal_status_t status; - unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ - int i; - - for (i = 0; i < 5; ++i) - cpuid.bits[i] = ia64_get_cpuid(i); - - memcpy(c->vendor, cpuid.field.vendor, 16); -#ifdef CONFIG_SMP - c->cpu = smp_processor_id(); - - /* below default values will be overwritten by identify_siblings() - * for Multi-Threading/Multi-Core capable cpu's - */ - c->threads_per_core = c->cores_per_socket = c->num_log = 1; - c->socket_id = -1; - - identify_siblings(c); -#endif - c->ppn = cpuid.field.ppn; - c->number = cpuid.field.number; - c->revision = cpuid.field.revision; - c->model = cpuid.field.model; - c->family = cpuid.field.family; - c->archrev = cpuid.field.archrev; - c->features = cpuid.field.features; - - status = ia64_pal_vm_summary(&vm1, &vm2); - if (status == PAL_STATUS_SUCCESS) { - impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb; - phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size; - } - c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1)); - c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); -} - -void -setup_per_cpu_areas (void) -{ - /* start_kernel() requires this... */ -#ifdef CONFIG_ACPI_HOTPLUG_CPU - prefill_possible_map(); -#endif -} - -/* - * Calculate the max. cache line size. - * - * In addition, the minimum of the i-cache stride sizes is calculated for - * "flush_icache_range()". - */ -static void __cpuinit -get_max_cacheline_size (void) -{ - unsigned long line_size, max = 1; - unsigned int cache_size = 0; - u64 l, levels, unique_caches; - pal_cache_config_info_t cci; - s64 status; - - status = ia64_pal_cache_summary(&levels, &unique_caches); - if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", - __FUNCTION__, status); - max = SMP_CACHE_BYTES; - /* Safest setup for "flush_icache_range()" */ - ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT; - goto out; - } - - for (l = 0; l < levels; ++l) { - status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2, - &cci); - if (status != 0) { - printk(KERN_ERR - "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n", - __FUNCTION__, l, status); - max = SMP_CACHE_BYTES; - /* The safest setup for "flush_icache_range()" */ - cci.pcci_stride = I_CACHE_STRIDE_SHIFT; - cci.pcci_unified = 1; - } - line_size = 1 << cci.pcci_line_size; - if (line_size > max) - max = line_size; - if (cache_size < cci.pcci_cache_size) - cache_size = cci.pcci_cache_size; - if (!cci.pcci_unified) { - status = ia64_pal_cache_config_info(l, - /* cache_type (instruction)= */ 1, - &cci); - if (status != 0) { - printk(KERN_ERR - "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n", - __FUNCTION__, l, status); - /* The safest setup for "flush_icache_range()" */ - cci.pcci_stride = I_CACHE_STRIDE_SHIFT; - } - } - if (cci.pcci_stride < ia64_i_cache_stride_shift) - ia64_i_cache_stride_shift = cci.pcci_stride; - } - out: -#ifdef CONFIG_SMP - max_cache_size = max(max_cache_size, cache_size); -#endif - if (max > ia64_max_cacheline_size) - ia64_max_cacheline_size = max; -} - -/* - * cpu_init() initializes state that is per-CPU. This function acts - * as a 'CPU state barrier', nothing should get across. - */ -void __cpuinit -cpu_init (void) -{ - extern void __cpuinit ia64_mmu_init (void *); - unsigned long num_phys_stacked; - pal_vm_info_2_u_t vmi; - unsigned int max_ctx; - struct cpuinfo_ia64 *cpu_info; - void *cpu_data; - - cpu_data = per_cpu_init(); - - /* - * We set ar.k3 so that assembly code in MCA handler can compute - * physical addresses of per cpu variables with a simple: - * phys = ar.k3 + &per_cpu_var - */ - ia64_set_kr(IA64_KR_PER_CPU_DATA, - ia64_tpa(cpu_data) - (long) __per_cpu_start); - - get_max_cacheline_size(); - - /* - * We can't pass "local_cpu_data" to identify_cpu() because we haven't called - * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it - * depends on the data returned by identify_cpu(). We break the dependency by - * accessing cpu_data() through the canonical per-CPU address. - */ - cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start); - identify_cpu(cpu_info); - -#ifdef CONFIG_MCKINLEY - { -# define FEATURE_SET 16 - struct ia64_pal_retval iprv; - - if (cpu_info->family == 0x1f) { - PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0); - if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80)) - PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, - (iprv.v1 | 0x80), FEATURE_SET, 0); - } - } -#endif - - /* Clear the stack memory reserved for pt_regs: */ - memset(task_pt_regs(current), 0, sizeof(struct pt_regs)); - - ia64_set_kr(IA64_KR_FPU_OWNER, 0); - - /* - * Initialize the page-table base register to a global - * directory with all zeroes. This ensure that we can handle - * TLB-misses to user address-space even before we created the - * first user address-space. This may happen, e.g., due to - * aggressive use of lfetch.fault. - */ - ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); - - /* - * Initialize default control register to defer speculative faults except - * for those arising from TLB misses, which are not deferred. The - * kernel MUST NOT depend on a particular setting of these bits (in other words, - * the kernel must have recovery code for all speculative accesses). Turn on - * dcr.lc as per recommendation by the architecture team. Most IA-32 apps - * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll - * be fine). - */ - ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR - | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - if (current->mm) - BUG(); - - ia64_mmu_init(ia64_imva(cpu_data)); - ia64_mca_cpu_init(ia64_imva(cpu_data)); - -#ifdef CONFIG_IA32_SUPPORT - ia32_cpu_init(); -#endif - - /* Clear ITC to eliminiate sched_clock() overflows in human time. */ - ia64_set_itc(0); - - /* disable all local interrupt sources: */ - ia64_set_itv(1 << 16); - ia64_set_lrr0(1 << 16); - ia64_set_lrr1(1 << 16); - ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); - ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); - - /* clear TPR & XTP to enable all interrupt classes: */ - ia64_setreg(_IA64_REG_CR_TPR, 0); -#ifdef CONFIG_SMP - normal_xtp(); -#endif - - /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ - if (ia64_pal_vm_summary(NULL, &vmi) == 0) - max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; - else { - printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n"); - max_ctx = (1U << 15) - 1; /* use architected minimum */ - } - while (max_ctx < ia64_ctx.max_ctx) { - unsigned int old = ia64_ctx.max_ctx; - if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old) - break; - } - - if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) { - printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical " - "stacked regs\n"); - num_phys_stacked = 96; - } - /* size of physical stacked register partition plus 8 bytes: */ - __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8; - platform_cpu_init(); -#ifdef CONFIG_XEN - if (is_running_on_xen() && !ia64_platform_is("xen")) { - extern ia64_mv_cpu_init_t xen_cpu_init; - xen_cpu_init(); - } -#endif - - pm_idle = default_idle; -} - -/* - * On SMP systems, when the scheduler does migration-cost autodetection, - * it needs a way to flush as much of the CPU's caches as possible. - */ -void sched_cacheflush(void) -{ - ia64_sal_cache_flush(3); -} - -void __init -check_bugs (void) -{ - ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, - (unsigned long) __end___mckinley_e9_bundles); -} - -static int __init run_dmi_scan(void) -{ - dmi_scan_machine(); - return 0; -} -core_initcall(run_dmi_scan); diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/time.c b/linux-2.6-xen-sparse/arch/ia64/kernel/time.c deleted file mode 100644 index b73cffa94f..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/time.c +++ /dev/null @@ -1,500 +0,0 @@ -/* - * linux/arch/ia64/kernel/time.c - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * Stephane Eranian <eranian@hpl.hp.com> - * David Mosberger <davidm@hpl.hp.com> - * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> - * Copyright (C) 1999-2000 VA Linux Systems - * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com> - */ - -#include <linux/cpu.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/profile.h> -#include <linux/sched.h> -#include <linux/time.h> -#include <linux/interrupt.h> -#include <linux/efi.h> -#include <linux/profile.h> -#include <linux/timex.h> - -#include <asm/machvec.h> -#include <asm/delay.h> -#include <asm/hw_irq.h> -#include <asm/ptrace.h> -#include <asm/sal.h> -#include <asm/sections.h> -#include <asm/system.h> - -#ifdef CONFIG_XEN -#include <linux/kernel_stat.h> -#include <linux/posix-timers.h> -#include <xen/interface/vcpu.h> -#include <asm/percpu.h> -#endif - -extern unsigned long wall_jiffies; - -volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ - -#ifdef CONFIG_IA64_DEBUG_IRQ - -unsigned long last_cli_ip; -EXPORT_SYMBOL(last_cli_ip); - -#endif - -#ifdef CONFIG_XEN -DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); -DEFINE_PER_CPU(unsigned long, processed_stolen_time); -DEFINE_PER_CPU(unsigned long, processed_blocked_time); -#define NS_PER_TICK (1000000000LL/HZ) -#endif - -static struct time_interpolator itc_interpolator = { - .shift = 16, - .mask = 0xffffffffffffffffLL, - .source = TIME_SOURCE_CPU -}; - -#ifdef CONFIG_XEN -static unsigned long -consider_steal_time(unsigned long new_itm, struct pt_regs *regs) -{ - unsigned long stolen, blocked, sched_time; - unsigned long delta_itm = 0, stolentick = 0; - int i, cpu = smp_processor_id(); - struct vcpu_runstate_info *runstate; - struct task_struct *p = current; - - runstate = &per_cpu(runstate, smp_processor_id()); - - do { - sched_time = runstate->state_entry_time; - mb(); - stolen = runstate->time[RUNSTATE_runnable] + - runstate->time[RUNSTATE_offline] - - per_cpu(processed_stolen_time, cpu); - blocked = runstate->time[RUNSTATE_blocked] - - per_cpu(processed_blocked_time, cpu); - mb(); - } while (sched_time != runstate->state_entry_time); - - /* - * Check for vcpu migration effect - * In this case, itc value is reversed. - * This causes huge stolen value. - * This function just checks and reject this effect. - */ - if (!time_after_eq(runstate->time[RUNSTATE_blocked], - per_cpu(processed_blocked_time, cpu))) - blocked = 0; - - if (!time_after_eq(runstate->time[RUNSTATE_runnable] + - runstate->time[RUNSTATE_offline], - per_cpu(processed_stolen_time, cpu))) - stolen = 0; - - if (!time_after(delta_itm + new_itm, ia64_get_itc())) - stolentick = ia64_get_itc() - delta_itm - new_itm; - - do_div(stolentick, NS_PER_TICK); - stolentick++; - - do_div(stolen, NS_PER_TICK); - - if (stolen > stolentick) - stolen = stolentick; - - stolentick -= stolen; - do_div(blocked, NS_PER_TICK); - - if (blocked > stolentick) - blocked = stolentick; - - if (stolen > 0 || blocked > 0) { - account_steal_time(NULL, jiffies_to_cputime(stolen)); - account_steal_time(idle_task(cpu), jiffies_to_cputime(blocked)); - run_local_timers(); - - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, user_mode(regs)); - - scheduler_tick(); - run_posix_cpu_timers(p); - delta_itm += local_cpu_data->itm_delta * (stolen + blocked); - - if (cpu == time_keeper_id) { - write_seqlock(&xtime_lock); - for(i = 0; i < stolen + blocked; i++) - do_timer(regs); - local_cpu_data->itm_next = delta_itm + new_itm; - write_sequnlock(&xtime_lock); - } else { - local_cpu_data->itm_next = delta_itm + new_itm; - } - per_cpu(processed_stolen_time,cpu) += NS_PER_TICK * stolen; - per_cpu(processed_blocked_time,cpu) += NS_PER_TICK * blocked; - } - return delta_itm; -} -#else -#define consider_steal_time(new_itm, regs) (0) -#endif - -static irqreturn_t -timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) -{ - unsigned long new_itm; - unsigned long delta_itm; /* XEN */ - - if (unlikely(cpu_is_offline(smp_processor_id()))) { - return IRQ_HANDLED; - } - - platform_timer_interrupt(irq, dev_id, regs); - - new_itm = local_cpu_data->itm_next; - - if (!time_after(ia64_get_itc(), new_itm)) - printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", - ia64_get_itc(), new_itm); - - profile_tick(CPU_PROFILING, regs); - - if (is_running_on_xen()) { - delta_itm = consider_steal_time(new_itm, regs); - new_itm += delta_itm; - if (time_after(new_itm, ia64_get_itc()) && delta_itm) - goto skip_process_time_accounting; - } - - while (1) { - update_process_times(user_mode(regs)); - - new_itm += local_cpu_data->itm_delta; - - if (smp_processor_id() == time_keeper_id) { - /* - * Here we are in the timer irq handler. We have irqs locally - * disabled, but we don't know if the timer_bh is running on - * another CPU. We need to avoid to SMP race by acquiring the - * xtime_lock. - */ - write_seqlock(&xtime_lock); - do_timer(regs); - local_cpu_data->itm_next = new_itm; - write_sequnlock(&xtime_lock); - } else - local_cpu_data->itm_next = new_itm; - - if (time_after(new_itm, ia64_get_itc())) - break; - } - -skip_process_time_accounting: /* XEN */ - - do { - /* - * If we're too close to the next clock tick for - * comfort, we increase the safety margin by - * intentionally dropping the next tick(s). We do NOT - * update itm.next because that would force us to call - * do_timer() which in turn would let our clock run - * too fast (with the potentially devastating effect - * of losing monotony of time). - */ - while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) - new_itm += local_cpu_data->itm_delta; - ia64_set_itm(new_itm); - /* double check, in case we got hit by a (slow) PMI: */ - } while (time_after_eq(ia64_get_itc(), new_itm)); - return IRQ_HANDLED; -} - -/* - * Encapsulate access to the itm structure for SMP. - */ -void -ia64_cpu_local_tick (void) -{ - int cpu = smp_processor_id(); - unsigned long shift = 0, delta; - - /* arrange for the cycle counter to generate a timer interrupt: */ - ia64_set_itv(IA64_TIMER_VECTOR); - - delta = local_cpu_data->itm_delta; - /* - * Stagger the timer tick for each CPU so they don't occur all at (almost) the - * same time: - */ - if (cpu) { - unsigned long hi = 1UL << ia64_fls(cpu); - shift = (2*(cpu - hi) + 1) * delta/hi/2; - } - local_cpu_data->itm_next = ia64_get_itc() + delta + shift; - ia64_set_itm(local_cpu_data->itm_next); -} - -static int nojitter; - -static int __init nojitter_setup(char *str) -{ - nojitter = 1; - printk("Jitter checking for ITC timers disabled\n"); - return 1; -} - -__setup("nojitter", nojitter_setup); - -#ifdef CONFIG_XEN -/* taken from i386/kernel/time-xen.c */ -static void init_missing_ticks_accounting(int cpu) -{ - struct vcpu_register_runstate_memory_area area; - struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); - int rc; - - memset(runstate, 0, sizeof(*runstate)); - - area.addr.v = runstate; - rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area); - WARN_ON(rc && rc != -ENOSYS); - - per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked]; - per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable] - + runstate->time[RUNSTATE_offline]; -} - -static int xen_ia64_settimefoday_after_resume; - -static int __init __xen_ia64_settimeofday_after_resume(char *str) -{ - xen_ia64_settimefoday_after_resume = 1; - return 1; -} - -__setup("xen_ia64_settimefoday_after_resume", - __xen_ia64_settimeofday_after_resume); - -/* Called after suspend, to resume time. */ -void -time_resume(void) -{ - unsigned int cpu; - - /* Just trigger a tick. */ - ia64_cpu_local_tick(); - - if (xen_ia64_settimefoday_after_resume) { - /* do_settimeofday() resets timer interplator */ - struct timespec xen_time; - int ret; - efi_gettimeofday(&xen_time); - - ret = do_settimeofday(&xen_time); - WARN_ON(ret); - } else { -#if 0 - /* adjust EFI time */ - struct timespec my_time = CURRENT_TIME; - struct timespec xen_time; - static timespec diff; - struct xen_domctl domctl; - int ret; - - efi_gettimeofday(&xen_time); - diff = timespec_sub(&xen_time, &my_time); - domctl.cmd = XEN_DOMCTL_settimeoffset; - domctl.domain = DOMID_SELF; - domctl.u.settimeoffset.timeoffset_seconds = diff.tv_sec; - ret = HYPERVISOR_domctl_op(&domctl); - WARN_ON(ret); -#endif - /* Time interpolator remembers the last timer status. - Forget it */ - write_seqlock_irq(&xtime_lock); - time_interpolator_reset(); - write_sequnlock_irq(&xtime_lock); - } - - for_each_online_cpu(cpu) - init_missing_ticks_accounting(cpu); - - touch_softlockup_watchdog(); -} -#else -#define init_missing_ticks_accounting(cpu) do {} while (0) -#endif - -void __devinit -ia64_init_itm (void) -{ - unsigned long platform_base_freq, itc_freq; - struct pal_freq_ratio itc_ratio, proc_ratio; - long status, platform_base_drift, itc_drift; - - /* - * According to SAL v2.6, we need to use a SAL call to determine the platform base - * frequency and then a PAL call to determine the frequency ratio between the ITC - * and the base frequency. - */ - status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, - &platform_base_freq, &platform_base_drift); - if (status != 0) { - printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status)); - } else { - status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio); - if (status != 0) - printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status); - } - if (status != 0) { - /* invent "random" values */ - printk(KERN_ERR - "SAL/PAL failed to obtain frequency info---inventing reasonable values\n"); - platform_base_freq = 100000000; - platform_base_drift = -1; /* no drift info */ - itc_ratio.num = 3; - itc_ratio.den = 1; - } - if (platform_base_freq < 40000000) { - printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n", - platform_base_freq); - platform_base_freq = 75000000; - platform_base_drift = -1; - } - if (!proc_ratio.den) - proc_ratio.den = 1; /* avoid division by zero */ - if (!itc_ratio.den) - itc_ratio.den = 1; /* avoid division by zero */ - - itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; - - local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ; - printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, " - "ITC freq=%lu.%03luMHz", smp_processor_id(), - platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, - itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000); - - if (platform_base_drift != -1) { - itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den; - printk("+/-%ldppm\n", itc_drift); - } else { - itc_drift = -1; - printk("\n"); - } - - local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den; - local_cpu_data->itc_freq = itc_freq; - local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC; - local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT) - + itc_freq/2)/itc_freq; - - if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { - itc_interpolator.frequency = local_cpu_data->itc_freq; - itc_interpolator.drift = itc_drift; -#ifdef CONFIG_SMP - /* On IA64 in an SMP configuration ITCs are never accurately synchronized. - * Jitter compensation requires a cmpxchg which may limit - * the scalability of the syscalls for retrieving time. - * The ITC synchronization is usually successful to within a few - * ITC ticks but this is not a sure thing. If you need to improve - * timer performance in SMP situations then boot the kernel with the - * "nojitter" option. However, doing so may result in time fluctuating (maybe - * even going backward) if the ITC offsets between the individual CPUs - * are too large. - */ - if (!nojitter) itc_interpolator.jitter = 1; -#endif - register_time_interpolator(&itc_interpolator); - } - - if (is_running_on_xen()) - init_missing_ticks_accounting(smp_processor_id()); - - /* avoid softlock up message when cpu is unplug and plugged again. */ - touch_softlockup_watchdog(); - - /* Setup the CPU local timer tick */ - ia64_cpu_local_tick(); -} - -static struct irqaction timer_irqaction = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED, - .name = "timer" -}; - -void __devinit ia64_disable_timer(void) -{ - ia64_set_itv(1 << 16); -} - -void __init -time_init (void) -{ - register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction); - efi_gettimeofday(&xtime); - ia64_init_itm(); - - /* - * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the - * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC). - */ - set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); -} - -/* - * Generic udelay assumes that if preemption is allowed and the thread - * migrates to another CPU, that the ITC values are synchronized across - * all CPUs. - */ -static void -ia64_itc_udelay (unsigned long usecs) -{ - unsigned long start = ia64_get_itc(); - unsigned long end = start + usecs*local_cpu_data->cyc_per_usec; - - while (time_before(ia64_get_itc(), end)) - cpu_relax(); -} - -void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay; - -void -udelay (unsigned long usecs) -{ - (*ia64_udelay)(usecs); -} -EXPORT_SYMBOL(udelay); - -static unsigned long long ia64_itc_printk_clock(void) -{ - if (ia64_get_kr(IA64_KR_PER_CPU_DATA)) - return sched_clock(); - return 0; -} - -static unsigned long long ia64_default_printk_clock(void) -{ - return (unsigned long long)(jiffies_64 - INITIAL_JIFFIES) * - (1000000000/HZ); -} - -unsigned long long (*ia64_printk_clock)(void) = &ia64_default_printk_clock; - -unsigned long long printk_clock(void) -{ - return ia64_printk_clock(); -} - -void __init -ia64_setup_printk_clock(void) -{ - if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) - ia64_printk_clock = ia64_itc_printk_clock; -} diff --git a/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c b/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c deleted file mode 100644 index c14ac662a3..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. - * Bjorn Helgaas <bjorn.helgaas@hp.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/compiler.h> -#include <linux/module.h> -#include <linux/efi.h> -#include <asm/io.h> -#include <asm/meminit.h> - -static inline void __iomem * -__ioremap (unsigned long offset, unsigned long size) -{ - offset = HYPERVISOR_ioremap(offset, size); - if (IS_ERR_VALUE(offset)) - return (void __iomem*)offset; - return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset); -} - -void __iomem * -ioremap (unsigned long offset, unsigned long size) -{ - u64 attr; - unsigned long gran_base, gran_size; - - /* - * For things in kern_memmap, we must use the same attribute - * as the rest of the kernel. For more details, see - * Documentation/ia64/aliasing.txt. - */ - attr = kern_mem_attribute(offset, size); - if (attr & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(offset); - else if (attr & EFI_MEMORY_UC) - return __ioremap(offset, size); - - /* - * Some chipsets don't support UC access to memory. If - * WB is supported for the whole granule, we prefer that. - */ - gran_base = GRANULEROUNDDOWN(offset); - gran_size = GRANULEROUNDUP(offset + size) - gran_base; - if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(offset); - - return __ioremap(offset, size); -} -EXPORT_SYMBOL(ioremap); - -void __iomem * -ioremap_nocache (unsigned long offset, unsigned long size) -{ - if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB) - return NULL; - - return __ioremap(offset, size); -} -EXPORT_SYMBOL(ioremap_nocache); diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile b/linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile deleted file mode 100644 index 555d4a9d7a..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -obj-$(CONFIG_OPROFILE) += oprofile.o - -DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ - oprof.o cpu_buffer.o buffer_sync.o \ - event_buffer.o oprofile_files.o \ - oprofilefs.o oprofile_stats.o \ - timer_int.o ) - -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o -oprofile-$(CONFIG_PERFMON) += perfmon.o -ifeq ($(CONFIG_XEN), y) -oprofile-$(CONFIG_PERFMON) += xenoprof.o \ - ../../../drivers/xen/xenoprof/xenoprofile.o -endif diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/init.c b/linux-2.6-xen-sparse/arch/ia64/oprofile/init.c deleted file mode 100644 index f218b7eb45..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/oprofile/init.c +++ /dev/null @@ -1,52 +0,0 @@ -/** - * @file init.c - * - * @remark Copyright 2002 OProfile authors - * @remark Read the file COPYING - * - * @author John Levon <levon@movementarian.org> - */ - -#include <linux/kernel.h> -#include <linux/oprofile.h> -#include <linux/init.h> -#include <linux/errno.h> -#include "oprofile_perfmon.h" - -extern int perfmon_init(struct oprofile_operations * ops); -extern void perfmon_exit(void); -extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth); - -int __init oprofile_arch_init(struct oprofile_operations * ops) -{ - int ret = -ENODEV; - - if (is_running_on_xen()) { - ret = xen_perfmon_init(); - if (ret) - return ret; - return xenoprofile_init(ops); - } - -#ifdef CONFIG_PERFMON - /* perfmon_init() can fail, but we have no way to report it */ - ret = perfmon_init(ops); -#endif - ops->backtrace = ia64_backtrace; - - return ret; -} - - -void oprofile_arch_exit(void) -{ - if (is_running_on_xen()) { - xenoprofile_exit(); - xen_perfmon_exit(); - return; - } - -#ifdef CONFIG_PERFMON - perfmon_exit(); -#endif -} diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h b/linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h deleted file mode 100644 index 6ba1170bd6..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef OPROFILE_PERFMON_H -#define OPROFILE_PERFMON_H - -#ifdef CONFIG_PERFMON -int __perfmon_init(void); -void __perfmon_exit(void); -int perfmon_start(void); -void perfmon_stop(void); -#else -#define __perfmon_init() (-ENOSYS) -#define __perfmon_exit() do {} while (0) -#endif /* CONFIG_PERFMON */ - -#ifdef CONFIG_XEN -#define STATIC_IF_NO_XEN /* nothing */ -#define xen_perfmon_init() __perfmon_init() -#define xen_perfmon_exit() __perfmon_exit() -extern int xenoprofile_init(struct oprofile_operations * ops); -extern void xenoprofile_exit(void); -#else -#define STATIC_IF_NO_XEN static -#define xen_perfmon_init() (-ENOSYS) -#define xen_perfmon_exit() do {} while (0) -#define xenoprofile_init() (-ENOSYS) -#define xenoprofile_exit() do {} while (0) -#endif /* CONFIG_XEN */ - -#endif /* OPROFILE_PERFMON_H */ diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c b/linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c deleted file mode 100644 index 89dc71f1c4..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c +++ /dev/null @@ -1,118 +0,0 @@ -/** - * @file perfmon.c - * - * @remark Copyright 2003 OProfile authors - * @remark Read the file COPYING - * - * @author John Levon <levon@movementarian.org> - */ - -#include <linux/kernel.h> -#include <linux/oprofile.h> -#include <linux/sched.h> -#include <asm/perfmon.h> -#include <asm/ptrace.h> -#include <asm/errno.h> -#include "oprofile_perfmon.h" - -static int allow_ints; - -static int -perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, - struct pt_regs *regs, unsigned long stamp) -{ - int event = arg->pmd_eventid; - - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; - - /* the owner of the oprofile event buffer may have exited - * without perfmon being shutdown (e.g. SIGSEGV) - */ - if (allow_ints) - oprofile_add_sample(regs, event); - return 0; -} - - -STATIC_IF_NO_XEN -int perfmon_start(void) -{ - allow_ints = 1; - return 0; -} - - -STATIC_IF_NO_XEN -void perfmon_stop(void) -{ - allow_ints = 0; -} - - -#define OPROFILE_FMT_UUID { \ - 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c } - -static pfm_buffer_fmt_t oprofile_fmt = { - .fmt_name = "oprofile_format", - .fmt_uuid = OPROFILE_FMT_UUID, - .fmt_handler = perfmon_handler, -}; - - -static char * get_cpu_type(void) -{ - __u8 family = local_cpu_data->family; - - switch (family) { - case 0x07: - return "ia64/itanium"; - case 0x1f: - return "ia64/itanium2"; - default: - return "ia64/ia64"; - } -} - - -/* all the ops are handled via userspace for IA64 perfmon */ - -static int using_perfmon; - -STATIC_IF_NO_XEN -int __perfmon_init(void) -{ - int ret = pfm_register_buffer_fmt(&oprofile_fmt); - if (ret) - return -ENODEV; - - using_perfmon = 1; - return 0; -} - -STATIC_IF_NO_XEN -void __perfmon_exit(void) -{ - if (!using_perfmon) - return; - - pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid); -} - -int perfmon_init(struct oprofile_operations * ops) -{ - int ret = __perfmon_init(); - if (ret) - return -ENODEV; - - ops->cpu_type = get_cpu_type(); - ops->start = perfmon_start; - ops->stop = perfmon_stop; - printk(KERN_INFO "oprofile: using perfmon.\n"); - return 0; -} - - -void perfmon_exit(void) -{ - __perfmon_exit(); -} diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c b/linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c deleted file mode 100644 index 998be3e66b..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c +++ /dev/null @@ -1,142 +0,0 @@ -/****************************************************************************** - * xenoprof ia64 specific part - * - * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> - * VA Linux Systems Japan K.K. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ -#include <linux/init.h> -#include <linux/oprofile.h> -#include <linux/ioport.h> - -#include <xen/driver_util.h> -#include <xen/interface/xen.h> -#include <xen/interface/xenoprof.h> -#include <xen/xenoprof.h> - -#include "oprofile_perfmon.h" - -void __init xenoprof_arch_init_counter(struct xenoprof_init *init) -{ - init->num_events = 0; /* perfmon manages. */ -} - -void xenoprof_arch_counter(void) -{ - /* nothing. perfmon does. */ -} - -void xenoprof_arch_start(void) -{ - perfmon_start(); -} - -void xenoprof_arch_stop(void) -{ - perfmon_stop(); -} - -/* XXX move them to an appropriate header file. */ -struct resource* xen_ia64_allocate_resource(unsigned long size); -void xen_ia64_release_resource(struct resource* res); -void xen_ia64_unmap_resource(struct resource* res); - -struct resource* -xenoprof_ia64_allocate_resource(int32_t max_samples) -{ - unsigned long bufsize; - - /* XXX add hypercall to get bufsize? */ - /* this value is taken from alloc_xenoprof_struct(). */ -#if 0 - bufsize = NR_CPUS * (sizeof(struct xenoprof_buf) + - (max_samples - 1) * sizeof(struct event_log)); - bufsize = PAGE_ALIGN(bufsize) + PAGE_SIZE; -#else -#define MAX_OPROF_SHARED_PAGES 32 - bufsize = (MAX_OPROF_SHARED_PAGES + 1) * PAGE_SIZE; -#endif - return xen_ia64_allocate_resource(bufsize); -} - -void xenoprof_arch_unmap_shared_buffer(struct xenoprof_shared_buffer* sbuf) -{ - if (sbuf->buffer) { - xen_ia64_unmap_resource(sbuf->arch.res); - sbuf->buffer = NULL; - sbuf->arch.res = NULL; - } -} - -int xenoprof_arch_map_shared_buffer(struct xenoprof_get_buffer* get_buffer, - struct xenoprof_shared_buffer* sbuf) -{ - int ret; - struct resource* res; - - sbuf->buffer = NULL; - sbuf->arch.res = NULL; - - res = xenoprof_ia64_allocate_resource(get_buffer->max_samples); - if (IS_ERR(res)) - return PTR_ERR(res); - - get_buffer->buf_gmaddr = res->start; - - ret = HYPERVISOR_xenoprof_op(XENOPROF_get_buffer, get_buffer); - if (ret) { - xen_ia64_release_resource(res); - return ret; - } - - BUG_ON((res->end - res->start + 1) < - get_buffer->bufsize * get_buffer->nbuf); - - sbuf->buffer = __va(res->start); - sbuf->arch.res = res; - - return ret; -} - -int xenoprof_arch_set_passive(struct xenoprof_passive* pdomain, - struct xenoprof_shared_buffer* sbuf) -{ - int ret; - struct resource* res; - - sbuf->buffer = NULL; - sbuf->arch.res = NULL; - - res = xenoprof_ia64_allocate_resource(pdomain->max_samples); - if (IS_ERR(res)) - return PTR_ERR(res); - - pdomain->buf_gmaddr = res->start; - - ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, pdomain); - if (ret) { - xen_ia64_release_resource(res); - return ret; - } - - BUG_ON((res->end - res->start + 1) < pdomain->bufsize * pdomain->nbuf); - - sbuf->buffer = __va(res->start); - sbuf->arch.res = res; - - return ret; -} diff --git a/linux-2.6-xen-sparse/arch/ia64/pci/pci.c b/linux-2.6-xen-sparse/arch/ia64/pci/pci.c deleted file mode 100644 index 4d3684156a..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/pci/pci.c +++ /dev/null @@ -1,836 +0,0 @@ -/* - * pci.c - Low-Level PCI Access in IA-64 - * - * Derived from bios32.c of i386 tree. - * - * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P. - * David Mosberger-Tang <davidm@hpl.hp.com> - * Bjorn Helgaas <bjorn.helgaas@hp.com> - * Copyright (C) 2004 Silicon Graphics, Inc. - * - * Note: Above list of copyright holders is incomplete... - */ - -#include <linux/acpi.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/slab.h> -#include <linux/smp_lock.h> -#include <linux/spinlock.h> - -#include <asm/machvec.h> -#include <asm/page.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/sal.h> -#include <asm/smp.h> -#include <asm/irq.h> -#include <asm/hw_irq.h> - -/* - * Low-level SAL-based PCI configuration access functions. Note that SAL - * calls are already serialized (via sal_lock), so we don't need another - * synchronization mechanism here. - */ - -#define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \ - (((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg)) - -/* SAL 3.2 adds support for extended config space. */ - -#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \ - (((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg)) - -static int -pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn, - int reg, int len, u32 *value) -{ - u64 addr, data = 0; - int mode, result; - - if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) - return -EINVAL; - - if ((seg | reg) <= 255) { - addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); - mode = 0; - } else { - addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); - mode = 1; - } - result = ia64_sal_pci_config_read(addr, mode, len, &data); - if (result != 0) - return -EINVAL; - - *value = (u32) data; - return 0; -} - -static int -pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn, - int reg, int len, u32 value) -{ - u64 addr; - int mode, result; - - if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) - return -EINVAL; - - if ((seg | reg) <= 255) { - addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); - mode = 0; - } else { - addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); - mode = 1; - } - result = ia64_sal_pci_config_write(addr, mode, len, value); - if (result != 0) - return -EINVAL; - return 0; -} - -static struct pci_raw_ops pci_sal_ops = { - .read = pci_sal_read, - .write = pci_sal_write -}; - -struct pci_raw_ops *raw_pci_ops = &pci_sal_ops; - -static int -pci_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) -{ - return raw_pci_ops->read(pci_domain_nr(bus), bus->number, - devfn, where, size, value); -} - -static int -pci_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) -{ - return raw_pci_ops->write(pci_domain_nr(bus), bus->number, - devfn, where, size, value); -} - -struct pci_ops pci_root_ops = { - .read = pci_read, - .write = pci_write, -}; - -/* Called by ACPI when it finds a new root bus. */ - -static struct pci_controller * __devinit -alloc_pci_controller (int seg) -{ - struct pci_controller *controller; - - controller = kmalloc(sizeof(*controller), GFP_KERNEL); - if (!controller) - return NULL; - - memset(controller, 0, sizeof(*controller)); - controller->segment = seg; - controller->node = -1; - return controller; -} - -struct pci_root_info { - struct pci_controller *controller; - char *name; -}; - -static unsigned int -new_space (u64 phys_base, int sparse) -{ - u64 mmio_base; - int i; - - if (phys_base == 0) - return 0; /* legacy I/O port space */ - - mmio_base = (u64) ioremap(phys_base, 0); - for (i = 0; i < num_io_spaces; i++) - if (io_space[i].mmio_base == mmio_base && - io_space[i].sparse == sparse) - return i; - - if (num_io_spaces == MAX_IO_SPACES) { - printk(KERN_ERR "PCI: Too many IO port spaces " - "(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES); - return ~0; - } - - i = num_io_spaces++; - io_space[i].mmio_base = mmio_base; - io_space[i].sparse = sparse; - - return i; -} - -static u64 __devinit -add_io_space (struct pci_root_info *info, struct acpi_resource_address64 *addr) -{ - struct resource *resource; - char *name; - u64 base, min, max, base_port; - unsigned int sparse = 0, space_nr, len; - - resource = kzalloc(sizeof(*resource), GFP_KERNEL); - if (!resource) { - printk(KERN_ERR "PCI: No memory for %s I/O port space\n", - info->name); - goto out; - } - - len = strlen(info->name) + 32; - name = kzalloc(len, GFP_KERNEL); - if (!name) { - printk(KERN_ERR "PCI: No memory for %s I/O port space name\n", - info->name); - goto free_resource; - } - - min = addr->minimum; - max = min + addr->address_length - 1; - if (addr->info.io.translation_type == ACPI_SPARSE_TRANSLATION) - sparse = 1; - - space_nr = new_space(addr->translation_offset, sparse); - if (space_nr == ~0) - goto free_name; - - base = __pa(io_space[space_nr].mmio_base); - base_port = IO_SPACE_BASE(space_nr); - snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->name, - base_port + min, base_port + max); - - /* - * The SDM guarantees the legacy 0-64K space is sparse, but if the - * mapping is done by the processor (not the bridge), ACPI may not - * mark it as sparse. - */ - if (space_nr == 0) - sparse = 1; - - resource->name = name; - resource->flags = IORESOURCE_MEM; - resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min); - resource->end = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max); - insert_resource(&iomem_resource, resource); - - return base_port; - -free_name: - kfree(name); -free_resource: - kfree(resource); -out: - return ~0; -} - -static acpi_status __devinit resource_to_window(struct acpi_resource *resource, - struct acpi_resource_address64 *addr) -{ - acpi_status status; - - /* - * We're only interested in _CRS descriptors that are - * - address space descriptors for memory or I/O space - * - non-zero size - * - producers, i.e., the address space is routed downstream, - * not consumed by the bridge itself - */ - status = acpi_resource_to_address64(resource, addr); - if (ACPI_SUCCESS(status) && - (addr->resource_type == ACPI_MEMORY_RANGE || - addr->resource_type == ACPI_IO_RANGE) && - addr->address_length && - addr->producer_consumer == ACPI_PRODUCER) - return AE_OK; - - return AE_ERROR; -} - -static acpi_status __devinit -count_window (struct acpi_resource *resource, void *data) -{ - unsigned int *windows = (unsigned int *) data; - struct acpi_resource_address64 addr; - acpi_status status; - - status = resource_to_window(resource, &addr); - if (ACPI_SUCCESS(status)) - (*windows)++; - - return AE_OK; -} - -static __devinit acpi_status add_window(struct acpi_resource *res, void *data) -{ - struct pci_root_info *info = data; - struct pci_window *window; - struct acpi_resource_address64 addr; - acpi_status status; - unsigned long flags, offset = 0; - struct resource *root; - - /* Return AE_OK for non-window resources to keep scanning for more */ - status = resource_to_window(res, &addr); - if (!ACPI_SUCCESS(status)) - return AE_OK; - - if (addr.resource_type == ACPI_MEMORY_RANGE) { - flags = IORESOURCE_MEM; - root = &iomem_resource; - offset = addr.translation_offset; - } else if (addr.resource_type == ACPI_IO_RANGE) { - flags = IORESOURCE_IO; - root = &ioport_resource; - offset = add_io_space(info, &addr); - if (offset == ~0) - return AE_OK; - } else - return AE_OK; - - window = &info->controller->window[info->controller->windows++]; - window->resource.name = info->name; - window->resource.flags = flags; - window->resource.start = addr.minimum + offset; - window->resource.end = window->resource.start + addr.address_length - 1; - window->resource.child = NULL; - window->offset = offset; - - if (insert_resource(root, &window->resource)) { - printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n", - window->resource.start, window->resource.end, - root->name, info->name); - } - - return AE_OK; -} - -static void __devinit -pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl) -{ - int i, j; - - j = 0; - for (i = 0; i < ctrl->windows; i++) { - struct resource *res = &ctrl->window[i].resource; - /* HP's firmware has a hack to work around a Windows bug. - * Ignore these tiny memory ranges */ - if ((res->flags & IORESOURCE_MEM) && - (res->end - res->start < 16)) - continue; - if (j >= PCI_BUS_NUM_RESOURCES) { - printk("Ignoring range [%lx-%lx] (%lx)\n", res->start, - res->end, res->flags); - continue; - } - bus->resource[j++] = res; - } -} - -struct pci_bus * __devinit -pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) -{ - struct pci_root_info info; - struct pci_controller *controller; - unsigned int windows = 0; - struct pci_bus *pbus; - char *name; - int pxm; - - controller = alloc_pci_controller(domain); - if (!controller) - goto out1; - - controller->acpi_handle = device->handle; - - pxm = acpi_get_pxm(controller->acpi_handle); -#ifdef CONFIG_NUMA - if (pxm >= 0) - controller->node = pxm_to_node(pxm); -#endif - - acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, - &windows); - controller->window = kmalloc_node(sizeof(*controller->window) * windows, - GFP_KERNEL, controller->node); - if (!controller->window) - goto out2; - - name = kmalloc(16, GFP_KERNEL); - if (!name) - goto out3; - - sprintf(name, "PCI Bus %04x:%02x", domain, bus); - info.controller = controller; - info.name = name; - acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window, - &info); - - pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller); - if (pbus) - pcibios_setup_root_windows(pbus, controller); - - return pbus; - -out3: - kfree(controller->window); -out2: - kfree(controller); -out1: - return NULL; -} - -void pcibios_resource_to_bus(struct pci_dev *dev, - struct pci_bus_region *region, struct resource *res) -{ - struct pci_controller *controller = PCI_CONTROLLER(dev); - unsigned long offset = 0; - int i; - - for (i = 0; i < controller->windows; i++) { - struct pci_window *window = &controller->window[i]; - if (!(window->resource.flags & res->flags)) - continue; - if (window->resource.start > res->start) - continue; - if (window->resource.end < res->end) - continue; - offset = window->offset; - break; - } - - region->start = res->start - offset; - region->end = res->end - offset; -} -EXPORT_SYMBOL(pcibios_resource_to_bus); - -void pcibios_bus_to_resource(struct pci_dev *dev, - struct resource *res, struct pci_bus_region *region) -{ - struct pci_controller *controller = PCI_CONTROLLER(dev); - unsigned long offset = 0; - int i; - - for (i = 0; i < controller->windows; i++) { - struct pci_window *window = &controller->window[i]; - if (!(window->resource.flags & res->flags)) - continue; - if (window->resource.start - window->offset > region->start) - continue; - if (window->resource.end - window->offset < region->end) - continue; - offset = window->offset; - break; - } - - res->start = region->start + offset; - res->end = region->end + offset; -} -EXPORT_SYMBOL(pcibios_bus_to_resource); - -static int __devinit is_valid_resource(struct pci_dev *dev, int idx) -{ - unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM; - struct resource *devr = &dev->resource[idx]; - - if (!dev->bus) - return 0; - for (i=0; i<PCI_BUS_NUM_RESOURCES; i++) { - struct resource *busr = dev->bus->resource[i]; - - if (!busr || ((busr->flags ^ devr->flags) & type_mask)) - continue; - if ((devr->start) && (devr->start >= busr->start) && - (devr->end <= busr->end)) - return 1; - } - return 0; -} - -static void __devinit -pcibios_fixup_resources(struct pci_dev *dev, int start, int limit) -{ - struct pci_bus_region region; - int i; - - for (i = start; i < limit; i++) { - if (!dev->resource[i].flags) - continue; - region.start = dev->resource[i].start; - region.end = dev->resource[i].end; - pcibios_bus_to_resource(dev, &dev->resource[i], ®ion); - if ((is_valid_resource(dev, i))) - pci_claim_resource(dev, i); - } -} - -static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) -{ - pcibios_fixup_resources(dev, 0, PCI_BRIDGE_RESOURCES); -} - -static void __devinit pcibios_fixup_bridge_resources(struct pci_dev *dev) -{ - pcibios_fixup_resources(dev, PCI_BRIDGE_RESOURCES, PCI_NUM_RESOURCES); -} - -/* - * Called after each bus is probed, but before its children are examined. - */ -void __devinit -pcibios_fixup_bus (struct pci_bus *b) -{ - struct pci_dev *dev; - - if (b->self) { - pci_read_bridge_bases(b); - pcibios_fixup_bridge_resources(b->self); - } - list_for_each_entry(dev, &b->devices, bus_list) - pcibios_fixup_device_resources(dev); - - return; -} - -void __devinit -pcibios_update_irq (struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); - - /* ??? FIXME -- record old value for shutdown. */ -} - -static inline int -pcibios_enable_resources (struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM; - - if (!dev) - return -EINVAL; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx=0; idx<PCI_NUM_RESOURCES; idx++) { - /* Only set up the desired resources. */ - if (!(mask & (1 << idx))) - continue; - - r = &dev->resource[idx]; - if (!(r->flags & type_mask)) - continue; - if ((idx == PCI_ROM_RESOURCE) && - (!(r->flags & IORESOURCE_ROM_ENABLE))) - continue; - if (!r->start && r->end) { - printk(KERN_ERR - "PCI: Device %s not available because of resource collisions\n", - pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -int -pcibios_enable_device (struct pci_dev *dev, int mask) -{ - int ret; - - ret = pcibios_enable_resources(dev, mask); - if (ret < 0) - return ret; - - return acpi_pci_irq_enable(dev); -} - -void -pcibios_disable_device (struct pci_dev *dev) -{ - acpi_pci_irq_disable(dev); -} - -void -pcibios_align_resource (void *data, struct resource *res, - resource_size_t size, resource_size_t align) -{ -} - -/* - * PCI BIOS setup, always defaults to SAL interface - */ -char * __init -pcibios_setup (char *str) -{ - return str; -} - -int -pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - /* - * I/O space cannot be accessed via normal processor loads and - * stores on this platform. - */ - if (mmap_state == pci_mmap_io) - /* - * XXX we could relax this for I/O spaces for which ACPI - * indicates that the space is 1-to-1 mapped. But at the - * moment, we don't support multiple PCI address spaces and - * the legacy I/O space is not 1-to-1 mapped, so this is moot. - */ - return -EINVAL; - - /* - * Leave vm_pgoff as-is, the PCI space address is the physical - * address on this platform. - */ - if (write_combine && efi_range_is_wc(vma->vm_start, - vma->vm_end - vma->vm_start)) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - else - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (is_initial_xendomain()) { - unsigned long addr = vma->vm_pgoff << PAGE_SHIFT; - size_t size = vma->vm_end - vma->vm_start; - unsigned long offset = HYPERVISOR_ioremap(addr, size); - if (IS_ERR_VALUE(offset)) - return offset; - } - - if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return -EAGAIN; - - return 0; -} - -/** - * ia64_pci_get_legacy_mem - generic legacy mem routine - * @bus: bus to get legacy memory base address for - * - * Find the base of legacy memory for @bus. This is typically the first - * megabyte of bus address space for @bus or is simply 0 on platforms whose - * chipsets support legacy I/O and memory routing. Returns the base address - * or an error pointer if an error occurred. - * - * This is the ia64 generic version of this routine. Other platforms - * are free to override it with a machine vector. - */ -char *ia64_pci_get_legacy_mem(struct pci_bus *bus) -{ - return (char *)__IA64_UNCACHED_OFFSET; -} - -/** - * pci_mmap_legacy_page_range - map legacy memory space to userland - * @bus: bus whose legacy space we're mapping - * @vma: vma passed in by mmap - * - * Map legacy memory space for this device back to userspace using a machine - * vector to get the base address. - */ -int -pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) -{ - unsigned long size = vma->vm_end - vma->vm_start; - pgprot_t prot; - char *addr; - - /* - * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt - * for more details. - */ - if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) - return -EINVAL; - prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, - vma->vm_page_prot); - if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot))) - return -EINVAL; - - addr = pci_get_legacy_mem(bus); - if (IS_ERR(addr)) - return PTR_ERR(addr); - - vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT; - vma->vm_page_prot = prot; - - if (is_initial_xendomain()) { - unsigned long addr = vma->vm_pgoff << PAGE_SHIFT; - size_t size = vma->vm_end - vma->vm_start; - unsigned long offset = HYPERVISOR_ioremap(addr, size); - if (IS_ERR_VALUE(offset)) - return offset; - } - - if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - size, vma->vm_page_prot)) - return -EAGAIN; - - return 0; -} - -/** - * ia64_pci_legacy_read - read from legacy I/O space - * @bus: bus to read - * @port: legacy port value - * @val: caller allocated storage for returned value - * @size: number of bytes to read - * - * Simply reads @size bytes from @port and puts the result in @val. - * - * Again, this (and the write routine) are generic versions that can be - * overridden by the platform. This is necessary on platforms that don't - * support legacy I/O routing or that hard fail on legacy I/O timeouts. - */ -int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) -{ - int ret = size; - - switch (size) { - case 1: - *val = inb(port); - break; - case 2: - *val = inw(port); - break; - case 4: - *val = inl(port); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -/** - * ia64_pci_legacy_write - perform a legacy I/O write - * @bus: bus pointer - * @port: port to write - * @val: value to write - * @size: number of bytes to write from @val - * - * Simply writes @size bytes of @val to @port. - */ -int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) -{ - int ret = size; - - switch (size) { - case 1: - outb(val, port); - break; - case 2: - outw(val, port); - break; - case 4: - outl(val, port); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -/** - * pci_cacheline_size - determine cacheline size for PCI devices - * @dev: void - * - * We want to use the line-size of the outer-most cache. We assume - * that this line-size is the same for all CPUs. - * - * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info(). - * - * RETURNS: An appropriate -ERRNO error value on eror, or zero for success. - */ -static unsigned long -pci_cacheline_size (void) -{ - u64 levels, unique_caches; - s64 status; - pal_cache_config_info_t cci; - static u8 cacheline_size; - - if (cacheline_size) - return cacheline_size; - - status = ia64_pal_cache_summary(&levels, &unique_caches); - if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", - __FUNCTION__, status); - return SMP_CACHE_BYTES; - } - - status = ia64_pal_cache_config_info(levels - 1, /* cache_type (data_or_unified)= */ 2, - &cci); - if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed (status=%ld)\n", - __FUNCTION__, status); - return SMP_CACHE_BYTES; - } - cacheline_size = 1 << cci.pcci_line_size; - return cacheline_size; -} - -/** - * pcibios_prep_mwi - helper function for drivers/pci/pci.c:pci_set_mwi() - * @dev: the PCI device for which MWI is enabled - * - * For ia64, we can get the cacheline sizes from PAL. - * - * RETURNS: An appropriate -ERRNO error value on eror, or zero for success. - */ -int -pcibios_prep_mwi (struct pci_dev *dev) -{ - unsigned long desired_linesize, current_linesize; - int rc = 0; - u8 pci_linesize; - - desired_linesize = pci_cacheline_size(); - - pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &pci_linesize); - current_linesize = 4 * pci_linesize; - if (desired_linesize != current_linesize) { - printk(KERN_WARNING "PCI: slot %s has incorrect PCI cache line size of %lu bytes,", - pci_name(dev), current_linesize); - if (current_linesize > desired_linesize) { - printk(" expected %lu bytes instead\n", desired_linesize); - rc = -EINVAL; - } else { - printk(" correcting to %lu\n", desired_linesize); - pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, desired_linesize / 4); - } - } - return rc; -} - -int pci_vector_resources(int last, int nr_released) -{ - int count = nr_released; - - count += (IA64_LAST_DEVICE_VECTOR - last); - - return count; -} diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile deleted file mode 100644 index 6d19da28df..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Makefile for Xen components -# - -obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o \ - hypervisor.o util.o xencomm.o xcom_hcall.o xcom_mini.o \ - xcom_privcmd.o mem.o xen_dma.o - -obj-$(CONFIG_IA64_GENERIC) += machvec.o diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S deleted file mode 100644 index dc5977886e..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Support routines for Xen hypercalls - * - * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com> - */ - -#include <asm/processor.h> -#include <asm/asmmacro.h> - -GLOBAL_ENTRY(xen_get_psr) - XEN_HYPER_GET_PSR - br.ret.sptk.many rp - ;; -END(xen_get_psr) - -GLOBAL_ENTRY(xen_get_ivr) - XEN_HYPER_GET_IVR - br.ret.sptk.many rp - ;; -END(xen_get_ivr) - -GLOBAL_ENTRY(xen_get_tpr) - XEN_HYPER_GET_TPR - br.ret.sptk.many rp - ;; -END(xen_get_tpr) - -GLOBAL_ENTRY(xen_set_tpr) - mov r8=r32 - XEN_HYPER_SET_TPR - br.ret.sptk.many rp - ;; -END(xen_set_tpr) - -GLOBAL_ENTRY(xen_eoi) - mov r8=r32 - XEN_HYPER_EOI - br.ret.sptk.many rp - ;; -END(xen_eoi) - -GLOBAL_ENTRY(xen_thash) - mov r8=r32 - XEN_HYPER_THASH - br.ret.sptk.many rp - ;; -END(xen_thash) - -GLOBAL_ENTRY(xen_set_itm) - mov r8=r32 - XEN_HYPER_SET_ITM - br.ret.sptk.many rp - ;; -END(xen_set_itm) - -GLOBAL_ENTRY(xen_ptcga) - mov r8=r32 - mov r9=r33 - XEN_HYPER_PTC_GA - br.ret.sptk.many rp - ;; -END(xen_ptcga) - -GLOBAL_ENTRY(xen_get_rr) - mov r8=r32 - XEN_HYPER_GET_RR - br.ret.sptk.many rp - ;; -END(xen_get_rr) - -GLOBAL_ENTRY(xen_set_rr) - mov r8=r32 - mov r9=r33 - XEN_HYPER_SET_RR - br.ret.sptk.many rp - ;; -END(xen_set_rr) - -GLOBAL_ENTRY(xen_set_kr) - mov r8=r32 - mov r9=r33 - XEN_HYPER_SET_KR - br.ret.sptk.many rp -END(xen_set_kr) - -GLOBAL_ENTRY(xen_fc) - mov r8=r32 - XEN_HYPER_FC - br.ret.sptk.many rp -END(xen_fc) - -GLOBAL_ENTRY(xen_get_cpuid) - mov r8=r32 - XEN_HYPER_GET_CPUID - br.ret.sptk.many rp -END(xen_get_cpuid) - -GLOBAL_ENTRY(xen_get_pmd) - mov r8=r32 - XEN_HYPER_GET_PMD - br.ret.sptk.many rp -END(xen_get_pmd) - -#ifdef CONFIG_IA32_SUPPORT -GLOBAL_ENTRY(xen_get_eflag) - XEN_HYPER_GET_EFLAG - br.ret.sptk.many rp -END(xen_get_eflag) - -// some bits aren't set if pl!=0, see SDM vol1 3.1.8 -GLOBAL_ENTRY(xen_set_eflag) - mov r8=r32 - XEN_HYPER_SET_EFLAG - br.ret.sptk.many rp -END(xen_set_eflag) -#endif - -GLOBAL_ENTRY(xen_send_ipi) - mov r14=r32 - mov r15=r33 - mov r2=0x400 - break 0x1000 - ;; - br.ret.sptk.many rp - ;; -END(xen_send_ipi) - -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT -// Those are vdso specialized. -// In fsys mode, call, ret can't be used. - - // see xen_ssm_i() in privop.h - // r22 = &vcpu->vcpu_info->evtchn_upcall_mask - // r23 = &vpsr.ic - // r24 = &vcpu->vcpu_info->evtchn_upcall_pending - // r25 = tmp - // r31 = tmp - // p11 = tmp - // p14 = tmp -#define XEN_SET_PSR_I \ - ld1 r31=[r22]; \ - ld1 r25=[r24]; \ - ;; \ - st1 [r22]=r0; \ - cmp.ne.unc p14,p0=r0,r31; \ - ;; \ -(p14) cmp.ne.unc p11,p0=r0,r25; \ - ;; \ -(p11) st1 [r22]=r20; \ -(p11) XEN_HYPER_SSM_I; - -GLOBAL_ENTRY(xen_ssm_i_0) - XEN_SET_PSR_I - brl.cond.sptk .vdso_ssm_i_0_ret - ;; -END(xen_ssm_i_0) - -GLOBAL_ENTRY(xen_ssm_i_1) - XEN_SET_PSR_I - brl.cond.sptk .vdso_ssm_i_1_ret - ;; -END(xen_ssm_i_1) - -GLOBAL_ENTRY(__hypercall) - mov r2=r37 - break 0x1000 - br.ret.sptk.many b0 - ;; -END(__hypercall) -#endif diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c deleted file mode 100644 index e895ef0d96..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c +++ /dev/null @@ -1,1264 +0,0 @@ -/****************************************************************************** - * include/asm-ia64/shadow.h - * - * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> - * VA Linux Systems Japan K.K. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -//#include <linux/kernel.h> -#include <linux/spinlock.h> -#include <linux/bootmem.h> -#include <linux/module.h> -#include <linux/vmalloc.h> -#include <linux/efi.h> -#include <asm/page.h> -#include <asm/pgalloc.h> -#include <asm/meminit.h> -#include <asm/hypervisor.h> -#include <asm/hypercall.h> -#include <xen/interface/memory.h> -#include <xen/xencons.h> -#include <xen/balloon.h> - -shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE; -EXPORT_SYMBOL(HYPERVISOR_shared_info); - -start_info_t *xen_start_info; -EXPORT_SYMBOL(xen_start_info); - -int running_on_xen; -EXPORT_SYMBOL(running_on_xen); - -#ifdef CONFIG_XEN_IA64_EXPOSE_P2M -static int p2m_expose_init(void); -#else -#define p2m_expose_init() (-ENOSYS) -#define p2m_expose_resume() ((void)0) -#endif - -EXPORT_SYMBOL(__hypercall); - -void __init -xen_setup(char **cmdline_p) -{ - extern void dig_setup(char **cmdline_p); - if (ia64_platform_is("xen")) - dig_setup(cmdline_p); - - if (!is_running_on_xen() || !is_initial_xendomain()) - return; - - if (xen_start_info->console.dom0.info_size >= - sizeof(struct dom0_vga_console_info)) { - const struct dom0_vga_console_info *info = - (struct dom0_vga_console_info *)( - (char *)xen_start_info + - xen_start_info->console.dom0.info_off); - dom0_init_screen_info(info); - } - xen_start_info->console.domU.mfn = 0; - xen_start_info->console.domU.evtchn = 0; -} - -void __cpuinit -xen_cpu_init(void) -{ - extern void xen_smp_intr_init(void); - xen_smp_intr_init(); -} - -//XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear() -// move those to lib/contiguous_bitmap? -//XXX discontigmem/sparsemem - -/* - * Bitmap is indexed by page number. If bit is set, the page is part of a - * xen_create_contiguous_region() area of memory. - */ -unsigned long *contiguous_bitmap; - -#ifdef CONFIG_VIRTUAL_MEM_MAP -/* Following logic is stolen from create_mem_map_table() for virtual memmap */ -static int -create_contiguous_bitmap(u64 start, u64 end, void *arg) -{ - unsigned long address, start_page, end_page; - unsigned long bitmap_start, bitmap_end; - unsigned char *bitmap; - int node; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - bitmap_start = (unsigned long)contiguous_bitmap + - ((__pa(start) >> PAGE_SHIFT) >> 3); - bitmap_end = (unsigned long)contiguous_bitmap + - (((__pa(end) >> PAGE_SHIFT) + 2 * BITS_PER_LONG) >> 3); - - start_page = bitmap_start & PAGE_MASK; - end_page = PAGE_ALIGN(bitmap_end); - node = paddr_to_nid(__pa(start)); - - bitmap = alloc_bootmem_pages_node(NODE_DATA(node), - end_page - start_page); - BUG_ON(!bitmap); - memset(bitmap, 0, end_page - start_page); - - for (address = start_page; address < end_page; address += PAGE_SIZE) { - pgd = pgd_offset_k(address); - if (pgd_none(*pgd)) - pgd_populate(&init_mm, pgd, - alloc_bootmem_pages_node(NODE_DATA(node), - PAGE_SIZE)); - pud = pud_offset(pgd, address); - - if (pud_none(*pud)) - pud_populate(&init_mm, pud, - alloc_bootmem_pages_node(NODE_DATA(node), - PAGE_SIZE)); - pmd = pmd_offset(pud, address); - - if (pmd_none(*pmd)) - pmd_populate_kernel(&init_mm, pmd, - alloc_bootmem_pages_node - (NODE_DATA(node), PAGE_SIZE)); - pte = pte_offset_kernel(pmd, address); - - if (pte_none(*pte)) - set_pte(pte, - pfn_pte(__pa(bitmap + (address - start_page)) - >> PAGE_SHIFT, PAGE_KERNEL)); - } - return 0; -} -#endif - -static void -__contiguous_bitmap_init(unsigned long size) -{ - contiguous_bitmap = alloc_bootmem_pages(size); - BUG_ON(!contiguous_bitmap); - memset(contiguous_bitmap, 0, size); -} - -void -contiguous_bitmap_init(unsigned long end_pfn) -{ - unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3; -#ifndef CONFIG_VIRTUAL_MEM_MAP - __contiguous_bitmap_init(size); -#else - unsigned long max_gap = 0; - - efi_memmap_walk(find_largest_hole, (u64*)&max_gap); - if (max_gap < LARGE_GAP) { - __contiguous_bitmap_init(size); - } else { - unsigned long map_size = PAGE_ALIGN(size); - vmalloc_end -= map_size; - contiguous_bitmap = (unsigned long*)vmalloc_end; - efi_memmap_walk(create_contiguous_bitmap, NULL); - } -#endif -} - -#if 0 -int -contiguous_bitmap_test(void* p) -{ - return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap); -} -#endif - -static void contiguous_bitmap_set( - unsigned long first_page, unsigned long nr_pages) -{ - unsigned long start_off, end_off, curr_idx, end_idx; - - curr_idx = first_page / BITS_PER_LONG; - start_off = first_page & (BITS_PER_LONG-1); - end_idx = (first_page + nr_pages) / BITS_PER_LONG; - end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); - - if (curr_idx == end_idx) { - contiguous_bitmap[curr_idx] |= - ((1UL<<end_off)-1) & -(1UL<<start_off); - } else { - contiguous_bitmap[curr_idx] |= -(1UL<<start_off); - while ( ++curr_idx < end_idx ) - contiguous_bitmap[curr_idx] = ~0UL; - contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1; - } -} - -static void contiguous_bitmap_clear( - unsigned long first_page, unsigned long nr_pages) -{ - unsigned long start_off, end_off, curr_idx, end_idx; - - curr_idx = first_page / BITS_PER_LONG; - start_off = first_page & (BITS_PER_LONG-1); - end_idx = (first_page + nr_pages) / BITS_PER_LONG; - end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); - - if (curr_idx == end_idx) { - contiguous_bitmap[curr_idx] &= - -(1UL<<end_off) | ((1UL<<start_off)-1); - } else { - contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1; - while ( ++curr_idx != end_idx ) - contiguous_bitmap[curr_idx] = 0; - contiguous_bitmap[curr_idx] &= -(1UL<<end_off); - } -} - -// __xen_create_contiguous_region(), __xen_destroy_contiguous_region() -// are based on i386 xen_create_contiguous_region(), -// xen_destroy_contiguous_region() - -/* Protected by balloon_lock. */ -#define MAX_CONTIG_ORDER 7 -static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; - -/* Ensure multi-page extents are contiguous in machine memory. */ -int -__xen_create_contiguous_region(unsigned long vstart, - unsigned int order, unsigned int address_bits) -{ - unsigned long error = 0; - unsigned long gphys = __pa(vstart); - unsigned long start_gpfn = gphys >> PAGE_SHIFT; - unsigned long num_gpfn = 1 << order; - unsigned long i; - unsigned long flags; - - unsigned long *in_frames = discontig_frames, out_frame; - int success; - struct xen_memory_exchange exchange = { - .in = { - .nr_extents = num_gpfn, - .extent_order = 0, - .domid = DOMID_SELF - }, - .out = { - .nr_extents = 1, - .extent_order = order, - .address_bits = address_bits, - .domid = DOMID_SELF - }, - .nr_exchanged = 0 - }; - - if (unlikely(order > MAX_CONTIG_ORDER)) - return -ENOMEM; - - set_xen_guest_handle(exchange.in.extent_start, in_frames); - set_xen_guest_handle(exchange.out.extent_start, &out_frame); - - scrub_pages(vstart, num_gpfn); - - balloon_lock(flags); - - /* Get a new contiguous memory extent. */ - for (i = 0; i < num_gpfn; i++) { - in_frames[i] = start_gpfn + i; - } - out_frame = start_gpfn; - error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); - success = (exchange.nr_exchanged == num_gpfn); - BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0))); - BUG_ON(success && (error != 0)); - if (unlikely(error == -ENOSYS)) { - /* Compatibility when XENMEM_exchange is unsupported. */ - error = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &exchange.in); - BUG_ON(error != num_gpfn); - error = HYPERVISOR_memory_op(XENMEM_populate_physmap, - &exchange.out); - if (error != 1) { - /* Couldn't get special memory: fall back to normal. */ - for (i = 0; i < num_gpfn; i++) { - in_frames[i] = start_gpfn + i; - } - error = HYPERVISOR_memory_op(XENMEM_populate_physmap, - &exchange.in); - BUG_ON(error != num_gpfn); - success = 0; - } else - success = 1; - } - if (success) - contiguous_bitmap_set(start_gpfn, num_gpfn); -#if 0 - if (success) { - unsigned long mfn; - unsigned long mfn_prev = ~0UL; - for (i = 0; i < num_gpfn; i++) { - mfn = pfn_to_mfn_for_dma(start_gpfn + i); - if (mfn_prev != ~0UL && mfn != mfn_prev + 1) { - xprintk("\n"); - xprintk("%s:%d order %d " - "start 0x%lx bus 0x%lx " - "machine 0x%lx\n", - __func__, __LINE__, order, - vstart, virt_to_bus((void*)vstart), - phys_to_machine_for_dma(gphys)); - xprintk("mfn: "); - for (i = 0; i < num_gpfn; i++) { - mfn = pfn_to_mfn_for_dma( - start_gpfn + i); - xprintk("0x%lx ", mfn); - } - xprintk("\n"); - break; - } - mfn_prev = mfn; - } - } -#endif - balloon_unlock(flags); - return success? 0: -ENOMEM; -} - -void -__xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) -{ - unsigned long flags; - unsigned long error = 0; - unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT; - unsigned long num_gpfn = 1UL << order; - unsigned long i; - - unsigned long *out_frames = discontig_frames, in_frame; - int success; - struct xen_memory_exchange exchange = { - .in = { - .nr_extents = 1, - .extent_order = order, - .domid = DOMID_SELF - }, - .out = { - .nr_extents = num_gpfn, - .extent_order = 0, - .address_bits = 0, - .domid = DOMID_SELF - }, - .nr_exchanged = 0 - }; - - - if (!test_bit(start_gpfn, contiguous_bitmap)) - return; - - if (unlikely(order > MAX_CONTIG_ORDER)) - return; - - set_xen_guest_handle(exchange.in.extent_start, &in_frame); - set_xen_guest_handle(exchange.out.extent_start, out_frames); - - scrub_pages(vstart, num_gpfn); - - balloon_lock(flags); - - contiguous_bitmap_clear(start_gpfn, num_gpfn); - - /* Do the exchange for non-contiguous MFNs. */ - in_frame = start_gpfn; - for (i = 0; i < num_gpfn; i++) { - out_frames[i] = start_gpfn + i; - } - error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); - success = (exchange.nr_exchanged == 1); - BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0))); - BUG_ON(success && (error != 0)); - if (unlikely(error == -ENOSYS)) { - /* Compatibility when XENMEM_exchange is unsupported. */ - error = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &exchange.in); - BUG_ON(error != 1); - - error = HYPERVISOR_memory_op(XENMEM_populate_physmap, - &exchange.out); - BUG_ON(error != num_gpfn); - } - balloon_unlock(flags); -} - - -/////////////////////////////////////////////////////////////////////////// -// grant table hack -// cmd: GNTTABOP_xxx - -#include <linux/mm.h> -#include <xen/interface/xen.h> -#include <xen/gnttab.h> - -static void -gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop) -{ - uint32_t flags; - - flags = uop->flags; - - if (flags & GNTMAP_host_map) { - if (flags & GNTMAP_application_map) { - xprintd("GNTMAP_application_map is not supported yet: flags 0x%x\n", flags); - BUG(); - } - if (flags & GNTMAP_contains_pte) { - xprintd("GNTMAP_contains_pte is not supported yet flags 0x%x\n", flags); - BUG(); - } - } else if (flags & GNTMAP_device_map) { - xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags); - BUG();//XXX not yet. actually this flag is not used. - } else { - BUG(); - } -} - -int -HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) -{ - if (cmd == GNTTABOP_map_grant_ref) { - unsigned int i; - for (i = 0; i < count; i++) { - gnttab_map_grant_ref_pre( - (struct gnttab_map_grant_ref*)uop + i); - } - } - return xencomm_mini_hypercall_grant_table_op(cmd, uop, count); -} -EXPORT_SYMBOL(HYPERVISOR_grant_table_op); - -/////////////////////////////////////////////////////////////////////////// -// foreign mapping -#include <linux/efi.h> -#include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}() - -static unsigned long privcmd_resource_min = 0; -// Xen/ia64 currently can handle pseudo physical address bits up to -// (PAGE_SHIFT * 3) -static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1); -static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE; - -static unsigned long -md_end_addr(const efi_memory_desc_t *md) -{ - return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); -} - -#define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE (1024 * 1024 * 1024UL) -static int -xen_ia64_privcmd_check_size(unsigned long start, unsigned long end) -{ - return (start < end && - (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE); -} - -static int __init -xen_ia64_privcmd_init(void) -{ - void *efi_map_start, *efi_map_end, *p; - u64 efi_desc_size; - efi_memory_desc_t *md; - unsigned long tmp_min; - unsigned long tmp_max; - unsigned long gap_size; - unsigned long prev_end; - - if (!is_running_on_xen()) - return -1; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - // at first check the used highest address - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - // nothing - } - md = p - efi_desc_size; - privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md)); - if (xen_ia64_privcmd_check_size(privcmd_resource_min, - privcmd_resource_max)) { - goto out; - } - - // the used highest address is too large. try to find the largest gap. - tmp_min = privcmd_resource_max; - tmp_max = 0; - gap_size = 0; - prev_end = 0; - for (p = efi_map_start; - p < efi_map_end - efi_desc_size; - p += efi_desc_size) { - unsigned long end; - efi_memory_desc_t* next; - unsigned long next_start; - - md = p; - end = md_end_addr(md); - if (end > privcmd_resource_max) { - break; - } - if (end < prev_end) { - // work around. - // Xen may pass incompletely sorted memory - // descriptors like - // [x, x + length] - // [x, x] - // this order should be reversed. - continue; - } - next = p + efi_desc_size; - next_start = next->phys_addr; - if (next_start > privcmd_resource_max) { - next_start = privcmd_resource_max; - } - if (end < next_start && gap_size < (next_start - end)) { - tmp_min = end; - tmp_max = next_start; - gap_size = tmp_max - tmp_min; - } - prev_end = end; - } - - privcmd_resource_min = GRANULEROUNDUP(tmp_min); - if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) { - privcmd_resource_max = tmp_max; - goto out; - } - - privcmd_resource_min = tmp_min; - privcmd_resource_max = tmp_max; - if (!xen_ia64_privcmd_check_size(privcmd_resource_min, - privcmd_resource_max)) { - // Any large enough gap isn't found. - // go ahead anyway with the warning hoping that large region - // won't be requested. - printk(KERN_WARNING "xen privcmd: large enough region for privcmd mmap is not found.\n"); - } - -out: - printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 0x%lx] (%ldMB)\n", - privcmd_resource_min, privcmd_resource_max, - (privcmd_resource_max - privcmd_resource_min) >> 20); - BUG_ON(privcmd_resource_min >= privcmd_resource_max); - - // XXX this should be somewhere appropriate - (void)p2m_expose_init(); - - return 0; -} -late_initcall(xen_ia64_privcmd_init); - -struct xen_ia64_privcmd_entry { - atomic_t map_count; -#define INVALID_GPFN (~0UL) - unsigned long gpfn; -}; - -struct xen_ia64_privcmd_range { - atomic_t ref_count; - unsigned long pgoff; // in PAGE_SIZE - struct resource* res; - - unsigned long num_entries; - struct xen_ia64_privcmd_entry entries[0]; -}; - -struct xen_ia64_privcmd_vma { - int is_privcmd_mmapped; - struct xen_ia64_privcmd_range* range; - - unsigned long num_entries; - struct xen_ia64_privcmd_entry* entries; -}; - -static void -xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry) -{ - atomic_set(&entry->map_count, 0); - entry->gpfn = INVALID_GPFN; -} - -static int -xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma, - unsigned long addr, - struct xen_ia64_privcmd_range* privcmd_range, - int i, - unsigned long gmfn, - pgprot_t prot, - domid_t domid) -{ - int error = 0; - struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i]; - unsigned long gpfn; - unsigned long flags; - - if ((addr & ~PAGE_MASK) != 0 || gmfn == INVALID_MFN) { - error = -EINVAL; - goto out; - } - - if (entry->gpfn != INVALID_GPFN) { - error = -EBUSY; - goto out; - } - gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i; - - flags = ASSIGN_writable; - if (pgprot_val(prot) == PROT_READ) { - flags = ASSIGN_readonly; - } - error = HYPERVISOR_add_physmap_with_gmfn(gpfn, gmfn, flags, domid); - if (error != 0) { - goto out; - } - - prot = vma->vm_page_prot; - error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot); - if (error != 0) { - error = HYPERVISOR_zap_physmap(gpfn, 0); - if (error) { - BUG();//XXX - } - } else { - atomic_inc(&entry->map_count); - entry->gpfn = gpfn; - } - -out: - return error; -} - -static void -xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range, - int i) -{ - struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i]; - unsigned long gpfn = entry->gpfn; - //gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + - // (vma->vm_pgoff - privcmd_range->pgoff); - int error; - - error = HYPERVISOR_zap_physmap(gpfn, 0); - if (error) { - BUG();//XXX - } - entry->gpfn = INVALID_GPFN; -} - -static void -xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range, - int i) -{ - struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i]; - if (entry->gpfn != INVALID_GPFN) { - atomic_inc(&entry->map_count); - } else { - BUG_ON(atomic_read(&entry->map_count) != 0); - } -} - -static void -xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range, - int i) -{ - struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i]; - if (entry->gpfn != INVALID_GPFN && - atomic_dec_and_test(&entry->map_count)) { - xen_ia64_privcmd_entry_munmap(privcmd_range, i); - } -} - -static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma); -static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma); - -struct vm_operations_struct xen_ia64_privcmd_vm_ops = { - .open = &xen_ia64_privcmd_vma_open, - .close = &xen_ia64_privcmd_vma_close, -}; - -static void -__xen_ia64_privcmd_vma_open(struct vm_area_struct* vma, - struct xen_ia64_privcmd_vma* privcmd_vma, - struct xen_ia64_privcmd_range* privcmd_range) -{ - unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff; - unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - unsigned long i; - - BUG_ON(entry_offset < 0); - BUG_ON(entry_offset + num_entries > privcmd_range->num_entries); - - privcmd_vma->range = privcmd_range; - privcmd_vma->num_entries = num_entries; - privcmd_vma->entries = &privcmd_range->entries[entry_offset]; - vma->vm_private_data = privcmd_vma; - for (i = 0; i < privcmd_vma->num_entries; i++) { - xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i); - } - - vma->vm_private_data = privcmd_vma; - vma->vm_ops = &xen_ia64_privcmd_vm_ops; -} - -static void -xen_ia64_privcmd_vma_open(struct vm_area_struct* vma) -{ - struct xen_ia64_privcmd_vma* old_privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data; - struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data; - struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range; - - atomic_inc(&privcmd_range->ref_count); - // vm_op->open() can't fail. - privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL); - // copy original value if necessary - privcmd_vma->is_privcmd_mmapped = old_privcmd_vma->is_privcmd_mmapped; - - __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range); -} - -static void -xen_ia64_privcmd_vma_close(struct vm_area_struct* vma) -{ - struct xen_ia64_privcmd_vma* privcmd_vma = - (struct xen_ia64_privcmd_vma*)vma->vm_private_data; - struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range; - unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff; - unsigned long i; - - for (i = 0; i < privcmd_vma->num_entries; i++) { - xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i); - } - vma->vm_private_data = NULL; - kfree(privcmd_vma); - - if (atomic_dec_and_test(&privcmd_range->ref_count)) { -#if 1 - for (i = 0; i < privcmd_range->num_entries; i++) { - struct xen_ia64_privcmd_entry* entry = - &privcmd_range->entries[i]; - BUG_ON(atomic_read(&entry->map_count) != 0); - BUG_ON(entry->gpfn != INVALID_GPFN); - } -#endif - release_resource(privcmd_range->res); - kfree(privcmd_range->res); - vfree(privcmd_range); - } -} - -int -privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) -{ - struct xen_ia64_privcmd_vma* privcmd_vma = - (struct xen_ia64_privcmd_vma *)vma->vm_private_data; - return (xchg(&privcmd_vma->is_privcmd_mmapped, 1) == 0); -} - -int -privcmd_mmap(struct file * file, struct vm_area_struct * vma) -{ - int error; - unsigned long size = vma->vm_end - vma->vm_start; - unsigned long num_entries = size >> PAGE_SHIFT; - struct xen_ia64_privcmd_range* privcmd_range = NULL; - struct xen_ia64_privcmd_vma* privcmd_vma = NULL; - struct resource* res = NULL; - unsigned long i; - BUG_ON(!is_running_on_xen()); - - BUG_ON(file->private_data != NULL); - - error = -ENOMEM; - privcmd_range = - vmalloc(sizeof(*privcmd_range) + - sizeof(privcmd_range->entries[0]) * num_entries); - if (privcmd_range == NULL) { - goto out_enomem0; - } - privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL); - if (privcmd_vma == NULL) { - goto out_enomem1; - } - privcmd_vma->is_privcmd_mmapped = 0; - - res = kzalloc(sizeof(*res), GFP_KERNEL); - if (res == NULL) { - goto out_enomem1; - } - res->name = "Xen privcmd mmap"; - error = allocate_resource(&iomem_resource, res, size, - privcmd_resource_min, privcmd_resource_max, - privcmd_resource_align, NULL, NULL); - if (error) { - goto out_enomem1; - } - privcmd_range->res = res; - - /* DONTCOPY is essential for Xen as copy_page_range is broken. */ - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; - - atomic_set(&privcmd_range->ref_count, 1); - privcmd_range->pgoff = vma->vm_pgoff; - privcmd_range->num_entries = num_entries; - for (i = 0; i < privcmd_range->num_entries; i++) { - xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]); - } - - __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range); - return 0; - -out_enomem1: - kfree(res); - kfree(privcmd_vma); -out_enomem0: - vfree(privcmd_range); - return error; -} - -int -direct_remap_pfn_range(struct vm_area_struct *vma, - unsigned long address, // process virtual address - unsigned long gmfn, // gmfn, gmfn + 1, ... gmfn + size/PAGE_SIZE - unsigned long size, - pgprot_t prot, - domid_t domid) // target domain -{ - struct xen_ia64_privcmd_vma* privcmd_vma = - (struct xen_ia64_privcmd_vma*)vma->vm_private_data; - struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range; - unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff; - - unsigned long i; - unsigned long offset; - int error = 0; - BUG_ON(!is_running_on_xen()); - -#if 0 - if (prot != vm->vm_page_prot) { - return -EINVAL; - } -#endif - - i = (address - vma->vm_start) >> PAGE_SHIFT; - for (offset = 0; offset < size; offset += PAGE_SIZE) { - error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, gmfn, prot, domid); - if (error != 0) { - break; - } - - i++; - gmfn++; - } - - return error; -} - - -/////////////////////////////////////////////////////////////////////////// -// expose p2m table -#ifdef CONFIG_XEN_IA64_EXPOSE_P2M -#include <linux/cpu.h> -#include <asm/uaccess.h> - -int p2m_initialized __read_mostly = 0; - -unsigned long p2m_min_low_pfn __read_mostly; -unsigned long p2m_max_low_pfn __read_mostly; -unsigned long p2m_convert_min_pfn __read_mostly; -unsigned long p2m_convert_max_pfn __read_mostly; - -static struct resource p2m_resource = { - .name = "Xen p2m table", - .flags = IORESOURCE_MEM, -}; -static unsigned long p2m_assign_start_pfn __read_mostly; -static unsigned long p2m_assign_end_pfn __read_mostly; -static unsigned long p2m_expose_size; // this is referenced only when resume. - // so __read_mostly doesn't make sense. -volatile const pte_t* p2m_pte __read_mostly; - -#define GRNULE_PFN PTRS_PER_PTE -static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN; - -#define ROUNDDOWN(x, y) ((x) & ~((y) - 1)) -#define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1)) - -#define P2M_PREFIX "Xen p2m: " - -static int xen_ia64_p2m_expose __read_mostly = 1; -module_param(xen_ia64_p2m_expose, int, 0); -MODULE_PARM_DESC(xen_ia64_p2m_expose, - "enable/disable xen/ia64 p2m exposure optimization\n"); - -#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR -static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1; -module_param(xen_ia64_p2m_expose_use_dtr, int, 0); -MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr, - "use/unuse dtr to map exposed p2m table\n"); - -static const int p2m_page_shifts[] = { - _PAGE_SIZE_4K, - _PAGE_SIZE_8K, - _PAGE_SIZE_16K, - _PAGE_SIZE_64K, - _PAGE_SIZE_256K, - _PAGE_SIZE_1M, - _PAGE_SIZE_4M, - _PAGE_SIZE_16M, - _PAGE_SIZE_64M, - _PAGE_SIZE_256M, -}; - -struct p2m_itr_arg { - unsigned long vaddr; - unsigned long pteval; - unsigned long log_page_size; -}; -static struct p2m_itr_arg p2m_itr_arg __read_mostly; - -// This should be in asm-ia64/kregs.h -#define IA64_TR_P2M_TABLE 3 - -static void -p2m_itr(void* info) -{ - struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info; - ia64_itr(0x2, IA64_TR_P2M_TABLE, - arg->vaddr, arg->pteval, arg->log_page_size); - ia64_srlz_d(); -} - -static int -p2m_expose_dtr_call(struct notifier_block *self, - unsigned long event, void* ptr) -{ - unsigned int cpu = (unsigned int)(long)ptr; - if (event != CPU_ONLINE) - return 0; - if (p2m_initialized && xen_ia64_p2m_expose_use_dtr) { - unsigned int me = get_cpu(); - if (cpu == me) - p2m_itr(&p2m_itr_arg); - else - smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg, - 1, 1); - put_cpu(); - } - return 0; -} - -static struct notifier_block p2m_expose_dtr_hotplug_notifier = { - .notifier_call = p2m_expose_dtr_call, - .next = NULL, - .priority = 0 -}; -#endif - -static int -p2m_expose_init(void) -{ - unsigned long num_pfn; - unsigned long p2m_size = 0; - unsigned long align = ~0UL; - int error = 0; -#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR - int i; - unsigned long page_size; - unsigned long log_page_size = 0; -#endif - - if (!xen_ia64_p2m_expose) - return -ENOSYS; - if (p2m_initialized) - return 0; - -#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR - error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier); - if (error < 0) - return error; -#endif - - lock_cpu_hotplug(); - if (p2m_initialized) - goto out; - -#ifdef CONFIG_DISCONTIGMEM - p2m_min_low_pfn = min_low_pfn; - p2m_max_low_pfn = max_low_pfn; -#else - p2m_min_low_pfn = 0; - p2m_max_low_pfn = max_pfn; -#endif - -#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR - if (xen_ia64_p2m_expose_use_dtr) { - unsigned long granule_pfn = 0; - p2m_size = p2m_max_low_pfn - p2m_min_low_pfn; - for (i = 0; - i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]); - i++) { - log_page_size = p2m_page_shifts[i]; - page_size = 1UL << log_page_size; - if (page_size < p2m_size) - continue; - - granule_pfn = max(page_size >> PAGE_SHIFT, - p2m_granule_pfn); - p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn, - granule_pfn); - p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, - granule_pfn); - num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn; - p2m_expose_size = num_pfn << PAGE_SHIFT; - p2m_size = num_pfn / PTRS_PER_PTE; - p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT); - if (p2m_size == page_size) - break; - } - if (p2m_size != page_size) { - printk(KERN_ERR "p2m_size != page_size\n"); - error = -EINVAL; - goto out; - } - align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT); - } else -#endif - { - BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1)); - p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn, - p2m_granule_pfn); - p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn); - num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn; - p2m_expose_size = num_pfn << PAGE_SHIFT; - p2m_size = num_pfn / PTRS_PER_PTE; - p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT); - align = max(privcmd_resource_align, - p2m_granule_pfn << PAGE_SHIFT); - } - - // use privcmd region - error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size, - privcmd_resource_min, privcmd_resource_max, - align, NULL, NULL); - if (error) { - printk(KERN_ERR P2M_PREFIX - "can't allocate region for p2m exposure " - "[0x%016lx, 0x%016lx) 0x%016lx\n", - p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size); - goto out; - } - - p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT; - p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT; - - error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn, - p2m_assign_start_pfn, - p2m_expose_size, p2m_granule_pfn); - if (error) { - printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n", - error); - printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx " - "expose_size 0x%016lx granule 0x%016lx\n", - p2m_convert_min_pfn, p2m_assign_start_pfn, - p2m_expose_size, p2m_granule_pfn);; - release_resource(&p2m_resource); - goto out; - } - p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn); -#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR - if (xen_ia64_p2m_expose_use_dtr) { - p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn - << PAGE_SHIFT); - p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn, - PAGE_KERNEL)); - p2m_itr_arg.log_page_size = log_page_size; - smp_mb(); - smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1); - p2m_itr(&p2m_itr_arg); - } -#endif - smp_mb(); - p2m_initialized = 1; - printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n", - p2m_convert_min_pfn << PAGE_SHIFT, - p2m_convert_max_pfn << PAGE_SHIFT); - printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n", - p2m_assign_start_pfn << PAGE_SHIFT, - p2m_assign_end_pfn << PAGE_SHIFT, - p2m_size / 1024); -out: - unlock_cpu_hotplug(); - return error; -} - -#ifdef notyet -void -p2m_expose_cleanup(void) -{ - BUG_ON(!p2m_initialized); -#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR - unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier); -#endif - release_resource(&p2m_resource); -} -#endif - -static void -p2m_expose_resume(void) -{ - int error; - - if (!xen_ia64_p2m_expose || !p2m_initialized) - return; - - /* - * We can't call {lock, unlock}_cpu_hotplug() because - * they require process context. - * We don't need them because we're the only one cpu and - * interrupts are masked when resume. - */ - error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn, - p2m_assign_start_pfn, - p2m_expose_size, p2m_granule_pfn); - if (error) { - printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n", - error); - printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx " - "expose_size 0x%016lx granule 0x%016lx\n", - p2m_convert_min_pfn, p2m_assign_start_pfn, - p2m_expose_size, p2m_granule_pfn);; - p2m_initialized = 0; - smp_mb(); - ia64_ptr(0x2, p2m_itr_arg.vaddr, p2m_itr_arg.log_page_size); - - /* - * We can't call those clean up functions because they - * require process context. - */ -#if 0 -#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR - if (xen_ia64_p2m_expose_use_dtr) - unregister_cpu_notifier( - &p2m_expose_dtr_hotplug_notifier); -#endif - release_resource(&p2m_resource); -#endif - } -} - -//XXX inlinize? -unsigned long -p2m_phystomach(unsigned long gpfn) -{ - volatile const pte_t* pte; - unsigned long mfn; - unsigned long pteval; - - if (!p2m_initialized || - gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn - /* || !pfn_valid(gpfn) */) - return INVALID_MFN; - pte = p2m_pte + (gpfn - p2m_convert_min_pfn); - - mfn = INVALID_MFN; - if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 && - pte_present(__pte(pteval)) && - pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT))) - mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT; - - return mfn; -} - -EXPORT_SYMBOL_GPL(p2m_initialized); -EXPORT_SYMBOL_GPL(p2m_min_low_pfn); -EXPORT_SYMBOL_GPL(p2m_max_low_pfn); -EXPORT_SYMBOL_GPL(p2m_convert_min_pfn); -EXPORT_SYMBOL_GPL(p2m_convert_max_pfn); -EXPORT_SYMBOL_GPL(p2m_pte); -EXPORT_SYMBOL_GPL(p2m_phystomach); -#endif - -/////////////////////////////////////////////////////////////////////////// -// for xenoprof - -struct resource* -xen_ia64_allocate_resource(unsigned long size) -{ - struct resource* res; - int error; - - res = kmalloc(sizeof(*res), GFP_KERNEL); - if (res == NULL) - return ERR_PTR(-ENOMEM); - - res->name = "Xen"; - res->flags = IORESOURCE_MEM; - error = allocate_resource(&iomem_resource, res, PAGE_ALIGN(size), - privcmd_resource_min, privcmd_resource_max, - IA64_GRANULE_SIZE, NULL, NULL); - if (error) { - kfree(res); - return ERR_PTR(error); - } - return res; -} -EXPORT_SYMBOL_GPL(xen_ia64_allocate_resource); - -void -xen_ia64_release_resource(struct resource* res) -{ - release_resource(res); - kfree(res); -} -EXPORT_SYMBOL_GPL(xen_ia64_release_resource); - -void -xen_ia64_unmap_resource(struct resource* res) -{ - unsigned long gpfn = res->start >> PAGE_SHIFT; - unsigned long nr_pages = (res->end - res->start) >> PAGE_SHIFT; - unsigned long i; - - for (i = 0; i < nr_pages; i++) { - int error = HYPERVISOR_zap_physmap(gpfn + i, 0); - if (error) - printk(KERN_ERR - "%s:%d zap_phsymap failed %d gpfn %lx\n", - __func__, __LINE__, error, gpfn + i); - } - xen_ia64_release_resource(res); -} -EXPORT_SYMBOL_GPL(xen_ia64_unmap_resource); - -/////////////////////////////////////////////////////////////////////////// -// suspend/resume -void -xen_post_suspend(int suspend_cancelled) -{ - if (suspend_cancelled) - return; - - p2m_expose_resume(); - /* add more if necessary */ -} diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/machvec.c b/linux-2.6-xen-sparse/arch/ia64/xen/machvec.c deleted file mode 100644 index 4ad588a7c2..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/machvec.c +++ /dev/null @@ -1,4 +0,0 @@ -#define MACHVEC_PLATFORM_NAME xen -#define MACHVEC_PLATFORM_HEADER <asm/machvec_xen.h> -#include <asm/machvec_init.h> - diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/mem.c b/linux-2.6-xen-sparse/arch/ia64/xen/mem.c deleted file mode 100644 index dc93097c70..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/mem.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Originally from linux/drivers/char/mem.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Added devfs support. - * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu> - * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com> - */ -/* - * taken from - * linux/drivers/char/mem.c and linux-2.6-xen-sparse/drivers/xen/char/mem.c. - * adjusted for IA64 and made transparent. - * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> - * VA Linux Systems Japan K.K. - */ - -#include <linux/mm.h> -#include <linux/efi.h> - -/* - * Architectures vary in how they handle caching for addresses - * outside of main memory. - * - */ -static inline int uncached_access(struct file *file, unsigned long addr) -{ - /* - * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases. - */ - return !(efi_mem_attributes(addr) & EFI_MEMORY_WB); -} - -int xen_mmap_mem(struct file * file, struct vm_area_struct * vma) -{ - unsigned long addr = vma->vm_pgoff << PAGE_SHIFT; - size_t size = vma->vm_end - vma->vm_start; - - -#if 0 - /* - *XXX FIXME: linux-2.6.16.29, linux-2.6.17 - * valid_mmap_phys_addr_range() in linux/arch/ia64/kernel/efi.c - * fails checks. - * linux-2.6.18.1's returns always 1. - * Its comments says - * - * MMIO regions are often missing from the EFI memory map. - * We must allow mmap of them for programs like X, so we - * currently can't do any useful validation. - */ - if (!valid_mmap_phys_addr_range(addr, &size)) - return -EINVAL; - if (size < vma->vm_end - vma->vm_start) - return -EINVAL; -#endif - - if (is_running_on_xen()) { - unsigned long offset = HYPERVISOR_ioremap(addr, size); - if (IS_ERR_VALUE(offset)) - return offset; - } - - if (uncached_access(file, vma->vm_pgoff << PAGE_SHIFT)) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ - if (remap_pfn_range(vma, - vma->vm_start, - vma->vm_pgoff, - size, - vma->vm_page_prot)) - return -EAGAIN; - return 0; -} diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c b/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c deleted file mode 100644 index fbc4664bbd..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c +++ /dev/null @@ -1,882 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * This implementation is for IA-64 and EM64T platforms that do not support - * I/O TLBs (aka DMA address translation hardware). - * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> - * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> - * Copyright (C) 2000, 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * - * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. - * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid - * unnecessary i-cache flushing. - * 04/07/.. ak Better overflow handling. Assorted fixes. - * 05/09/10 linville Add support for syncing ranges, support syncing for - * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. - */ - -#include <linux/cache.h> -#include <linux/dma-mapping.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ctype.h> - -#include <asm/io.h> -#include <asm/dma.h> -#include <asm/scatterlist.h> - -#include <linux/init.h> -#include <linux/bootmem.h> - -#ifdef CONFIG_XEN -/* - * What DMA mask should Xen use to remap the bounce buffer pool? Most - * reports seem to indicate 30 bits is sufficient, except maybe for old - * sound cards that we probably don't care about anyway. If we need to, - * we could put in some smarts to try to lower, but hopefully it's not - * necessary. - */ -#define DMA_BITS (30) -#endif - -#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - -#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) -#define SG_ENT_PHYS_ADDRESS(SG) virt_to_bus(SG_ENT_VIRT_ADDRESS(SG)) - -/* - * Maximum allowable number of contiguous slabs to map, - * must be a power of 2. What is the appropriate value ? - * The complexity of {map,unmap}_single is linearly dependent on this value. - */ -#define IO_TLB_SEGSIZE 128 - -/* - * log of the size of each IO TLB slab. The number of slabs is command line - * controllable. - */ -#define IO_TLB_SHIFT 11 - -#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) - -/* - * Minimum IO TLB size to bother booting with. Systems with mainly - * 64bit capable cards will only lightly use the swiotlb. If we can't - * allocate a contiguous 1MB, we're probably in trouble anyway. - */ -#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) - -/* - * Enumeration for sync targets - */ -enum dma_sync_target { - SYNC_FOR_CPU = 0, - SYNC_FOR_DEVICE = 1, -}; - -int swiotlb_force; - -/* - * Used to do a quick range check in swiotlb_unmap_single and - * swiotlb_sync_single_*, to see if the memory was in fact allocated by this - * API. - */ -static char *io_tlb_start, *io_tlb_end; - -/* - * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and - * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. - */ -static unsigned long io_tlb_nslabs; - -/* - * When the IOMMU overflows we return a fallback buffer. This sets the size. - */ -static unsigned long io_tlb_overflow = 32*1024; - -void *io_tlb_overflow_buffer; - -/* - * This is a free list describing the number of free entries available from - * each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; - -/* - * We need to save away the original address corresponding to a mapped entry - * for the sync operations. - */ -static unsigned char **io_tlb_orig_addr; - -/* - * Protect the above data structures in the map and unmap calls - */ -static DEFINE_SPINLOCK(io_tlb_lock); - -static int __init -setup_io_tlb_npages(char *str) -{ - if (isdigit(*str)) { - io_tlb_nslabs = simple_strtoul(str, &str, 0); - /* avoid tail segment of size < IO_TLB_SEGSIZE */ - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - if (*str == ',') - ++str; - if (!strcmp(str, "force")) - swiotlb_force = 1; - return 1; -} -__setup("swiotlb=", setup_io_tlb_npages); -/* make io_tlb_overflow tunable too? */ - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void -swiotlb_init_with_default_size (size_t default_size) -{ - unsigned long i; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - -#ifdef CONFIG_XEN - if (is_running_on_xen()) - io_tlb_nslabs = roundup_pow_of_two(io_tlb_nslabs); -#endif - /* - * Get IO TLB memory from the low pages - */ - io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); - if (!io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); - io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); - -#ifdef CONFIG_XEN - for (i = 0 ; i < io_tlb_nslabs ; i += IO_TLB_SEGSIZE) { - if (xen_create_contiguous_region( - (unsigned long)io_tlb_start + - (i << IO_TLB_SHIFT), - get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), - DMA_BITS)) - panic("Failed to setup Xen contiguous region"); - } -#endif - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); - for (i = 0; i < io_tlb_nslabs; i++) - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); - - /* - * Get the overflow emergency buffer - */ - io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); -#ifdef CONFIG_XEN - if (xen_create_contiguous_region((unsigned long)io_tlb_overflow_buffer, - get_order(io_tlb_overflow), DMA_BITS)) - panic("Failed to setup Xen contiguous region for overflow"); -#endif - printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", - virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); -} - -void -swiotlb_init (void) -{ - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ -} - -/* - * Systems with larger DMA zones (those that don't support ISA) can - * initialize the swiotlb later using the slab allocator if needed. - * This should be just like above, but with some error catching. - */ -int -swiotlb_late_init_with_default_size (size_t default_size) -{ - unsigned long i, req_nslabs = io_tlb_nslabs; - unsigned int order; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - -#ifdef CONFIG_XEN - if (is_running_on_xen()) - io_tlb_nslabs = roundup_pow_of_two(io_tlb_nslabs); -#endif - /* - * Get IO TLB memory from the low pages - */ - order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); - io_tlb_nslabs = SLABS_PER_PAGE << order; - - while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN, - order); - if (io_tlb_start) - break; - order--; - } - - if (!io_tlb_start) - goto cleanup1; - - if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); - io_tlb_nslabs = SLABS_PER_PAGE << order; - } - io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); - memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT)); - -#ifdef CONFIG_XEN - for (i = 0 ; i < io_tlb_nslabs ; i += IO_TLB_SEGSIZE) { - if (xen_create_contiguous_region( - (unsigned long)io_tlb_start + - (i << IO_TLB_SHIFT), - get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), - DMA_BITS)) - panic("Failed to setup Xen contiguous region"); - } -#endif - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(int))); - if (!io_tlb_list) - goto cleanup2; - - for (i = 0; i < io_tlb_nslabs; i++) - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_index = 0; - - io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(char *))); - if (!io_tlb_orig_addr) - goto cleanup3; - - memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *)); - - /* - * Get the overflow emergency buffer - */ - io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, - get_order(io_tlb_overflow)); - if (!io_tlb_overflow_buffer) - goto cleanup4; - -#ifdef CONFIG_XEN - if (xen_create_contiguous_region((unsigned long)io_tlb_overflow_buffer, - get_order(io_tlb_overflow), DMA_BITS)) - panic("Failed to setup Xen contiguous region for overflow"); -#endif - printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - " - "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20, - virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); - - return 0; - -cleanup4: - free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * - sizeof(char *))); - io_tlb_orig_addr = NULL; -cleanup3: - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - io_tlb_list = NULL; - io_tlb_end = NULL; -cleanup2: - free_pages((unsigned long)io_tlb_start, order); - io_tlb_start = NULL; -cleanup1: - io_tlb_nslabs = req_nslabs; - return -ENOMEM; -} - -static inline int -address_needs_mapping(struct device *hwdev, dma_addr_t addr) -{ - dma_addr_t mask = 0xffffffff; - /* If the device has a mask, use it, otherwise default to 32 bits */ - if (hwdev && hwdev->dma_mask) - mask = *hwdev->dma_mask; - return (addr & ~mask) != 0; -} - -/* - * Allocates bounce buffer and returns its kernel virtual address. - */ -static void * -map_single(struct device *hwdev, char *buffer, size_t size, int dir) -{ - unsigned long flags; - char *dma_addr; - unsigned int nslots, stride, index, wrap; - int i; - - /* - * For mappings greater than a page, we limit the stride (and - * hence alignment) to a page size. - */ - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size > PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); - - /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - wrap = index = ALIGN(io_tlb_index, stride); - - if (index >= io_tlb_nslabs) - wrap = index = 0; - - do { - /* - * If we find a slot that indicates we have 'nslots' - * number of contiguous buffers, we allocate the - * buffers from that slot and mark the entries as '0' - * indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in - * the next round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); - - goto found; - } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - } while (index != wrap); - - spin_unlock_irqrestore(&io_tlb_lock, flags); - return NULL; - } - found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - - /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. - */ - io_tlb_orig_addr[index] = buffer; - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - memcpy(dma_addr, buffer, size); - - return dma_addr; -} - -/* - * dma_addr is the kernel virtual address of the bounce buffer to unmap. - */ -static void -unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) -{ - unsigned long flags; - int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; - - /* - * First, sync the memory before unmapping the entry - */ - if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - /* - * bounce... copy the data back into the original buffer * and - * delete the bounce buffer. - */ - memcpy(buffer, dma_addr, size); - - /* - * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contigous entries available. - * While returning the entries to the free list, we merge the entries - * with slots below and above the pool being returned. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) - io_tlb_list[i] = ++count; - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - } - spin_unlock_irqrestore(&io_tlb_lock, flags); -} - -static void -sync_single(struct device *hwdev, char *dma_addr, size_t size, - int dir, int target) -{ - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; - - switch (target) { - case SYNC_FOR_CPU: - if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - memcpy(buffer, dma_addr, size); - else - BUG_ON(dir != DMA_TO_DEVICE); - break; - case SYNC_FOR_DEVICE: - if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - memcpy(dma_addr, buffer, size); - else - BUG_ON(dir != DMA_FROM_DEVICE); - break; - default: - BUG(); - } -} - -void * -swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) -{ - unsigned long dev_addr; - void *ret; - int order = get_order(size); - - /* - * XXX fix me: the DMA API should pass us an explicit DMA mask - * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32 - * bit range instead of a 16MB one). - */ - flags |= GFP_DMA; - - ret = (void *)__get_free_pages(flags, order); -#ifdef CONFIG_XEN - if (ret && is_running_on_xen()) { - if (xen_create_contiguous_region((unsigned long)ret, order, - fls64(hwdev->coherent_dma_mask))) { - free_pages((unsigned long)ret, order); - ret = NULL; - } else { - /* - * Short circuit the rest, xen_create_contiguous_region - * should fail if it didn't give us an address within - * the mask requested. - */ - memset(ret, 0, size); - *dma_handle = virt_to_bus(ret); - return ret; - } - } -#endif - if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) { - /* - * The allocated memory isn't reachable by the device. - * Fall back on swiotlb_map_single(). - */ - free_pages((unsigned long) ret, order); - ret = NULL; - } - if (!ret) { - /* - * We are either out of memory or the device can't DMA - * to GFP_DMA memory; fall back on - * swiotlb_map_single(), which will grab memory from - * the lowest available address range. - */ - dma_addr_t handle; - handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE); - if (swiotlb_dma_mapping_error(handle)) - return NULL; - - ret = bus_to_virt(handle); - } - - memset(ret, 0, size); - dev_addr = virt_to_bus(ret); - - /* Confirm address can be DMA'd by device */ - if (address_needs_mapping(hwdev, dev_addr)) { - printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n", - (unsigned long long)*hwdev->dma_mask, dev_addr); - panic("swiotlb_alloc_coherent: allocated memory is out of " - "range for device"); - } - *dma_handle = dev_addr; - return ret; -} - -void -swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - if (!(vaddr >= (void *)io_tlb_start - && vaddr < (void *)io_tlb_end)) { -#ifdef CONFIG_XEN - xen_destroy_contiguous_region((unsigned long)vaddr, - get_order(size)); -#endif - free_pages((unsigned long) vaddr, get_order(size)); - } else - /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); -} - -static void -swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) -{ - /* - * Ran out of IOMMU space for this operation. This is very bad. - * Unfortunately the drivers cannot handle this operation properly. - * unless they check for dma_mapping_error (most don't) - * When the mapping is small enough return a static buffer to limit - * the damage, or panic when the transfer is too big. - */ - printk(KERN_ERR "DMA: Out of SW-IOMMU space for %lu bytes at " - "device %s\n", size, dev ? dev->bus_id : "?"); - - if (size > io_tlb_overflow && do_panic) { - if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) - panic("DMA: Memory would be corrupted\n"); - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - panic("DMA: Random memory would be DMAed\n"); - } -} - -/* - * Map a single buffer of the indicated size for DMA in streaming mode. The - * physical address to use is returned. - * - * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. - */ -dma_addr_t -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) -{ - unsigned long dev_addr = virt_to_bus(ptr); - void *map; - - BUG_ON(dir == DMA_NONE); - /* - * If the pointer passed in happens to be in the device's DMA window, - * we can safely return the device addr and not worry about bounce - * buffering it. - */ - if (!range_straddles_page_boundary(ptr, size) && - !address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) - return dev_addr; - - /* - * Oh well, have to allocate and map a bounce buffer. - */ - map = map_single(hwdev, ptr, size, dir); - if (!map) { - swiotlb_full(hwdev, size, dir, 1); - map = io_tlb_overflow_buffer; - } - - dev_addr = virt_to_bus(map); - - /* - * Ensure that the address returned is DMA'ble - */ - if (address_needs_mapping(hwdev, dev_addr)) - panic("map_single: bounce buffer is not DMA'ble"); - - return dev_addr; -} - -/* - * Since DMA is i-cache coherent, any (complete) pages that were written via - * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to - * flush them when they get mapped into an executable vm-area. - */ -static void -mark_clean(void *addr, size_t size) -{ - unsigned long pg_addr, end; - -#ifdef CONFIG_XEN - /* XXX: Bad things happen when starting domUs if this is enabled. */ - if (is_running_on_xen()) - return; -#endif - - pg_addr = PAGE_ALIGN((unsigned long) addr); - end = (unsigned long) addr + size; - while (pg_addr + PAGE_SIZE <= end) { - struct page *page = virt_to_page(pg_addr); - set_bit(PG_arch_1, &page->flags); - pg_addr += PAGE_SIZE; - } -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -void -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, - int dir) -{ - char *dma_addr = bus_to_virt(dev_addr); - - BUG_ON(dir == DMA_NONE); - if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) - unmap_single(hwdev, dma_addr, size, dir); - else if (dir == DMA_FROM_DEVICE) - mark_clean(dma_addr, size); -} - -/* - * Make physical memory consistent for a single streaming mode DMA translation - * after a transfer. - * - * If you perform a swiotlb_map_single() but wish to interrogate the buffer - * using the cpu, yet do not wish to teardown the dma mapping, you must - * call this function before doing so. At the next point you give the dma - * address back to the card, you must first perform a - * swiotlb_dma_sync_for_device, and then the device again owns the buffer - */ -static inline void -swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, int target) -{ - char *dma_addr = bus_to_virt(dev_addr); - - BUG_ON(dir == DMA_NONE); - if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) - sync_single(hwdev, dma_addr, size, dir, target); - else if (dir == DMA_FROM_DEVICE) - mark_clean(dma_addr, size); -} - -void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); -} - -/* - * Same as above, but for a sub-range of the mapping. - */ -static inline void -swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - int dir, int target) -{ - char *dma_addr = bus_to_virt(dev_addr) + offset; - - BUG_ON(dir == DMA_NONE); - if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) - sync_single(hwdev, dma_addr, size, dir, target); - else if (dir == DMA_FROM_DEVICE) - mark_clean(dma_addr, size); -} - -void -swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, int dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_CPU); -} - -void -swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, int dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_DEVICE); -} - -/* - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_single - * interface. Here the scatter gather list elements are each tagged with the - * appropriate dma address and length. They are obtained via - * sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for swiotlb_map_single are the - * same here. - */ -int -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, - int dir) -{ - void *addr; - unsigned long dev_addr; - int i; - - BUG_ON(dir == DMA_NONE); - - for (i = 0; i < nelems; i++, sg++) { - addr = SG_ENT_VIRT_ADDRESS(sg); - dev_addr = virt_to_bus(addr); - if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { - void *map = map_single(hwdev, addr, sg->length, dir); - sg->dma_address = virt_to_bus(map); - if (!map) { - /* Don't panic here, we expect map_sg users - to do proper error handling. */ - swiotlb_full(hwdev, sg->length, dir, 0); - swiotlb_unmap_sg(hwdev, sg - i, i, dir); - sg[0].dma_length = 0; - return 0; - } - } else - sg->dma_address = dev_addr; - sg->dma_length = sg->length; - } - return nelems; -} - -/* - * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_single() above. - */ -void -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, - int dir) -{ - int i; - - BUG_ON(dir == DMA_NONE); - - for (i = 0; i < nelems; i++, sg++) - if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) - unmap_single(hwdev, (void *) bus_to_virt(sg->dma_address), sg->dma_length, dir); - else if (dir == DMA_FROM_DEVICE) - mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); -} - -/* - * Make physical memory consistent for a set of streaming mode DMA translations - * after a transfer. - * - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules - * and usage. - */ -static inline void -swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir, int target) -{ - int i; - - BUG_ON(dir == DMA_NONE); - - for (i = 0; i < nelems; i++, sg++) - if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) - sync_single(hwdev, (void *) sg->dma_address, - sg->dma_length, dir, target); -} - -void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); -} - -int -swiotlb_dma_mapping_error(dma_addr_t dma_addr) -{ - return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); -} - -/* - * Return whether the given device DMA address mask can be supported - * properly. For example, if your device can only drive the low 24-bits - * during bus mastering, then you would pass 0x00ffffff as the mask to - * this function. - */ -int -swiotlb_dma_supported (struct device *hwdev, u64 mask) -{ - return (virt_to_bus(io_tlb_end) - 1) <= mask; -} - -EXPORT_SYMBOL(swiotlb_init); -EXPORT_SYMBOL(swiotlb_map_single); -EXPORT_SYMBOL(swiotlb_unmap_single); -EXPORT_SYMBOL(swiotlb_map_sg); -EXPORT_SYMBOL(swiotlb_unmap_sg); -EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); -EXPORT_SYMBOL(swiotlb_sync_single_for_device); -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); -EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); -EXPORT_SYMBOL(swiotlb_sync_sg_for_device); -EXPORT_SYMBOL(swiotlb_dma_mapping_error); -EXPORT_SYMBOL(swiotlb_alloc_coherent); -EXPORT_SYMBOL(swiotlb_free_coherent); -EXPORT_SYMBOL(swiotlb_dma_supported); diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/util.c b/linux-2.6-xen-sparse/arch/ia64/xen/util.c deleted file mode 100644 index 387a1c3368..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/util.c +++ /dev/null @@ -1,105 +0,0 @@ -/****************************************************************************** - * arch/ia64/xen/util.c - * This file is the ia64 counterpart of drivers/xen/util.c - * - * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> - * VA Linux Systems Japan K.K. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <asm/uaccess.h> -#include <xen/driver_util.h> -#include <xen/interface/memory.h> -#include <asm/hypercall.h> - -struct vm_struct *alloc_vm_area(unsigned long size) -{ - int order; - unsigned long virt; - unsigned long nr_pages; - struct vm_struct* area; - - order = get_order(size); - virt = __get_free_pages(GFP_KERNEL, order); - if (virt == 0) { - goto err0; - } - nr_pages = 1 << order; - scrub_pages(virt, nr_pages); - - area = kmalloc(sizeof(*area), GFP_KERNEL); - if (area == NULL) { - goto err1; - } - - area->flags = VM_IOREMAP;//XXX - area->addr = (void*)virt; - area->size = size; - area->pages = NULL; //XXX - area->nr_pages = nr_pages; - area->phys_addr = 0; /* xenbus_map_ring_valloc uses this field! */ - - return area; - -err1: - free_pages(virt, order); -err0: - return NULL; - -} -EXPORT_SYMBOL_GPL(alloc_vm_area); - -void free_vm_area(struct vm_struct *area) -{ - unsigned int order = get_order(area->size); - unsigned long i; - unsigned long phys_addr = __pa(area->addr); - - // This area is used for foreign page mappping. - // So underlying machine page may not be assigned. - for (i = 0; i < (1 << order); i++) { - unsigned long ret; - unsigned long gpfn = (phys_addr >> PAGE_SHIFT) + i; - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .address_bits = 0, - .extent_order = 0, - .domid = DOMID_SELF - }; - set_xen_guest_handle(reservation.extent_start, &gpfn); - ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, - &reservation); - BUG_ON(ret != 1); - } - free_pages((unsigned long)area->addr, order); - kfree(area); -} -EXPORT_SYMBOL_GPL(free_vm_area); - -/* - * Local variables: - * c-file-style: "linux" - * indent-tabs-mode: t - * c-indent-level: 8 - * c-basic-offset: 8 - * tab-width: 8 - * End: - */ diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c deleted file mode 100644 index 4c90b5b01e..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Tristan Gingold <tristan.gingold@bull.net> - */ -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/gfp.h> -#include <linux/module.h> -#include <xen/interface/xen.h> -#include <xen/interface/platform.h> -#include <xen/interface/memory.h> -#include <xen/interface/xencomm.h> -#include <xen/interface/version.h> -#include <xen/interface/sched.h> -#include <xen/interface/event_channel.h> -#include <xen/interface/physdev.h> -#include <xen/interface/grant_table.h> -#include <xen/interface/callback.h> -#include <xen/interface/acm_ops.h> -#include <xen/interface/hvm/params.h> -#include <xen/interface/xenoprof.h> -#include <xen/interface/vcpu.h> -#include <asm/hypercall.h> -#include <asm/page.h> -#include <asm/uaccess.h> -#include <asm/xen/xencomm.h> -#include <asm/perfmon.h> - -/* Xencomm notes: - * This file defines hypercalls to be used by xencomm. The hypercalls simply - * create inlines descriptors for pointers and then call the raw arch hypercall - * xencomm_arch_hypercall_XXX - * - * If the arch wants to directly use these hypercalls, simply define macros - * in asm/hypercall.h, eg: - * #define HYPERVISOR_sched_op xencomm_hypercall_sched_op - * - * The arch may also define HYPERVISOR_xxx as a function and do more operations - * before/after doing the hypercall. - * - * Note: because only inline descriptors are created these functions must only - * be called with in kernel memory parameters. - */ - -int -xencomm_hypercall_console_io(int cmd, int count, char *str) -{ - return xencomm_arch_hypercall_console_io - (cmd, count, xencomm_create_inline(str)); -} - -int -xencomm_hypercall_event_channel_op(int cmd, void *op) -{ - return xencomm_arch_hypercall_event_channel_op - (cmd, xencomm_create_inline(op)); -} - -int -xencomm_hypercall_xen_version(int cmd, void *arg) -{ - switch (cmd) { - case XENVER_version: - case XENVER_extraversion: - case XENVER_compile_info: - case XENVER_capabilities: - case XENVER_changeset: - case XENVER_platform_parameters: - case XENVER_pagesize: - case XENVER_get_features: - break; - default: - printk("%s: unknown version cmd %d\n", __func__, cmd); - return -ENOSYS; - } - - return xencomm_arch_hypercall_xen_version - (cmd, xencomm_create_inline(arg)); -} - -int -xencomm_hypercall_physdev_op(int cmd, void *op) -{ - return xencomm_arch_hypercall_physdev_op - (cmd, xencomm_create_inline(op)); -} - -static void * -xencommize_grant_table_op(unsigned int cmd, void *op, unsigned int count) -{ - switch (cmd) { - case GNTTABOP_map_grant_ref: - case GNTTABOP_unmap_grant_ref: - break; - case GNTTABOP_setup_table: - { - struct gnttab_setup_table *setup = op; - struct xencomm_handle *frame_list; - - frame_list = xencomm_create_inline - (xen_guest_handle(setup->frame_list)); - - set_xen_guest_handle(setup->frame_list, (void *)frame_list); - break; - } - case GNTTABOP_dump_table: - case GNTTABOP_transfer: - case GNTTABOP_copy: - break; - default: - printk("%s: unknown grant table op %d\n", __func__, cmd); - BUG(); - } - - return xencomm_create_inline(op); -} - -int -xencomm_hypercall_grant_table_op(unsigned int cmd, void *op, unsigned int count) -{ - void *desc = xencommize_grant_table_op (cmd, op, count); - - return xencomm_arch_hypercall_grant_table_op(cmd, desc, count); -} - -int -xencomm_hypercall_sched_op(int cmd, void *arg) -{ - switch (cmd) { - case SCHEDOP_yield: - case SCHEDOP_block: - case SCHEDOP_shutdown: - case SCHEDOP_remote_shutdown: - break; - case SCHEDOP_poll: - { - sched_poll_t *poll = arg; - struct xencomm_handle *ports; - - ports = xencomm_create_inline(xen_guest_handle(poll->ports)); - - set_xen_guest_handle(poll->ports, (void *)ports); - break; - } - default: - printk("%s: unknown sched op %d\n", __func__, cmd); - return -ENOSYS; - } - - return xencomm_arch_hypercall_sched_op(cmd, xencomm_create_inline(arg)); -} - -int -xencomm_hypercall_multicall(void *call_list, int nr_calls) -{ - int i; - multicall_entry_t *mce; - - for (i = 0; i < nr_calls; i++) { - mce = (multicall_entry_t *)call_list + i; - - switch (mce->op) { - case __HYPERVISOR_update_va_mapping: - case __HYPERVISOR_mmu_update: - /* No-op on ia64. */ - break; - case __HYPERVISOR_grant_table_op: - mce->args[1] = (unsigned long)xencommize_grant_table_op - (mce->args[0], (void *)mce->args[1], - mce->args[2]); - break; - case __HYPERVISOR_memory_op: - default: - printk("%s: unhandled multicall op entry op %lu\n", - __func__, mce->op); - return -ENOSYS; - } - } - - return xencomm_arch_hypercall_multicall - (xencomm_create_inline(call_list), nr_calls); -} - -int -xencomm_hypercall_callback_op(int cmd, void *arg) -{ - switch (cmd) - { - case CALLBACKOP_register: - case CALLBACKOP_unregister: - break; - default: - printk("%s: unknown callback op %d\n", __func__, cmd); - return -ENOSYS; - } - - return xencomm_arch_hypercall_callback_op - (cmd, xencomm_create_inline(arg)); -} - -static void -xencommize_memory_reservation (xen_memory_reservation_t *mop) -{ - struct xencomm_handle *desc; - - desc = xencomm_create_inline(xen_guest_handle(mop->extent_start)); - set_xen_guest_handle(mop->extent_start, (void *)desc); -} - -int -xencomm_hypercall_memory_op(unsigned int cmd, void *arg) -{ - XEN_GUEST_HANDLE(xen_pfn_t) extent_start_va[2]; - xen_memory_reservation_t *xmr = NULL, *xme_in = NULL, *xme_out = NULL; - xen_memory_map_t *memmap = NULL; - XEN_GUEST_HANDLE(void) buffer; - int rc; - - switch (cmd) { - case XENMEM_increase_reservation: - case XENMEM_decrease_reservation: - case XENMEM_populate_physmap: - xmr = (xen_memory_reservation_t *)arg; - xen_guest_handle(extent_start_va[0]) = - xen_guest_handle(xmr->extent_start); - xencommize_memory_reservation((xen_memory_reservation_t *)arg); - break; - - case XENMEM_maximum_ram_page: - break; - - case XENMEM_exchange: - xme_in = &((xen_memory_exchange_t *)arg)->in; - xme_out = &((xen_memory_exchange_t *)arg)->out; - xen_guest_handle(extent_start_va[0]) = - xen_guest_handle(xme_in->extent_start); - xen_guest_handle(extent_start_va[1]) = - xen_guest_handle(xme_out->extent_start); - xencommize_memory_reservation - (&((xen_memory_exchange_t *)arg)->in); - xencommize_memory_reservation - (&((xen_memory_exchange_t *)arg)->out); - break; - - case XENMEM_machine_memory_map: - memmap = (xen_memory_map_t *)arg; - xen_guest_handle(buffer) = xen_guest_handle(memmap->buffer); - set_xen_guest_handle(memmap->buffer, - (void *)xencomm_create_inline( - xen_guest_handle(memmap->buffer))); - break; - - default: - printk("%s: unknown memory op %d\n", __func__, cmd); - return -ENOSYS; - } - - rc = xencomm_arch_hypercall_memory_op(cmd, xencomm_create_inline(arg)); - - switch (cmd) { - case XENMEM_increase_reservation: - case XENMEM_decrease_reservation: - case XENMEM_populate_physmap: - xen_guest_handle(xmr->extent_start) = - xen_guest_handle(extent_start_va[0]); - break; - - case XENMEM_exchange: - xen_guest_handle(xme_in->extent_start) = - xen_guest_handle(extent_start_va[0]); - xen_guest_handle(xme_out->extent_start) = - xen_guest_handle(extent_start_va[1]); - break; - - case XENMEM_machine_memory_map: - xen_guest_handle(memmap->buffer) = xen_guest_handle(buffer); - break; - } - - return rc; -} - -unsigned long -xencomm_hypercall_hvm_op(int cmd, void *arg) -{ - switch (cmd) { - case HVMOP_set_param: - case HVMOP_get_param: - break; - default: - printk("%s: unknown hvm op %d\n", __func__, cmd); - return -ENOSYS; - } - - return xencomm_arch_hypercall_hvm_op(cmd, xencomm_create_inline(arg)); -} - -int -xencomm_hypercall_suspend(unsigned long srec) -{ - struct sched_shutdown arg; - - arg.reason = SHUTDOWN_suspend; - - return xencomm_arch_hypercall_suspend(xencomm_create_inline(&arg)); -} - -int -xencomm_hypercall_xenoprof_op(int op, void *arg) -{ - switch (op) { - case XENOPROF_init: - case XENOPROF_set_active: - case XENOPROF_set_passive: - case XENOPROF_counter: - case XENOPROF_get_buffer: - break; - - case XENOPROF_reset_active_list: - case XENOPROF_reset_passive_list: - case XENOPROF_reserve_counters: - case XENOPROF_setup_events: - case XENOPROF_enable_virq: - case XENOPROF_start: - case XENOPROF_stop: - case XENOPROF_disable_virq: - case XENOPROF_release_counters: - case XENOPROF_shutdown: - return xencomm_arch_hypercall_xenoprof_op(op, arg); - break; - - default: - printk("%s: op %d isn't supported\n", __func__, op); - return -ENOSYS; - } - return xencomm_arch_hypercall_xenoprof_op(op, - xencomm_create_inline(arg)); -} - -int -xencomm_hypercall_perfmon_op(unsigned long cmd, void* arg, unsigned long count) -{ - switch (cmd) { - case PFM_GET_FEATURES: - case PFM_CREATE_CONTEXT: - case PFM_WRITE_PMCS: - case PFM_WRITE_PMDS: - case PFM_LOAD_CONTEXT: - break; - - case PFM_DESTROY_CONTEXT: - case PFM_UNLOAD_CONTEXT: - case PFM_START: - case PFM_STOP: - return xencomm_arch_hypercall_perfmon_op(cmd, arg, count); - - default: - printk("%s:%d cmd %ld isn't supported\n", - __func__,__LINE__, cmd); - BUG(); - } - - return xencomm_arch_hypercall_perfmon_op(cmd, - xencomm_create_inline(arg), - count); -} - -long -xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg) -{ - switch (cmd) { - case VCPUOP_register_runstate_memory_area: - xencommize_memory_reservation((xen_memory_reservation_t *)arg); - break; - - default: - printk("%s: unknown vcpu op %d\n", __func__, cmd); - return -ENOSYS; - } - - return xencomm_arch_hypercall_vcpu_op(cmd, cpu, - xencomm_create_inline(arg)); -} diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c deleted file mode 100644 index 3c0baff1f0..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c +++ /dev/null @@ -1,469 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Tristan Gingold <tristan.gingold@bull.net> - */ -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <xen/interface/xen.h> -#include <xen/interface/platform.h> -#include <xen/interface/memory.h> -#include <xen/interface/xencomm.h> -#include <xen/interface/version.h> -#include <xen/interface/event_channel.h> -#include <xen/interface/physdev.h> -#include <xen/interface/grant_table.h> -#include <xen/interface/hvm/params.h> -#include <xen/interface/xenoprof.h> -#ifdef CONFIG_VMX_GUEST -#include <asm/hypervisor.h> -#else -#include <asm/hypercall.h> -#endif -#include <asm/xen/xencomm.h> -#include <asm/perfmon.h> - -int -xencomm_mini_hypercall_event_channel_op(int cmd, void *op) -{ - struct xencomm_mini xc_area[2]; - int nbr_area = 2; - struct xencomm_handle *desc; - int rc; - - rc = xencomm_create_mini(xc_area, &nbr_area, - op, sizeof(evtchn_op_t), &desc); - if (rc) - return rc; - - return xencomm_arch_hypercall_event_channel_op(cmd, desc); -} -EXPORT_SYMBOL(xencomm_mini_hypercall_event_channel_op); - -static int -xencommize_mini_grant_table_op(struct xencomm_mini *xc_area, int *nbr_area, - unsigned int cmd, void *op, unsigned int count, - struct xencomm_handle **desc) -{ - struct xencomm_handle *desc1; - unsigned int argsize; - int rc; - - switch (cmd) { - case GNTTABOP_map_grant_ref: - argsize = sizeof(struct gnttab_map_grant_ref); - break; - case GNTTABOP_unmap_grant_ref: - argsize = sizeof(struct gnttab_unmap_grant_ref); - break; - case GNTTABOP_setup_table: - { - struct gnttab_setup_table *setup = op; - - argsize = sizeof(*setup); - - if (count != 1) - return -EINVAL; - rc = xencomm_create_mini - (xc_area, nbr_area, - xen_guest_handle(setup->frame_list), - setup->nr_frames - * sizeof(*xen_guest_handle(setup->frame_list)), - &desc1); - if (rc) - return rc; - set_xen_guest_handle(setup->frame_list, (void *)desc1); - break; - } - case GNTTABOP_dump_table: - argsize = sizeof(struct gnttab_dump_table); - break; - case GNTTABOP_transfer: - argsize = sizeof(struct gnttab_transfer); - break; - case GNTTABOP_copy: - argsize = sizeof(struct gnttab_copy); - break; - case GNTTABOP_query_size: - argsize = sizeof(struct gnttab_query_size); - break; - default: - printk("%s: unknown mini grant table op %d\n", __func__, cmd); - BUG(); - } - - rc = xencomm_create_mini(xc_area, nbr_area, op, count * argsize, desc); - if (rc) - return rc; - - return 0; -} - -int -xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op, - unsigned int count) -{ - int rc; - struct xencomm_handle *desc; - int nbr_area = 2; - struct xencomm_mini xc_area[2]; - - rc = xencommize_mini_grant_table_op(xc_area, &nbr_area, - cmd, op, count, &desc); - if (rc) - return rc; - - return xencomm_arch_hypercall_grant_table_op(cmd, desc, count); -} -EXPORT_SYMBOL(xencomm_mini_hypercall_grant_table_op); - -int -xencomm_mini_hypercall_multicall(void *call_list, int nr_calls) -{ - int i; - multicall_entry_t *mce; - int nbr_area = 2 + nr_calls * 3; - struct xencomm_mini xc_area[nbr_area]; - struct xencomm_handle *desc; - int rc; - - for (i = 0; i < nr_calls; i++) { - mce = (multicall_entry_t *)call_list + i; - - switch (mce->op) { - case __HYPERVISOR_update_va_mapping: - case __HYPERVISOR_mmu_update: - /* No-op on ia64. */ - break; - case __HYPERVISOR_grant_table_op: - rc = xencommize_mini_grant_table_op - (xc_area, &nbr_area, - mce->args[0], (void *)mce->args[1], - mce->args[2], &desc); - if (rc) - return rc; - mce->args[1] = (unsigned long)desc; - break; - case __HYPERVISOR_memory_op: - default: - printk("%s: unhandled multicall op entry op %lu\n", - __func__, mce->op); - return -ENOSYS; - } - } - - rc = xencomm_create_mini(xc_area, &nbr_area, call_list, - nr_calls * sizeof(multicall_entry_t), &desc); - if (rc) - return rc; - - return xencomm_arch_hypercall_multicall(desc, nr_calls); -} -EXPORT_SYMBOL(xencomm_mini_hypercall_multicall); - -static int -xencommize_mini_memory_reservation(struct xencomm_mini *area, int *nbr_area, - xen_memory_reservation_t *mop) -{ - struct xencomm_handle *desc; - int rc; - - rc = xencomm_create_mini - (area, nbr_area, - xen_guest_handle(mop->extent_start), - mop->nr_extents - * sizeof(*xen_guest_handle(mop->extent_start)), - &desc); - if (rc) - return rc; - - set_xen_guest_handle(mop->extent_start, (void *)desc); - - return 0; -} - -int -xencomm_mini_hypercall_memory_op(unsigned int cmd, void *arg) -{ - int nbr_area = 4; - struct xencomm_mini xc_area[4]; - struct xencomm_handle *desc; - int rc; - unsigned int argsize; - - switch (cmd) { - case XENMEM_increase_reservation: - case XENMEM_decrease_reservation: - case XENMEM_populate_physmap: - argsize = sizeof(xen_memory_reservation_t); - rc = xencommize_mini_memory_reservation - (xc_area, &nbr_area, (xen_memory_reservation_t *)arg); - if (rc) - return rc; - break; - - case XENMEM_maximum_ram_page: - argsize = 0; - break; - - case XENMEM_exchange: - argsize = sizeof(xen_memory_exchange_t); - rc = xencommize_mini_memory_reservation - (xc_area, &nbr_area, - &((xen_memory_exchange_t *)arg)->in); - if (rc) - return rc; - rc = xencommize_mini_memory_reservation - (xc_area, &nbr_area, - &((xen_memory_exchange_t *)arg)->out); - if (rc) - return rc; - break; - - case XENMEM_add_to_physmap: - argsize = sizeof (xen_add_to_physmap_t); - break; - - case XENMEM_machine_memory_map: - { - xen_memory_map_t *memmap = (xen_memory_map_t *)arg; - argsize = sizeof(*memmap); - rc = xencomm_create_mini(xc_area, &nbr_area, - xen_guest_handle(memmap->buffer), - memmap->nr_entries, &desc); - if (rc) - return rc; - set_xen_guest_handle(memmap->buffer, (void *)desc); - break; - } - - default: - printk("%s: unknown mini memory op %d\n", __func__, cmd); - return -ENOSYS; - } - - rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc); - if (rc) - return rc; - - return xencomm_arch_hypercall_memory_op(cmd, desc); -} -EXPORT_SYMBOL(xencomm_mini_hypercall_memory_op); - -unsigned long -xencomm_mini_hypercall_hvm_op(int cmd, void *arg) -{ - struct xencomm_handle *desc; - int nbr_area = 2; - struct xencomm_mini xc_area[2]; - unsigned int argsize; - int rc; - - switch (cmd) { - case HVMOP_get_param: - case HVMOP_set_param: - argsize = sizeof(xen_hvm_param_t); - break; - default: - printk("%s: unknown HVMOP %d\n", __func__, cmd); - return -EINVAL; - } - - rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc); - if (rc) - return rc; - - return xencomm_arch_hypercall_hvm_op(cmd, desc); -} -EXPORT_SYMBOL(xencomm_mini_hypercall_hvm_op); - -int -xencomm_mini_hypercall_xen_version(int cmd, void *arg) -{ - struct xencomm_handle *desc; - int nbr_area = 2; - struct xencomm_mini xc_area[2]; - unsigned int argsize; - int rc; - - switch (cmd) { - case XENVER_version: - /* do not actually pass an argument */ - return xencomm_arch_hypercall_xen_version(cmd, 0); - case XENVER_extraversion: - argsize = sizeof(xen_extraversion_t); - break; - case XENVER_compile_info: - argsize = sizeof(xen_compile_info_t); - break; - case XENVER_capabilities: - argsize = sizeof(xen_capabilities_info_t); - break; - case XENVER_changeset: - argsize = sizeof(xen_changeset_info_t); - break; - case XENVER_platform_parameters: - argsize = sizeof(xen_platform_parameters_t); - break; - case XENVER_pagesize: - argsize = (arg == NULL) ? 0 : sizeof(void *); - break; - case XENVER_get_features: - argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t); - break; - - default: - printk("%s: unknown version op %d\n", __func__, cmd); - return -ENOSYS; - } - - rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc); - if (rc) - return rc; - - return xencomm_arch_hypercall_xen_version(cmd, desc); -} -EXPORT_SYMBOL(xencomm_mini_hypercall_xen_version); - -int -xencomm_mini_hypercall_xenoprof_op(int op, void *arg) -{ - unsigned int argsize; - struct xencomm_mini xc_area[2]; - int nbr_area = 2; - struct xencomm_handle *desc; - int rc; - - switch (op) { - case XENOPROF_init: - argsize = sizeof(xenoprof_init_t); - break; - case XENOPROF_set_active: - argsize = sizeof(domid_t); - break; - case XENOPROF_set_passive: - argsize = sizeof(xenoprof_passive_t); - break; - case XENOPROF_counter: - argsize = sizeof(xenoprof_counter_t); - break; - case XENOPROF_get_buffer: - argsize = sizeof(xenoprof_get_buffer_t); - break; - - case XENOPROF_reset_active_list: - case XENOPROF_reset_passive_list: - case XENOPROF_reserve_counters: - case XENOPROF_setup_events: - case XENOPROF_enable_virq: - case XENOPROF_start: - case XENOPROF_stop: - case XENOPROF_disable_virq: - case XENOPROF_release_counters: - case XENOPROF_shutdown: - return xencomm_arch_hypercall_xenoprof_op(op, arg); - - default: - printk("%s: op %d isn't supported\n", __func__, op); - return -ENOSYS; - } - rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc); - if (rc) - return rc; - return xencomm_arch_hypercall_xenoprof_op(op, desc); -} -EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_xenoprof_op); - -int -xencomm_mini_hypercall_perfmon_op(unsigned long cmd, void* arg, - unsigned long count) -{ - unsigned int argsize; - struct xencomm_mini xc_area[2]; - int nbr_area = 2; - struct xencomm_handle *desc; - int rc; - - switch (cmd) { - case PFM_GET_FEATURES: - argsize = sizeof(pfarg_features_t); - break; - case PFM_CREATE_CONTEXT: - argsize = sizeof(pfarg_context_t); - break; - case PFM_LOAD_CONTEXT: - argsize = sizeof(pfarg_load_t); - break; - case PFM_WRITE_PMCS: - case PFM_WRITE_PMDS: - argsize = sizeof(pfarg_reg_t) * count; - break; - - case PFM_DESTROY_CONTEXT: - case PFM_UNLOAD_CONTEXT: - case PFM_START: - case PFM_STOP: - return xencomm_arch_hypercall_perfmon_op(cmd, arg, count); - - default: - printk("%s:%d cmd %ld isn't supported\n", - __func__, __LINE__, cmd); - BUG(); - } - - rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc); - if (rc) - return rc; - return xencomm_arch_hypercall_perfmon_op(cmd, desc, count); -} -EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_perfmon_op); - -int -xencomm_mini_hypercall_sched_op(int cmd, void *arg) -{ - int rc, nbr_area = 2; - struct xencomm_mini xc_area[2]; - struct xencomm_handle *desc; - unsigned int argsize; - - switch (cmd) { - case SCHEDOP_yield: - case SCHEDOP_block: - argsize = 0; - break; - case SCHEDOP_shutdown: - argsize = sizeof(sched_shutdown_t); - break; - case SCHEDOP_poll: - argsize = sizeof(sched_poll_t); - break; - case SCHEDOP_remote_shutdown: - argsize = sizeof(sched_remote_shutdown_t); - break; - - default: - printk("%s: unknown sched op %d\n", __func__, cmd); - return -ENOSYS; - } - - rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc); - if (rc) - return rc; - - return xencomm_arch_hypercall_sched_op(cmd, desc); -} -EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_sched_op); diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c deleted file mode 100644 index 7c67373ccc..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c +++ /dev/null @@ -1,673 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - * Tristan Gingold <tristan.gingold@bull.net> - */ -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/gfp.h> -#include <linux/module.h> -#include <xen/interface/xen.h> -#include <xen/interface/platform.h> -#define __XEN__ -#include <xen/interface/domctl.h> -#include <xen/interface/sysctl.h> -#include <xen/interface/memory.h> -#include <xen/interface/version.h> -#include <xen/interface/event_channel.h> -#include <xen/interface/acm_ops.h> -#include <xen/interface/hvm/params.h> -#include <xen/public/privcmd.h> -#include <asm/hypercall.h> -#include <asm/page.h> -#include <asm/uaccess.h> -#include <asm/xen/xencomm.h> - -#define ROUND_DIV(v,s) (((v) + (s) - 1) / (s)) - -static int -xencomm_privcmd_platform_op(privcmd_hypercall_t *hypercall) -{ - struct xen_platform_op kern_op; - struct xen_platform_op __user *user_op = (struct xen_platform_op __user *)hypercall->arg[0]; - struct xencomm_handle *op_desc; - struct xencomm_handle *desc = NULL; - int ret = 0; - - if (copy_from_user(&kern_op, user_op, sizeof(struct xen_platform_op))) - return -EFAULT; - - if (kern_op.interface_version != XENPF_INTERFACE_VERSION) - return -EACCES; - - op_desc = xencomm_create_inline(&kern_op); - - switch (kern_op.cmd) { - default: - printk("%s: unknown platform cmd %d\n", __func__, kern_op.cmd); - return -ENOSYS; - } - - if (ret) { - /* error mapping the nested pointer */ - return ret; - } - - ret = xencomm_arch_hypercall_platform_op(op_desc); - - /* FIXME: should we restore the handle? */ - if (copy_to_user(user_op, &kern_op, sizeof(struct xen_platform_op))) - ret = -EFAULT; - - if (desc) - xencomm_free(desc); - return ret; -} - -/* - * Temporarily disable the NUMA PHYSINFO code until the rest of the - * changes are upstream. - */ -#undef IA64_NUMA_PHYSINFO - -static int -xencomm_privcmd_sysctl(privcmd_hypercall_t *hypercall) -{ - xen_sysctl_t kern_op; - xen_sysctl_t __user *user_op; - struct xencomm_handle *op_desc; - struct xencomm_handle *desc = NULL; - struct xencomm_handle *desc1 = NULL; - int ret = 0; - - user_op = (xen_sysctl_t __user *)hypercall->arg[0]; - - if (copy_from_user(&kern_op, user_op, sizeof(xen_sysctl_t))) - return -EFAULT; - - if (kern_op.interface_version != XEN_SYSCTL_INTERFACE_VERSION) - return -EACCES; - - op_desc = xencomm_create_inline(&kern_op); - - switch (kern_op.cmd) { - case XEN_SYSCTL_readconsole: - ret = xencomm_create( - xen_guest_handle(kern_op.u.readconsole.buffer), - kern_op.u.readconsole.count, - &desc, GFP_KERNEL); - set_xen_guest_handle(kern_op.u.readconsole.buffer, - (void *)desc); - break; - case XEN_SYSCTL_tbuf_op: -#ifndef IA64_NUMA_PHYSINFO - case XEN_SYSCTL_physinfo: -#endif - case XEN_SYSCTL_sched_id: - break; - case XEN_SYSCTL_perfc_op: - { - struct xencomm_handle *tmp_desc; - xen_sysctl_t tmp_op = { - .cmd = XEN_SYSCTL_perfc_op, - .interface_version = XEN_SYSCTL_INTERFACE_VERSION, - .u.perfc_op = { - .cmd = XEN_SYSCTL_PERFCOP_query, - // .desc.p = NULL, - // .val.p = NULL, - }, - }; - - if (xen_guest_handle(kern_op.u.perfc_op.desc) == NULL) { - if (xen_guest_handle(kern_op.u.perfc_op.val) != NULL) - return -EINVAL; - break; - } - - /* query the buffer size for xencomm */ - tmp_desc = xencomm_create_inline(&tmp_op); - ret = xencomm_arch_hypercall_sysctl(tmp_desc); - if (ret) - return ret; - - ret = xencomm_create(xen_guest_handle(kern_op.u.perfc_op.desc), - tmp_op.u.perfc_op.nr_counters * - sizeof(xen_sysctl_perfc_desc_t), - &desc, GFP_KERNEL); - if (ret) - return ret; - - set_xen_guest_handle(kern_op.u.perfc_op.desc, (void *)desc); - - ret = xencomm_create(xen_guest_handle(kern_op.u.perfc_op.val), - tmp_op.u.perfc_op.nr_vals * - sizeof(xen_sysctl_perfc_val_t), - &desc1, GFP_KERNEL); - if (ret) - xencomm_free(desc); - - set_xen_guest_handle(kern_op.u.perfc_op.val, (void *)desc1); - break; - } - case XEN_SYSCTL_getdomaininfolist: - ret = xencomm_create( - xen_guest_handle(kern_op.u.getdomaininfolist.buffer), - kern_op.u.getdomaininfolist.max_domains * - sizeof(xen_domctl_getdomaininfo_t), - &desc, GFP_KERNEL); - set_xen_guest_handle(kern_op.u.getdomaininfolist.buffer, - (void *)desc); - break; - case XEN_SYSCTL_debug_keys: - ret = xencomm_create( - xen_guest_handle(kern_op.u.debug_keys.keys), - kern_op.u.debug_keys.nr_keys, - &desc, GFP_KERNEL); - set_xen_guest_handle(kern_op.u.debug_keys.keys, - (void *)desc); - break; - -#ifdef IA64_NUMA_PHYSINFO - case XEN_SYSCTL_physinfo: - ret = xencomm_create( - xen_guest_handle(kern_op.u.physinfo.memory_chunks), - PUBLIC_MAXCHUNKS * sizeof(node_data_t), - &desc, GFP_KERNEL); - if (ret) - return ret; - set_xen_guest_handle(kern_op.u.physinfo.memory_chunks, - (void *)desc); - - ret = xencomm_create( - xen_guest_handle(kern_op.u.physinfo.cpu_to_node), - PUBLIC_MAX_NUMNODES * sizeof(u64), - &desc1, GFP_KERNEL); - if (ret) - xencomm_free(desc); - set_xen_guest_handle(kern_op.u.physinfo.cpu_to_node, - (void *)desc1); - break; -#endif - default: - printk("%s: unknown sysctl cmd %d\n", __func__, kern_op.cmd); - return -ENOSYS; - } - - if (ret) { - /* error mapping the nested pointer */ - return ret; - } - - ret = xencomm_arch_hypercall_sysctl(op_desc); - - /* FIXME: should we restore the handles? */ - if (copy_to_user(user_op, &kern_op, sizeof(xen_sysctl_t))) - ret = -EFAULT; - - if (desc) - xencomm_free(desc); - if (desc1) - xencomm_free(desc1); - return ret; -} - -static int -xencomm_privcmd_domctl(privcmd_hypercall_t *hypercall) -{ - xen_domctl_t kern_op; - xen_domctl_t __user *user_op; - struct xencomm_handle *op_desc; - struct xencomm_handle *desc = NULL; - int ret = 0; - - user_op = (xen_domctl_t __user *)hypercall->arg[0]; - - if (copy_from_user(&kern_op, user_op, sizeof(xen_domctl_t))) - return -EFAULT; - - if (kern_op.interface_version != XEN_DOMCTL_INTERFACE_VERSION) - return -EACCES; - - op_desc = xencomm_create_inline(&kern_op); - - switch (kern_op.cmd) { - case XEN_DOMCTL_createdomain: - case XEN_DOMCTL_destroydomain: - case XEN_DOMCTL_pausedomain: - case XEN_DOMCTL_unpausedomain: - case XEN_DOMCTL_getdomaininfo: - break; - case XEN_DOMCTL_getmemlist: - { - unsigned long nr_pages = kern_op.u.getmemlist.max_pfns; - - ret = xencomm_create( - xen_guest_handle(kern_op.u.getmemlist.buffer), - nr_pages * sizeof(unsigned long), - &desc, GFP_KERNEL); - set_xen_guest_handle(kern_op.u.getmemlist.buffer, - (void *)desc); - break; - } - case XEN_DOMCTL_getpageframeinfo: - break; - case XEN_DOMCTL_getpageframeinfo2: - ret = xencomm_create( - xen_guest_handle(kern_op.u.getpageframeinfo2.array), - kern_op.u.getpageframeinfo2.num, - &desc, GFP_KERNEL); - set_xen_guest_handle(kern_op.u.getpageframeinfo2.array, - (void *)desc); - break; - case XEN_DOMCTL_shadow_op: - ret = xencomm_create( - xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap), - ROUND_DIV(kern_op.u.shadow_op.pages, 8), - &desc, GFP_KERNEL); - set_xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap, - (void *)desc); - break; - case XEN_DOMCTL_max_mem: - break; - case XEN_DOMCTL_setvcpucontext: - case XEN_DOMCTL_getvcpucontext: - ret = xencomm_create( - xen_guest_handle(kern_op.u.vcpucontext.ctxt), - sizeof(vcpu_guest_context_t), - &desc, GFP_KERNEL); - set_xen_guest_handle(kern_op.u.vcpucontext.ctxt, (void *)desc); - break; - case XEN_DOMCTL_getvcpuinfo: - break; - case XEN_DOMCTL_setvcpuaffinity: - case XEN_DOMCTL_getvcpuaffinity: - ret = xencomm_create( - xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap), - ROUND_DIV(kern_op.u.vcpuaffinity.cpumap.nr_cpus, 8), - &desc, GFP_KERNEL); - set_xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap, - (void *)desc); - break; - case XEN_DOMCTL_max_vcpus: - case XEN_DOMCTL_scheduler_op: - case XEN_DOMCTL_setdomainhandle: - case XEN_DOMCTL_setdebugging: - case XEN_DOMCTL_irq_permission: - case XEN_DOMCTL_iomem_permission: - case XEN_DOMCTL_ioport_permission: - case XEN_DOMCTL_hypercall_init: - case XEN_DOMCTL_arch_setup: - case XEN_DOMCTL_settimeoffset: - case XEN_DOMCTL_sendtrigger: - break; - default: - printk("%s: unknown domctl cmd %d\n", __func__, kern_op.cmd); - return -ENOSYS; - } - - if (ret) { - /* error mapping the nested pointer */ - return ret; - } - - ret = xencomm_arch_hypercall_domctl (op_desc); - - /* FIXME: should we restore the handle? */ - if (copy_to_user(user_op, &kern_op, sizeof(xen_domctl_t))) - ret = -EFAULT; - - if (desc) - xencomm_free(desc); - return ret; -} - -static int -xencomm_privcmd_acm_op(privcmd_hypercall_t *hypercall) -{ - int cmd = hypercall->arg[0]; - void __user *arg = (void __user *)hypercall->arg[1]; - struct xencomm_handle *op_desc; - struct xencomm_handle *desc = NULL; - int ret; - - switch (cmd) { - case ACMOP_getssid: - { - struct acm_getssid kern_arg; - - if (copy_from_user(&kern_arg, arg, sizeof (kern_arg))) - return -EFAULT; - - op_desc = xencomm_create_inline(&kern_arg); - - ret = xencomm_create(xen_guest_handle(kern_arg.ssidbuf), - kern_arg.ssidbuf_size, &desc, GFP_KERNEL); - if (ret) - return ret; - - set_xen_guest_handle(kern_arg.ssidbuf, (void *)desc); - - ret = xencomm_arch_hypercall_acm_op(cmd, op_desc); - - xencomm_free(desc); - - if (copy_to_user(arg, &kern_arg, sizeof (kern_arg))) - return -EFAULT; - - return ret; - } - default: - printk("%s: unknown acm_op cmd %d\n", __func__, cmd); - return -ENOSYS; - } - - return ret; -} - -static int -xencomm_privcmd_memory_op(privcmd_hypercall_t *hypercall) -{ - const unsigned long cmd = hypercall->arg[0]; - int ret = 0; - - switch (cmd) { - case XENMEM_increase_reservation: - case XENMEM_decrease_reservation: - case XENMEM_populate_physmap: - { - xen_memory_reservation_t kern_op; - xen_memory_reservation_t __user *user_op; - struct xencomm_handle *desc = NULL; - struct xencomm_handle *desc_op; - - user_op = (xen_memory_reservation_t __user *)hypercall->arg[1]; - if (copy_from_user(&kern_op, user_op, - sizeof(xen_memory_reservation_t))) - return -EFAULT; - desc_op = xencomm_create_inline(&kern_op); - - if (xen_guest_handle(kern_op.extent_start)) { - void * addr; - - addr = xen_guest_handle(kern_op.extent_start); - ret = xencomm_create - (addr, - kern_op.nr_extents * - sizeof(*xen_guest_handle - (kern_op.extent_start)), - &desc, GFP_KERNEL); - if (ret) - return ret; - set_xen_guest_handle(kern_op.extent_start, - (void *)desc); - } - - ret = xencomm_arch_hypercall_memory_op(cmd, desc_op); - - if (desc) - xencomm_free(desc); - - if (ret != 0) - return ret; - - if (copy_to_user(user_op, &kern_op, - sizeof(xen_memory_reservation_t))) - return -EFAULT; - - return ret; - } - case XENMEM_translate_gpfn_list: - { - xen_translate_gpfn_list_t kern_op; - xen_translate_gpfn_list_t __user *user_op; - struct xencomm_handle *desc_gpfn = NULL; - struct xencomm_handle *desc_mfn = NULL; - struct xencomm_handle *desc_op; - void *addr; - - user_op = (xen_translate_gpfn_list_t __user *) - hypercall->arg[1]; - if (copy_from_user(&kern_op, user_op, - sizeof(xen_translate_gpfn_list_t))) - return -EFAULT; - desc_op = xencomm_create_inline(&kern_op); - - if (kern_op.nr_gpfns) { - /* gpfn_list. */ - addr = xen_guest_handle(kern_op.gpfn_list); - - ret = xencomm_create(addr, kern_op.nr_gpfns * - sizeof(*xen_guest_handle - (kern_op.gpfn_list)), - &desc_gpfn, GFP_KERNEL); - if (ret) - return ret; - set_xen_guest_handle(kern_op.gpfn_list, - (void *)desc_gpfn); - - /* mfn_list. */ - addr = xen_guest_handle(kern_op.mfn_list); - - ret = xencomm_create(addr, kern_op.nr_gpfns * - sizeof(*xen_guest_handle - (kern_op.mfn_list)), - &desc_mfn, GFP_KERNEL); - if (ret) - return ret; - set_xen_guest_handle(kern_op.mfn_list, - (void *)desc_mfn); - } - - ret = xencomm_arch_hypercall_memory_op(cmd, desc_op); - - if (desc_gpfn) - xencomm_free(desc_gpfn); - - if (desc_mfn) - xencomm_free(desc_mfn); - - if (ret != 0) - return ret; - - return ret; - } - default: - printk("%s: unknown memory op %lu\n", __func__, cmd); - ret = -ENOSYS; - } - return ret; -} - -static int -xencomm_privcmd_xen_version(privcmd_hypercall_t *hypercall) -{ - int cmd = hypercall->arg[0]; - void __user *arg = (void __user *)hypercall->arg[1]; - struct xencomm_handle *desc; - size_t argsize; - int rc; - - switch (cmd) { - case XENVER_version: - /* do not actually pass an argument */ - return xencomm_arch_hypercall_xen_version(cmd, 0); - case XENVER_extraversion: - argsize = sizeof(xen_extraversion_t); - break; - case XENVER_compile_info: - argsize = sizeof(xen_compile_info_t); - break; - case XENVER_capabilities: - argsize = sizeof(xen_capabilities_info_t); - break; - case XENVER_changeset: - argsize = sizeof(xen_changeset_info_t); - break; - case XENVER_platform_parameters: - argsize = sizeof(xen_platform_parameters_t); - break; - case XENVER_pagesize: - argsize = (arg == NULL) ? 0 : sizeof(void *); - break; - case XENVER_get_features: - argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t); - break; - - default: - printk("%s: unknown version op %d\n", __func__, cmd); - return -ENOSYS; - } - - rc = xencomm_create(arg, argsize, &desc, GFP_KERNEL); - if (rc) - return rc; - - rc = xencomm_arch_hypercall_xen_version(cmd, desc); - - xencomm_free(desc); - - return rc; -} - -static int -xencomm_privcmd_event_channel_op(privcmd_hypercall_t *hypercall) -{ - int cmd = hypercall->arg[0]; - struct xencomm_handle *desc; - unsigned int argsize; - int ret; - - switch (cmd) { - case EVTCHNOP_alloc_unbound: - argsize = sizeof(evtchn_alloc_unbound_t); - break; - - case EVTCHNOP_status: - argsize = sizeof(evtchn_status_t); - break; - - default: - printk("%s: unknown EVTCHNOP %d\n", __func__, cmd); - return -EINVAL; - } - - ret = xencomm_create((void *)hypercall->arg[1], argsize, - &desc, GFP_KERNEL); - if (ret) - return ret; - - ret = xencomm_arch_hypercall_event_channel_op(cmd, desc); - - xencomm_free(desc); - return ret; -} - -static int -xencomm_privcmd_hvm_op(privcmd_hypercall_t *hypercall) -{ - int cmd = hypercall->arg[0]; - struct xencomm_handle *desc; - unsigned int argsize; - int ret; - - switch (cmd) { - case HVMOP_get_param: - case HVMOP_set_param: - argsize = sizeof(xen_hvm_param_t); - break; - case HVMOP_set_pci_intx_level: - argsize = sizeof(xen_hvm_set_pci_intx_level_t); - break; - case HVMOP_set_isa_irq_level: - argsize = sizeof(xen_hvm_set_isa_irq_level_t); - break; - case HVMOP_set_pci_link_route: - argsize = sizeof(xen_hvm_set_pci_link_route_t); - break; - - default: - printk("%s: unknown HVMOP %d\n", __func__, cmd); - return -EINVAL; - } - - ret = xencomm_create((void *)hypercall->arg[1], argsize, - &desc, GFP_KERNEL); - if (ret) - return ret; - - ret = xencomm_arch_hypercall_hvm_op(cmd, desc); - - xencomm_free(desc); - return ret; -} - -static int -xencomm_privcmd_sched_op(privcmd_hypercall_t *hypercall) -{ - int cmd = hypercall->arg[0]; - struct xencomm_handle *desc; - unsigned int argsize; - int ret; - - switch (cmd) { - case SCHEDOP_remote_shutdown: - argsize = sizeof(sched_remote_shutdown_t); - break; - default: - printk("%s: unknown SCHEDOP %d\n", __func__, cmd); - return -EINVAL; - } - - ret = xencomm_create((void *)hypercall->arg[1], argsize, - &desc, GFP_KERNEL); - if (ret) - return ret; - - ret = xencomm_arch_hypercall_sched_op(cmd, desc); - - xencomm_free(desc); - return ret; -} - -int -privcmd_hypercall(privcmd_hypercall_t *hypercall) -{ - switch (hypercall->op) { - case __HYPERVISOR_platform_op: - return xencomm_privcmd_platform_op(hypercall); - case __HYPERVISOR_domctl: - return xencomm_privcmd_domctl(hypercall); - case __HYPERVISOR_sysctl: - return xencomm_privcmd_sysctl(hypercall); - case __HYPERVISOR_acm_op: - return xencomm_privcmd_acm_op(hypercall); - case __HYPERVISOR_xen_version: - return xencomm_privcmd_xen_version(hypercall); - case __HYPERVISOR_memory_op: - return xencomm_privcmd_memory_op(hypercall); - case __HYPERVISOR_event_channel_op: - return xencomm_privcmd_event_channel_op(hypercall); - case __HYPERVISOR_hvm_op: - return xencomm_privcmd_hvm_op(hypercall); - case __HYPERVISOR_sched_op: - return xencomm_privcmd_sched_op(hypercall); - default: - printk("%s: unknown hcall (%ld)\n", __func__, hypercall->op); - return -ENOSYS; - } -} - diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c b/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c deleted file mode 100644 index 5962e73aa6..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. - * Alex Williamson <alex.williamson@hp.com> - * - * Basic DMA mapping services for Xen guests. - * Based on arch/i386/kernel/pci-dma-xen.c. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include <linux/dma-mapping.h> -#include <linux/mm.h> -#include <asm/scatterlist.h> - -#define IOMMU_BUG_ON(test) \ -do { \ - if (unlikely(test)) { \ - printk(KERN_ALERT "Fatal DMA error!\n"); \ - BUG(); \ - } \ -} while (0) - - -/* - * This should be broken out of swiotlb and put in a common place - * when merged with upstream Linux. - */ -static inline int -address_needs_mapping(struct device *hwdev, dma_addr_t addr) -{ - dma_addr_t mask = 0xffffffff; - - /* If the device has a mask, use it, otherwise default to 32 bits */ - if (hwdev && hwdev->dma_mask) - mask = *hwdev->dma_mask; - return (addr & ~mask) != 0; -} - -int -xen_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, - int direction) -{ - int i; - - for (i = 0 ; i < nents ; i++) { - sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; - sg[i].dma_length = sg[i].length; - - IOMMU_BUG_ON(address_needs_mapping(hwdev, sg[i].dma_address)); - } - - return nents; -} -EXPORT_SYMBOL(xen_map_sg); - -void -xen_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, - int direction) -{ -} -EXPORT_SYMBOL(xen_unmap_sg); - -int -xen_dma_mapping_error(dma_addr_t dma_addr) -{ - return 0; -} -EXPORT_SYMBOL(xen_dma_mapping_error); - -int -xen_dma_supported(struct device *dev, u64 mask) -{ - return 1; -} -EXPORT_SYMBOL(xen_dma_supported); - -void * -xen_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - unsigned long vaddr; - unsigned int order = get_order(size); - - vaddr = __get_free_pages(gfp, order); - - if (!vaddr) - return NULL; - - if (xen_create_contiguous_region(vaddr, order, - dev->coherent_dma_mask)) { - free_pages(vaddr, order); - return NULL; - } - - memset((void *)vaddr, 0, size); - *dma_handle = virt_to_bus((void *)vaddr); - - return (void *)vaddr; -} -EXPORT_SYMBOL(xen_alloc_coherent); - -void -xen_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - unsigned int order = get_order(size); - - xen_destroy_contiguous_region((unsigned long)vaddr, order); - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(xen_free_coherent); - -dma_addr_t -xen_map_single(struct device *dev, void *ptr, size_t size, - int direction) -{ - dma_addr_t dma_addr = virt_to_bus(ptr); - - IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size)); - IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr)); - - return dma_addr; -} -EXPORT_SYMBOL(xen_map_single); - -void -xen_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - int direction) -{ -} -EXPORT_SYMBOL(xen_unmap_single); diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c b/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c deleted file mode 100644 index 367b6b32de..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/gfp.h> -#include <linux/mm.h> -#include <xen/interface/xen.h> -#include <asm/page.h> - -#ifdef HAVE_XEN_PLATFORM_COMPAT_H -#include <xen/platform-compat.h> -#endif - -#include <asm/xen/xencomm.h> - -static int xencomm_debug = 0; - -static unsigned long kernel_start_pa; - -void -xencomm_init (void) -{ - kernel_start_pa = KERNEL_START - ia64_tpa(KERNEL_START); -} - -/* Translate virtual address to physical address. */ -unsigned long -xencomm_vaddr_to_paddr(unsigned long vaddr) -{ -#ifndef CONFIG_VMX_GUEST - struct page *page; - struct vm_area_struct *vma; -#endif - - if (vaddr == 0) - return 0; - -#ifdef __ia64__ - if (REGION_NUMBER(vaddr) == 5) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep; - - /* On ia64, TASK_SIZE refers to current. It is not initialized - during boot. - Furthermore the kernel is relocatable and __pa() doesn't - work on addresses. */ - if (vaddr >= KERNEL_START - && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE)) { - return vaddr - kernel_start_pa; - } - - /* In kernel area -- virtually mapped. */ - pgd = pgd_offset_k(vaddr); - if (pgd_none(*pgd) || pgd_bad(*pgd)) - return ~0UL; - - pud = pud_offset(pgd, vaddr); - if (pud_none(*pud) || pud_bad(*pud)) - return ~0UL; - - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd) || pmd_bad(*pmd)) - return ~0UL; - - ptep = pte_offset_kernel(pmd, vaddr); - if (!ptep) - return ~0UL; - - return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK); - } -#endif - - if (vaddr > TASK_SIZE) { - /* kernel address */ - return __pa(vaddr); - } - - -#ifdef CONFIG_VMX_GUEST - /* No privcmd within vmx guest. */ - return ~0UL; -#else - /* XXX double-check (lack of) locking */ - vma = find_extend_vma(current->mm, vaddr); - if (!vma) - return ~0UL; - - /* We assume the page is modified. */ - page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH); - if (!page) - return ~0UL; - - return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); -#endif -} - -static int -xencomm_init_desc(struct xencomm_desc *desc, void *buffer, unsigned long bytes) -{ - unsigned long recorded = 0; - int i = 0; - - BUG_ON((buffer == NULL) && (bytes > 0)); - - /* record the physical pages used */ - if (buffer == NULL) - desc->nr_addrs = 0; - - while ((recorded < bytes) && (i < desc->nr_addrs)) { - unsigned long vaddr = (unsigned long)buffer + recorded; - unsigned long paddr; - int offset; - int chunksz; - - offset = vaddr % PAGE_SIZE; /* handle partial pages */ - chunksz = min(PAGE_SIZE - offset, bytes - recorded); - - paddr = xencomm_vaddr_to_paddr(vaddr); - if (paddr == ~0UL) { - printk("%s: couldn't translate vaddr %lx\n", - __func__, vaddr); - return -EINVAL; - } - - desc->address[i++] = paddr; - recorded += chunksz; - } - - if (recorded < bytes) { - printk("%s: could only translate %ld of %ld bytes\n", - __func__, recorded, bytes); - return -ENOSPC; - } - - /* mark remaining addresses invalid (just for safety) */ - while (i < desc->nr_addrs) - desc->address[i++] = XENCOMM_INVALID; - - desc->magic = XENCOMM_MAGIC; - - return 0; -} - -static struct xencomm_desc * -xencomm_alloc(gfp_t gfp_mask) -{ - struct xencomm_desc *desc; - - desc = (struct xencomm_desc *)__get_free_page(gfp_mask); - if (desc == NULL) - panic("%s: page allocation failed\n", __func__); - - desc->nr_addrs = (PAGE_SIZE - sizeof(struct xencomm_desc)) / - sizeof(*desc->address); - - return desc; -} - -void -xencomm_free(struct xencomm_handle *desc) -{ - if (desc) - free_page((unsigned long)__va(desc)); -} - -int -xencomm_create(void *buffer, unsigned long bytes, - struct xencomm_handle **ret, gfp_t gfp_mask) -{ - struct xencomm_desc *desc; - struct xencomm_handle *handle; - int rc; - - if (xencomm_debug) - printk("%s: %p[%ld]\n", __func__, buffer, bytes); - - if (buffer == NULL || bytes == 0) { - *ret = (struct xencomm_handle *)NULL; - return 0; - } - - desc = xencomm_alloc(gfp_mask); - if (!desc) { - printk("%s failure\n", "xencomm_alloc"); - return -ENOMEM; - } - handle = (struct xencomm_handle *)__pa(desc); - - rc = xencomm_init_desc(desc, buffer, bytes); - if (rc) { - printk("%s failure: %d\n", "xencomm_init_desc", rc); - xencomm_free(handle); - return rc; - } - - *ret = handle; - return 0; -} - -/* "mini" routines, for stack-based communications: */ - -static void * -xencomm_alloc_mini(struct xencomm_mini *area, int *nbr_area) -{ - unsigned long base; - unsigned int pageoffset; - - while (*nbr_area >= 0) { - /* Allocate an area. */ - (*nbr_area)--; - - base = (unsigned long)(area + *nbr_area); - pageoffset = base % PAGE_SIZE; - - /* If the area does not cross a page, use it. */ - if ((PAGE_SIZE - pageoffset) >= sizeof(struct xencomm_mini)) - return &area[*nbr_area]; - } - /* No more area. */ - return NULL; -} - -int -xencomm_create_mini(struct xencomm_mini *area, int *nbr_area, - void *buffer, unsigned long bytes, - struct xencomm_handle **ret) -{ - struct xencomm_desc *desc; - int rc; - unsigned long res; - - desc = xencomm_alloc_mini(area, nbr_area); - if (!desc) - return -ENOMEM; - desc->nr_addrs = XENCOMM_MINI_ADDRS; - - rc = xencomm_init_desc(desc, buffer, bytes); - if (rc) - return rc; - - res = xencomm_vaddr_to_paddr((unsigned long)desc); - if (res == ~0UL) - return -EINVAL; - - *ret = (struct xencomm_handle*)res; - return 0; -} diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S deleted file mode 100644 index b9394ab050..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S +++ /dev/null @@ -1,931 +0,0 @@ -/* - * ia64/xen/entry.S - * - * Alternate kernel routines for Xen. Heavily leveraged from - * ia64/kernel/entry.S - * - * Copyright (C) 2005 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@.hp.com> - */ - -#include <asm/asmmacro.h> -#include <asm/cache.h> -#include <asm/errno.h> -#include <asm/kregs.h> -#include <asm/asm-offsets.h> -#include <asm/pgtable.h> -#include <asm/percpu.h> -#include <asm/processor.h> -#include <asm/thread_info.h> -#include <asm/unistd.h> - -#ifdef CONFIG_XEN -#include "xenminstate.h" -#else -#include "minstate.h" -#endif - -/* - * prev_task <- ia64_switch_to(struct task_struct *next) - * With Ingo's new scheduler, interrupts are disabled when this routine gets - * called. The code starting at .map relies on this. The rest of the code - * doesn't care about the interrupt masking status. - */ -#ifdef CONFIG_XEN -GLOBAL_ENTRY(xen_switch_to) - .prologue - alloc r16=ar.pfs,1,0,0,0 - movl r22=running_on_xen;; - ld4 r22=[r22];; - cmp.eq p7,p0=r22,r0 -(p7) br.cond.sptk.many __ia64_switch_to;; -#else -GLOBAL_ENTRY(ia64_switch_to) - .prologue - alloc r16=ar.pfs,1,0,0,0 -#endif - DO_SAVE_SWITCH_STACK - .body - - adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 - movl r25=init_task - mov r27=IA64_KR(CURRENT_STACK) - adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 - dep r20=0,in0,61,3 // physical address of "next" - ;; - st8 [r22]=sp // save kernel stack pointer of old task - shr.u r26=r20,IA64_GRANULE_SHIFT - cmp.eq p7,p6=r25,in0 - ;; - /* - * If we've already mapped this task's page, we can skip doing it again. - */ -(p6) cmp.eq p7,p6=r26,r27 -(p6) br.cond.dpnt .map - ;; -.done: - ld8 sp=[r21] // load kernel stack pointer of new task -#ifdef CONFIG_XEN - // update "current" application register - mov r8=IA64_KR_CURRENT - mov r9=in0;; - XEN_HYPER_SET_KR -#else - mov IA64_KR(CURRENT)=in0 // update "current" application register -#endif - mov r8=r13 // return pointer to previously running task - mov r13=in0 // set "current" pointer - ;; - DO_LOAD_SWITCH_STACK - -#ifdef CONFIG_SMP - sync.i // ensure "fc"s done by this CPU are visible on other CPUs -#endif - br.ret.sptk.many rp // boogie on out in new context - -.map: -#ifdef CONFIG_XEN - movl r25=XSI_PSR_IC // clear psr.ic - ;; - st4 [r25]=r0 - ;; -#else - rsm psr.ic // interrupts (psr.i) are already disabled here -#endif - movl r25=PAGE_KERNEL - ;; - srlz.d - or r23=r25,r20 // construct PA | page properties - mov r25=IA64_GRANULE_SHIFT<<2 - ;; -#ifdef CONFIG_XEN - movl r8=XSI_ITIR - ;; - st8 [r8]=r25 - ;; - movl r8=XSI_IFA - ;; - st8 [r8]=in0 // VA of next task... - ;; - mov r25=IA64_TR_CURRENT_STACK - // remember last page we mapped... - mov r8=IA64_KR_CURRENT_STACK - mov r9=r26;; - XEN_HYPER_SET_KR;; -#else - mov cr.itir=r25 - mov cr.ifa=in0 // VA of next task... - ;; - mov r25=IA64_TR_CURRENT_STACK - mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped... -#endif - ;; - itr.d dtr[r25]=r23 // wire in new mapping... -#ifdef CONFIG_XEN - ;; - srlz.d - mov r9=1 - movl r8=XSI_PSR_IC - ;; - st4 [r8]=r9 - ;; -#else - ssm psr.ic // reenable the psr.ic bit - ;; - srlz.d -#endif - br.cond.sptk .done -#ifdef CONFIG_XEN -END(xen_switch_to) -#else -END(ia64_switch_to) -#endif - - /* - * Invoke a system call, but do some tracing before and after the call. - * We MUST preserve the current register frame throughout this routine - * because some system calls (such as ia64_execve) directly - * manipulate ar.pfs. - */ -#ifdef CONFIG_XEN -GLOBAL_ENTRY(xen_trace_syscall) - PT_REGS_UNWIND_INFO(0) - movl r16=running_on_xen;; - ld4 r16=[r16];; - cmp.eq p7,p0=r16,r0 -(p7) br.cond.sptk.many __ia64_trace_syscall;; -#else -GLOBAL_ENTRY(ia64_trace_syscall) - PT_REGS_UNWIND_INFO(0) -#endif - /* - * We need to preserve the scratch registers f6-f11 in case the system - * call is sigreturn. - */ - adds r16=PT(F6)+16,sp - adds r17=PT(F7)+16,sp - ;; - stf.spill [r16]=f6,32 - stf.spill [r17]=f7,32 - ;; - stf.spill [r16]=f8,32 - stf.spill [r17]=f9,32 - ;; - stf.spill [r16]=f10 - stf.spill [r17]=f11 - br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args - adds r16=PT(F6)+16,sp - adds r17=PT(F7)+16,sp - ;; - ldf.fill f6=[r16],32 - ldf.fill f7=[r17],32 - ;; - ldf.fill f8=[r16],32 - ldf.fill f9=[r17],32 - ;; - ldf.fill f10=[r16] - ldf.fill f11=[r17] - // the syscall number may have changed, so re-load it and re-calculate the - // syscall entry-point: - adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #) - ;; - ld8 r15=[r15] - mov r3=NR_syscalls - 1 - ;; - adds r15=-1024,r15 - movl r16=sys_call_table - ;; - shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) - cmp.leu p6,p7=r15,r3 - ;; -(p6) ld8 r20=[r20] // load address of syscall entry point -(p7) movl r20=sys_ni_syscall - ;; - mov b6=r20 - br.call.sptk.many rp=b6 // do the syscall -.strace_check_retval: - cmp.lt p6,p0=r8,r0 // syscall failed? - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 - mov r10=0 -(p6) br.cond.sptk strace_error // syscall failed -> - ;; // avoid RAW on r10 -.strace_save_retval: -.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 -.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 - br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -.ret3: -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk - br.cond.sptk .work_pending_syscall_end - -strace_error: - ld8 r3=[r2] // load pt_regs.r8 - sub r9=0,r8 // negate return value to get errno value - ;; - cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? - adds r3=16,r2 // r3=&pt_regs.r10 - ;; -(p6) mov r10=-1 -(p6) mov r8=r9 - br.cond.sptk .strace_save_retval -#ifdef CONFIG_XEN -END(xen_trace_syscall) -#else -END(ia64_trace_syscall) -#endif - -#ifdef CONFIG_XEN -GLOBAL_ENTRY(xen_ret_from_clone) - PT_REGS_UNWIND_INFO(0) - movl r16=running_on_xen;; - ld4 r16=[r16];; - cmp.eq p7,p0=r16,r0 -(p7) br.cond.sptk.many __ia64_ret_from_clone;; -#else -GLOBAL_ENTRY(ia64_ret_from_clone) - PT_REGS_UNWIND_INFO(0) -#endif -{ /* - * Some versions of gas generate bad unwind info if the first instruction of a - * procedure doesn't go into the first slot of a bundle. This is a workaround. - */ - nop.m 0 - nop.i 0 - /* - * We need to call schedule_tail() to complete the scheduling process. - * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the - * address of the previously executing task. - */ - br.call.sptk.many rp=ia64_invoke_schedule_tail -} -.ret8: - adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 r2=[r2] - ;; - mov r8=0 - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 - ;; - cmp.ne p6,p0=r2,r0 -(p6) br.cond.spnt .strace_check_retval - ;; // added stop bits to prevent r8 dependency -#ifdef CONFIG_XEN - br.cond.sptk ia64_ret_from_syscall -END(xen_ret_from_clone) -#else -END(ia64_ret_from_clone) -#endif -/* - * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't - * need to switch to bank 0 and doesn't restore the scratch registers. - * To avoid leaking kernel bits, the scratch registers are set to - * the following known-to-be-safe values: - * - * r1: restored (global pointer) - * r2: cleared - * r3: 1 (when returning to user-level) - * r8-r11: restored (syscall return value(s)) - * r12: restored (user-level stack pointer) - * r13: restored (user-level thread pointer) - * r14: set to __kernel_syscall_via_epc - * r15: restored (syscall #) - * r16-r17: cleared - * r18: user-level b6 - * r19: cleared - * r20: user-level ar.fpsr - * r21: user-level b0 - * r22: cleared - * r23: user-level ar.bspstore - * r24: user-level ar.rnat - * r25: user-level ar.unat - * r26: user-level ar.pfs - * r27: user-level ar.rsc - * r28: user-level ip - * r29: user-level psr - * r30: user-level cfm - * r31: user-level pr - * f6-f11: cleared - * pr: restored (user-level pr) - * b0: restored (user-level rp) - * b6: restored - * b7: set to __kernel_syscall_via_epc - * ar.unat: restored (user-level ar.unat) - * ar.pfs: restored (user-level ar.pfs) - * ar.rsc: restored (user-level ar.rsc) - * ar.rnat: restored (user-level ar.rnat) - * ar.bspstore: restored (user-level ar.bspstore) - * ar.fpsr: restored (user-level ar.fpsr) - * ar.ccv: cleared - * ar.csd: cleared - * ar.ssd: cleared - */ -#ifdef CONFIG_XEN -GLOBAL_ENTRY(xen_leave_syscall) - PT_REGS_UNWIND_INFO(0) - movl r22=running_on_xen;; - ld4 r22=[r22];; - cmp.eq p7,p0=r22,r0 -(p7) br.cond.sptk.many __ia64_leave_syscall;; -#else -ENTRY(ia64_leave_syscall) - PT_REGS_UNWIND_INFO(0) -#endif - /* - * work.need_resched etc. mustn't get changed by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on. - * - * p6 controls whether current_thread_info()->flags needs to be check for - * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count - * is 0. After extra work processing has been completed, execution - * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check - * needs to be redone. - */ -#ifdef CONFIG_PREEMPT - rsm psr.i // disable interrupts - cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; - .pred.rel.mutex pUStk,pKStk -(pKStk) ld4 r21=[r20] // r21 <- preempt_count -(pUStk) mov r21=0 // r21 <- 0 - ;; - cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else /* !CONFIG_PREEMPT */ -#ifdef CONFIG_XEN - movl r2=XSI_PSR_I_ADDR - mov r18=1 - ;; - ld8 r2=[r2] - ;; -(pUStk) st1 [r2]=r18 -#else -(pUStk) rsm psr.i -#endif - cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -#endif -.work_processed_syscall: - adds r2=PT(LOADRS)+16,r12 - adds r3=PT(AR_BSPSTORE)+16,r12 - adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r18] // load current_thread_info()->flags - ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" - nop.i 0 - ;; - mov r16=ar.bsp // M2 get existing backing store pointer - ld8 r18=[r2],PT(R9)-PT(B6) // load b6 -(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? - ;; - ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) -(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? -(p6) br.cond.spnt .work_pending_syscall - ;; - // start restoring the state saved on the kernel stack (struct pt_regs): - ld8 r9=[r2],PT(CR_IPSR)-PT(R9) - ld8 r11=[r3],PT(CR_IIP)-PT(R11) -(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! - ;; - invala // M0|1 invalidate ALAT -#ifdef CONFIG_XEN - movl r28=XSI_PSR_I_ADDR - movl r29=XSI_PSR_IC - ;; - ld8 r28=[r28] - mov r30=1 - ;; - st1 [r28]=r30 - st4 [r29]=r0 // note: clears both vpsr.i and vpsr.ic! - ;; -#else - rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection -#endif - cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs - - ld8 r29=[r2],16 // M0|1 load cr.ipsr - ld8 r28=[r3],16 // M0|1 load cr.iip - mov r22=r0 // A clear r22 - ;; - ld8 r30=[r2],16 // M0|1 load cr.ifs - ld8 r25=[r3],16 // M0|1 load ar.unat -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 - ;; - ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs -#ifdef CONFIG_XEN -(pKStk) mov r21=r8 -(pKStk) XEN_HYPER_GET_PSR - ;; -(pKStk) mov r22=r8 -(pKStk) mov r8=r21 - ;; -#else -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled -#endif - nop 0 - ;; - ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 - ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc - mov f6=f0 // F clear f6 - ;; - ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage) - ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates - mov f7=f0 // F clear f7 - ;; - ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr - ld8.fill r1=[r3],16 // M0|1 load r1 -(pUStk) mov r17=1 // A - ;; -(pUStk) st1 [r14]=r17 // M2|3 - ld8.fill r13=[r3],16 // M0|1 - mov f8=f0 // F clear f8 - ;; - ld8.fill r12=[r2] // M0|1 restore r12 (sp) - ld8.fill r15=[r3] // M0|1 restore r15 - mov b6=r18 // I0 restore b6 - - addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A - mov f9=f0 // F clear f9 -(pKStk) br.cond.dpnt.many skip_rbs_switch // B - - srlz.d // M0 ensure interruption collection is off (for cover) - shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition -#ifdef CONFIG_XEN - XEN_HYPER_COVER; -#else - cover // B add current frame into dirty partition & set cr.ifs -#endif - ;; -(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8 - mov r19=ar.bsp // M2 get new backing store pointer - mov f10=f0 // F clear f10 - - nop.m 0 - movl r14=__kernel_syscall_via_epc // X - ;; - mov.m ar.csd=r0 // M2 clear ar.csd - mov.m ar.ccv=r0 // M2 clear ar.ccv - mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) - - mov.m ar.ssd=r0 // M2 clear ar.ssd - mov f11=f0 // F clear f11 - br.cond.sptk.many rbs_switch // B -#ifdef CONFIG_XEN -END(xen_leave_syscall) -#else -END(ia64_leave_syscall) -#endif - -#ifdef CONFIG_XEN -GLOBAL_ENTRY(xen_leave_kernel) - PT_REGS_UNWIND_INFO(0) - movl r22=running_on_xen;; - ld4 r22=[r22];; - cmp.eq p7,p0=r22,r0 -(p7) br.cond.sptk.many __ia64_leave_kernel;; -#else -GLOBAL_ENTRY(ia64_leave_kernel) - PT_REGS_UNWIND_INFO(0) -#endif - /* - * work.need_resched etc. mustn't get changed by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on. - * - * p6 controls whether current_thread_info()->flags needs to be check for - * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count - * is 0. After extra work processing has been completed, execution - * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check - * needs to be redone. - */ -#ifdef CONFIG_PREEMPT - rsm psr.i // disable interrupts - cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; - .pred.rel.mutex pUStk,pKStk -(pKStk) ld4 r21=[r20] // r21 <- preempt_count -(pUStk) mov r21=0 // r21 <- 0 - ;; - cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else -#ifdef CONFIG_XEN -(pUStk) movl r17=XSI_PSR_I_ADDR -(pUStk) mov r31=1 - ;; -(pUStk) ld8 r17=[r17] - ;; -(pUStk) st1 [r17]=r31 - ;; -#else -(pUStk) rsm psr.i -#endif - cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -#endif -.work_processed_kernel: - adds r17=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r17] // load current_thread_info()->flags - adds r21=PT(PR)+16,r12 - ;; - - lfetch [r21],PT(CR_IPSR)-PT(PR) - adds r2=PT(B6)+16,r12 - adds r3=PT(R16)+16,r12 - ;; - lfetch [r21] - ld8 r28=[r2],8 // load b6 - adds r29=PT(R24)+16,r12 - - ld8.fill r16=[r3],PT(AR_CSD)-PT(R16) - adds r30=PT(AR_CCV)+16,r12 -(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? - ;; - ld8.fill r24=[r29] - ld8 r15=[r30] // load ar.ccv -(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending? - ;; - ld8 r29=[r2],16 // load b7 - ld8 r30=[r3],16 // load ar.csd -(p6) br.cond.spnt .work_pending - ;; - ld8 r31=[r2],16 // load ar.ssd - ld8.fill r8=[r3],16 - ;; - ld8.fill r9=[r2],16 - ld8.fill r10=[r3],PT(R17)-PT(R10) - ;; - ld8.fill r11=[r2],PT(R18)-PT(R11) - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - mov ar.csd=r30 - mov ar.ssd=r31 - ;; -#ifdef CONFIG_XEN - movl r23=XSI_PSR_I_ADDR - movl r22=XSI_PSR_IC - ;; - ld8 r23=[r23] - mov r25=1 - ;; - st1 [r23]=r25 - st4 [r22]=r0 // note: clears both vpsr.i and vpsr.ic! - ;; -#else - rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection -#endif - invala // invalidate ALAT - ;; - ld8.fill r22=[r2],24 - ld8.fill r23=[r3],24 - mov b6=r28 - ;; - ld8.fill r25=[r2],16 - ld8.fill r26=[r3],16 - mov b7=r29 - ;; - ld8.fill r27=[r2],16 - ld8.fill r28=[r3],16 - ;; - ld8.fill r29=[r2],16 - ld8.fill r30=[r3],24 - ;; - ld8.fill r31=[r2],PT(F9)-PT(R31) - adds r3=PT(F10)-PT(F6),r3 - ;; - ldf.fill f9=[r2],PT(F6)-PT(F9) - ldf.fill f10=[r3],PT(F8)-PT(F10) - ;; - ldf.fill f6=[r2],PT(F7)-PT(F6) - ;; - ldf.fill f7=[r2],PT(F11)-PT(F7) - ldf.fill f8=[r3],32 - ;; - srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned) - mov ar.ccv=r15 - ;; - ldf.fill f11=[r2] -#ifdef CONFIG_XEN - ;; - // r16-r31 all now hold bank1 values - mov r15=ar.unat - movl r2=XSI_BANK1_R16 - movl r3=XSI_BANK1_R16+8 - ;; -.mem.offset 0,0; st8.spill [r2]=r16,16 -.mem.offset 8,0; st8.spill [r3]=r17,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r18,16 -.mem.offset 8,0; st8.spill [r3]=r19,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r20,16 -.mem.offset 8,0; st8.spill [r3]=r21,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r22,16 -.mem.offset 8,0; st8.spill [r3]=r23,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r24,16 -.mem.offset 8,0; st8.spill [r3]=r25,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r26,16 -.mem.offset 8,0; st8.spill [r3]=r27,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r28,16 -.mem.offset 8,0; st8.spill [r3]=r29,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r30,16 -.mem.offset 8,0; st8.spill [r3]=r31,16 - ;; - mov r3=ar.unat - movl r2=XSI_B1NAT - ;; - st8 [r2]=r3 - mov ar.unat=r15 - movl r2=XSI_BANKNUM;; - st4 [r2]=r0; -#else - bsw.0 // switch back to bank 0 (no stop bit required beforehand...) -#endif - ;; -(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) - adds r16=PT(CR_IPSR)+16,r12 - adds r17=PT(CR_IIP)+16,r12 - -#ifdef CONFIG_XEN -(pKStk) mov r29=r8 -(pKStk) XEN_HYPER_GET_PSR - ;; -(pKStk) mov r22=r8 -(pKStk) mov r8=r29 - ;; -#else -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled -#endif - nop.i 0 - nop.i 0 - ;; - ld8 r29=[r16],16 // load cr.ipsr - ld8 r28=[r17],16 // load cr.iip - ;; - ld8 r30=[r16],16 // load cr.ifs - ld8 r25=[r17],16 // load ar.unat - ;; - ld8 r26=[r16],16 // load ar.pfs - ld8 r27=[r17],16 // load ar.rsc - cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs - ;; - ld8 r24=[r16],16 // load ar.rnat (may be garbage) - ld8 r23=[r17],16 // load ar.bspstore (may be garbage) - ;; - ld8 r31=[r16],16 // load predicates - ld8 r21=[r17],16 // load b0 - ;; - ld8 r19=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 // load r1 - ;; - ld8.fill r12=[r16],16 - ld8.fill r13=[r17],16 -(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 - ;; - ld8 r20=[r16],16 // ar.fpsr - ld8.fill r15=[r17],16 - ;; - ld8.fill r14=[r16],16 - ld8.fill r2=[r17] -(pUStk) mov r17=1 - ;; - ld8.fill r3=[r16] -(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack - shr.u r18=r19,16 // get byte size of existing "dirty" partition - ;; - mov r16=ar.bsp // get existing backing store pointer - addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 - ;; - ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 -(pKStk) br.cond.dpnt skip_rbs_switch - - /* - * Restore user backing store. - * - * NOTE: alloc, loadrs, and cover can't be predicated. - */ -(pNonSys) br.cond.dpnt dont_preserve_current_frame - -#ifdef CONFIG_XEN - XEN_HYPER_COVER; -#else - cover // add current frame into dirty partition and set cr.ifs -#endif - ;; - mov r19=ar.bsp // get new backing store pointer -rbs_switch: - sub r16=r16,r18 // krbs = old bsp - size of dirty partition - cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs - ;; - sub r19=r19,r16 // calculate total byte size of dirty partition - add r18=64,r18 // don't force in0-in7 into memory... - ;; - shl r19=r19,16 // shift size of dirty partition into loadrs position - ;; -dont_preserve_current_frame: - /* - * To prevent leaking bits between the kernel and user-space, - * we must clear the stacked registers in the "invalid" partition here. - * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium, - * 5 registers/cycle on McKinley). - */ -# define pRecurse p6 -# define pReturn p7 -#ifdef CONFIG_ITANIUM -# define Nregs 10 -#else -# define Nregs 14 -#endif - alloc loc0=ar.pfs,2,Nregs-2,2,0 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) - sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize - ;; - mov ar.rsc=r19 // load ar.rsc to be used for "loadrs" - shladd in0=loc1,3,r17 - mov in1=0 - ;; - TEXT_ALIGN(32) -rse_clear_invalid: -#ifdef CONFIG_ITANIUM - // cycle 0 - { .mii - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 -}{ .mfb - add out1=1,in1 // increment recursion count - nop.f 0 - nop.b 0 // can't do br.call here because of alloc (WAW on CFM) - ;; -}{ .mfi // cycle 1 - mov loc1=0 - nop.f 0 - mov loc2=0 -}{ .mib - mov loc3=0 - mov loc4=0 -(pRecurse) br.call.sptk.many b0=rse_clear_invalid - -}{ .mfi // cycle 2 - mov loc5=0 - nop.f 0 - cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret -}{ .mib - mov loc6=0 - mov loc7=0 -(pReturn) br.ret.sptk.many b0 -} -#else /* !CONFIG_ITANIUM */ - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 - add out1=1,in1 // increment recursion count - mov loc1=0 - mov loc2=0 - ;; - mov loc3=0 - mov loc4=0 - mov loc5=0 - mov loc6=0 - mov loc7=0 -(pRecurse) br.call.dptk.few b0=rse_clear_invalid - ;; - mov loc8=0 - mov loc9=0 - cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret - mov loc10=0 - mov loc11=0 -(pReturn) br.ret.dptk.many b0 -#endif /* !CONFIG_ITANIUM */ -# undef pRecurse -# undef pReturn - ;; - alloc r17=ar.pfs,0,0,0,0 // drop current register frame - ;; - loadrs - ;; -skip_rbs_switch: - mov ar.unat=r25 // M2 -(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22 -(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise - ;; -(pUStk) mov ar.bspstore=r23 // M2 -(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp -(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise - ;; -#ifdef CONFIG_XEN - movl r25=XSI_IPSR - ;; - st8[r25]=r29,XSI_IFS_OFS-XSI_IPSR_OFS - ;; -#else - mov cr.ipsr=r29 // M2 -#endif - mov ar.pfs=r26 // I0 -(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise - -#ifdef CONFIG_XEN -(p9) st8 [r25]=r30 - ;; - adds r25=XSI_IIP_OFS-XSI_IFS_OFS,r25 - ;; -#else -(p9) mov cr.ifs=r30 // M2 -#endif - mov b0=r21 // I0 -(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise - - mov ar.fpsr=r20 // M2 -#ifdef CONFIG_XEN - st8 [r25]=r28 -#else - mov cr.iip=r28 // M2 -#endif - nop 0 - ;; -(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode - nop 0 -(pLvSys)mov r2=r0 - - mov ar.rsc=r27 // M2 - mov pr=r31,-1 // I0 -#ifdef CONFIG_XEN - ;; - XEN_HYPER_RFI; -#else - rfi // B -#endif - - /* - * On entry: - * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT) - * r31 = current->thread_info->flags - * On exit: - * p6 = TRUE if work-pending-check needs to be redone - */ -.work_pending_syscall: - add r2=-8,r2 - add r3=-8,r3 - ;; - st8 [r2]=r8 - st8 [r3]=r10 -.work_pending: - tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? -(p6) br.cond.sptk.few .notify -#ifdef CONFIG_PREEMPT -(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1 - ;; -(pKStk) st4 [r20]=r21 - ssm psr.i // enable interrupts -#endif - br.call.spnt.many rp=schedule -.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 -#ifdef CONFIG_XEN - movl r2=XSI_PSR_I_ADDR - mov r20=1 - ;; - ld8 r2=[r2] - ;; - st1 [r2]=r20 -#else - rsm psr.i // disable interrupts -#endif - ;; -#ifdef CONFIG_PREEMPT -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; -(pKStk) st4 [r20]=r0 // preempt_count() <- 0 -#endif -(pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // re-check - -.notify: -(pUStk) br.call.spnt.many rp=notify_resume_user -.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 -(pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // don't re-check - -.work_pending_syscall_end: - adds r2=PT(R8)+16,r12 - adds r3=PT(R10)+16,r12 - ;; - ld8 r8=[r2] - ld8 r10=[r3] - br.cond.sptk.many .work_processed_syscall // re-check - -#ifdef CONFIG_XEN -END(xen_leave_kernel) -#else -END(ia64_leave_kernel) -#endif diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c b/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c deleted file mode 100644 index 3bc6cdbf7e..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c +++ /dev/null @@ -1,19 +0,0 @@ - -extern unsigned long xen_get_cpuid(int); - -int -running_on_sim(void) -{ - int i; - long cpuid[6]; - - for (i = 0; i < 5; ++i) - cpuid[i] = xen_get_cpuid(i); - if ((cpuid[0] & 0xff) != 'H') return 0; - if ((cpuid[3] & 0xff) != 0x4) return 0; - if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0; - if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0; - if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0; - return 1; -} - diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S deleted file mode 100644 index a411bb3a4a..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S +++ /dev/null @@ -1,2177 +0,0 @@ -/* - * arch/ia64/xen/ivt.S - * - * Copyright (C) 2005 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@hp.com> - */ -/* - * This file defines the interruption vector table used by the CPU. - * It does not include one entry per possible cause of interruption. - * - * The first 20 entries of the table contain 64 bundles each while the - * remaining 48 entries contain only 16 bundles each. - * - * The 64 bundles are used to allow inlining the whole handler for critical - * interruptions like TLB misses. - * - * For each entry, the comment is as follows: - * - * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) - * entry offset ----/ / / / / - * entry number ---------/ / / / - * size of the entry -------------/ / / - * vector name -------------------------------------/ / - * interruptions triggering this vector ----------------------/ - * - * The table is 32KB in size and must be aligned on 32KB boundary. - * (The CPU ignores the 15 lower bits of the address) - * - * Table is based upon EAS2.6 (Oct 1999) - */ - -#include <asm/asmmacro.h> -#include <asm/break.h> -#include <asm/ia32.h> -#include <asm/kregs.h> -#include <asm/asm-offsets.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/ptrace.h> -#include <asm/system.h> -#include <asm/thread_info.h> -#include <asm/unistd.h> -#include <asm/errno.h> - -#ifdef CONFIG_XEN -#define ia64_ivt xen_ivt -#endif - -#if 1 -# define PSR_DEFAULT_BITS psr.ac -#else -# define PSR_DEFAULT_BITS 0 -#endif - -#if 0 - /* - * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't - * needed for something else before enabling this... - */ -# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 -#else -# define DBG_FAULT(i) -#endif - -#define MINSTATE_VIRT /* needed by minstate.h */ -#include "xenminstate.h" - -#define FAULT(n) \ - mov r31=pr; \ - mov r19=n;; /* prepare to save predicates */ \ - br.sptk.many dispatch_to_fault_handler - - .section .text.ivt,"ax" - - .align 32768 // align on 32KB boundary - .global ia64_ivt -ia64_ivt: -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) -ENTRY(vhpt_miss) - DBG_FAULT(0) - /* - * The VHPT vector is invoked when the TLB entry for the virtual page table - * is missing. This happens only as a result of a previous - * (the "original") TLB miss, which may either be caused by an instruction - * fetch or a data access (or non-access). - * - * What we do here is normal TLB miss handing for the _original_ miss, - * followed by inserting the TLB entry for the virtual page table page - * that the VHPT walker was attempting to access. The latter gets - * inserted as long as page table entry above pte level have valid - * mappings for the faulting address. The TLB entry for the original - * miss gets inserted only if the pte entry indicates that the page is - * present. - * - * do_page_fault gets invoked in the following cases: - * - the faulting virtual address uses unimplemented address bits - * - the faulting virtual address has no valid page table mapping - */ -#ifdef CONFIG_XEN - movl r16=XSI_IFA - ;; - ld8 r16=[r16] -#ifdef CONFIG_HUGETLB_PAGE - movl r18=PAGE_SHIFT - movl r25=XSI_ITIR - ;; - ld8 r25=[r25] -#endif - ;; -#else - mov r16=cr.ifa // get address that caused the TLB miss -#ifdef CONFIG_HUGETLB_PAGE - movl r18=PAGE_SHIFT - mov r25=cr.itir -#endif -#endif - ;; -#ifdef CONFIG_XEN - XEN_HYPER_RSM_PSR_DT; -#else - rsm psr.dt // use physical addressing for data -#endif - mov r31=pr // save the predicate registers - mov r19=IA64_KR(PT_BASE) // get page table base address - shl r21=r16,3 // shift bit 60 into sign bit - shr.u r17=r16,61 // get the region number into r17 - ;; - shr.u r22=r21,3 -#ifdef CONFIG_HUGETLB_PAGE - extr.u r26=r25,2,6 - ;; - cmp.ne p8,p0=r18,r26 - sub r27=r26,r18 - ;; -(p8) dep r25=r18,r25,2,6 -(p8) shr r22=r22,r27 -#endif - ;; - cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? - shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd index bit - ;; -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - - srlz.d - LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir - - .pred.rel "mutex", p6, p7 -(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT -(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? -#ifdef CONFIG_PGTABLE_4 - shr.u r28=r22,PUD_SHIFT // shift pud index into position -#else - shr.u r18=r22,PMD_SHIFT // shift pmd index into position -#endif - ;; - ld8 r17=[r17] // get *pgd (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? -#ifdef CONFIG_PGTABLE_4 - dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) - ;; - shr.u r18=r22,PMD_SHIFT // shift pmd index into position -(p7) ld8 r29=[r28] // get *pud (may be 0) - ;; -(p7) cmp.eq.or.andcm p6,p7=r29,r0 // was pud_present(*pud) == NULL? - dep r17=r18,r29,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) -#else - dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pgd,addr) -#endif - ;; -(p7) ld8 r20=[r17] // get *pmd (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift pte index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was pmd_present(*pmd) == NULL? - dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr) - ;; -(p7) ld8 r18=[r21] // read *pte -#ifdef CONFIG_XEN - movl r19=XSI_ISR - ;; - ld8 r19=[r19] -#else - mov r19=cr.isr // cr.isr bit 32 tells us if this is an insn miss -#endif - ;; -(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? -#ifdef CONFIG_XEN - movl r22=XSI_IHA - ;; - ld8 r22=[r22] -#else - mov r22=cr.iha // get the VHPT address that caused the TLB miss -#endif - ;; // avoid RAW on p7 -(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? - dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address - ;; -#ifdef CONFIG_XEN - mov r24=r8 - mov r8=r18 - ;; -(p10) XEN_HYPER_ITC_I - ;; -(p11) XEN_HYPER_ITC_D - ;; - mov r8=r24 - ;; -#else -(p10) itc.i r18 // insert the instruction TLB entry -(p11) itc.d r18 // insert the data TLB entry -#endif -(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) -#ifdef CONFIG_XEN - movl r24=XSI_IFA - ;; - st8 [r24]=r22 - ;; -#else - mov cr.ifa=r22 -#endif - -#ifdef CONFIG_HUGETLB_PAGE -(p8) mov cr.itir=r25 // change to default page-size for VHPT -#endif - - /* - * Now compute and insert the TLB entry for the virtual page table. We never - * execute in a page table page so there is no need to set the exception deferral - * bit. - */ - adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 - ;; -#ifdef CONFIG_XEN -(p7) mov r25=r8 -(p7) mov r8=r24 - ;; -(p7) XEN_HYPER_ITC_D - ;; -(p7) mov r8=r25 - ;; -#else -(p7) itc.d r24 -#endif - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - /* - * Re-check pagetable entry. If they changed, we may have received a ptc.g - * between reading the pagetable and the "itc". If so, flush the entry we - * inserted and retry. At this point, we have: - * - * r28 = equivalent of pud_offset(pgd, ifa) - * r17 = equivalent of pmd_offset(pud, ifa) - * r21 = equivalent of pte_offset(pmd, ifa) - * - * r29 = *pud - * r20 = *pmd - * r18 = *pte - */ - ld8 r25=[r21] // read *pte again - ld8 r26=[r17] // read *pmd again -#ifdef CONFIG_PGTABLE_4 - ld8 r19=[r28] // read *pud again -#endif - cmp.ne p6,p7=r0,r0 - ;; - cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change -#ifdef CONFIG_PGTABLE_4 - cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change -#endif - mov r27=PAGE_SHIFT<<2 - ;; -(p6) ptc.l r22,r27 // purge PTE page translation -(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did *pte change - ;; -(p6) ptc.l r16,r27 // purge translation -#endif - - mov pr=r31,-1 // restore predicate registers -#ifdef CONFIG_XEN - XEN_HYPER_RFI - dv_serialize_data -#else - rfi -#endif -END(vhpt_miss) - - .org ia64_ivt+0x400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0400 Entry 1 (size 64 bundles) ITLB (21) -ENTRY(itlb_miss) - DBG_FAULT(1) - /* - * The ITLB handler accesses the PTE via the virtually mapped linear - * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the PTE read and - * go on normally after that. - */ -#ifdef CONFIG_XEN - movl r16=XSI_IFA - ;; - ld8 r16=[r16] -#else - mov r16=cr.ifa // get virtual address -#endif - mov r29=b0 // save b0 - mov r31=pr // save predicates -.itlb_fault: -#ifdef CONFIG_XEN - movl r17=XSI_IHA - ;; - ld8 r17=[r17] // get virtual address of L3 PTE -#else - mov r17=cr.iha // get virtual address of PTE -#endif - movl r30=1f // load nested fault continuation point - ;; -1: ld8 r18=[r17] // read *pte - ;; - mov b0=r29 - tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? -(p6) br.cond.spnt page_fault - ;; -#ifdef CONFIG_XEN - mov r19=r8 - mov r8=r18 - ;; - XEN_HYPER_ITC_I - ;; - mov r8=r19 -#else - itc.i r18 -#endif - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r19=[r17] // read *pte again and see if same - mov r20=PAGE_SHIFT<<2 // setup page size for purge - ;; - cmp.ne p7,p0=r18,r19 - ;; -(p7) ptc.l r16,r20 -#endif - mov pr=r31,-1 -#ifdef CONFIG_XEN - XEN_HYPER_RFI - dv_serialize_data -#else - rfi -#endif -END(itlb_miss) - - .org ia64_ivt+0x0800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) -ENTRY(dtlb_miss) - DBG_FAULT(2) - /* - * The DTLB handler accesses the PTE via the virtually mapped linear - * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the PTE read and - * go on normally after that. - */ -#ifdef CONFIG_XEN - movl r16=XSI_IFA - ;; - ld8 r16=[r16] -#else - mov r16=cr.ifa // get virtual address -#endif - mov r29=b0 // save b0 - mov r31=pr // save predicates -dtlb_fault: -#ifdef CONFIG_XEN - movl r17=XSI_IHA - ;; - ld8 r17=[r17] // get virtual address of L3 PTE -#else - mov r17=cr.iha // get virtual address of PTE -#endif - movl r30=1f // load nested fault continuation point - ;; -1: ld8 r18=[r17] // read *pte - ;; - mov b0=r29 - tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? -(p6) br.cond.spnt page_fault - ;; -#ifdef CONFIG_XEN - mov r19=r8 - mov r8=r18 - ;; - XEN_HYPER_ITC_D - ;; - mov r8=r19 - ;; -#else - itc.d r18 -#endif - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r19=[r17] // read *pte again and see if same - mov r20=PAGE_SHIFT<<2 // setup page size for purge - ;; - cmp.ne p7,p0=r18,r19 - ;; -(p7) ptc.l r16,r20 -#endif - mov pr=r31,-1 -#ifdef CONFIG_XEN - XEN_HYPER_RFI - dv_serialize_data -#else - rfi -#endif -END(dtlb_miss) - - .org ia64_ivt+0x0c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) -ENTRY(alt_itlb_miss) - DBG_FAULT(3) -#ifdef CONFIG_XEN - movl r31=XSI_IPSR - ;; - ld8 r21=[r31],XSI_IFA_OFS-XSI_IPSR_OFS // get ipsr, point to ifa - movl r17=PAGE_KERNEL - ;; - ld8 r16=[r31] // get ifa -#else - mov r16=cr.ifa // get address that caused the TLB miss - movl r17=PAGE_KERNEL - mov r21=cr.ipsr -#endif - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r31=pr - ;; -#ifdef CONFIG_DISABLE_VHPT - shr.u r22=r16,61 // get the region number into r21 - ;; - cmp.gt p8,p0=6,r22 // user mode - ;; -#ifndef CONFIG_XEN -(p8) thash r17=r16 - ;; -(p8) mov cr.iha=r17 -#endif -(p8) mov r29=b0 // save b0 -(p8) br.cond.dptk .itlb_fault -#endif - extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - shr.u r18=r16,57 // move address bit 61 to bit 4 - ;; - andcm r18=0x10,r18 // bit 4=~address-bit(61) - cmp.ne p8,p0=r0,r23 // psr.cpl != 0? - or r19=r17,r19 // insert PTE control bits into r19 - ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 -(p8) br.cond.spnt page_fault - ;; -#ifdef CONFIG_XEN - mov r18=r8 - mov r8=r19 - ;; - XEN_HYPER_ITC_I - ;; - mov r8=r18 - ;; - mov pr=r31,-1 - ;; - XEN_HYPER_RFI; -#else - itc.i r19 // insert the TLB entry - mov pr=r31,-1 - rfi -#endif -END(alt_itlb_miss) - - .org ia64_ivt+0x1000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) -ENTRY(alt_dtlb_miss) - DBG_FAULT(4) -#ifdef CONFIG_XEN - movl r31=XSI_IPSR - ;; - ld8 r21=[r31],XSI_ISR_OFS-XSI_IPSR_OFS // get ipsr, point to isr - movl r17=PAGE_KERNEL - ;; - ld8 r20=[r31],XSI_IFA_OFS-XSI_ISR_OFS // get isr, point to ifa - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - ;; - ld8 r16=[r31] // get ifa -#else - mov r16=cr.ifa // get address that caused the TLB miss - movl r17=PAGE_KERNEL - mov r20=cr.isr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r21=cr.ipsr -#endif - mov r31=pr - ;; -#ifdef CONFIG_DISABLE_VHPT - shr.u r22=r16,61 // get the region number into r21 - ;; - cmp.gt p8,p0=6,r22 // access to region 0-5 - ;; -#ifndef CONFIG_XEN -(p8) thash r17=r16 - ;; -(p8) mov cr.iha=r17 -#endif -(p8) mov r29=b0 // save b0 -(p8) br.cond.dptk dtlb_fault -#endif - extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl - and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field - tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? - shr.u r18=r16,57 // move address bit 61 to bit 4 - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? - ;; - andcm r18=0x10,r18 // bit 4=~address-bit(61) - cmp.ne p8,p0=r0,r23 -(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field -(p8) br.cond.spnt page_fault - - dep r21=-1,r21,IA64_PSR_ED_BIT,1 - or r19=r19,r17 // insert PTE control bits into r19 - ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 -(p6) mov cr.ipsr=r21 - ;; -#ifdef CONFIG_XEN -(p7) mov r18=r8 -(p7) mov r8=r19 - ;; -(p7) XEN_HYPER_ITC_D - ;; -(p7) mov r8=r18 - ;; - mov pr=r31,-1 - ;; - XEN_HYPER_RFI; -#else -(p7) itc.d r19 // insert the TLB entry - mov pr=r31,-1 - rfi -#endif -END(alt_dtlb_miss) - - .org ia64_ivt+0x1400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) -ENTRY(nested_dtlb_miss) - /* - * In the absence of kernel bugs, we get here when the virtually mapped linear - * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction - * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page - * table is missing, a nested TLB miss fault is triggered and control is - * transferred to this point. When this happens, we lookup the pte for the - * faulting address by walking the page table in physical mode and return to the - * continuation point passed in register r30 (or call page_fault if the address is - * not mapped). - * - * Input: r16: faulting address - * r29: saved b0 - * r30: continuation address - * r31: saved pr - * - * Output: r17: physical address of PTE of faulting address - * r29: saved b0 - * r30: continuation address - * r31: saved pr - * - * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared) - */ -#ifdef CONFIG_XEN - XEN_HYPER_RSM_PSR_DT; -#else - rsm psr.dt // switch to using physical data addressing -#endif - mov r19=IA64_KR(PT_BASE) // get the page table base address - shl r21=r16,3 // shift bit 60 into sign bit -#ifdef CONFIG_XEN - movl r18=XSI_ITIR - ;; - ld8 r18=[r18] -#else - mov r18=cr.itir -#endif - ;; - shr.u r17=r16,61 // get the region number into r17 - extr.u r18=r18,2,6 // get the faulting page size - ;; - cmp.eq p6,p7=5,r17 // is faulting address in region 5? - add r22=-PAGE_SHIFT,r18 // adjustment for hugetlb address - add r18=PGDIR_SHIFT-PAGE_SHIFT,r18 - ;; - shr.u r22=r16,r22 - shr.u r18=r16,r18 -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - - srlz.d - LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir - - .pred.rel "mutex", p6, p7 -(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT -(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? -#ifdef CONFIG_PGTABLE_4 - shr.u r18=r22,PUD_SHIFT // shift pud index into position -#else - shr.u r18=r22,PMD_SHIFT // shift pmd index into position -#endif - ;; - ld8 r17=[r17] // get *pgd (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) - ;; -#ifdef CONFIG_PGTABLE_4 -(p7) ld8 r17=[r17] // get *pud (may be 0) - shr.u r18=r22,PMD_SHIFT // shift pmd index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pud_present(*pud) == NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) - ;; -#endif -(p7) ld8 r17=[r17] // get *pmd (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift pte index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pmd_present(*pmd) == NULL? - dep r17=r19,r17,3,(PAGE_SHIFT-3) // r17=pte_offset(pmd,addr); -(p6) br.cond.spnt page_fault - mov b0=r30 - br.sptk.many b0 // return to continuation point -END(nested_dtlb_miss) - - .org ia64_ivt+0x1800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) -ENTRY(ikey_miss) - DBG_FAULT(6) - FAULT(6) -END(ikey_miss) - - //----------------------------------------------------------------------------------- - // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) -ENTRY(page_fault) -#ifdef CONFIG_XEN - XEN_HYPER_SSM_PSR_DT -#else - ssm psr.dt - ;; - srlz.i -#endif - ;; - SAVE_MIN_WITH_COVER - alloc r15=ar.pfs,0,0,3,0 -#ifdef CONFIG_XEN - movl r3=XSI_ISR - ;; - ld8 out1=[r3],XSI_IFA_OFS-XSI_ISR_OFS // get vcr.isr, point to ifa - ;; - ld8 out0=[r3] // get vcr.ifa - mov r14=1 - ;; - add r3=XSI_PSR_IC_OFS-XSI_IFA_OFS, r3 // point to vpsr.ic - ;; - st4 [r3]=r14 // vpsr.ic = 1 - adds r3=8,r2 // set up second base pointer - ;; -#else - mov out0=cr.ifa - mov out1=cr.isr - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collectin is on - ;; -#endif -#ifdef CONFIG_XEN - -#define MASK_TO_PEND_OFS (-1) - -(p15) movl r14=XSI_PSR_I_ADDR - ;; -(p15) ld8 r14=[r14] - ;; -(p15) st1 [r14]=r0,MASK_TO_PEND_OFS // if (p15) vpsr.i = 1 - ;; // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0 -(p15) ld1 r14=[r14] // if (vcpu->vcpu_info->evtchn_upcall_pending) - ;; -(p15) cmp.ne p15,p0=r14,r0 - ;; -(p15) XEN_HYPER_SSM_I -#else -(p15) ssm psr.i // restore psr.i -#endif - movl r14=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 // out2 = pointer to pt_regs - br.call.sptk.many b6=ia64_do_page_fault // ignore return address -END(page_fault) - - .org ia64_ivt+0x1c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) -ENTRY(dkey_miss) - DBG_FAULT(7) - FAULT(7) -END(dkey_miss) - - .org ia64_ivt+0x2000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) -ENTRY(dirty_bit) - DBG_FAULT(8) - /* - * What we do here is to simply turn on the dirty bit in the PTE. We need to - * update both the page-table and the TLB entry. To efficiently access the PTE, - * we address it through the virtual page table. Most likely, the TLB entry for - * the relevant virtual page table page is still present in the TLB so we can - * normally do this without additional TLB misses. In case the necessary virtual - * page table TLB entry isn't present, we take a nested TLB miss hit where we look - * up the physical address of the L3 PTE and then continue at label 1 below. - */ -#ifdef CONFIG_XEN - movl r16=XSI_IFA - ;; - ld8 r16=[r16] - ;; -#else - mov r16=cr.ifa // get the address that caused the fault -#endif - movl r30=1f // load continuation point in case of nested fault - ;; -#ifdef CONFIG_XEN - mov r18=r8; - mov r8=r16; - XEN_HYPER_THASH;; - mov r17=r8; - mov r8=r18;; -#else - thash r17=r16 // compute virtual address of L3 PTE -#endif - mov r29=b0 // save b0 in case of nested fault - mov r31=pr // save pr -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit - ;; -(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only update if page is present - mov r24=PAGE_SHIFT<<2 - ;; -(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present - ;; -#ifdef CONFIG_XEN -(p6) mov r18=r8 -(p6) mov r8=r25 - ;; -(p6) XEN_HYPER_ITC_D - ;; -(p6) mov r8=r18 -#else -(p6) itc.d r25 // install updated PTE -#endif - ;; - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov b0=r29 // restore b0 - mov ar.ccv=r28 -#else - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE -#endif - mov pr=r31,-1 // restore pr -#ifdef CONFIG_XEN - XEN_HYPER_RFI - dv_serialize_data -#else - rfi -#endif -END(dirty_bit) - - .org ia64_ivt+0x2400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) -ENTRY(iaccess_bit) - DBG_FAULT(9) - // Like Entry 8, except for instruction access -#ifdef CONFIG_XEN - movl r16=XSI_IFA - ;; - ld8 r16=[r16] - ;; -#else - mov r16=cr.ifa // get the address that caused the fault -#endif - movl r30=1f // load continuation point in case of nested fault - mov r31=pr // save predicates -#ifdef CONFIG_ITANIUM - /* - * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. - */ - mov r17=cr.ipsr - ;; - mov r18=cr.iip - tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? - ;; -(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa -#endif /* CONFIG_ITANIUM */ - ;; -#ifdef CONFIG_XEN - mov r18=r8; - mov r8=r16; - XEN_HYPER_THASH;; - mov r17=r8; - mov r8=r18;; -#else - thash r17=r16 // compute virtual address of L3 PTE -#endif - mov r29=b0 // save b0 in case of nested fault) -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_A,r18 // set the accessed bit - tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit - ;; -(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page present - mov r24=PAGE_SHIFT<<2 - ;; -(p6) cmp.eq p6,p7=r26,r18 // Only if page present - ;; -#ifdef CONFIG_XEN - mov r26=r8 - mov r8=r25 - ;; -(p6) XEN_HYPER_ITC_I - ;; - mov r8=r26 - ;; -#else -(p6) itc.i r25 // install updated PTE -#endif - ;; - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov b0=r29 // restore b0 - mov ar.ccv=r28 -#else /* !CONFIG_SMP */ - ;; -1: ld8 r18=[r17] - ;; - or r18=_PAGE_A,r18 // set the accessed bit - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - itc.i r18 // install updated PTE -#endif /* !CONFIG_SMP */ - mov pr=r31,-1 -#ifdef CONFIG_XEN - XEN_HYPER_RFI - dv_serialize_data -#else - rfi -#endif -END(iaccess_bit) - - .org ia64_ivt+0x2800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) -ENTRY(daccess_bit) - DBG_FAULT(10) - // Like Entry 8, except for data access -#ifdef CONFIG_XEN - movl r16=XSI_IFA - ;; - ld8 r16=[r16] - ;; -#else - mov r16=cr.ifa // get the address that caused the fault -#endif - movl r30=1f // load continuation point in case of nested fault - ;; -#ifdef CONFIG_XEN - mov r18=r8 - mov r8=r16 - XEN_HYPER_THASH - ;; - mov r17=r8 - mov r8=r18 - ;; -#else - thash r17=r16 // compute virtual address of L3 PTE -#endif - mov r31=pr - mov r29=b0 // save b0 in case of nested fault) -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_A,r18 // set the dirty bit - tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit - ;; -(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page is present - mov r24=PAGE_SHIFT<<2 - ;; -(p6) cmp.eq p6,p7=r26,r18 // Only if page is present - ;; -#ifdef CONFIG_XEN - mov r26=r8 - mov r8=r25 - ;; -(p6) XEN_HYPER_ITC_D - ;; - mov r8=r26 - ;; -#else -(p6) itc.d r25 // install updated PTE -#endif - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - ;; - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov ar.ccv=r28 -#else - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_A,r18 // set the accessed bit - ;; - st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE -#endif - mov b0=r29 // restore b0 - mov pr=r31,-1 -#ifdef CONFIG_XEN - XEN_HYPER_RFI - dv_serialize_data -#else - rfi -#endif -END(daccess_bit) - - .org ia64_ivt+0x2c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) -ENTRY(break_fault) - /* - * The streamlined system call entry/exit paths only save/restore the initial part - * of pt_regs. This implies that the callers of system-calls must adhere to the - * normal procedure calling conventions. - * - * Registers to be saved & restored: - * CR registers: cr.ipsr, cr.iip, cr.ifs - * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr - * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15 - * Registers to be restored only: - * r8-r11: output value from the system call. - * - * During system call exit, scratch registers (including r15) are modified/cleared - * to prevent leaking bits from kernel to user level. - */ - DBG_FAULT(11) - mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc) -#ifdef CONFIG_XEN - movl r22=XSI_IPSR - ;; - ld8 r29=[r22],XSI_IIM_OFS-XSI_IPSR_OFS // get ipsr, point to iip -#else - mov r29=cr.ipsr // M2 (12 cyc) -#endif - mov r31=pr // I0 (2 cyc) - -#ifdef CONFIG_XEN - ;; - ld8 r17=[r22],XSI_IIP_OFS-XSI_IIM_OFS -#else - mov r17=cr.iim // M2 (2 cyc) -#endif - mov.m r27=ar.rsc // M2 (12 cyc) - mov r18=__IA64_BREAK_SYSCALL // A - - mov.m ar.rsc=0 // M2 - mov.m r21=ar.fpsr // M2 (12 cyc) - mov r19=b6 // I0 (2 cyc) - ;; - mov.m r23=ar.bspstore // M2 (12 cyc) - mov.m r24=ar.rnat // M2 (5 cyc) - mov.i r26=ar.pfs // I0 (2 cyc) - - invala // M0|1 - nop.m 0 // M - mov r20=r1 // A save r1 - - nop.m 0 - movl r30=sys_call_table // X - -#ifdef CONFIG_XEN - ld8 r28=[r22] -#else - mov r28=cr.iip // M2 (2 cyc) -#endif - cmp.eq p0,p7=r18,r17 // I0 is this a system call? -(p7) br.cond.spnt non_syscall // B no -> - // - // From this point on, we are definitely on the syscall-path - // and we can use (non-banked) scratch registers. - // -/////////////////////////////////////////////////////////////////////// - mov r1=r16 // A move task-pointer to "addl"-addressable reg - mov r2=r16 // A setup r2 for ia64_syscall_setup - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = ¤t_thread_info()->flags - - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 - adds r15=-1024,r15 // A subtract 1024 from syscall number - mov r3=NR_syscalls - 1 - ;; - ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag - ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags - extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr - - shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024) - addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS - cmp.leu p6,p7=r15,r3 // A syscall number in range? - ;; - - lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS -(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point - tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT? - - mov.m ar.bspstore=r22 // M2 switch to kernel RBS - cmp.eq p8,p9=2,r8 // A isr.ei==2? - ;; - -(p8) mov r8=0 // A clear ei to 0 -(p7) movl r30=sys_ni_syscall // X - -(p8) adds r28=16,r28 // A switch cr.iip to next bundle -(p9) adds r8=1,r8 // A increment ei to next slot - nop.i 0 - ;; - - mov.m r25=ar.unat // M2 (5 cyc) - dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr - adds r15=1024,r15 // A restore original syscall number - // - // If any of the above loads miss in L1D, we'll stall here until - // the data arrives. - // -/////////////////////////////////////////////////////////////////////// - st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag - mov b6=r30 // I0 setup syscall handler branch reg early - cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already? - - and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit - mov r18=ar.bsp // M2 (12 cyc) -(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS - ;; -.back_from_break_fixup: -(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack - cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? - br.call.sptk.many b7=ia64_syscall_setup // B -1: - mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 - nop 0 -#ifdef CONFIG_XEN - mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;; -#else - bsw.1 // B (6 cyc) regs are saved, switch to bank 1 -#endif - ;; - -#ifdef CONFIG_XEN - movl r16=XSI_PSR_IC - mov r3=1 - ;; - st4 [r16]=r3,XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS // vpsr.ic = 1 -#else - ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection -#endif - movl r3=ia64_ret_from_syscall // X - ;; - - srlz.i // M0 ensure interruption collection is on - mov rp=r3 // I0 set the real return addr -(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT - -#ifdef CONFIG_XEN -(p15) ld8 r16=[r16] // vpsr.i - ;; -(p15) st1 [r16]=r0,MASK_TO_PEND_OFS // if (p15) vpsr.i = 1 - ;; // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0 -(p15) ld1 r2=[r16] // if (vcpu->vcpu_info->evtchn_upcall_pending) - ;; -(p15) cmp.ne.unc p6,p0=r2,r0 - ;; -(p6) XEN_HYPER_SSM_I // do a real ssm psr.i -#else -(p15) ssm psr.i // M2 restore psr.i -#endif -(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr) - br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic - // NOT REACHED -/////////////////////////////////////////////////////////////////////// - // On entry, we optimistically assumed that we're coming from user-space. - // For the rare cases where a system-call is done from within the kernel, - // we fix things up at this point: -.break_fixup: - add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure - mov ar.rnat=r24 // M2 restore kernel's AR.RNAT - ;; - mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE - br.cond.sptk .back_from_break_fixup -END(break_fault) - - .org ia64_ivt+0x3000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) -ENTRY(interrupt) - DBG_FAULT(12) - mov r31=pr // prepare to save predicates - ;; - SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 -#ifdef CONFIG_XEN - movl r3=XSI_PSR_IC - mov r14=1 - ;; - st4 [r3]=r14 -#else - ssm psr.ic | PSR_DEFAULT_BITS -#endif - ;; - adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on - ;; - SAVE_REST - ;; - alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group -#ifdef CONFIG_XEN - ;; - br.call.sptk.many rp=xen_get_ivr - ;; - mov out0=r8 // pass cr.ivr as first arg -#else - mov out0=cr.ivr // pass cr.ivr as first arg -#endif - add out1=16,sp // pass pointer to pt_regs as second arg - ;; - srlz.d // make sure we see the effect of cr.ivr - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_handle_irq -END(interrupt) - - .org ia64_ivt+0x3400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3400 Entry 13 (size 64 bundles) Reserved - DBG_FAULT(13) - FAULT(13) - - .org ia64_ivt+0x3800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3800 Entry 14 (size 64 bundles) Reserved - DBG_FAULT(14) - FAULT(14) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - * - * ia64_syscall_setup() is a separate subroutine so that it can - * allocate stacked registers so it can safely demine any - * potential NaT values from the input registers. - * - * On entry: - * - executing on bank 0 or bank 1 register set (doesn't matter) - * - r1: stack pointer - * - r2: current task pointer - * - r3: preserved - * - r11: original contents (saved ar.pfs to be saved) - * - r12: original contents (sp to be saved) - * - r13: original contents (tp to be saved) - * - r15: original contents (syscall # to be saved) - * - r18: saved bsp (after switching to kernel stack) - * - r19: saved b6 - * - r20: saved r1 (gp) - * - r21: saved ar.fpsr - * - r22: kernel's register backing store base (krbs_base) - * - r23: saved ar.bspstore - * - r24: saved ar.rnat - * - r25: saved ar.unat - * - r26: saved ar.pfs - * - r27: saved ar.rsc - * - r28: saved cr.iip - * - r29: saved cr.ipsr - * - r31: saved pr - * - b0: original contents (to be saved) - * On exit: - * - p10: TRUE if syscall is invoked with more than 8 out - * registers or r15's Nat is true - * - r1: kernel's gp - * - r3: preserved (same as on entry) - * - r8: -EINVAL if p10 is true - * - r12: points to kernel stack - * - r13: points to current task - * - r14: preserved (same as on entry) - * - p13: preserved - * - p15: TRUE if interrupts need to be re-enabled - * - ar.fpsr: set to kernel settings - * - b6: preserved (same as on entry) - */ -#ifndef CONFIG_XEN -GLOBAL_ENTRY(ia64_syscall_setup) -#if PT(B6) != 0 -# error This code assumes that b6 is the first field in pt_regs. -#endif - st8 [r1]=r19 // save b6 - add r16=PT(CR_IPSR),r1 // initialize first base pointer - add r17=PT(R11),r1 // initialize second base pointer - ;; - alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable - st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr - tnat.nz p8,p0=in0 - - st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 - tnat.nz p9,p0=in1 -(pKStk) mov r18=r0 // make sure r18 isn't NaT - ;; - - st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs - st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip - mov r28=b0 // save b0 (2 cyc) - ;; - - st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat - dep r19=0,r19,38,26 // clear all bits but 0..37 [I0] -(p8) mov in0=-1 - ;; - - st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs - extr.u r11=r19,7,7 // I0 // get sol of ar.pfs - and r8=0x7f,r19 // A // get sof of ar.pfs - - st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc - tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0 -(p9) mov in1=-1 - ;; - -(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 - tnat.nz p10,p0=in2 - add r11=8,r11 - ;; -(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field -(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field - tnat.nz p11,p0=in3 - ;; -(p10) mov in2=-1 - tnat.nz p12,p0=in4 // [I0] -(p11) mov in3=-1 - ;; -(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat -(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore - shl r18=r18,16 // compute ar.rsc to be used for "loadrs" - ;; - st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates - st8 [r17]=r28,PT(R1)-PT(B0) // save b0 - tnat.nz p13,p0=in5 // [I0] - ;; - st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" - st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 -(p12) mov in4=-1 - ;; - -.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 -.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 -(p13) mov in5=-1 - ;; - st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr - tnat.nz p13,p0=in6 - cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 - ;; - mov r8=1 -(p9) tnat.nz p10,p0=r15 - adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) - - st8.spill [r17]=r15 // save r15 - tnat.nz p8,p0=in7 - nop.i 0 - - mov r13=r2 // establish `current' - movl r1=__gp // establish kernel global pointer - ;; - st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error) -(p13) mov in6=-1 -(p8) mov in7=-1 - - cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 - movl r17=FPSR_DEFAULT - ;; - mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value -(p10) mov r8=-EINVAL - br.ret.sptk.many b7 -END(ia64_syscall_setup) -#endif - - .org ia64_ivt+0x3c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3c00 Entry 15 (size 64 bundles) Reserved - DBG_FAULT(15) - FAULT(15) - - /* - * Squatting in this space ... - * - * This special case dispatcher for illegal operation faults allows preserved - * registers to be modified through a callback function (asm only) that is handed - * back from the fault handler in r8. Up to three arguments can be passed to the - * callback function by returning an aggregate with the callback as its first - * element, followed by the arguments. - */ -ENTRY(dispatch_illegal_op_fault) - .prologue - .body - SAVE_MIN_WITH_COVER - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in insn group - mov out0=ar.ec - ;; - SAVE_REST - PT_REGS_UNWIND_INFO(0) - ;; - br.call.sptk.many rp=ia64_illegal_op_fault -.ret0: ;; - alloc r14=ar.pfs,0,0,3,0 // must be first in insn group - mov out0=r9 - mov out1=r10 - mov out2=r11 - movl r15=ia64_leave_kernel - ;; - mov rp=r15 - mov b6=r8 - ;; - cmp.ne p6,p0=0,r8 -(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel - br.sptk.many ia64_leave_kernel -END(dispatch_illegal_op_fault) - - .org ia64_ivt+0x4000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4000 Entry 16 (size 64 bundles) Reserved - DBG_FAULT(16) - FAULT(16) - - .org ia64_ivt+0x4400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4400 Entry 17 (size 64 bundles) Reserved - DBG_FAULT(17) - FAULT(17) - -ENTRY(non_syscall) - mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER - ;; - SAVE_MIN_WITH_COVER - - // There is no particular reason for this code to be here, other than that - // there happens to be space here that would go unused otherwise. If this - // fault ever gets "unreserved", simply moved the following code to a more - // suitable spot... - - alloc r14=ar.pfs,0,0,2,0 - mov out0=cr.iim - add out1=16,sp - adds r3=8,r2 // set up second base pointer for SAVE_REST - - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - movl r15=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r15 - ;; - br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr -END(non_syscall) - - .org ia64_ivt+0x4800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4800 Entry 18 (size 64 bundles) Reserved - DBG_FAULT(18) - FAULT(18) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_unaligned_handler) - SAVE_MIN_WITH_COVER - ;; - alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - mov out0=cr.ifa - adds out1=16,sp - - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.sptk.many ia64_prepare_handle_unaligned -END(dispatch_unaligned_handler) - - .org ia64_ivt+0x4c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4c00 Entry 19 (size 64 bundles) Reserved - DBG_FAULT(19) - FAULT(19) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_to_fault_handler) - /* - * Input: - * psr.ic: off - * r19: fault vector number (e.g., 24 for General Exception) - * r31: contains saved predicates (pr) - */ - SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - mov out0=r15 -#ifdef CONFIG_XEN - movl out1=XSI_ISR - ;; - adds out2=XSI_IFA-XSI_ISR,out1 - adds out3=XSI_IIM-XSI_ISR,out1 - adds out4=XSI_ITIR-XSI_ISR,out1 - ;; - ld8 out1=[out1] - ld8 out2=[out2] - ld8 out3=[out4] - ld8 out4=[out4] - ;; -#else - mov out1=cr.isr - mov out2=cr.ifa - mov out3=cr.iim - mov out4=cr.itir - ;; -#endif - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_fault -END(dispatch_to_fault_handler) - -// -// --- End of long entries, Beginning of short entries -// - - .org ia64_ivt+0x5000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) -ENTRY(page_not_present) - DBG_FAULT(20) - mov r16=cr.ifa - rsm psr.dt - /* - * The Linux page fault handler doesn't expect non-present pages to be in - * the TLB. Flush the existing entry now, so we meet that expectation. - */ - mov r17=PAGE_SHIFT<<2 - ;; - ptc.l r16,r17 - ;; - mov r31=pr - srlz.d - br.sptk.many page_fault -END(page_not_present) - - .org ia64_ivt+0x5100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) -ENTRY(key_permission) - DBG_FAULT(21) - mov r16=cr.ifa - rsm psr.dt - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(key_permission) - - .org ia64_ivt+0x5200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) -ENTRY(iaccess_rights) - DBG_FAULT(22) - mov r16=cr.ifa - rsm psr.dt - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(iaccess_rights) - - .org ia64_ivt+0x5300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) -ENTRY(daccess_rights) - DBG_FAULT(23) -#ifdef CONFIG_XEN - movl r16=XSI_IFA - ;; - ld8 r16=[r16] - ;; - XEN_HYPER_RSM_PSR_DT -#else - mov r16=cr.ifa - rsm psr.dt -#endif - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(daccess_rights) - - .org ia64_ivt+0x5400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) -ENTRY(general_exception) - DBG_FAULT(24) - mov r16=cr.isr - mov r31=pr - ;; - cmp4.eq p6,p0=0,r16 -(p6) br.sptk.many dispatch_illegal_op_fault - ;; - mov r19=24 // fault number - br.sptk.many dispatch_to_fault_handler -END(general_exception) - - .org ia64_ivt+0x5500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) -ENTRY(disabled_fp_reg) - DBG_FAULT(25) - rsm psr.dfh // ensure we can access fph - ;; - srlz.d - mov r31=pr - mov r19=25 - br.sptk.many dispatch_to_fault_handler -END(disabled_fp_reg) - - .org ia64_ivt+0x5600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) -ENTRY(nat_consumption) - DBG_FAULT(26) - - mov r16=cr.ipsr - mov r17=cr.isr - mov r31=pr // save PR - ;; - and r18=0xf,r17 // r18 = cr.ipsr.code{3:0} - tbit.z p6,p0=r17,IA64_ISR_NA_BIT - ;; - cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18 - dep r16=-1,r16,IA64_PSR_ED_BIT,1 -(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH) - ;; - mov cr.ipsr=r16 // set cr.ipsr.na - mov pr=r31,-1 - ;; - rfi - -1: mov pr=r31,-1 - ;; - FAULT(26) -END(nat_consumption) - - .org ia64_ivt+0x5700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5700 Entry 27 (size 16 bundles) Speculation (40) -ENTRY(speculation_vector) - DBG_FAULT(27) - /* - * A [f]chk.[as] instruction needs to take the branch to the recovery code but - * this part of the architecture is not implemented in hardware on some CPUs, such - * as Itanium. Thus, in general we need to emulate the behavior. IIM contains - * the relative target (not yet sign extended). So after sign extending it we - * simply add it to IIP. We also need to reset the EI field of the IPSR to zero, - * i.e., the slot to restart into. - * - * cr.imm contains zero_ext(imm21) - */ - mov r18=cr.iim - ;; - mov r17=cr.iip - shl r18=r18,43 // put sign bit in position (43=64-21) - ;; - - mov r16=cr.ipsr - shr r18=r18,39 // sign extend (39=43-4) - ;; - - add r17=r17,r18 // now add the offset - ;; - mov cr.iip=r17 - dep r16=0,r16,41,2 // clear EI - ;; - - mov cr.ipsr=r16 - ;; - -#ifdef CONFIG_XEN - XEN_HYPER_RFI; -#else - rfi // and go back -#endif -END(speculation_vector) - - .org ia64_ivt+0x5800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5800 Entry 28 (size 16 bundles) Reserved - DBG_FAULT(28) - FAULT(28) - - .org ia64_ivt+0x5900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) -ENTRY(debug_vector) - DBG_FAULT(29) - FAULT(29) -END(debug_vector) - - .org ia64_ivt+0x5a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) -ENTRY(unaligned_access) - DBG_FAULT(30) - mov r31=pr // prepare to save predicates - ;; - br.sptk.many dispatch_unaligned_handler -END(unaligned_access) - - .org ia64_ivt+0x5b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) -ENTRY(unsupported_data_reference) - DBG_FAULT(31) - FAULT(31) -END(unsupported_data_reference) - - .org ia64_ivt+0x5c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) -ENTRY(floating_point_fault) - DBG_FAULT(32) - FAULT(32) -END(floating_point_fault) - - .org ia64_ivt+0x5d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) -ENTRY(floating_point_trap) - DBG_FAULT(33) - FAULT(33) -END(floating_point_trap) - - .org ia64_ivt+0x5e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) -ENTRY(lower_privilege_trap) - DBG_FAULT(34) - FAULT(34) -END(lower_privilege_trap) - - .org ia64_ivt+0x5f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) -ENTRY(taken_branch_trap) - DBG_FAULT(35) - FAULT(35) -END(taken_branch_trap) - - .org ia64_ivt+0x6000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) -ENTRY(single_step_trap) - DBG_FAULT(36) - FAULT(36) -END(single_step_trap) - - .org ia64_ivt+0x6100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6100 Entry 37 (size 16 bundles) Reserved - DBG_FAULT(37) - FAULT(37) - - .org ia64_ivt+0x6200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6200 Entry 38 (size 16 bundles) Reserved - DBG_FAULT(38) - FAULT(38) - - .org ia64_ivt+0x6300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6300 Entry 39 (size 16 bundles) Reserved - DBG_FAULT(39) - FAULT(39) - - .org ia64_ivt+0x6400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6400 Entry 40 (size 16 bundles) Reserved - DBG_FAULT(40) - FAULT(40) - - .org ia64_ivt+0x6500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6500 Entry 41 (size 16 bundles) Reserved - DBG_FAULT(41) - FAULT(41) - - .org ia64_ivt+0x6600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6600 Entry 42 (size 16 bundles) Reserved - DBG_FAULT(42) - FAULT(42) - - .org ia64_ivt+0x6700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6700 Entry 43 (size 16 bundles) Reserved - DBG_FAULT(43) - FAULT(43) - - .org ia64_ivt+0x6800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6800 Entry 44 (size 16 bundles) Reserved - DBG_FAULT(44) - FAULT(44) - - .org ia64_ivt+0x6900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) -ENTRY(ia32_exception) - DBG_FAULT(45) - FAULT(45) -END(ia32_exception) - - .org ia64_ivt+0x6a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) -ENTRY(ia32_intercept) - DBG_FAULT(46) -#ifdef CONFIG_IA32_SUPPORT - mov r31=pr - mov r16=cr.isr - ;; - extr.u r17=r16,16,8 // get ISR.code - mov r18=ar.eflag - mov r19=cr.iim // old eflag value - ;; - cmp.ne p6,p0=2,r17 -(p6) br.cond.spnt 1f // not a system flag fault - xor r16=r18,r19 - ;; - extr.u r17=r16,18,1 // get the eflags.ac bit - ;; - cmp.eq p6,p0=0,r17 -(p6) br.cond.spnt 1f // eflags.ac bit didn't change - ;; - mov pr=r31,-1 // restore predicate registers -#ifdef CONFIG_XEN - XEN_HYPER_RFI; -#else - rfi -#endif - -1: -#endif // CONFIG_IA32_SUPPORT - FAULT(46) -END(ia32_intercept) - - .org ia64_ivt+0x6b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) -ENTRY(ia32_interrupt) - DBG_FAULT(47) -#ifdef CONFIG_IA32_SUPPORT - mov r31=pr - br.sptk.many dispatch_to_ia32_handler -#else - FAULT(47) -#endif -END(ia32_interrupt) - - .org ia64_ivt+0x6c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6c00 Entry 48 (size 16 bundles) Reserved - DBG_FAULT(48) - FAULT(48) - - .org ia64_ivt+0x6d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6d00 Entry 49 (size 16 bundles) Reserved - DBG_FAULT(49) - FAULT(49) - - .org ia64_ivt+0x6e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6e00 Entry 50 (size 16 bundles) Reserved - DBG_FAULT(50) - FAULT(50) - - .org ia64_ivt+0x6f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6f00 Entry 51 (size 16 bundles) Reserved - DBG_FAULT(51) - FAULT(51) - - .org ia64_ivt+0x7000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7000 Entry 52 (size 16 bundles) Reserved - DBG_FAULT(52) - FAULT(52) - - .org ia64_ivt+0x7100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7100 Entry 53 (size 16 bundles) Reserved - DBG_FAULT(53) - FAULT(53) - - .org ia64_ivt+0x7200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7200 Entry 54 (size 16 bundles) Reserved - DBG_FAULT(54) - FAULT(54) - - .org ia64_ivt+0x7300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7300 Entry 55 (size 16 bundles) Reserved - DBG_FAULT(55) - FAULT(55) - - .org ia64_ivt+0x7400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7400 Entry 56 (size 16 bundles) Reserved - DBG_FAULT(56) - FAULT(56) - - .org ia64_ivt+0x7500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7500 Entry 57 (size 16 bundles) Reserved - DBG_FAULT(57) - FAULT(57) - - .org ia64_ivt+0x7600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7600 Entry 58 (size 16 bundles) Reserved - DBG_FAULT(58) - FAULT(58) - - .org ia64_ivt+0x7700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7700 Entry 59 (size 16 bundles) Reserved - DBG_FAULT(59) - FAULT(59) - - .org ia64_ivt+0x7800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7800 Entry 60 (size 16 bundles) Reserved - DBG_FAULT(60) - FAULT(60) - - .org ia64_ivt+0x7900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7900 Entry 61 (size 16 bundles) Reserved - DBG_FAULT(61) - FAULT(61) - - .org ia64_ivt+0x7a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7a00 Entry 62 (size 16 bundles) Reserved - DBG_FAULT(62) - FAULT(62) - - .org ia64_ivt+0x7b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7b00 Entry 63 (size 16 bundles) Reserved - DBG_FAULT(63) - FAULT(63) - - .org ia64_ivt+0x7c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7c00 Entry 64 (size 16 bundles) Reserved - DBG_FAULT(64) - FAULT(64) - - .org ia64_ivt+0x7d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7d00 Entry 65 (size 16 bundles) Reserved - DBG_FAULT(65) - FAULT(65) - - .org ia64_ivt+0x7e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7e00 Entry 66 (size 16 bundles) Reserved - DBG_FAULT(66) - FAULT(66) - - .org ia64_ivt+0x7f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7f00 Entry 67 (size 16 bundles) Reserved - DBG_FAULT(67) - FAULT(67) - -#ifdef CONFIG_IA32_SUPPORT - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - - // IA32 interrupt entry point - -ENTRY(dispatch_to_ia32_handler) - SAVE_MIN - ;; - mov r14=cr.isr - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i - adds r3=8,r2 // Base pointer for SAVE_REST - ;; - SAVE_REST - ;; - mov r15=0x80 - shr r14=r14,16 // Get interrupt number - ;; - cmp.ne p6,p0=r14,r15 -(p6) br.call.dpnt.many b6=non_ia32_syscall - - adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions - adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp - ;; - cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 - ld8 r8=[r14] // get r8 - ;; - st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP) - ;; - alloc r15=ar.pfs,0,0,6,0 // must first in an insn group - ;; - ld4 r8=[r14],8 // r8 == eax (syscall number) - mov r15=IA32_NR_syscalls - ;; - cmp.ltu.unc p6,p7=r8,r15 - ld4 out1=[r14],8 // r9 == ecx - ;; - ld4 out2=[r14],8 // r10 == edx - ;; - ld4 out0=[r14] // r11 == ebx - adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp - ;; - ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp - ;; - ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi - adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 out4=[r14] // r15 == edi - movl r16=ia32_syscall_table - ;; -(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number - ld4 r2=[r2] // r2 = current_thread_info()->flags - ;; - ld8 r16=[r16] - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit - ;; - mov b6=r16 - movl r15=ia32_ret_from_syscall - cmp.eq p8,p0=r2,r0 - ;; - mov rp=r15 -(p8) br.call.sptk.many b6=b6 - br.cond.sptk ia32_trace_syscall - -non_ia32_syscall: - alloc r15=ar.pfs,0,0,2,0 - mov out0=r14 // interrupt # - add out1=16,sp // pointer to pt_regs - ;; // avoid WAW on CFM - br.call.sptk.many rp=ia32_bad_interrupt -.ret1: movl r15=ia64_leave_kernel - ;; - mov rp=r15 - br.ret.sptk.many rp -END(dispatch_to_ia32_handler) -#endif /* CONFIG_IA32_SUPPORT */ - -#ifdef CONFIG_XEN - .section .text,"ax" -GLOBAL_ENTRY(xen_event_callback) - mov r31=pr // prepare to save predicates - ;; - SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 - ;; - movl r3=XSI_PSR_IC - mov r14=1 - ;; - st4 [r3]=r14 - ;; - adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on - ;; - SAVE_REST - ;; -1: - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - add out0=16,sp // pass pointer to pt_regs as first arg - ;; - br.call.sptk.many b0=evtchn_do_upcall - ;; - movl r20=XSI_PSR_I_ADDR - ;; - ld8 r20=[r20] - ;; - adds r20=-1,r20 // vcpu_info->evtchn_upcall_pending - ;; - ld1 r20=[r20] - ;; - cmp.ne p6,p0=r20,r0 // if there are pending events, - (p6) br.spnt.few 1b // call evtchn_do_upcall again. - br.sptk.many ia64_leave_kernel -END(xen_event_callback) - - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -GLOBAL_ENTRY(xen_bsw1) - /* FIXME: THIS CODE IS NOT NaT SAFE! */ - mov r14=ar.unat - movl r30=XSI_B1NAT - ;; - ld8 r30=[r30];; - mov ar.unat=r30 - movl r30=XSI_BANKNUM; - mov r31=1;; - st4 [r30]=r31; - movl r30=XSI_BANK1_R16; - movl r31=XSI_BANK1_R16+8;; - ld8.fill r16=[r30],16; ld8.fill r17=[r31],16;; - ld8.fill r18=[r30],16; ld8.fill r19=[r31],16;; - ld8.fill r20=[r30],16; ld8.fill r21=[r31],16;; - ld8.fill r22=[r30],16; ld8.fill r23=[r31],16;; - ld8.fill r24=[r30],16; ld8.fill r25=[r31],16;; - ld8.fill r26=[r30],16; ld8.fill r27=[r31],16;; - ld8.fill r28=[r30],16; ld8.fill r29=[r31],16;; - ld8.fill r30=[r30]; ld8.fill r31=[r31];; - mov ar.unat=r14 - br.ret.sptk.many b0 -END(xen_bsw1) - - -#endif diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h deleted file mode 100644 index 5741b4e75d..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h +++ /dev/null @@ -1,358 +0,0 @@ -#include <asm/cache.h> - -#ifdef CONFIG_XEN -#include "../kernel/entry.h" -#else -#include "entry.h" -#endif - -/* - * For ivt.s we want to access the stack virtually so we don't have to disable translation - * on interrupts. - * - * On entry: - * r1: pointer to current task (ar.k6) - */ -#define MINSTATE_START_SAVE_MIN_VIRT \ -(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ - ;; \ -(pUStk) mov.m r24=ar.rnat; \ -(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ -(pKStk) mov r1=sp; /* get sp */ \ - ;; \ -(pUStk) lfetch.fault.excl.nt1 [r22]; \ -(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ -(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ - ;; \ -(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ -(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ - ;; \ -(pUStk) mov r18=ar.bsp; \ -(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ - -#define MINSTATE_END_SAVE_MIN_VIRT \ - bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ - ;; - -/* - * For mca_asm.S we want to access the stack physically since the state is saved before we - * go virtual and don't want to destroy the iip or ipsr. - */ -#define MINSTATE_START_SAVE_MIN_PHYS \ -(pKStk) mov r3=IA64_KR(PER_CPU_DATA);; \ -(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \ -(pKStk) ld8 r3 = [r3];; \ -(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \ -(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ -(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ -(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ - ;; \ -(pUStk) mov r24=ar.rnat; \ -(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ -(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ -(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \ - ;; \ -(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ -(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ - ;; \ -(pUStk) mov r18=ar.bsp; \ -(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ - -#define MINSTATE_END_SAVE_MIN_PHYS \ - dep r12=-1,r12,61,3; /* make sp a kernel virtual address */ \ - ;; - -#ifdef MINSTATE_VIRT -# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT) -# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT -# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT -#endif - -#ifdef MINSTATE_PHYS -# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; tpa reg=reg -# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS -# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS -#endif - -/* - * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves - * the minimum state necessary that allows us to turn psr.ic back - * on. - * - * Assumed state upon entry: - * psr.ic: off - * r31: contains saved predicates (pr) - * - * Upon exit, the state is as follows: - * psr.ic: off - * r2 = points to &pt_regs.r16 - * r8 = contents of ar.ccv - * r9 = contents of ar.csd - * r10 = contents of ar.ssd - * r11 = FPSR_DEFAULT - * r12 = kernel sp (kernel virtual address) - * r13 = points to current task_struct (kernel virtual address) - * p15 = TRUE if psr.i is set in cr.ipsr - * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: - * preserved - * CONFIG_XEN note: p6/p7 are not preserved - * - * Note that psr.ic is NOT turned on by this macro. This is so that - * we can pass interruption state as arguments to a handler. - */ -#ifdef CONFIG_XEN -#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ - MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ - mov r27=ar.rsc; /* M */ \ - mov r20=r1; /* A */ \ - mov r25=ar.unat; /* M */ \ - /* mov r29=cr.ipsr; /* M */ \ - movl r29=XSI_IPSR;; \ - ld8 r29=[r29];; \ - mov r26=ar.pfs; /* I */ \ - /* mov r28=cr.iip; /* M */ \ - movl r28=XSI_IIP;; \ - ld8 r28=[r28];; \ - mov r21=ar.fpsr; /* M */ \ - COVER; /* B;; (or nothing) */ \ - ;; \ - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ - ;; \ - ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \ - st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \ - adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \ - /* switch from user to kernel RBS: */ \ - ;; \ - invala; /* M */ \ - /* SAVE_IFS; /* see xen special handling below */ \ - cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \ - ;; \ - MINSTATE_START_SAVE_MIN \ - adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ - adds r16=PT(CR_IPSR),r1; \ - ;; \ - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ - st8 [r16]=r29; /* save cr.ipsr */ \ - ;; \ - lfetch.fault.excl.nt1 [r17]; \ - tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ - mov r29=b0 \ - ;; \ - adds r16=PT(R8),r1; /* initialize first base pointer */ \ - adds r17=PT(R9),r1; /* initialize second base pointer */ \ -(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r8,16; \ -.mem.offset 8,0; st8.spill [r17]=r9,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r10,24; \ -.mem.offset 8,0; st8.spill [r17]=r11,24; \ - ;; \ - /* xen special handling for possibly lazy cover */ \ - movl r8=XSI_PRECOVER_IFS; \ - ;; \ - ld8 r30=[r8]; \ - ;; \ - st8 [r16]=r28,16; /* save cr.iip */ \ - st8 [r17]=r30,16; /* save cr.ifs */ \ -(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ - mov r8=ar.ccv; \ - mov r9=ar.csd; \ - mov r10=ar.ssd; \ - movl r11=FPSR_DEFAULT; /* L-unit */ \ - ;; \ - st8 [r16]=r25,16; /* save ar.unat */ \ - st8 [r17]=r26,16; /* save ar.pfs */ \ - shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ - ;; \ - st8 [r16]=r27,16; /* save ar.rsc */ \ -(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \ -(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \ - ;; /* avoid RAW on r16 & r17 */ \ -(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \ - st8 [r17]=r31,16; /* save predicates */ \ -(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \ - ;; \ - st8 [r16]=r29,16; /* save b0 */ \ - st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ - cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ -.mem.offset 8,0; st8.spill [r17]=r12,16; \ - adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r13,16; \ -.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \ - mov r13=IA64_KR(CURRENT); /* establish `current' */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r15,16; \ -.mem.offset 8,0; st8.spill [r17]=r14,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r2,16; \ -.mem.offset 8,0; st8.spill [r17]=r3,16; \ - ;; \ - EXTRA; \ - mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2; \ - adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ - ;; \ - movl r1=__gp; /* establish kernel global pointer */ \ - ;; \ - /* MINSTATE_END_SAVE_MIN */ -#else -#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ - MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ - mov r27=ar.rsc; /* M */ \ - mov r20=r1; /* A */ \ - mov r25=ar.unat; /* M */ \ - mov r29=cr.ipsr; /* M */ \ - mov r26=ar.pfs; /* I */ \ - mov r28=cr.iip; /* M */ \ - mov r21=ar.fpsr; /* M */ \ - COVER; /* B;; (or nothing) */ \ - ;; \ - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ - ;; \ - ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \ - st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \ - adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \ - /* switch from user to kernel RBS: */ \ - ;; \ - invala; /* M */ \ - SAVE_IFS; \ - cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \ - ;; \ - MINSTATE_START_SAVE_MIN \ - adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ - adds r16=PT(CR_IPSR),r1; \ - ;; \ - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ - st8 [r16]=r29; /* save cr.ipsr */ \ - ;; \ - lfetch.fault.excl.nt1 [r17]; \ - tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ - mov r29=b0 \ - ;; \ - adds r16=PT(R8),r1; /* initialize first base pointer */ \ - adds r17=PT(R9),r1; /* initialize second base pointer */ \ -(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r8,16; \ -.mem.offset 8,0; st8.spill [r17]=r9,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r10,24; \ -.mem.offset 8,0; st8.spill [r17]=r11,24; \ - ;; \ - st8 [r16]=r28,16; /* save cr.iip */ \ - st8 [r17]=r30,16; /* save cr.ifs */ \ -(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ - mov r8=ar.ccv; \ - mov r9=ar.csd; \ - mov r10=ar.ssd; \ - movl r11=FPSR_DEFAULT; /* L-unit */ \ - ;; \ - st8 [r16]=r25,16; /* save ar.unat */ \ - st8 [r17]=r26,16; /* save ar.pfs */ \ - shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ - ;; \ - st8 [r16]=r27,16; /* save ar.rsc */ \ -(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \ -(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \ - ;; /* avoid RAW on r16 & r17 */ \ -(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \ - st8 [r17]=r31,16; /* save predicates */ \ -(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \ - ;; \ - st8 [r16]=r29,16; /* save b0 */ \ - st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ - cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ -.mem.offset 8,0; st8.spill [r17]=r12,16; \ - adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r13,16; \ -.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \ - mov r13=IA64_KR(CURRENT); /* establish `current' */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r15,16; \ -.mem.offset 8,0; st8.spill [r17]=r14,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r2,16; \ -.mem.offset 8,0; st8.spill [r17]=r3,16; \ - adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ - ;; \ - EXTRA; \ - movl r1=__gp; /* establish kernel global pointer */ \ - ;; \ - MINSTATE_END_SAVE_MIN -#endif - -/* - * SAVE_REST saves the remainder of pt_regs (with psr.ic on). - * - * Assumed state upon entry: - * psr.ic: on - * r2: points to &pt_regs.r16 - * r3: points to &pt_regs.r17 - * r8: contents of ar.ccv - * r9: contents of ar.csd - * r10: contents of ar.ssd - * r11: FPSR_DEFAULT - * - * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. - */ -#define SAVE_REST \ -.mem.offset 0,0; st8.spill [r2]=r16,16; \ -.mem.offset 8,0; st8.spill [r3]=r17,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r18,16; \ -.mem.offset 8,0; st8.spill [r3]=r19,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r20,16; \ -.mem.offset 8,0; st8.spill [r3]=r21,16; \ - mov r18=b6; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r22,16; \ -.mem.offset 8,0; st8.spill [r3]=r23,16; \ - mov r19=b7; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r24,16; \ -.mem.offset 8,0; st8.spill [r3]=r25,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r26,16; \ -.mem.offset 8,0; st8.spill [r3]=r27,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r28,16; \ -.mem.offset 8,0; st8.spill [r3]=r29,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r30,16; \ -.mem.offset 8,0; st8.spill [r3]=r31,32; \ - ;; \ - mov ar.fpsr=r11; /* M-unit */ \ - st8 [r2]=r8,8; /* ar.ccv */ \ - adds r24=PT(B6)-PT(F7),r3; \ - ;; \ - stf.spill [r2]=f6,32; \ - stf.spill [r3]=f7,32; \ - ;; \ - stf.spill [r2]=f8,32; \ - stf.spill [r3]=f9,32; \ - ;; \ - stf.spill [r2]=f10; \ - stf.spill [r3]=f11; \ - adds r25=PT(B7)-PT(F11),r3; \ - ;; \ - st8 [r24]=r18,16; /* b6 */ \ - st8 [r25]=r19,16; /* b7 */ \ - ;; \ - st8 [r24]=r9; /* ar.csd */ \ - st8 [r25]=r10; /* ar.ssd */ \ - ;; - -#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,) -#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19) -#ifdef CONFIG_XEN -#define SAVE_MIN break 0;; /* FIXME: non-cover version only for ia32 support? */ -#else -#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, ) -#endif diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S deleted file mode 100644 index d8ebfb994a..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S +++ /dev/null @@ -1,85 +0,0 @@ -/* - * ia64/xen/xenpal.S - * - * Alternate PAL routines for Xen. Heavily leveraged from - * ia64/kernel/pal.S - * - * Copyright (C) 2005 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@.hp.com> - */ - -#include <asm/asmmacro.h> -#include <asm/processor.h> - -GLOBAL_ENTRY(xen_pal_call_static) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) - alloc loc1 = ar.pfs,5,5,0,0 -#ifdef CONFIG_XEN - movl r22=running_on_xen;; - ld4 r22=[r22];; - cmp.eq p7,p0=r22,r0 -(p7) br.cond.spnt.many __ia64_pal_call_static;; -#endif - movl loc2 = pal_entry_point -1: { - mov r28 = in0 - mov r29 = in1 - mov r8 = ip - } - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - tbit.nz p6,p7 = in4, 0 - adds r8 = 1f-1b,r8 - mov loc4=ar.rsc // save RSE configuration - ;; - mov ar.rsc=0 // put RSE in enforced lazy, LE mode -#ifdef CONFIG_XEN - mov r9 = r8 - XEN_HYPER_GET_PSR - ;; - mov loc3 = r8 - mov r8 = r9 - ;; -#else - mov loc3 = psr -#endif - mov loc0 = rp - .body - mov r30 = in2 - -#ifdef CONFIG_XEN - // this is low priority for paravirtualization, but is called - // from the idle loop so confuses privop counting - movl r31=XSI_PSR_I_ADDR - ;; - ld8 r31=[r31] - mov r22=1 - ;; - st1 [r31]=r22 - ;; -(p6) movl r31=XSI_PSR_IC - ;; -(p6) st4.rel [r31]=r0 - ;; - mov r31 = in3 - mov b7 = loc2 - ;; -#else -(p6) rsm psr.i | psr.ic - mov r31 = in3 - mov b7 = loc2 - -(p7) rsm psr.i - ;; -(p6) srlz.i -#endif - mov rp = r8 - br.cond.sptk.many b7 -1: mov psr.l = loc3 - mov ar.rsc = loc4 // restore RSE configuration - mov ar.pfs = loc1 - mov rp = loc0 - ;; - srlz.d // seralize restoration of psr.l - br.ret.sptk.many b0 -END(xen_pal_call_static) diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S deleted file mode 100644 index 469f39e226..0000000000 --- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Support routines for Xen - * - * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com> - */ - -#include <asm/processor.h> -#include <asm/asmmacro.h> - -#define isBP p3 // are we the Bootstrap Processor? - - .text -GLOBAL_ENTRY(early_xen_setup) - mov r8=ar.rsc // Initialized in head.S -(isBP) movl r9=running_on_xen;; - extr.u r8=r8,2,2;; // Extract pl fields - cmp.eq p7,p0=r8,r0 // p7: !running on xen - mov r8=1 // booleanize. -(p7) br.ret.sptk.many rp;; -(isBP) st4 [r9]=r8 - movl r10=xen_ivt;; - - mov cr.iva=r10 - - /* Set xsi base. */ -#define FW_HYPERCALL_SET_SHARED_INFO_VA 0x600 -(isBP) mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA -(isBP) movl r28=XSI_BASE;; -(isBP) break 0x1000;; - - br.ret.sptk.many rp - ;; -END(early_xen_setup) - -#include <xen/interface/xen.h> - -/* Stub for suspend. - Just force the stacked registers to be written in memory. */ -GLOBAL_ENTRY(xencomm_arch_hypercall_suspend) - ;; - alloc r20=ar.pfs,0,0,6,0 - mov r2=__HYPERVISOR_sched_op - ;; - /* We don't want to deal with RSE. */ - flushrs - mov r33=r32 - mov r32=2 // SCHEDOP_shutdown - ;; - break 0x1000 - ;; - br.ret.sptk.many b0 -END(xencomm_arch_hypercall_suspend) |