aboutsummaryrefslogtreecommitdiffstats
path: root/linux-2.6-xen-sparse/arch/ia64
diff options
context:
space:
mode:
Diffstat (limited to 'linux-2.6-xen-sparse/arch/ia64')
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/Kconfig587
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/Makefile106
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c2160
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/Makefile63
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c1010
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c296
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/entry.S1620
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S925
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/gate.S478
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S110
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/head.S1229
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c1253
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c649
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/pal.S303
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/patch.c264
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c6943
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/setup.c1030
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/time.c500
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c63
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile14
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/oprofile/init.c52
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h28
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c118
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c142
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/pci/pci.c836
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/Makefile9
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S170
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c1264
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/machvec.c4
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/mem.c75
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c882
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/util.c105
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c397
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c469
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c673
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c145
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c263
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S931
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c19
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S2177
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h358
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S85
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S52
43 files changed, 0 insertions, 28857 deletions
diff --git a/linux-2.6-xen-sparse/arch/ia64/Kconfig b/linux-2.6-xen-sparse/arch/ia64/Kconfig
deleted file mode 100644
index 4991dd4a2b..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig
+++ /dev/null
@@ -1,587 +0,0 @@
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
-#
-
-mainmenu "IA-64 Linux Kernel Configuration"
-
-source "init/Kconfig"
-
-menu "Processor type and features"
-
-config IA64
- bool
- default y
- help
- The Itanium Processor Family is Intel's 64-bit successor to
- the 32-bit X86 line. The IA-64 Linux project has a home
- page at <http://www.linuxia64.org/> and a mailing list at
- <linux-ia64@vger.kernel.org>.
-
-config 64BIT
- bool
- default y
-
-config MMU
- bool
- default y
-
-config SWIOTLB
- bool
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
-config GENERIC_FIND_NEXT_BIT
- bool
- default y
-
-config GENERIC_CALIBRATE_DELAY
- bool
- default y
-
-config TIME_INTERPOLATION
- bool
- default y
-
-config DMI
- bool
- default y
-
-config EFI
- bool
- default y
-
-config GENERIC_IOMAP
- bool
- default y
-
-config XEN
- bool "Xen hypervisor support"
- default y
- help
- Enable Xen hypervisor support. Resulting kernel runs
- both as a guest OS on Xen and natively on hardware.
-
-config XEN_IA64_VDSO_PARAVIRT
- bool
- depends on XEN && !ITANIUM
- default y
- help
- vDSO paravirtualization
-
-config XEN_IA64_EXPOSE_P2M
- bool "Xen/IA64 exposure p2m table"
- depends on XEN
- default y
- help
- expose p2m from xen
-
-config XEN_IA64_EXPOSE_P2M_USE_DTR
- bool "Xen/IA64 map p2m table with dtr"
- depends on XEN_IA64_EXPOSE_P2M
- default y
- help
- use dtr to map the exposed p2m table
-
-config SCHED_NO_NO_OMIT_FRAME_POINTER
- bool
- default y
-
-config IA64_UNCACHED_ALLOCATOR
- bool
- select GENERIC_ALLOCATOR
-
-config DMA_IS_DMA32
- bool
- default y
-
-config DMA_IS_NORMAL
- bool
- depends on IA64_SGI_SN2
- default y
-
-config AUDIT_ARCH
- bool
- default y
-
-choice
- prompt "System type"
- default IA64_GENERIC
-
-config IA64_GENERIC
- bool "generic"
- select ACPI
- select PCI
- select NUMA
- select ACPI_NUMA
- help
- This selects the system type of your hardware. A "generic" kernel
- will run on any supported IA-64 system. However, if you configure
- a kernel for your specific system, it will be faster and smaller.
-
- generic For any supported IA-64 system
- DIG-compliant For DIG ("Developer's Interface Guide") compliant systems
- HP-zx1/sx1000 For HP systems
- HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices.
- SGI-SN2 For SGI Altix systems
- Ski-simulator For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
-
- If you don't know what to do, choose "generic".
-
-config IA64_DIG
- bool "DIG-compliant"
-
-config IA64_HP_ZX1
- bool "HP-zx1/sx1000"
- help
- Build a kernel that runs on HP zx1 and sx1000 systems. This adds
- support for the HP I/O MMU.
-
-config IA64_HP_ZX1_SWIOTLB
- bool "HP-zx1/sx1000 with software I/O TLB"
- help
- Build a kernel that runs on HP zx1 and sx1000 systems even when they
- have broken PCI devices which cannot DMA to full 32 bits. Apart
- from support for the HP I/O MMU, this includes support for the software
- I/O TLB, which allows supporting the broken devices at the expense of
- wasting some kernel memory (about 2MB by default).
-
-config IA64_SGI_SN2
- bool "SGI-SN2"
- help
- Selecting this option will optimize the kernel for use on sn2 based
- systems, but the resulting kernel binary will not run on other
- types of ia64 systems. If you have an SGI Altix system, it's safe
- to select this option. If in doubt, select ia64 generic support
- instead.
-
-config IA64_HP_SIM
- bool "Ski-simulator"
-
-config IA64_XEN
- bool "Xen guest"
- depends on XEN
-
-endchoice
-
-choice
- prompt "Processor type"
- default ITANIUM
-
-config ITANIUM
- bool "Itanium"
- help
- Select your IA-64 processor type. The default is Itanium.
- This choice is safe for all IA-64 systems, but may not perform
- optimally on systems with, say, Itanium 2 or newer processors.
-
-config MCKINLEY
- bool "Itanium 2"
- help
- Select this to configure for an Itanium 2 (McKinley) processor.
-
-endchoice
-
-choice
- prompt "Kernel page size"
- default IA64_PAGE_SIZE_16KB
-
-config IA64_PAGE_SIZE_4KB
- bool "4KB"
- help
- This lets you select the page size of the kernel. For best IA-64
- performance, a page size of 8KB or 16KB is recommended. For best
- IA-32 compatibility, a page size of 4KB should be selected (the vast
- majority of IA-32 binaries work perfectly fine with a larger page
- size). For Itanium 2 or newer systems, a page size of 64KB can also
- be selected.
-
- 4KB For best IA-32 compatibility
- 8KB For best IA-64 performance
- 16KB For best IA-64 performance
- 64KB Requires Itanium 2 or newer processor.
-
- If you don't know what to do, choose 16KB.
-
-config IA64_PAGE_SIZE_8KB
- bool "8KB"
-
-config IA64_PAGE_SIZE_16KB
- bool "16KB"
-
-config IA64_PAGE_SIZE_64KB
- depends on !ITANIUM
- bool "64KB"
-
-endchoice
-
-choice
- prompt "Page Table Levels"
- default PGTABLE_3
-
-config PGTABLE_3
- bool "3 Levels"
-
-config PGTABLE_4
- depends on !IA64_PAGE_SIZE_64KB
- bool "4 Levels"
-
-endchoice
-
-source kernel/Kconfig.hz
-
-config IA64_BRL_EMU
- bool
- depends on ITANIUM
- default y
-
-# align cache-sensitive data to 128 bytes
-config IA64_L1_CACHE_SHIFT
- int
- default "7" if MCKINLEY
- default "6" if ITANIUM
-
-config IA64_CYCLONE
- bool "Cyclone (EXA) Time Source support"
- help
- Say Y here to enable support for IBM EXA Cyclone time source.
- If you're unsure, answer N.
-
-config IOSAPIC
- bool
- depends on !IA64_HP_SIM
- default y
-
-config IA64_SGI_SN_XP
- tristate "Support communication between SGI SSIs"
- depends on IA64_GENERIC || IA64_SGI_SN2
- select IA64_UNCACHED_ALLOCATOR
- help
- An SGI machine can be divided into multiple Single System
- Images which act independently of each other and have
- hardware based memory protection from the others. Enabling
- this feature will allow for direct communication between SSIs
- based on a network adapter and DMA messaging.
-
-config FORCE_MAX_ZONEORDER
- int "MAX_ORDER (11 - 17)" if !HUGETLB_PAGE
- range 11 17 if !HUGETLB_PAGE
- default "17" if HUGETLB_PAGE
- default "11"
-
-config SMP
- bool "Symmetric multi-processing support"
- help
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, say N. If you have a system with more
- than one CPU, say Y.
-
- If you say N here, the kernel will run on single and multiprocessor
- systems, but will use only one CPU of a multiprocessor system. If
- you say Y here, the kernel will run on many, but not all,
- single processor systems. On a single processor system, the kernel
- will run faster if you say N here.
-
- See also the <file:Documentation/smp.txt> and the SMP-HOWTO
- available at <http://www.tldp.org/docs.html#howto>.
-
- If you don't know what to do here, say N.
-
-config NR_CPUS
- int "Maximum number of CPUs (2-1024)"
- range 2 1024
- depends on SMP
- default "1024"
- help
- You should set this to the number of CPUs in your system, but
- keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but
- only use 2 CPUs on a >2 CPU system. Setting this to a value larger
- than 64 will cause the use of a CPU mask array, causing a small
- performance hit.
-
-config HOTPLUG_CPU
- bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
- depends on SMP && EXPERIMENTAL
- select HOTPLUG
- default n
- ---help---
- Say Y here to experiment with turning CPUs off and on. CPUs
- can be controlled through /sys/devices/system/cpu/cpu#.
- Say N if you want to disable CPU hotplug.
-
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
-
-config SCHED_SMT
- bool "SMT scheduler support"
- depends on SMP
- help
- Improves the CPU scheduler's decision making when dealing with
- Intel IA64 chips with MultiThreading at a cost of slightly increased
- overhead in some places. If unsure say N here.
-
-config PERMIT_BSP_REMOVE
- bool "Support removal of Bootstrap Processor"
- depends on HOTPLUG_CPU
- default n
- ---help---
- Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
- support.
-
-config FORCE_CPEI_RETARGET
- bool "Force assumption that CPEI can be re-targetted"
- depends on PERMIT_BSP_REMOVE
- default n
- ---help---
- Say Y if you need to force the assumption that CPEI can be re-targetted to
- any cpu in the system. This hint is available via ACPI 3.0 specifications.
- Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
- This option it useful to enable this feature on older BIOS's as well.
- You can also enable this by using boot command line option force_cpei=1.
-
-config PREEMPT
- bool "Preemptible Kernel"
- help
- This option reduces the latency of the kernel when reacting to
- real-time or interactive events by allowing a low priority process to
- be preempted even if it is in kernel mode executing a system call.
- This allows applications to run more reliably even when the system is
- under load.
-
- Say Y here if you are building a kernel for a desktop, embedded
- or real-time system. Say N if you are unsure.
-
-source "mm/Kconfig"
-
-config ARCH_SELECT_MEMORY_MODEL
- def_bool y
-
-config ARCH_DISCONTIGMEM_ENABLE
- def_bool y
- help
- Say Y to support efficient handling of discontiguous physical memory,
- for architectures which are either NUMA (Non-Uniform Memory Access)
- or have huge holes in the physical address space for other reasons.
- See <file:Documentation/vm/numa> for more.
-
-config ARCH_FLATMEM_ENABLE
- def_bool y
-
-config ARCH_SPARSEMEM_ENABLE
- def_bool y
- depends on ARCH_DISCONTIGMEM_ENABLE
-
-config ARCH_DISCONTIGMEM_DEFAULT
- def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB)
- depends on ARCH_DISCONTIGMEM_ENABLE
-
-config NUMA
- bool "NUMA support"
- depends on !IA64_HP_SIM && !FLATMEM
- default y if IA64_SGI_SN2
- help
- Say Y to compile the kernel to support NUMA (Non-Uniform Memory
- Access). This option is for configuring high-end multiprocessor
- server systems. If in doubt, say N.
-
-config NODES_SHIFT
- int "Max num nodes shift(3-10)"
- range 3 10
- default "10"
- depends on NEED_MULTIPLE_NODES
- help
- This option specifies the maximum number of nodes in your SSI system.
- MAX_NUMNODES will be 2^(This value).
- If in doubt, use the default.
-
-# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
-# VIRTUAL_MEM_MAP has been retained for historical reasons.
-config VIRTUAL_MEM_MAP
- bool "Virtual mem map"
- depends on !SPARSEMEM
- default y if !IA64_HP_SIM
- help
- Say Y to compile the kernel with support for a virtual mem map.
- This code also only takes effect if a memory hole of greater than
- 1 Gb is found during boot. You must turn this option on if you
- require the DISCONTIGMEM option for your machine. If you are
- unsure, say Y.
-
-config HOLES_IN_ZONE
- bool
- default y if VIRTUAL_MEM_MAP
-
-config HAVE_ARCH_EARLY_PFN_TO_NID
- def_bool y
- depends on NEED_MULTIPLE_NODES
-
-config HAVE_ARCH_NODEDATA_EXTENSION
- def_bool y
- depends on NUMA
-
-config IA32_SUPPORT
- bool "Support for Linux/x86 binaries"
- help
- IA-64 processors can execute IA-32 (X86) instructions. By
- saying Y here, the kernel will include IA-32 system call
- emulation support which makes it possible to transparently
- run IA-32 Linux binaries on an IA-64 Linux system.
- If in doubt, say Y.
-
-config COMPAT
- bool
- depends on IA32_SUPPORT
- default y
-
-config IA64_MCA_RECOVERY
- tristate "MCA recovery from errors other than TLB."
-
-config PERFMON
- bool "Performance monitor support"
- help
- Selects whether support for the IA-64 performance monitor hardware
- is included in the kernel. This makes some kernel data-structures a
- little bigger and slows down execution a bit, but it is generally
- a good idea to turn this on. If you're unsure, say Y.
-
-config IA64_PALINFO
- tristate "/proc/pal support"
- help
- If you say Y here, you are able to get PAL (Processor Abstraction
- Layer) information in /proc/pal. This contains useful information
- about the processors in your systems, such as cache and TLB sizes
- and the PAL firmware version in use.
-
- To use this option, you have to ensure that the "/proc file system
- support" (CONFIG_PROC_FS) is enabled, too.
-
-config SGI_SN
- def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
-
-source "drivers/sn/Kconfig"
-
-source "drivers/firmware/Kconfig"
-
-source "fs/Kconfig.binfmt"
-
-endmenu
-
-menu "Power management and ACPI"
-
-source "kernel/power/Kconfig"
-
-source "drivers/acpi/Kconfig"
-
-if PM
-
-source "arch/ia64/kernel/cpufreq/Kconfig"
-
-endif
-
-endmenu
-
-if !IA64_HP_SIM
-
-menu "Bus options (PCI, PCMCIA)"
-
-config PCI
- bool "PCI support"
- help
- Real IA-64 machines all have PCI/PCI-X/PCI Express busses. Say Y
- here unless you are using a simulator without PCI support.
-
-config PCI_DOMAINS
- bool
- default PCI
-
-config XEN_PCIDEV_FRONTEND
- bool "Xen PCI Frontend"
- depends on PCI && XEN
- default y
- help
- The PCI device frontend driver allows the kernel to import arbitrary
- PCI devices from a PCI backend to support PCI driver domains.
-
-config XEN_PCIDEV_FE_DEBUG
- bool "Xen PCI Frontend Debugging"
- depends on XEN_PCIDEV_FRONTEND
- default n
- help
- Enables some debug statements within the PCI Frontend.
-
-source "drivers/pci/pcie/Kconfig"
-
-source "drivers/pci/Kconfig"
-
-source "drivers/pci/hotplug/Kconfig"
-
-source "drivers/pcmcia/Kconfig"
-
-endmenu
-
-endif
-
-source "net/Kconfig"
-
-source "drivers/Kconfig"
-
-source "fs/Kconfig"
-
-source "lib/Kconfig"
-
-#
-# Use the generic interrupt handling code in kernel/irq/:
-#
-config GENERIC_HARDIRQS
- bool
- default y
-
-config GENERIC_IRQ_PROBE
- bool
- default y
-
-config GENERIC_PENDING_IRQ
- bool
- depends on GENERIC_HARDIRQS && SMP
- default y
-
-config IRQ_PER_CPU
- bool
- default y
-
-source "arch/ia64/hp/sim/Kconfig"
-
-menu "Instrumentation Support"
- depends on EXPERIMENTAL
-
-source "arch/ia64/oprofile/Kconfig"
-
-config KPROBES
- bool "Kprobes (EXPERIMENTAL)"
- depends on EXPERIMENTAL && MODULES
- help
- Kprobes allows you to trap at almost any kernel address and
- execute a callback function. register_kprobe() establishes
- a probepoint and specifies the callback. Kprobes is useful
- for kernel debugging, non-intrusive instrumentation and testing.
- If in doubt, say "N".
-endmenu
-
-source "arch/ia64/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-#
-# override default values of drivers/xen/Kconfig
-#
-if XEN
-config XEN_SMPBOOT
- default n
-endif
-
-source "drivers/xen/Kconfig"
diff --git a/linux-2.6-xen-sparse/arch/ia64/Makefile b/linux-2.6-xen-sparse/arch/ia64/Makefile
deleted file mode 100644
index 9c7c05626b..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/Makefile
+++ /dev/null
@@ -1,106 +0,0 @@
-#
-# ia64/Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies.
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License. See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com>
-#
-
-NM := $(CROSS_COMPILE)nm -B
-READELF := $(CROSS_COMPILE)readelf
-
-export AWK
-
-CHECKFLAGS += -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
-
-OBJCOPYFLAGS := --strip-all
-LDFLAGS_vmlinux := -static
-LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds
-AFLAGS_KERNEL := -mconstant-gp
-EXTRA :=
-
-cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
- -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
-CFLAGS_KERNEL := -mconstant-gp
-
-GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
-CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)")
-
-ifeq ($(GAS_STATUS),buggy)
-$(error Sorry, you need a newer version of the assember, one that is built from \
- a source-tree that post-dates 18-Dec-2002. You can find a pre-compiled \
- static binary of such an assembler at: \
- \
- ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
-endif
-
-ifeq ($(call cc-version),0304)
- cflags-$(CONFIG_ITANIUM) += -mtune=merced
- cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley
-endif
-
-CFLAGS += $(cflags-y)
-
-cppflags-$(CONFIG_XEN) += \
- -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
-
-CPPFLAGS += $(cppflags-y)
-
-head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
-
-libs-y += arch/ia64/lib/
-core-y += arch/ia64/kernel/ arch/ia64/mm/
-core-$(CONFIG_IA32_SUPPORT) += arch/ia64/ia32/
-core-$(CONFIG_IA64_DIG) += arch/ia64/dig/
-core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
-core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
-core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
-core-$(CONFIG_IA64_XEN) += arch/ia64/dig/
-core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
-core-$(CONFIG_XEN) += arch/ia64/xen/
-
-drivers-$(CONFIG_PCI) += arch/ia64/pci/
-drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
-drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
-drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
-drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/
-drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/
-
-boot := arch/ia64/hp/sim/boot
-
-PHONY += boot compressed check
-
-all: compressed unwcheck
-
-compressed: vmlinux.gz
-
-vmlinuz: vmlinux.gz
-
-vmlinux.gz: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) $@
-
-unwcheck: vmlinux
- -$(Q)READELF=$(READELF) $(srctree)/arch/ia64/scripts/unwcheck.py $<
-
-archclean:
- $(Q)$(MAKE) $(clean)=$(boot)
-
-CLEAN_FILES += vmlinux.gz bootloader
-
-boot: lib/lib.a vmlinux
- $(Q)$(MAKE) $(build)=$(boot) $@
-
-install:
- -yes | sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) vmlinux.gz System.map "$(INSTALL_PATH)"
-
-define archhelp
- echo '* compressed - Build compressed kernel image'
- echo ' install - Install compressed kernel image'
- echo ' boot - Build vmlinux and bootloader for Ski simulator'
- echo '* unwcheck - Check vmlinux for invalid unwind info'
-endef
diff --git a/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c b/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c
deleted file mode 100644
index c0f6eac819..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c
+++ /dev/null
@@ -1,2160 +0,0 @@
-/*
-** IA64 System Bus Adapter (SBA) I/O MMU manager
-**
-** (c) Copyright 2002-2005 Alex Williamson
-** (c) Copyright 2002-2003 Grant Grundler
-** (c) Copyright 2002-2005 Hewlett-Packard Company
-**
-** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)
-** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)
-**
-** This program is free software; you can redistribute it and/or modify
-** it under the terms of the GNU General Public License as published by
-** the Free Software Foundation; either version 2 of the License, or
-** (at your option) any later version.
-**
-**
-** This module initializes the IOC (I/O Controller) found on HP
-** McKinley machines and their successors.
-**
-*/
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/acpi.h>
-#include <linux/efi.h>
-#include <linux/nodemask.h>
-#include <linux/bitops.h> /* hweight64() */
-
-#include <asm/delay.h> /* ia64_get_itc() */
-#include <asm/io.h>
-#include <asm/page.h> /* PAGE_OFFSET */
-#include <asm/dma.h>
-#include <asm/system.h> /* wmb() */
-
-#include <asm/acpi-ext.h>
-
-#define PFX "IOC: "
-
-/*
-** Enabling timing search of the pdir resource map. Output in /proc.
-** Disabled by default to optimize performance.
-*/
-#undef PDIR_SEARCH_TIMING
-
-/*
-** This option allows cards capable of 64bit DMA to bypass the IOMMU. If
-** not defined, all DMA will be 32bit and go through the TLB.
-** There's potentially a conflict in the bio merge code with us
-** advertising an iommu, but then bypassing it. Since I/O MMU bypassing
-** appears to give more performance than bio-level virtual merging, we'll
-** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to
-** completely restrict DMA to the IOMMU.
-*/
-#define ALLOW_IOV_BYPASS
-
-/*
-** This option specifically allows/disallows bypassing scatterlists with
-** multiple entries. Coalescing these entries can allow better DMA streaming
-** and in some cases shows better performance than entirely bypassing the
-** IOMMU. Performance increase on the order of 1-2% sequential output/input
-** using bonnie++ on a RAID0 MD device (sym2 & mpt).
-*/
-#undef ALLOW_IOV_BYPASS_SG
-
-/*
-** If a device prefetches beyond the end of a valid pdir entry, it will cause
-** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should
-** disconnect on 4k boundaries and prevent such issues. If the device is
-** particularly agressive, this option will keep the entire pdir valid such
-** that prefetching will hit a valid address. This could severely impact
-** error containment, and is therefore off by default. The page that is
-** used for spill-over is poisoned, so that should help debugging somewhat.
-*/
-#undef FULL_VALID_PDIR
-
-#define ENABLE_MARK_CLEAN
-
-/*
-** The number of debug flags is a clue - this code is fragile. NOTE: since
-** tightening the use of res_lock the resource bitmap and actual pdir are no
-** longer guaranteed to stay in sync. The sanity checking code isn't going to
-** like that.
-*/
-#undef DEBUG_SBA_INIT
-#undef DEBUG_SBA_RUN
-#undef DEBUG_SBA_RUN_SG
-#undef DEBUG_SBA_RESOURCE
-#undef ASSERT_PDIR_SANITY
-#undef DEBUG_LARGE_SG_ENTRIES
-#undef DEBUG_BYPASS
-
-#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY)
-#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive
-#endif
-
-#define SBA_INLINE __inline__
-/* #define SBA_INLINE */
-
-#ifdef DEBUG_SBA_INIT
-#define DBG_INIT(x...) printk(x)
-#else
-#define DBG_INIT(x...)
-#endif
-
-#ifdef DEBUG_SBA_RUN
-#define DBG_RUN(x...) printk(x)
-#else
-#define DBG_RUN(x...)
-#endif
-
-#ifdef DEBUG_SBA_RUN_SG
-#define DBG_RUN_SG(x...) printk(x)
-#else
-#define DBG_RUN_SG(x...)
-#endif
-
-
-#ifdef DEBUG_SBA_RESOURCE
-#define DBG_RES(x...) printk(x)
-#else
-#define DBG_RES(x...)
-#endif
-
-#ifdef DEBUG_BYPASS
-#define DBG_BYPASS(x...) printk(x)
-#else
-#define DBG_BYPASS(x...)
-#endif
-
-#ifdef ASSERT_PDIR_SANITY
-#define ASSERT(expr) \
- if(!(expr)) { \
- printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \
- panic(#expr); \
- }
-#else
-#define ASSERT(expr)
-#endif
-
-/*
-** The number of pdir entries to "free" before issuing
-** a read to PCOM register to flush out PCOM writes.
-** Interacts with allocation granularity (ie 4 or 8 entries
-** allocated and free'd/purged at a time might make this
-** less interesting).
-*/
-#define DELAYED_RESOURCE_CNT 64
-
-#define PCI_DEVICE_ID_HP_SX2000_IOC 0x12ec
-
-#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP)
-#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP)
-#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP)
-#define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP)
-#define SX2000_IOC_ID ((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP)
-
-#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */
-
-#define IOC_FUNC_ID 0x000
-#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */
-#define IOC_IBASE 0x300 /* IO TLB */
-#define IOC_IMASK 0x308
-#define IOC_PCOM 0x310
-#define IOC_TCNFG 0x318
-#define IOC_PDIR_BASE 0x320
-
-#define IOC_ROPE0_CFG 0x500
-#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */
-
-
-/* AGP GART driver looks for this */
-#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL
-
-/*
-** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register)
-**
-** Some IOCs (sx1000) can run at the above pages sizes, but are
-** really only supported using the IOC at a 4k page size.
-**
-** iovp_size could only be greater than PAGE_SIZE if we are
-** confident the drivers really only touch the next physical
-** page iff that driver instance owns it.
-*/
-static unsigned long iovp_size;
-static unsigned long iovp_shift;
-static unsigned long iovp_mask;
-
-struct ioc {
- void __iomem *ioc_hpa; /* I/O MMU base address */
- char *res_map; /* resource map, bit == pdir entry */
- u64 *pdir_base; /* physical base address */
- unsigned long ibase; /* pdir IOV Space base */
- unsigned long imask; /* pdir IOV Space mask */
-
- unsigned long *res_hint; /* next avail IOVP - circular search */
- unsigned long dma_mask;
- spinlock_t res_lock; /* protects the resource bitmap, but must be held when */
- /* clearing pdir to prevent races with allocations. */
- unsigned int res_bitshift; /* from the RIGHT! */
- unsigned int res_size; /* size of resource map in bytes */
-#ifdef CONFIG_NUMA
- unsigned int node; /* node where this IOC lives */
-#endif
-#if DELAYED_RESOURCE_CNT > 0
- spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */
- /* than res_lock for bigger systems. */
- int saved_cnt;
- struct sba_dma_pair {
- dma_addr_t iova;
- size_t size;
- } saved[DELAYED_RESOURCE_CNT];
-#endif
-
-#ifdef PDIR_SEARCH_TIMING
-#define SBA_SEARCH_SAMPLE 0x100
- unsigned long avg_search[SBA_SEARCH_SAMPLE];
- unsigned long avg_idx; /* current index into avg_search */
-#endif
-
- /* Stuff we don't need in performance path */
- struct ioc *next; /* list of IOC's in system */
- acpi_handle handle; /* for multiple IOC's */
- const char *name;
- unsigned int func_id;
- unsigned int rev; /* HW revision of chip */
- u32 iov_size;
- unsigned int pdir_size; /* in bytes, determined by IOV Space size */
- struct pci_dev *sac_only_dev;
-};
-
-static struct ioc *ioc_list;
-static int reserve_sba_gart = 1;
-
-static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t);
-static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t);
-
-#define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset)
-
-#ifdef FULL_VALID_PDIR
-static u64 prefetch_spill_page;
-#endif
-
-#ifdef CONFIG_PCI
-# define GET_IOC(dev) (((dev)->bus == &pci_bus_type) \
- ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL)
-#else
-# define GET_IOC(dev) NULL
-#endif
-
-/*
-** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
-** (or rather not merge) DMA's into managable chunks.
-** On parisc, this is more of the software/tuning constraint
-** rather than the HW. I/O MMU allocation alogorithms can be
-** faster with smaller size is (to some degree).
-*/
-#define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size)
-
-#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))
-
-/************************************
-** SBA register read and write support
-**
-** BE WARNED: register writes are posted.
-** (ie follow writes which must reach HW with a read)
-**
-*/
-#define READ_REG(addr) __raw_readq(addr)
-#define WRITE_REG(val, addr) __raw_writeq(val, addr)
-
-#ifdef DEBUG_SBA_INIT
-
-/**
- * sba_dump_tlb - debugging only - print IOMMU operating parameters
- * @hpa: base address of the IOMMU
- *
- * Print the size/location of the IO MMU PDIR.
- */
-static void
-sba_dump_tlb(char *hpa)
-{
- DBG_INIT("IO TLB at 0x%p\n", (void *)hpa);
- DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE));
- DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK));
- DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG));
- DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE));
- DBG_INIT("\n");
-}
-#endif
-
-
-#ifdef ASSERT_PDIR_SANITY
-
-/**
- * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @msg: text to print ont the output line.
- * @pide: pdir index.
- *
- * Print one entry of the IO MMU PDIR in human readable form.
- */
-static void
-sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide)
-{
- /* start printing from lowest pde in rval */
- u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)];
- unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)];
- uint rcnt;
-
- printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n",
- msg, rptr, pide & (BITS_PER_LONG - 1), *rptr);
-
- rcnt = 0;
- while (rcnt < BITS_PER_LONG) {
- printk(KERN_DEBUG "%s %2d %p %016Lx\n",
- (rcnt == (pide & (BITS_PER_LONG - 1)))
- ? " -->" : " ",
- rcnt, ptr, (unsigned long long) *ptr );
- rcnt++;
- ptr++;
- }
- printk(KERN_DEBUG "%s", msg);
-}
-
-
-/**
- * sba_check_pdir - debugging only - consistency checker
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @msg: text to print ont the output line.
- *
- * Verify the resource map and pdir state is consistent
- */
-static int
-sba_check_pdir(struct ioc *ioc, char *msg)
-{
- u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]);
- u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */
- u64 *pptr = ioc->pdir_base; /* pdir ptr */
- uint pide = 0;
-
- while (rptr < rptr_end) {
- u64 rval;
- int rcnt; /* number of bits we might check */
-
- rval = *rptr;
- rcnt = 64;
-
- while (rcnt) {
- /* Get last byte and highest bit from that */
- u32 pde = ((u32)((*pptr >> (63)) & 0x1));
- if ((rval & 0x1) ^ pde)
- {
- /*
- ** BUMMER! -- res_map != pdir --
- ** Dump rval and matching pdir entries
- */
- sba_dump_pdir_entry(ioc, msg, pide);
- return(1);
- }
- rcnt--;
- rval >>= 1; /* try the next bit */
- pptr++;
- pide++;
- }
- rptr++; /* look at next word of res_map */
- }
- /* It'd be nice if we always got here :^) */
- return 0;
-}
-
-
-/**
- * sba_dump_sg - debugging only - print Scatter-Gather list
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @startsg: head of the SG list
- * @nents: number of entries in SG list
- *
- * print the SG list so we can verify it's correct by hand.
- */
-static void
-sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
-{
- while (nents-- > 0) {
- printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,
- startsg->dma_address, startsg->dma_length,
- sba_sg_address(startsg));
- startsg++;
- }
-}
-
-static void
-sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
-{
- struct scatterlist *the_sg = startsg;
- int the_nents = nents;
-
- while (the_nents-- > 0) {
- if (sba_sg_address(the_sg) == 0x0UL)
- sba_dump_sg(NULL, startsg, nents);
- the_sg++;
- }
-}
-
-#endif /* ASSERT_PDIR_SANITY */
-
-
-
-
-/**************************************************************
-*
-* I/O Pdir Resource Management
-*
-* Bits set in the resource map are in use.
-* Each bit can represent a number of pages.
-* LSbs represent lower addresses (IOVA's).
-*
-***************************************************************/
-#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */
-
-/* Convert from IOVP to IOVA and vice versa. */
-#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset))
-#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase))
-
-#define PDIR_ENTRY_SIZE sizeof(u64)
-
-#define PDIR_INDEX(iovp) ((iovp)>>iovp_shift)
-
-#define RESMAP_MASK(n) ~(~0UL << (n))
-#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1)
-
-
-/**
- * For most cases the normal get_order is sufficient, however it limits us
- * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity.
- * It only incurs about 1 clock cycle to use this one with the static variable
- * and makes the code more intuitive.
- */
-static SBA_INLINE int
-get_iovp_order (unsigned long size)
-{
- long double d = size - 1;
- long order;
-
- order = ia64_getf_exp(d);
- order = order - iovp_shift - 0xffff + 1;
- if (order < 0)
- order = 0;
- return order;
-}
-
-/**
- * sba_search_bitmap - find free space in IO PDIR resource bitmap
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @bits_wanted: number of entries we need.
- * @use_hint: use res_hint to indicate where to start looking
- *
- * Find consecutive free bits in resource bitmap.
- * Each bit represents one entry in the IO Pdir.
- * Cool perf optimization: search for log2(size) bits at a time.
- */
-static SBA_INLINE unsigned long
-sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
-{
- unsigned long *res_ptr;
- unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
- unsigned long flags, pide = ~0UL;
-
- ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
- ASSERT(res_ptr < res_end);
-
- spin_lock_irqsave(&ioc->res_lock, flags);
-
- /* Allow caller to force a search through the entire resource space */
- if (likely(use_hint)) {
- res_ptr = ioc->res_hint;
- } else {
- res_ptr = (ulong *)ioc->res_map;
- ioc->res_bitshift = 0;
- }
-
- /*
- * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts
- * if a TLB entry is purged while in use. sba_mark_invalid()
- * purges IOTLB entries in power-of-two sizes, so we also
- * allocate IOVA space in power-of-two sizes.
- */
- bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift);
-
- if (likely(bits_wanted == 1)) {
- unsigned int bitshiftcnt;
- for(; res_ptr < res_end ; res_ptr++) {
- if (likely(*res_ptr != ~0UL)) {
- bitshiftcnt = ffz(*res_ptr);
- *res_ptr |= (1UL << bitshiftcnt);
- pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
- pide <<= 3; /* convert to bit address */
- pide += bitshiftcnt;
- ioc->res_bitshift = bitshiftcnt + bits_wanted;
- goto found_it;
- }
- }
- goto not_found;
-
- }
-
- if (likely(bits_wanted <= BITS_PER_LONG/2)) {
- /*
- ** Search the resource bit map on well-aligned values.
- ** "o" is the alignment.
- ** We need the alignment to invalidate I/O TLB using
- ** SBA HW features in the unmap path.
- */
- unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift);
- uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o);
- unsigned long mask, base_mask;
-
- base_mask = RESMAP_MASK(bits_wanted);
- mask = base_mask << bitshiftcnt;
-
- DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr);
- for(; res_ptr < res_end ; res_ptr++)
- {
- DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr);
- ASSERT(0 != mask);
- for (; mask ; mask <<= o, bitshiftcnt += o) {
- if(0 == ((*res_ptr) & mask)) {
- *res_ptr |= mask; /* mark resources busy! */
- pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
- pide <<= 3; /* convert to bit address */
- pide += bitshiftcnt;
- ioc->res_bitshift = bitshiftcnt + bits_wanted;
- goto found_it;
- }
- }
-
- bitshiftcnt = 0;
- mask = base_mask;
-
- }
-
- } else {
- int qwords, bits, i;
- unsigned long *end;
-
- qwords = bits_wanted >> 6; /* /64 */
- bits = bits_wanted - (qwords * BITS_PER_LONG);
-
- end = res_end - qwords;
-
- for (; res_ptr < end; res_ptr++) {
- for (i = 0 ; i < qwords ; i++) {
- if (res_ptr[i] != 0)
- goto next_ptr;
- }
- if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits))
- continue;
-
- /* Found it, mark it */
- for (i = 0 ; i < qwords ; i++)
- res_ptr[i] = ~0UL;
- res_ptr[i] |= RESMAP_MASK(bits);
-
- pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
- pide <<= 3; /* convert to bit address */
- res_ptr += qwords;
- ioc->res_bitshift = bits;
- goto found_it;
-next_ptr:
- ;
- }
- }
-
-not_found:
- prefetch(ioc->res_map);
- ioc->res_hint = (unsigned long *) ioc->res_map;
- ioc->res_bitshift = 0;
- spin_unlock_irqrestore(&ioc->res_lock, flags);
- return (pide);
-
-found_it:
- ioc->res_hint = res_ptr;
- spin_unlock_irqrestore(&ioc->res_lock, flags);
- return (pide);
-}
-
-
-/**
- * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @size: number of bytes to create a mapping for
- *
- * Given a size, find consecutive unmarked and then mark those bits in the
- * resource bit map.
- */
-static int
-sba_alloc_range(struct ioc *ioc, size_t size)
-{
- unsigned int pages_needed = size >> iovp_shift;
-#ifdef PDIR_SEARCH_TIMING
- unsigned long itc_start;
-#endif
- unsigned long pide;
-
- ASSERT(pages_needed);
- ASSERT(0 == (size & ~iovp_mask));
-
-#ifdef PDIR_SEARCH_TIMING
- itc_start = ia64_get_itc();
-#endif
- /*
- ** "seek and ye shall find"...praying never hurts either...
- */
- pide = sba_search_bitmap(ioc, pages_needed, 1);
- if (unlikely(pide >= (ioc->res_size << 3))) {
- pide = sba_search_bitmap(ioc, pages_needed, 0);
- if (unlikely(pide >= (ioc->res_size << 3))) {
-#if DELAYED_RESOURCE_CNT > 0
- unsigned long flags;
-
- /*
- ** With delayed resource freeing, we can give this one more shot. We're
- ** getting close to being in trouble here, so do what we can to make this
- ** one count.
- */
- spin_lock_irqsave(&ioc->saved_lock, flags);
- if (ioc->saved_cnt > 0) {
- struct sba_dma_pair *d;
- int cnt = ioc->saved_cnt;
-
- d = &(ioc->saved[ioc->saved_cnt - 1]);
-
- spin_lock(&ioc->res_lock);
- while (cnt--) {
- sba_mark_invalid(ioc, d->iova, d->size);
- sba_free_range(ioc, d->iova, d->size);
- d--;
- }
- ioc->saved_cnt = 0;
- READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
- spin_unlock(&ioc->res_lock);
- }
- spin_unlock_irqrestore(&ioc->saved_lock, flags);
-
- pide = sba_search_bitmap(ioc, pages_needed, 0);
- if (unlikely(pide >= (ioc->res_size << 3)))
- panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
- ioc->ioc_hpa);
-#else
- panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
- ioc->ioc_hpa);
-#endif
- }
- }
-
-#ifdef PDIR_SEARCH_TIMING
- ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed;
- ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1;
-#endif
-
- prefetchw(&(ioc->pdir_base[pide]));
-
-#ifdef ASSERT_PDIR_SANITY
- /* verify the first enable bit is clear */
- if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) {
- sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide);
- }
-#endif
-
- DBG_RES("%s(%x) %d -> %lx hint %x/%x\n",
- __FUNCTION__, size, pages_needed, pide,
- (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map),
- ioc->res_bitshift );
-
- return (pide);
-}
-
-
-/**
- * sba_free_range - unmark bits in IO PDIR resource bitmap
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @iova: IO virtual address which was previously allocated.
- * @size: number of bytes to create a mapping for
- *
- * clear bits in the ioc's resource map
- */
-static SBA_INLINE void
-sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size)
-{
- unsigned long iovp = SBA_IOVP(ioc, iova);
- unsigned int pide = PDIR_INDEX(iovp);
- unsigned int ridx = pide >> 3; /* convert bit to byte address */
- unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]);
- int bits_not_wanted = size >> iovp_shift;
- unsigned long m;
-
- /* Round up to power-of-two size: see AR2305 note above */
- bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift);
- for (; bits_not_wanted > 0 ; res_ptr++) {
-
- if (unlikely(bits_not_wanted > BITS_PER_LONG)) {
-
- /* these mappings start 64bit aligned */
- *res_ptr = 0UL;
- bits_not_wanted -= BITS_PER_LONG;
- pide += BITS_PER_LONG;
-
- } else {
-
- /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */
- m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1));
- bits_not_wanted = 0;
-
- DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size,
- bits_not_wanted, m, pide, res_ptr, *res_ptr);
-
- ASSERT(m != 0);
- ASSERT(bits_not_wanted);
- ASSERT((*res_ptr & m) == m); /* verify same bits are set */
- *res_ptr &= ~m;
- }
- }
-}
-
-
-/**************************************************************
-*
-* "Dynamic DMA Mapping" support (aka "Coherent I/O")
-*
-***************************************************************/
-
-/**
- * sba_io_pdir_entry - fill in one IO PDIR entry
- * @pdir_ptr: pointer to IO PDIR entry
- * @vba: Virtual CPU address of buffer to map
- *
- * SBA Mapping Routine
- *
- * Given a virtual address (vba, arg1) sba_io_pdir_entry()
- * loads the I/O PDIR entry pointed to by pdir_ptr (arg0).
- * Each IO Pdir entry consists of 8 bytes as shown below
- * (LSB == bit 0):
- *
- * 63 40 11 7 0
- * +-+---------------------+----------------------------------+----+--------+
- * |V| U | PPN[39:12] | U | FF |
- * +-+---------------------+----------------------------------+----+--------+
- *
- * V == Valid Bit
- * U == Unused
- * PPN == Physical Page Number
- *
- * The physical address fields are filled with the results of virt_to_phys()
- * on the vba.
- */
-
-#if 1
-#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = \
- ((virt_to_bus((void *)vba) & ~0xFFFULL) | 0x8000000000000000ULL)
-#else
-void SBA_INLINE
-sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba)
-{
- *pdir_ptr = ((virt_to_bus((void *)vba) & ~0xFFFULL) |
- 0x80000000000000FFULL);
-}
-#endif
-
-#ifdef ENABLE_MARK_CLEAN
-/**
- * Since DMA is i-cache coherent, any (complete) pages that were written via
- * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
- * flush them when they get mapped into an executable vm-area.
- */
-static void
-mark_clean (void *addr, size_t size)
-{
- unsigned long pg_addr, end;
-
-#ifdef CONFIG_XEN
- /* XXX: Bad things happen starting domUs when this is enabled. */
- if (is_running_on_xen())
- return;
-#endif
-
- pg_addr = PAGE_ALIGN((unsigned long) addr);
- end = (unsigned long) addr + size;
- while (pg_addr + PAGE_SIZE <= end) {
- struct page *page = virt_to_page((void *)pg_addr);
- set_bit(PG_arch_1, &page->flags);
- pg_addr += PAGE_SIZE;
- }
-}
-#endif
-
-/**
- * sba_mark_invalid - invalidate one or more IO PDIR entries
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @iova: IO Virtual Address mapped earlier
- * @byte_cnt: number of bytes this mapping covers.
- *
- * Marking the IO PDIR entry(ies) as Invalid and invalidate
- * corresponding IO TLB entry. The PCOM (Purge Command Register)
- * is to purge stale entries in the IO TLB when unmapping entries.
- *
- * The PCOM register supports purging of multiple pages, with a minium
- * of 1 page and a maximum of 2GB. Hardware requires the address be
- * aligned to the size of the range being purged. The size of the range
- * must be a power of 2. The "Cool perf optimization" in the
- * allocation routine helps keep that true.
- */
-static SBA_INLINE void
-sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
-{
- u32 iovp = (u32) SBA_IOVP(ioc,iova);
-
- int off = PDIR_INDEX(iovp);
-
- /* Must be non-zero and rounded up */
- ASSERT(byte_cnt > 0);
- ASSERT(0 == (byte_cnt & ~iovp_mask));
-
-#ifdef ASSERT_PDIR_SANITY
- /* Assert first pdir entry is set */
- if (!(ioc->pdir_base[off] >> 60)) {
- sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp));
- }
-#endif
-
- if (byte_cnt <= iovp_size)
- {
- ASSERT(off < ioc->pdir_size);
-
- iovp |= iovp_shift; /* set "size" field for PCOM */
-
-#ifndef FULL_VALID_PDIR
- /*
- ** clear I/O PDIR entry "valid" bit
- ** Do NOT clear the rest - save it for debugging.
- ** We should only clear bits that have previously
- ** been enabled.
- */
- ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
-#else
- /*
- ** If we want to maintain the PDIR as valid, put in
- ** the spill page so devices prefetching won't
- ** cause a hard fail.
- */
- ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
-#endif
- } else {
- u32 t = get_iovp_order(byte_cnt) + iovp_shift;
-
- iovp |= t;
- ASSERT(t <= 31); /* 2GB! Max value of "size" field */
-
- do {
- /* verify this pdir entry is enabled */
- ASSERT(ioc->pdir_base[off] >> 63);
-#ifndef FULL_VALID_PDIR
- /* clear I/O Pdir entry "valid" bit first */
- ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
-#else
- ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
-#endif
- off++;
- byte_cnt -= iovp_size;
- } while (byte_cnt > 0);
- }
-
- WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM);
-}
-
-/**
- * sba_map_single - map one buffer and return IOVA for DMA
- * @dev: instance of PCI owned by the driver that's asking.
- * @addr: driver buffer to map.
- * @size: number of bytes to map in driver buffer.
- * @dir: R/W or both.
- *
- * See Documentation/DMA-mapping.txt
- */
-dma_addr_t
-sba_map_single(struct device *dev, void *addr, size_t size, int dir)
-{
- struct ioc *ioc;
- dma_addr_t iovp;
- dma_addr_t offset;
- u64 *pdir_start;
- int pide;
-#ifdef ASSERT_PDIR_SANITY
- unsigned long flags;
-#endif
-#ifdef ALLOW_IOV_BYPASS
- unsigned long pci_addr = virt_to_bus(addr);
-
- ASSERT(to_pci_dev(dev)->dma_mask);
- /*
- ** Check if the PCI device can DMA to ptr... if so, just return ptr
- */
- if (likely(pci_addr & ~to_pci_dev(dev)->dma_mask) == 0 &&
- !range_straddles_page_boundary(addr, size)) {
- /*
- ** Device is bit capable of DMA'ing to the buffer...
- ** just return the PCI address of ptr
- */
- DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n",
- to_pci_dev(dev)->dma_mask, pci_addr);
- return pci_addr;
- }
-#endif
- ioc = GET_IOC(dev);
- ASSERT(ioc);
-
- prefetch(ioc->res_hint);
-
- ASSERT(size > 0);
- ASSERT(size <= DMA_CHUNK_SIZE);
-
- /* save offset bits */
- offset = ((dma_addr_t) (long) addr) & ~iovp_mask;
-
- /* round up to nearest iovp_size */
- size = (size + offset + ~iovp_mask) & iovp_mask;
-
-#ifdef ASSERT_PDIR_SANITY
- spin_lock_irqsave(&ioc->res_lock, flags);
- if (sba_check_pdir(ioc,"Check before sba_map_single()"))
- panic("Sanity check failed");
- spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
- pide = sba_alloc_range(ioc, size);
-
- iovp = (dma_addr_t) pide << iovp_shift;
-
- DBG_RUN("%s() 0x%p -> 0x%lx\n",
- __FUNCTION__, addr, (long) iovp | offset);
-
- pdir_start = &(ioc->pdir_base[pide]);
-
- while (size > 0) {
- ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */
- sba_io_pdir_entry(pdir_start, (unsigned long) addr);
-
- DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start);
-
- addr += iovp_size;
- size -= iovp_size;
- pdir_start++;
- }
- /* force pdir update */
- wmb();
-
- /* form complete address */
-#ifdef ASSERT_PDIR_SANITY
- spin_lock_irqsave(&ioc->res_lock, flags);
- sba_check_pdir(ioc,"Check after sba_map_single()");
- spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
- return SBA_IOVA(ioc, iovp, offset);
-}
-
-#ifdef ENABLE_MARK_CLEAN
-static SBA_INLINE void
-sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
-{
- u32 iovp = (u32) SBA_IOVP(ioc,iova);
- int off = PDIR_INDEX(iovp);
- void *addr;
-
- if (size <= iovp_size) {
- addr = bus_to_virt(ioc->pdir_base[off] &
- ~0xE000000000000FFFULL);
- mark_clean(addr, size);
- } else {
- do {
- addr = bus_to_virt(ioc->pdir_base[off] &
- ~0xE000000000000FFFULL);
- mark_clean(addr, min(size, iovp_size));
- off++;
- size -= iovp_size;
- } while (size > 0);
- }
-}
-#endif
-
-/**
- * sba_unmap_single - unmap one IOVA and free resources
- * @dev: instance of PCI owned by the driver that's asking.
- * @iova: IOVA of driver buffer previously mapped.
- * @size: number of bytes mapped in driver buffer.
- * @dir: R/W or both.
- *
- * See Documentation/DMA-mapping.txt
- */
-void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
-{
- struct ioc *ioc;
-#if DELAYED_RESOURCE_CNT > 0
- struct sba_dma_pair *d;
-#endif
- unsigned long flags;
- dma_addr_t offset;
-
- ioc = GET_IOC(dev);
- ASSERT(ioc);
-
-#ifdef ALLOW_IOV_BYPASS
- if (likely((iova & ioc->imask) != ioc->ibase)) {
- /*
- ** Address does not fall w/in IOVA, must be bypassing
- */
- DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova);
-
-#ifdef ENABLE_MARK_CLEAN
- if (dir == DMA_FROM_DEVICE) {
- mark_clean(bus_to_virt(iova), size);
- }
-#endif
- return;
- }
-#endif
- offset = iova & ~iovp_mask;
-
- DBG_RUN("%s() iovp 0x%lx/%x\n",
- __FUNCTION__, (long) iova, size);
-
- iova ^= offset; /* clear offset bits */
- size += offset;
- size = ROUNDUP(size, iovp_size);
-
-#ifdef ENABLE_MARK_CLEAN
- if (dir == DMA_FROM_DEVICE)
- sba_mark_clean(ioc, iova, size);
-#endif
-
-#if DELAYED_RESOURCE_CNT > 0
- spin_lock_irqsave(&ioc->saved_lock, flags);
- d = &(ioc->saved[ioc->saved_cnt]);
- d->iova = iova;
- d->size = size;
- if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) {
- int cnt = ioc->saved_cnt;
- spin_lock(&ioc->res_lock);
- while (cnt--) {
- sba_mark_invalid(ioc, d->iova, d->size);
- sba_free_range(ioc, d->iova, d->size);
- d--;
- }
- ioc->saved_cnt = 0;
- READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
- spin_unlock(&ioc->res_lock);
- }
- spin_unlock_irqrestore(&ioc->saved_lock, flags);
-#else /* DELAYED_RESOURCE_CNT == 0 */
- spin_lock_irqsave(&ioc->res_lock, flags);
- sba_mark_invalid(ioc, iova, size);
- sba_free_range(ioc, iova, size);
- READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
- spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif /* DELAYED_RESOURCE_CNT == 0 */
-}
-
-
-/**
- * sba_alloc_coherent - allocate/map shared mem for DMA
- * @dev: instance of PCI owned by the driver that's asking.
- * @size: number of bytes mapped in driver buffer.
- * @dma_handle: IOVA of new buffer.
- *
- * See Documentation/DMA-mapping.txt
- */
-void *
-sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
-{
- struct ioc *ioc;
- void *addr;
-
- ioc = GET_IOC(dev);
- ASSERT(ioc);
-
-#ifdef CONFIG_NUMA
- {
- struct page *page;
- page = alloc_pages_node(ioc->node == MAX_NUMNODES ?
- numa_node_id() : ioc->node, flags,
- get_order(size));
-
- if (unlikely(!page))
- return NULL;
-
- addr = page_address(page);
- }
-#else
- addr = (void *) __get_free_pages(flags, get_order(size));
-#endif
- if (unlikely(!addr))
- return NULL;
-
- memset(addr, 0, size);
-
-#ifdef ALLOW_IOV_BYPASS
-#ifdef CONFIG_XEN
- if (xen_create_contiguous_region((unsigned long)addr, get_order(size),
- fls64(dev->coherent_dma_mask)))
- goto iommu_map;
-#endif
- *dma_handle = virt_to_bus(addr);
- ASSERT(dev->coherent_dma_mask);
- /*
- ** Check if the PCI device can DMA to ptr... if so, just return ptr
- */
- if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) {
- DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n",
- dev->coherent_dma_mask, *dma_handle);
-
- return addr;
- }
-#ifdef CONFIG_XEN
-iommu_map:
-#endif
-#endif
-
- /*
- * If device can't bypass or bypass is disabled, pass the 32bit fake
- * device to map single to get an iova mapping.
- */
- *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0);
-
- return addr;
-}
-
-
-/**
- * sba_free_coherent - free/unmap shared mem for DMA
- * @dev: instance of PCI owned by the driver that's asking.
- * @size: number of bytes mapped in driver buffer.
- * @vaddr: virtual address IOVA of "consistent" buffer.
- * @dma_handler: IO virtual address of "consistent" buffer.
- *
- * See Documentation/DMA-mapping.txt
- */
-void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
-{
-#if defined(ALLOW_IOV_BYPASS) && defined(CONFIG_XEN)
- struct ioc *ioc = GET_IOC(dev);
-
- if (likely((dma_handle & ioc->imask) != ioc->ibase))
- xen_destroy_contiguous_region((unsigned long)vaddr,
- get_order(size));
-#endif
- sba_unmap_single(dev, dma_handle, size, 0);
- free_pages((unsigned long) vaddr, get_order(size));
-}
-
-
-/*
-** Since 0 is a valid pdir_base index value, can't use that
-** to determine if a value is valid or not. Use a flag to indicate
-** the SG list entry contains a valid pdir index.
-*/
-#define PIDE_FLAG 0x1UL
-
-#ifdef DEBUG_LARGE_SG_ENTRIES
-int dump_run_sg = 0;
-#endif
-
-
-/**
- * sba_fill_pdir - write allocated SG entries into IO PDIR
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @startsg: list of IOVA/size pairs
- * @nents: number of entries in startsg list
- *
- * Take preprocessed SG list and write corresponding entries
- * in the IO PDIR.
- */
-
-static SBA_INLINE int
-sba_fill_pdir(
- struct ioc *ioc,
- struct scatterlist *startsg,
- int nents)
-{
- struct scatterlist *dma_sg = startsg; /* pointer to current DMA */
- int n_mappings = 0;
- u64 *pdirp = NULL;
- unsigned long dma_offset = 0;
-
- dma_sg--;
- while (nents-- > 0) {
- int cnt = startsg->dma_length;
- startsg->dma_length = 0;
-
-#ifdef DEBUG_LARGE_SG_ENTRIES
- if (dump_run_sg)
- printk(" %2d : %08lx/%05x %p\n",
- nents, startsg->dma_address, cnt,
- sba_sg_address(startsg));
-#else
- DBG_RUN_SG(" %d : %08lx/%05x %p\n",
- nents, startsg->dma_address, cnt,
- sba_sg_address(startsg));
-#endif
- /*
- ** Look for the start of a new DMA stream
- */
- if (startsg->dma_address & PIDE_FLAG) {
- u32 pide = startsg->dma_address & ~PIDE_FLAG;
- dma_offset = (unsigned long) pide & ~iovp_mask;
- startsg->dma_address = 0;
- dma_sg++;
- dma_sg->dma_address = pide | ioc->ibase;
- pdirp = &(ioc->pdir_base[pide >> iovp_shift]);
- n_mappings++;
- }
-
- /*
- ** Look for a VCONTIG chunk
- */
- if (cnt) {
- unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
- ASSERT(pdirp);
-
- /* Since multiple Vcontig blocks could make up
- ** one DMA stream, *add* cnt to dma_len.
- */
- dma_sg->dma_length += cnt;
- cnt += dma_offset;
- dma_offset=0; /* only want offset on first chunk */
- cnt = ROUNDUP(cnt, iovp_size);
- do {
- sba_io_pdir_entry(pdirp, vaddr);
- vaddr += iovp_size;
- cnt -= iovp_size;
- pdirp++;
- } while (cnt > 0);
- }
- startsg++;
- }
- /* force pdir update */
- wmb();
-
-#ifdef DEBUG_LARGE_SG_ENTRIES
- dump_run_sg = 0;
-#endif
- return(n_mappings);
-}
-
-
-/*
-** Two address ranges are DMA contiguous *iff* "end of prev" and
-** "start of next" are both on an IOV page boundary.
-**
-** (shift left is a quick trick to mask off upper bits)
-*/
-#define DMA_CONTIG(__X, __Y) \
- (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL)
-
-
-/**
- * sba_coalesce_chunks - preprocess the SG list
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @startsg: list of IOVA/size pairs
- * @nents: number of entries in startsg list
- *
- * First pass is to walk the SG list and determine where the breaks are
- * in the DMA stream. Allocates PDIR entries but does not fill them.
- * Returns the number of DMA chunks.
- *
- * Doing the fill separate from the coalescing/allocation keeps the
- * code simpler. Future enhancement could make one pass through
- * the sglist do both.
- */
-static SBA_INLINE int
-sba_coalesce_chunks( struct ioc *ioc,
- struct scatterlist *startsg,
- int nents)
-{
- struct scatterlist *vcontig_sg; /* VCONTIG chunk head */
- unsigned long vcontig_len; /* len of VCONTIG chunk */
- unsigned long vcontig_end;
- struct scatterlist *dma_sg; /* next DMA stream head */
- unsigned long dma_offset, dma_len; /* start/len of DMA stream */
- int n_mappings = 0;
-
- while (nents > 0) {
- unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
-
- /*
- ** Prepare for first/next DMA stream
- */
- dma_sg = vcontig_sg = startsg;
- dma_len = vcontig_len = vcontig_end = startsg->length;
- vcontig_end += vaddr;
- dma_offset = vaddr & ~iovp_mask;
-
- /* PARANOID: clear entries */
- startsg->dma_address = startsg->dma_length = 0;
-
- /*
- ** This loop terminates one iteration "early" since
- ** it's always looking one "ahead".
- */
- while (--nents > 0) {
- unsigned long vaddr; /* tmp */
-
- startsg++;
-
- /* PARANOID */
- startsg->dma_address = startsg->dma_length = 0;
-
- /* catch brokenness in SCSI layer */
- ASSERT(startsg->length <= DMA_CHUNK_SIZE);
-
- /*
- ** First make sure current dma stream won't
- ** exceed DMA_CHUNK_SIZE if we coalesce the
- ** next entry.
- */
- if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask)
- > DMA_CHUNK_SIZE)
- break;
-
- /*
- ** Then look for virtually contiguous blocks.
- **
- ** append the next transaction?
- */
- vaddr = (unsigned long) sba_sg_address(startsg);
- if (vcontig_end == vaddr)
- {
- vcontig_len += startsg->length;
- vcontig_end += startsg->length;
- dma_len += startsg->length;
- continue;
- }
-
-#ifdef DEBUG_LARGE_SG_ENTRIES
- dump_run_sg = (vcontig_len > iovp_size);
-#endif
-
- /*
- ** Not virtually contigous.
- ** Terminate prev chunk.
- ** Start a new chunk.
- **
- ** Once we start a new VCONTIG chunk, dma_offset
- ** can't change. And we need the offset from the first
- ** chunk - not the last one. Ergo Successive chunks
- ** must start on page boundaries and dove tail
- ** with it's predecessor.
- */
- vcontig_sg->dma_length = vcontig_len;
-
- vcontig_sg = startsg;
- vcontig_len = startsg->length;
-
- /*
- ** 3) do the entries end/start on page boundaries?
- ** Don't update vcontig_end until we've checked.
- */
- if (DMA_CONTIG(vcontig_end, vaddr))
- {
- vcontig_end = vcontig_len + vaddr;
- dma_len += vcontig_len;
- continue;
- } else {
- break;
- }
- }
-
- /*
- ** End of DMA Stream
- ** Terminate last VCONTIG block.
- ** Allocate space for DMA stream.
- */
- vcontig_sg->dma_length = vcontig_len;
- dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
- ASSERT(dma_len <= DMA_CHUNK_SIZE);
- dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
- | (sba_alloc_range(ioc, dma_len) << iovp_shift)
- | dma_offset);
- n_mappings++;
- }
-
- return n_mappings;
-}
-
-
-/**
- * sba_map_sg - map Scatter/Gather list
- * @dev: instance of PCI owned by the driver that's asking.
- * @sglist: array of buffer/length pairs
- * @nents: number of entries in list
- * @dir: R/W or both.
- *
- * See Documentation/DMA-mapping.txt
- */
-int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir)
-{
- struct ioc *ioc;
- int coalesced, filled = 0;
-#ifdef ASSERT_PDIR_SANITY
- unsigned long flags;
-#endif
-#ifdef ALLOW_IOV_BYPASS_SG
- struct scatterlist *sg;
-#endif
-
- DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents);
- ioc = GET_IOC(dev);
- ASSERT(ioc);
-
-#ifdef ALLOW_IOV_BYPASS_SG
- ASSERT(to_pci_dev(dev)->dma_mask);
- if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) {
- for (sg = sglist ; filled < nents ; filled++, sg++){
- sg->dma_length = sg->length;
- sg->dma_address = virt_to_bus(sba_sg_address(sg));
- }
- return filled;
- }
-#endif
- /* Fast path single entry scatterlists. */
- if (nents == 1) {
- sglist->dma_length = sglist->length;
- sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir);
- return 1;
- }
-
-#ifdef ASSERT_PDIR_SANITY
- spin_lock_irqsave(&ioc->res_lock, flags);
- if (sba_check_pdir(ioc,"Check before sba_map_sg()"))
- {
- sba_dump_sg(ioc, sglist, nents);
- panic("Check before sba_map_sg()");
- }
- spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
- prefetch(ioc->res_hint);
-
- /*
- ** First coalesce the chunks and allocate I/O pdir space
- **
- ** If this is one DMA stream, we can properly map using the
- ** correct virtual address associated with each DMA page.
- ** w/o this association, we wouldn't have coherent DMA!
- ** Access to the virtual address is what forces a two pass algorithm.
- */
- coalesced = sba_coalesce_chunks(ioc, sglist, nents);
-
- /*
- ** Program the I/O Pdir
- **
- ** map the virtual addresses to the I/O Pdir
- ** o dma_address will contain the pdir index
- ** o dma_len will contain the number of bytes to map
- ** o address contains the virtual address.
- */
- filled = sba_fill_pdir(ioc, sglist, nents);
-
-#ifdef ASSERT_PDIR_SANITY
- spin_lock_irqsave(&ioc->res_lock, flags);
- if (sba_check_pdir(ioc,"Check after sba_map_sg()"))
- {
- sba_dump_sg(ioc, sglist, nents);
- panic("Check after sba_map_sg()\n");
- }
- spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
- ASSERT(coalesced == filled);
- DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled);
-
- return filled;
-}
-
-
-/**
- * sba_unmap_sg - unmap Scatter/Gather list
- * @dev: instance of PCI owned by the driver that's asking.
- * @sglist: array of buffer/length pairs
- * @nents: number of entries in list
- * @dir: R/W or both.
- *
- * See Documentation/DMA-mapping.txt
- */
-void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
-{
-#ifdef ASSERT_PDIR_SANITY
- struct ioc *ioc;
- unsigned long flags;
-#endif
-
- DBG_RUN_SG("%s() START %d entries, %p,%x\n",
- __FUNCTION__, nents, sba_sg_address(sglist), sglist->length);
-
-#ifdef ASSERT_PDIR_SANITY
- ioc = GET_IOC(dev);
- ASSERT(ioc);
-
- spin_lock_irqsave(&ioc->res_lock, flags);
- sba_check_pdir(ioc,"Check before sba_unmap_sg()");
- spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
- while (nents && sglist->dma_length) {
-
- sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
- sglist++;
- nents--;
- }
-
- DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__, nents);
-
-#ifdef ASSERT_PDIR_SANITY
- spin_lock_irqsave(&ioc->res_lock, flags);
- sba_check_pdir(ioc,"Check after sba_unmap_sg()");
- spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
-}
-
-/**************************************************************
-*
-* Initialization and claim
-*
-***************************************************************/
-
-static void __init
-ioc_iova_init(struct ioc *ioc)
-{
- int tcnfg;
- int agp_found = 0;
- struct pci_dev *device = NULL;
-#ifdef FULL_VALID_PDIR
- unsigned long index;
-#endif
-
- /*
- ** Firmware programs the base and size of a "safe IOVA space"
- ** (one that doesn't overlap memory or LMMIO space) in the
- ** IBASE and IMASK registers.
- */
- ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL;
- ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL;
-
- ioc->iov_size = ~ioc->imask + 1;
-
- DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n",
- __FUNCTION__, ioc->ioc_hpa, ioc->ibase, ioc->imask,
- ioc->iov_size >> 20);
-
- switch (iovp_size) {
- case 4*1024: tcnfg = 0; break;
- case 8*1024: tcnfg = 1; break;
- case 16*1024: tcnfg = 2; break;
- case 64*1024: tcnfg = 3; break;
- default:
- panic(PFX "Unsupported IOTLB page size %ldK",
- iovp_size >> 10);
- break;
- }
- WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG);
-
- ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE;
- ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL,
- get_order(ioc->pdir_size));
- if (!ioc->pdir_base)
- panic(PFX "Couldn't allocate I/O Page Table\n");
-
-#ifdef CONFIG_XEN
- /* The page table needs to be pinned in Xen memory */
- if (xen_create_contiguous_region((unsigned long)ioc->pdir_base,
- get_order(ioc->pdir_size), 0))
- panic(PFX "Couldn't contiguously map I/O Page Table\n");
-#endif
- memset(ioc->pdir_base, 0, ioc->pdir_size);
-
- DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __FUNCTION__,
- iovp_size >> 10, ioc->pdir_base, ioc->pdir_size);
-
- ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base);
- WRITE_REG(virt_to_bus(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE);
-
- /*
- ** If an AGP device is present, only use half of the IOV space
- ** for PCI DMA. Unfortunately we can't know ahead of time
- ** whether GART support will actually be used, for now we
- ** can just key on an AGP device found in the system.
- ** We program the next pdir index after we stop w/ a key for
- ** the GART code to handshake on.
- */
- for_each_pci_dev(device)
- agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP);
-
- if (agp_found && reserve_sba_gart) {
- printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n",
- ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2);
- ioc->pdir_size /= 2;
- ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE;
- }
-#ifdef FULL_VALID_PDIR
- /*
- ** Check to see if the spill page has been allocated, we don't need more than
- ** one across multiple SBAs.
- */
- if (!prefetch_spill_page) {
- char *spill_poison = "SBAIOMMU POISON";
- int poison_size = 16;
- void *poison_addr, *addr;
-
- addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size));
- if (!addr)
- panic(PFX "Couldn't allocate PDIR spill page\n");
-
- poison_addr = addr;
- for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size)
- memcpy(poison_addr, spill_poison, poison_size);
-
- prefetch_spill_page = virt_to_bus(addr);
-
- DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __FUNCTION__, prefetch_spill_page);
- }
- /*
- ** Set all the PDIR entries valid w/ the spill page as the target
- */
- for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++)
- ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page);
-#endif
-
- /* Clear I/O TLB of any possible entries */
- WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
- READ_REG(ioc->ioc_hpa + IOC_PCOM);
-
- /* Enable IOVA translation */
- WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE);
- READ_REG(ioc->ioc_hpa + IOC_IBASE);
-}
-
-static void __init
-ioc_resource_init(struct ioc *ioc)
-{
- spin_lock_init(&ioc->res_lock);
-#if DELAYED_RESOURCE_CNT > 0
- spin_lock_init(&ioc->saved_lock);
-#endif
-
- /* resource map size dictated by pdir_size */
- ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */
- ioc->res_size >>= 3; /* convert bit count to byte count */
- DBG_INIT("%s() res_size 0x%x\n", __FUNCTION__, ioc->res_size);
-
- ioc->res_map = (char *) __get_free_pages(GFP_KERNEL,
- get_order(ioc->res_size));
- if (!ioc->res_map)
- panic(PFX "Couldn't allocate resource map\n");
-
- memset(ioc->res_map, 0, ioc->res_size);
- /* next available IOVP - circular search */
- ioc->res_hint = (unsigned long *) ioc->res_map;
-
-#ifdef ASSERT_PDIR_SANITY
- /* Mark first bit busy - ie no IOVA 0 */
- ioc->res_map[0] = 0x1;
- ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE;
-#endif
-#ifdef FULL_VALID_PDIR
- /* Mark the last resource used so we don't prefetch beyond IOVA space */
- ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */
- ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF
- | prefetch_spill_page);
-#endif
-
- DBG_INIT("%s() res_map %x %p\n", __FUNCTION__,
- ioc->res_size, (void *) ioc->res_map);
-}
-
-static void __init
-ioc_sac_init(struct ioc *ioc)
-{
- struct pci_dev *sac = NULL;
- struct pci_controller *controller = NULL;
-
- /*
- * pci_alloc_coherent() must return a DMA address which is
- * SAC (single address cycle) addressable, so allocate a
- * pseudo-device to enforce that.
- */
- sac = kmalloc(sizeof(*sac), GFP_KERNEL);
- if (!sac)
- panic(PFX "Couldn't allocate struct pci_dev");
- memset(sac, 0, sizeof(*sac));
-
- controller = kmalloc(sizeof(*controller), GFP_KERNEL);
- if (!controller)
- panic(PFX "Couldn't allocate struct pci_controller");
- memset(controller, 0, sizeof(*controller));
-
- controller->iommu = ioc;
- sac->sysdata = controller;
- sac->dma_mask = 0xFFFFFFFFUL;
-#ifdef CONFIG_PCI
- sac->dev.bus = &pci_bus_type;
-#endif
- ioc->sac_only_dev = sac;
-}
-
-static void __init
-ioc_zx1_init(struct ioc *ioc)
-{
- unsigned long rope_config;
- unsigned int i;
-
- if (ioc->rev < 0x20)
- panic(PFX "IOC 2.0 or later required for IOMMU support\n");
-
- /* 38 bit memory controller + extra bit for range displaced by MMIO */
- ioc->dma_mask = (0x1UL << 39) - 1;
-
- /*
- ** Clear ROPE(N)_CONFIG AO bit.
- ** Disables "NT Ordering" (~= !"Relaxed Ordering")
- ** Overrides bit 1 in DMA Hint Sets.
- ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701.
- */
- for (i=0; i<(8*8); i+=8) {
- rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i);
- rope_config &= ~IOC_ROPE_AO;
- WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i);
- }
-}
-
-typedef void (initfunc)(struct ioc *);
-
-struct ioc_iommu {
- u32 func_id;
- char *name;
- initfunc *init;
-};
-
-static struct ioc_iommu ioc_iommu_info[] __initdata = {
- { ZX1_IOC_ID, "zx1", ioc_zx1_init },
- { ZX2_IOC_ID, "zx2", NULL },
- { SX1000_IOC_ID, "sx1000", NULL },
- { SX2000_IOC_ID, "sx2000", NULL },
-};
-
-static struct ioc * __init
-ioc_init(u64 hpa, void *handle)
-{
- struct ioc *ioc;
- struct ioc_iommu *info;
-
- ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
- if (!ioc)
- return NULL;
-
- memset(ioc, 0, sizeof(*ioc));
-
- ioc->next = ioc_list;
- ioc_list = ioc;
-
- ioc->handle = handle;
- ioc->ioc_hpa = ioremap(hpa, 0x1000);
-
- ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID);
- ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL;
- ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */
-
- for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) {
- if (ioc->func_id == info->func_id) {
- ioc->name = info->name;
- if (info->init)
- (info->init)(ioc);
- }
- }
-
- iovp_size = (1 << iovp_shift);
- iovp_mask = ~(iovp_size - 1);
-
- DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __FUNCTION__,
- PAGE_SIZE >> 10, iovp_size >> 10);
-
- if (!ioc->name) {
- ioc->name = kmalloc(24, GFP_KERNEL);
- if (ioc->name)
- sprintf((char *) ioc->name, "Unknown (%04x:%04x)",
- ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF);
- else
- ioc->name = "Unknown";
- }
-
- ioc_iova_init(ioc);
- ioc_resource_init(ioc);
- ioc_sac_init(ioc);
-
- if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask)
- ia64_max_iommu_merge_mask = ~iovp_mask;
-
- printk(KERN_INFO PFX
- "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n",
- ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF,
- hpa, ioc->iov_size >> 20, ioc->ibase);
-
- return ioc;
-}
-
-
-
-/**************************************************************************
-**
-** SBA initialization code (HW and SW)
-**
-** o identify SBA chip itself
-** o FIXME: initialize DMA hints for reasonable defaults
-**
-**************************************************************************/
-
-#ifdef CONFIG_PROC_FS
-static void *
-ioc_start(struct seq_file *s, loff_t *pos)
-{
- struct ioc *ioc;
- loff_t n = *pos;
-
- for (ioc = ioc_list; ioc; ioc = ioc->next)
- if (!n--)
- return ioc;
-
- return NULL;
-}
-
-static void *
-ioc_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct ioc *ioc = v;
-
- ++*pos;
- return ioc->next;
-}
-
-static void
-ioc_stop(struct seq_file *s, void *v)
-{
-}
-
-static int
-ioc_show(struct seq_file *s, void *v)
-{
- struct ioc *ioc = v;
- unsigned long *res_ptr = (unsigned long *)ioc->res_map;
- int i, used = 0;
-
- seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n",
- ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF));
-#ifdef CONFIG_NUMA
- if (ioc->node != MAX_NUMNODES)
- seq_printf(s, "NUMA node : %d\n", ioc->node);
-#endif
- seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024));
- seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024);
-
- for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr)
- used += hweight64(*res_ptr);
-
- seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3);
- seq_printf(s, "PDIR used : %d entries\n", used);
-
-#ifdef PDIR_SEARCH_TIMING
- {
- unsigned long i = 0, avg = 0, min, max;
- min = max = ioc->avg_search[0];
- for (i = 0; i < SBA_SEARCH_SAMPLE; i++) {
- avg += ioc->avg_search[i];
- if (ioc->avg_search[i] > max) max = ioc->avg_search[i];
- if (ioc->avg_search[i] < min) min = ioc->avg_search[i];
- }
- avg /= SBA_SEARCH_SAMPLE;
- seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n",
- min, avg, max);
- }
-#endif
-#ifndef ALLOW_IOV_BYPASS
- seq_printf(s, "IOVA bypass disabled\n");
-#endif
- return 0;
-}
-
-static struct seq_operations ioc_seq_ops = {
- .start = ioc_start,
- .next = ioc_next,
- .stop = ioc_stop,
- .show = ioc_show
-};
-
-static int
-ioc_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ioc_seq_ops);
-}
-
-static struct file_operations ioc_fops = {
- .open = ioc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
-static void __init
-ioc_proc_init(void)
-{
- struct proc_dir_entry *dir, *entry;
-
- dir = proc_mkdir("bus/mckinley", NULL);
- if (!dir)
- return;
-
- entry = create_proc_entry(ioc_list->name, 0, dir);
- if (entry)
- entry->proc_fops = &ioc_fops;
-}
-#endif
-
-static void
-sba_connect_bus(struct pci_bus *bus)
-{
- acpi_handle handle, parent;
- acpi_status status;
- struct ioc *ioc;
-
- if (!PCI_CONTROLLER(bus))
- panic(PFX "no sysdata on bus %d!\n", bus->number);
-
- if (PCI_CONTROLLER(bus)->iommu)
- return;
-
- handle = PCI_CONTROLLER(bus)->acpi_handle;
- if (!handle)
- return;
-
- /*
- * The IOC scope encloses PCI root bridges in the ACPI
- * namespace, so work our way out until we find an IOC we
- * claimed previously.
- */
- do {
- for (ioc = ioc_list; ioc; ioc = ioc->next)
- if (ioc->handle == handle) {
- PCI_CONTROLLER(bus)->iommu = ioc;
- return;
- }
-
- status = acpi_get_parent(handle, &parent);
- handle = parent;
- } while (ACPI_SUCCESS(status));
-
- printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number);
-}
-
-#ifdef CONFIG_NUMA
-static void __init
-sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
-{
- unsigned int node;
- int pxm;
-
- ioc->node = MAX_NUMNODES;
-
- pxm = acpi_get_pxm(handle);
-
- if (pxm < 0)
- return;
-
- node = pxm_to_node(pxm);
-
- if (node >= MAX_NUMNODES || !node_online(node))
- return;
-
- ioc->node = node;
- return;
-}
-#else
-#define sba_map_ioc_to_node(ioc, handle)
-#endif
-
-static int __init
-acpi_sba_ioc_add(struct acpi_device *device)
-{
- struct ioc *ioc;
- acpi_status status;
- u64 hpa, length;
- struct acpi_buffer buffer;
- struct acpi_device_info *dev_info;
-
- status = hp_acpi_csr_space(device->handle, &hpa, &length);
- if (ACPI_FAILURE(status))
- return 1;
-
- buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
- status = acpi_get_object_info(device->handle, &buffer);
- if (ACPI_FAILURE(status))
- return 1;
- dev_info = buffer.pointer;
-
- /*
- * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI
- * root bridges, and its CSR space includes the IOC function.
- */
- if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) {
- hpa += ZX1_IOC_OFFSET;
- /* zx1 based systems default to kernel page size iommu pages */
- if (!iovp_shift)
- iovp_shift = min(PAGE_SHIFT, 16);
- }
- kfree(dev_info);
-
- /*
- * default anything not caught above or specified on cmdline to 4k
- * iommu page size
- */
- if (!iovp_shift)
- iovp_shift = 12;
-
- ioc = ioc_init(hpa, device->handle);
- if (!ioc)
- return 1;
-
- /* setup NUMA node association */
- sba_map_ioc_to_node(ioc, device->handle);
- return 0;
-}
-
-static struct acpi_driver acpi_sba_ioc_driver = {
- .name = "IOC IOMMU Driver",
- .ids = "HWP0001,HWP0004",
- .ops = {
- .add = acpi_sba_ioc_add,
- },
-};
-
-static int __init
-sba_init(void)
-{
- if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
- return 0;
-
- acpi_bus_register_driver(&acpi_sba_ioc_driver);
- if (!ioc_list) {
-#ifdef CONFIG_IA64_GENERIC
- extern int swiotlb_late_init_with_default_size (size_t size);
-
- /*
- * If we didn't find something sba_iommu can claim, we
- * need to setup the swiotlb and switch to the dig machvec.
- */
- if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
- panic("Unable to find SBA IOMMU or initialize "
- "software I/O TLB: Try machvec=dig boot option");
- machvec_init("dig");
-#else
- panic("Unable to find SBA IOMMU: Try a generic or DIG kernel");
-#endif
- return 0;
- }
-
-#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB)
- /*
- * hpzx1_swiotlb needs to have a fairly small swiotlb bounce
- * buffer setup to support devices with smaller DMA masks than
- * sba_iommu can handle.
- */
- if (ia64_platform_is("hpzx1_swiotlb")) {
- extern void hwsw_init(void);
-
- hwsw_init();
- }
-#endif
-
-#ifdef CONFIG_PCI
- {
- struct pci_bus *b = NULL;
- while ((b = pci_find_next_bus(b)) != NULL)
- sba_connect_bus(b);
- }
-#endif
-
-#ifdef CONFIG_PROC_FS
- ioc_proc_init();
-#endif
- return 0;
-}
-
-subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */
-
-static int __init
-nosbagart(char *str)
-{
- reserve_sba_gart = 0;
- return 1;
-}
-
-int
-sba_dma_supported (struct device *dev, u64 mask)
-{
- /* make sure it's at least 32bit capable */
- return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL);
-}
-
-int
-sba_dma_mapping_error (dma_addr_t dma_addr)
-{
- return 0;
-}
-
-__setup("nosbagart", nosbagart);
-
-static int __init
-sba_page_override(char *str)
-{
- unsigned long page_size;
-
- page_size = memparse(str, &str);
- switch (page_size) {
- case 4096:
- case 8192:
- case 16384:
- case 65536:
- iovp_shift = ffs(page_size) - 1;
- break;
- default:
- printk("%s: unknown/unsupported iommu page size %ld\n",
- __FUNCTION__, page_size);
- }
-
- return 1;
-}
-
-__setup("sbapagesize=",sba_page_override);
-
-EXPORT_SYMBOL(sba_dma_mapping_error);
-EXPORT_SYMBOL(sba_map_single);
-EXPORT_SYMBOL(sba_unmap_single);
-EXPORT_SYMBOL(sba_map_sg);
-EXPORT_SYMBOL(sba_unmap_sg);
-EXPORT_SYMBOL(sba_dma_supported);
-EXPORT_SYMBOL(sba_alloc_coherent);
-EXPORT_SYMBOL(sba_free_coherent);
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile b/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile
deleted file mode 100644
index ad8215a3c5..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile
+++ /dev/null
@@ -1,63 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := head.o init_task.o vmlinux.lds
-
-obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
- irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
- salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
- unwind.o mca.o mca_asm.o topology.o
-
-obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
-obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o
-obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o
-obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
-
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y += acpi-processor.o
-endif
-
-obj-$(CONFIG_IA64_PALINFO) += palinfo.o
-obj-$(CONFIG_IOSAPIC) += iosapic.o
-obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_SMP) += smp.o smpboot.o
-obj-$(CONFIG_NUMA) += numa.o
-obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
-obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
-obj-$(CONFIG_CPU_FREQ) += cpufreq/
-obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
-obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
-obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
-obj-$(CONFIG_AUDIT) += audit.o
-mca_recovery-y += mca_drv.o mca_drv_asm.o
-
-# The gate DSO image is built using a special linker script.
-targets += gate.so gate-syms.o
-
-extra-y += gate.so gate-syms.o gate.lds gate.o
-
-# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
-CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
-
-CPPFLAGS_gate.lds := -P -C -U$(ARCH)
-
-quiet_cmd_gate = GATE $@
- cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
-
-GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
-$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
- $(call if_changed,gate)
-
-$(obj)/built-in.o: $(obj)/gate-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
-
-GATECFLAGS_gate-syms.o = -r
-$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
- $(call if_changed,gate)
-
-# gate-data.o contains the gate DSO image as data in section .data.gate.
-# We must build gate.so before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c b/linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c
deleted file mode 100644
index 54e2fb8ab7..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c
+++ /dev/null
@@ -1,1010 +0,0 @@
-/*
- * acpi.c - Architecture-Specific Low-Level ACPI Support
- *
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
- * David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 2000 Intel Corp.
- * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
- * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com>
- * Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
- * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
- * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/irq.h>
-#include <linux/acpi.h>
-#include <linux/efi.h>
-#include <linux/mmzone.h>
-#include <linux/nodemask.h>
-#include <asm/io.h>
-#include <asm/iosapic.h>
-#include <asm/machvec.h>
-#include <asm/page.h>
-#include <asm/system.h>
-#include <asm/numa.h>
-#include <asm/sal.h>
-#include <asm/cyclone.h>
-
-#define BAD_MADT_ENTRY(entry, end) ( \
- (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
- ((acpi_table_entry_header *)entry)->length < sizeof(*entry))
-
-#define PREFIX "ACPI: "
-
-void (*pm_idle) (void);
-EXPORT_SYMBOL(pm_idle);
-void (*pm_power_off) (void);
-EXPORT_SYMBOL(pm_power_off);
-
-unsigned char acpi_kbd_controller_present = 1;
-unsigned char acpi_legacy_devices;
-
-unsigned int acpi_cpei_override;
-unsigned int acpi_cpei_phys_cpuid;
-
-#define MAX_SAPICS 256
-u16 ia64_acpiid_to_sapicid[MAX_SAPICS] = {[0 ... MAX_SAPICS - 1] = -1 };
-
-EXPORT_SYMBOL(ia64_acpiid_to_sapicid);
-
-const char *acpi_get_sysname(void)
-{
-#ifdef CONFIG_IA64_GENERIC
- unsigned long rsdp_phys;
- struct acpi20_table_rsdp *rsdp;
- struct acpi_table_xsdt *xsdt;
- struct acpi_table_header *hdr;
-
- rsdp_phys = acpi_find_rsdp();
- if (!rsdp_phys) {
- printk(KERN_ERR
- "ACPI 2.0 RSDP not found, default to \"dig\"\n");
- return "dig";
- }
-
- rsdp = (struct acpi20_table_rsdp *)__va(rsdp_phys);
- if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
- printk(KERN_ERR
- "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
- return "dig";
- }
-
- xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_address);
- hdr = &xsdt->header;
- if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
- printk(KERN_ERR
- "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
- return "dig";
- }
-
- if (!strcmp(hdr->oem_id, "HP")) {
- return "hpzx1";
- } else if (!strcmp(hdr->oem_id, "SGI")) {
- return "sn2";
-#ifdef CONFIG_XEN
- } else if (is_running_on_xen() && !strcmp(hdr->oem_id, "XEN")) {
- return "xen";
-#endif
- }
-
- return "dig";
-#else
-# if defined (CONFIG_IA64_HP_SIM)
- return "hpsim";
-# elif defined (CONFIG_IA64_HP_ZX1)
- return "hpzx1";
-# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
- return "hpzx1_swiotlb";
-# elif defined (CONFIG_IA64_SGI_SN2)
- return "sn2";
-# elif defined (CONFIG_IA64_DIG)
- return "dig";
-# elif defined (CONFIG_IA64_XEN)
- return "xen";
-# else
-# error Unknown platform. Fix acpi.c.
-# endif
-#endif
-}
-
-#ifdef CONFIG_ACPI
-
-#define ACPI_MAX_PLATFORM_INTERRUPTS 256
-
-/* Array to record platform interrupt vectors for generic interrupt routing. */
-int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
- [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
-};
-
-enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
-
-/*
- * Interrupt routing API for device drivers. Provides interrupt vector for
- * a generic platform event. Currently only CPEI is implemented.
- */
-int acpi_request_vector(u32 int_type)
-{
- int vector = -1;
-
- if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
- /* corrected platform error interrupt */
- vector = platform_intr_list[int_type];
- } else
- printk(KERN_ERR
- "acpi_request_vector(): invalid interrupt type\n");
- return vector;
-}
-
-char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
-{
- return __va(phys_addr);
-}
-
-/* --------------------------------------------------------------------------
- Boot-time Table Parsing
- -------------------------------------------------------------------------- */
-
-static int total_cpus __initdata;
-static int available_cpus __initdata;
-struct acpi_table_madt *acpi_madt __initdata;
-static u8 has_8259;
-
-static int __init
-acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
- const unsigned long end)
-{
- struct acpi_table_lapic_addr_ovr *lapic;
-
- lapic = (struct acpi_table_lapic_addr_ovr *)header;
-
- if (BAD_MADT_ENTRY(lapic, end))
- return -EINVAL;
-
- if (lapic->address) {
- iounmap(ipi_base_addr);
- ipi_base_addr = ioremap(lapic->address, 0);
- }
- return 0;
-}
-
-static int __init
-acpi_parse_lsapic(acpi_table_entry_header * header, const unsigned long end)
-{
- struct acpi_table_lsapic *lsapic;
-
- lsapic = (struct acpi_table_lsapic *)header;
-
- if (BAD_MADT_ENTRY(lsapic, end))
- return -EINVAL;
-
- if (lsapic->flags.enabled) {
-#ifdef CONFIG_SMP
- smp_boot_data.cpu_phys_id[available_cpus] =
- (lsapic->id << 8) | lsapic->eid;
-#endif
- ia64_acpiid_to_sapicid[lsapic->acpi_id] =
- (lsapic->id << 8) | lsapic->eid;
- ++available_cpus;
- }
-
- total_cpus++;
- return 0;
-}
-
-static int __init
-acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
-{
- struct acpi_table_lapic_nmi *lacpi_nmi;
-
- lacpi_nmi = (struct acpi_table_lapic_nmi *)header;
-
- if (BAD_MADT_ENTRY(lacpi_nmi, end))
- return -EINVAL;
-
- /* TBD: Support lapic_nmi entries */
- return 0;
-}
-
-static int __init
-acpi_parse_iosapic(acpi_table_entry_header * header, const unsigned long end)
-{
- struct acpi_table_iosapic *iosapic;
-
- iosapic = (struct acpi_table_iosapic *)header;
-
- if (BAD_MADT_ENTRY(iosapic, end))
- return -EINVAL;
-
- return iosapic_init(iosapic->address, iosapic->global_irq_base);
-}
-
-static unsigned int __initdata acpi_madt_rev;
-
-static int __init
-acpi_parse_plat_int_src(acpi_table_entry_header * header,
- const unsigned long end)
-{
- struct acpi_table_plat_int_src *plintsrc;
- int vector;
-
- plintsrc = (struct acpi_table_plat_int_src *)header;
-
- if (BAD_MADT_ENTRY(plintsrc, end))
- return -EINVAL;
-
- /*
- * Get vector assignment for this interrupt, set attributes,
- * and program the IOSAPIC routing table.
- */
- vector = iosapic_register_platform_intr(plintsrc->type,
- plintsrc->global_irq,
- plintsrc->iosapic_vector,
- plintsrc->eid,
- plintsrc->id,
- (plintsrc->flags.polarity ==
- 1) ? IOSAPIC_POL_HIGH :
- IOSAPIC_POL_LOW,
- (plintsrc->flags.trigger ==
- 1) ? IOSAPIC_EDGE :
- IOSAPIC_LEVEL);
-
- platform_intr_list[plintsrc->type] = vector;
- if (acpi_madt_rev > 1) {
- acpi_cpei_override = plintsrc->plint_flags.cpei_override_flag;
- }
-
- /*
- * Save the physical id, so we can check when its being removed
- */
- acpi_cpei_phys_cpuid = ((plintsrc->id << 8) | (plintsrc->eid)) & 0xffff;
-
- return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-unsigned int can_cpei_retarget(void)
-{
- extern int cpe_vector;
- extern unsigned int force_cpei_retarget;
-
- /*
- * Only if CPEI is supported and the override flag
- * is present, otherwise return that its re-targettable
- * if we are in polling mode.
- */
- if (cpe_vector > 0) {
- if (acpi_cpei_override || force_cpei_retarget)
- return 1;
- else
- return 0;
- }
- return 1;
-}
-
-unsigned int is_cpu_cpei_target(unsigned int cpu)
-{
- unsigned int logical_id;
-
- logical_id = cpu_logical_id(acpi_cpei_phys_cpuid);
-
- if (logical_id == cpu)
- return 1;
- else
- return 0;
-}
-
-void set_cpei_target_cpu(unsigned int cpu)
-{
- acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
-}
-#endif
-
-unsigned int get_cpei_target_cpu(void)
-{
- return acpi_cpei_phys_cpuid;
-}
-
-static int __init
-acpi_parse_int_src_ovr(acpi_table_entry_header * header,
- const unsigned long end)
-{
- struct acpi_table_int_src_ovr *p;
-
- p = (struct acpi_table_int_src_ovr *)header;
-
- if (BAD_MADT_ENTRY(p, end))
- return -EINVAL;
-
- iosapic_override_isa_irq(p->bus_irq, p->global_irq,
- (p->flags.polarity ==
- 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
- (p->flags.trigger ==
- 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
- return 0;
-}
-
-static int __init
-acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
-{
- struct acpi_table_nmi_src *nmi_src;
-
- nmi_src = (struct acpi_table_nmi_src *)header;
-
- if (BAD_MADT_ENTRY(nmi_src, end))
- return -EINVAL;
-
- /* TBD: Support nimsrc entries */
- return 0;
-}
-
-static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERMOW", 6))) {
-
- /*
- * Unfortunately ITC_DRIFT is not yet part of the
- * official SAL spec, so the ITC_DRIFT bit is not
- * set by the BIOS on this hardware.
- */
- sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
-
- cyclone_setup();
- }
-}
-
-static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
-{
- if (!phys_addr || !size)
- return -EINVAL;
-
- acpi_madt = (struct acpi_table_madt *)__va(phys_addr);
-
- acpi_madt_rev = acpi_madt->header.revision;
-
- /* remember the value for reference after free_initmem() */
-#ifdef CONFIG_ITANIUM
- has_8259 = 1; /* Firmware on old Itanium systems is broken */
-#else
- has_8259 = acpi_madt->flags.pcat_compat;
-#endif
- iosapic_system_init(has_8259);
-
- /* Get base address of IPI Message Block */
-
- if (acpi_madt->lapic_address)
- ipi_base_addr = ioremap(acpi_madt->lapic_address, 0);
-
- printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr);
-
- acpi_madt_oem_check(acpi_madt->header.oem_id,
- acpi_madt->header.oem_table_id);
-
- return 0;
-}
-
-#ifdef CONFIG_ACPI_NUMA
-
-#undef SLIT_DEBUG
-
-#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
-
-static int __initdata srat_num_cpus; /* number of cpus */
-static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
-#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
-#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
-static struct acpi_table_slit __initdata *slit_table;
-
-static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa)
-{
- int pxm;
-
- pxm = pa->proximity_domain;
- if (ia64_platform_is("sn2"))
- pxm += pa->reserved[0] << 8;
- return pxm;
-}
-
-static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma)
-{
- int pxm;
-
- pxm = ma->proximity_domain;
- if (ia64_platform_is("sn2"))
- pxm += ma->reserved1[0] << 8;
- return pxm;
-}
-
-/*
- * ACPI 2.0 SLIT (System Locality Information Table)
- * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
- */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
- u32 len;
-
- len = sizeof(struct acpi_table_header) + 8
- + slit->localities * slit->localities;
- if (slit->header.length != len) {
- printk(KERN_ERR
- "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
- len, slit->header.length);
- memset(numa_slit, 10, sizeof(numa_slit));
- return;
- }
- slit_table = slit;
-}
-
-void __init
-acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
-{
- int pxm;
-
- if (!pa->flags.enabled)
- return;
-
- pxm = get_processor_proximity_domain(pa);
-
- /* record this node in proximity bitmap */
- pxm_bit_set(pxm);
-
- node_cpuid[srat_num_cpus].phys_id =
- (pa->apic_id << 8) | (pa->lsapic_eid);
- /* nid should be overridden as logical node id later */
- node_cpuid[srat_num_cpus].nid = pxm;
- srat_num_cpus++;
-}
-
-void __init
-acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
-{
- unsigned long paddr, size;
- int pxm;
- struct node_memblk_s *p, *q, *pend;
-
- pxm = get_memory_proximity_domain(ma);
-
- /* fill node memory chunk structure */
- paddr = ma->base_addr_hi;
- paddr = (paddr << 32) | ma->base_addr_lo;
- size = ma->length_hi;
- size = (size << 32) | ma->length_lo;
-
- /* Ignore disabled entries */
- if (!ma->flags.enabled)
- return;
-
- /* record this node in proximity bitmap */
- pxm_bit_set(pxm);
-
- /* Insertion sort based on base address */
- pend = &node_memblk[num_node_memblks];
- for (p = &node_memblk[0]; p < pend; p++) {
- if (paddr < p->start_paddr)
- break;
- }
- if (p < pend) {
- for (q = pend - 1; q >= p; q--)
- *(q + 1) = *q;
- }
- p->start_paddr = paddr;
- p->size = size;
- p->nid = pxm;
- num_node_memblks++;
-}
-
-void __init acpi_numa_arch_fixup(void)
-{
- int i, j, node_from, node_to;
-
- /* If there's no SRAT, fix the phys_id and mark node 0 online */
- if (srat_num_cpus == 0) {
- node_set_online(0);
- node_cpuid[0].phys_id = hard_smp_processor_id();
- return;
- }
-
- /*
- * MCD - This can probably be dropped now. No need for pxm ID to node ID
- * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES.
- */
- nodes_clear(node_online_map);
- for (i = 0; i < MAX_PXM_DOMAINS; i++) {
- if (pxm_bit_test(i)) {
- int nid = acpi_map_pxm_to_node(i);
- node_set_online(nid);
- }
- }
-
- /* set logical node id in memory chunk structure */
- for (i = 0; i < num_node_memblks; i++)
- node_memblk[i].nid = pxm_to_node(node_memblk[i].nid);
-
- /* assign memory bank numbers for each chunk on each node */
- for_each_online_node(i) {
- int bank;
-
- bank = 0;
- for (j = 0; j < num_node_memblks; j++)
- if (node_memblk[j].nid == i)
- node_memblk[j].bank = bank++;
- }
-
- /* set logical node id in cpu structure */
- for (i = 0; i < srat_num_cpus; i++)
- node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
-
- printk(KERN_INFO "Number of logical nodes in system = %d\n",
- num_online_nodes());
- printk(KERN_INFO "Number of memory chunks in system = %d\n",
- num_node_memblks);
-
- if (!slit_table)
- return;
- memset(numa_slit, -1, sizeof(numa_slit));
- for (i = 0; i < slit_table->localities; i++) {
- if (!pxm_bit_test(i))
- continue;
- node_from = pxm_to_node(i);
- for (j = 0; j < slit_table->localities; j++) {
- if (!pxm_bit_test(j))
- continue;
- node_to = pxm_to_node(j);
- node_distance(node_from, node_to) =
- slit_table->entry[i * slit_table->localities + j];
- }
- }
-
-#ifdef SLIT_DEBUG
- printk("ACPI 2.0 SLIT locality table:\n");
- for_each_online_node(i) {
- for_each_online_node(j)
- printk("%03d ", node_distance(i, j));
- printk("\n");
- }
-#endif
-}
-#endif /* CONFIG_ACPI_NUMA */
-
-/*
- * success: return IRQ number (>=0)
- * failure: return < 0
- */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
-{
- if (has_8259 && gsi < 16)
- return isa_irq_to_vector(gsi);
-
- return iosapic_register_intr(gsi,
- (polarity ==
- ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH :
- IOSAPIC_POL_LOW,
- (triggering ==
- ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE :
- IOSAPIC_LEVEL);
-}
-
-EXPORT_SYMBOL(acpi_register_gsi);
-
-void acpi_unregister_gsi(u32 gsi)
-{
- iosapic_unregister_intr(gsi);
-}
-
-EXPORT_SYMBOL(acpi_unregister_gsi);
-
-static int __init acpi_parse_fadt(unsigned long phys_addr, unsigned long size)
-{
- struct acpi_table_header *fadt_header;
- struct fadt_descriptor *fadt;
-
- if (!phys_addr || !size)
- return -EINVAL;
-
- fadt_header = (struct acpi_table_header *)__va(phys_addr);
- if (fadt_header->revision != 3)
- return -ENODEV; /* Only deal with ACPI 2.0 FADT */
-
- fadt = (struct fadt_descriptor *)fadt_header;
-
- if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
- acpi_kbd_controller_present = 0;
-
- if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
- acpi_legacy_devices = 1;
-
- acpi_register_gsi(fadt->sci_int, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
- return 0;
-}
-
-unsigned long __init acpi_find_rsdp(void)
-{
- unsigned long rsdp_phys = 0;
-
- if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
- rsdp_phys = efi.acpi20;
- else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
- printk(KERN_WARNING PREFIX
- "v1.0/r0.71 tables no longer supported\n");
- return rsdp_phys;
-}
-
-int __init acpi_boot_init(void)
-{
-
- /*
- * MADT
- * ----
- * Parse the Multiple APIC Description Table (MADT), if exists.
- * Note that this table provides platform SMP configuration
- * information -- the successor to MPS tables.
- */
-
- if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
- printk(KERN_ERR PREFIX "Can't find MADT\n");
- goto skip_madt;
- }
-
- /* Local APIC */
-
- if (acpi_table_parse_madt
- (ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
- printk(KERN_ERR PREFIX
- "Error parsing LAPIC address override entry\n");
-
- if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS)
- < 1)
- printk(KERN_ERR PREFIX
- "Error parsing MADT - no LAPIC entries\n");
-
- if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0)
- < 0)
- printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
-
- /* I/O APIC */
-
- if (acpi_table_parse_madt
- (ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
- printk(KERN_ERR PREFIX
- "Error parsing MADT - no IOSAPIC entries\n");
-
- /* System-Level Interrupt Routing */
-
- if (acpi_table_parse_madt
- (ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src,
- ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
- printk(KERN_ERR PREFIX
- "Error parsing platform interrupt source entry\n");
-
- if (acpi_table_parse_madt
- (ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
- printk(KERN_ERR PREFIX
- "Error parsing interrupt source overrides entry\n");
-
- if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
- printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
- skip_madt:
-
- /*
- * FADT says whether a legacy keyboard controller is present.
- * The FADT also contains an SCI_INT line, by which the system
- * gets interrupts such as power and sleep buttons. If it's not
- * on a Legacy interrupt, it needs to be setup.
- */
- if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
- printk(KERN_ERR PREFIX "Can't find FADT\n");
-
-#ifdef CONFIG_SMP
- if (available_cpus == 0) {
- printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
- printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
- smp_boot_data.cpu_phys_id[available_cpus] =
- hard_smp_processor_id();
- available_cpus = 1; /* We've got at least one of these, no? */
- }
- smp_boot_data.cpu_count = available_cpus;
-
- smp_build_cpu_map();
-# ifdef CONFIG_ACPI_NUMA
- if (srat_num_cpus == 0) {
- int cpu, i = 1;
- for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
- if (smp_boot_data.cpu_phys_id[cpu] !=
- hard_smp_processor_id())
- node_cpuid[i++].phys_id =
- smp_boot_data.cpu_phys_id[cpu];
- }
-# endif
-#endif
-#ifdef CONFIG_ACPI_NUMA
- build_cpu_to_node_map();
-#endif
- /* Make boot-up look pretty */
- printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
- total_cpus);
- return 0;
-}
-
-int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
-{
- int vector;
-
- if (has_8259 && gsi < 16)
- *irq = isa_irq_to_vector(gsi);
- else {
- vector = gsi_to_vector(gsi);
- if (vector == -1)
- return -1;
-
- *irq = vector;
- }
- return 0;
-}
-
-/*
- * ACPI based hotplug CPU support
- */
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static
-int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
-{
-#ifdef CONFIG_ACPI_NUMA
- int pxm_id;
-
- pxm_id = acpi_get_pxm(handle);
-
- /*
- * Assuming that the container driver would have set the proximity
- * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag
- */
- node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id);
-
- node_cpuid[cpu].phys_id = physid;
-#endif
- return (0);
-}
-
-int additional_cpus __initdata = -1;
-
-static __init int setup_additional_cpus(char *s)
-{
- if (s)
- additional_cpus = simple_strtol(s, NULL, 0);
-
- return 0;
-}
-
-early_param("additional_cpus", setup_additional_cpus);
-
-/*
- * cpu_possible_map should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and dont expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_map on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * - Ashok Raj
- *
- * Three ways to find out the number of additional hotplug CPUs:
- * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with additional_cpus=NUM
- * - Otherwise don't reserve additional CPUs.
- */
-__init void prefill_possible_map(void)
-{
- int i;
- int possible, disabled_cpus;
-
- disabled_cpus = total_cpus - available_cpus;
-
- if (additional_cpus == -1) {
- if (disabled_cpus > 0)
- additional_cpus = disabled_cpus;
- else
- additional_cpus = 0;
- }
-
- possible = available_cpus + additional_cpus;
-
- if (possible > NR_CPUS)
- possible = NR_CPUS;
-
- printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
- possible, max((possible - available_cpus), 0));
-
- for (i = 0; i < possible; i++)
- cpu_set(i, cpu_possible_map);
-}
-
-int acpi_map_lsapic(acpi_handle handle, int *pcpu)
-{
- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- union acpi_object *obj;
- struct acpi_table_lsapic *lsapic;
- cpumask_t tmp_map;
- long physid;
- int cpu;
-
- if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
- return -EINVAL;
-
- if (!buffer.length || !buffer.pointer)
- return -EINVAL;
-
- obj = buffer.pointer;
- if (obj->type != ACPI_TYPE_BUFFER ||
- obj->buffer.length < sizeof(*lsapic)) {
- kfree(buffer.pointer);
- return -EINVAL;
- }
-
- lsapic = (struct acpi_table_lsapic *)obj->buffer.pointer;
-
- if ((lsapic->header.type != ACPI_MADT_LSAPIC) ||
- (!lsapic->flags.enabled)) {
- kfree(buffer.pointer);
- return -EINVAL;
- }
-
- physid = ((lsapic->id << 8) | (lsapic->eid));
-
- kfree(buffer.pointer);
- buffer.length = ACPI_ALLOCATE_BUFFER;
- buffer.pointer = NULL;
-
- cpus_complement(tmp_map, cpu_present_map);
- cpu = first_cpu(tmp_map);
- if (cpu >= NR_CPUS)
- return -EINVAL;
-
- acpi_map_cpu2node(handle, cpu, physid);
-
- cpu_set(cpu, cpu_present_map);
- ia64_cpu_to_sapicid[cpu] = physid;
- ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu];
-
- *pcpu = cpu;
- return (0);
-}
-
-EXPORT_SYMBOL(acpi_map_lsapic);
-
-int acpi_unmap_lsapic(int cpu)
-{
- int i;
-
- for (i = 0; i < MAX_SAPICS; i++) {
- if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
- ia64_acpiid_to_sapicid[i] = -1;
- break;
- }
- }
- ia64_cpu_to_sapicid[cpu] = -1;
- cpu_clear(cpu, cpu_present_map);
-
-#ifdef CONFIG_ACPI_NUMA
- /* NUMA specific cleanup's */
-#endif
-
- return (0);
-}
-
-EXPORT_SYMBOL(acpi_unmap_lsapic);
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-
-#ifdef CONFIG_ACPI_NUMA
-static acpi_status __devinit
-acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
-{
- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- union acpi_object *obj;
- struct acpi_table_iosapic *iosapic;
- unsigned int gsi_base;
- int pxm, node;
-
- /* Only care about objects w/ a method that returns the MADT */
- if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
- return AE_OK;
-
- if (!buffer.length || !buffer.pointer)
- return AE_OK;
-
- obj = buffer.pointer;
- if (obj->type != ACPI_TYPE_BUFFER ||
- obj->buffer.length < sizeof(*iosapic)) {
- kfree(buffer.pointer);
- return AE_OK;
- }
-
- iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer;
-
- if (iosapic->header.type != ACPI_MADT_IOSAPIC) {
- kfree(buffer.pointer);
- return AE_OK;
- }
-
- gsi_base = iosapic->global_irq_base;
-
- kfree(buffer.pointer);
-
- /*
- * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell
- * us which node to associate this with.
- */
- pxm = acpi_get_pxm(handle);
- if (pxm < 0)
- return AE_OK;
-
- node = pxm_to_node(pxm);
-
- if (node >= MAX_NUMNODES || !node_online(node) ||
- cpus_empty(node_to_cpumask(node)))
- return AE_OK;
-
- /* We know a gsi to node mapping! */
- map_iosapic_to_node(gsi_base, node);
- return AE_OK;
-}
-
-static int __init
-acpi_map_iosapics (void)
-{
- acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL);
- return 0;
-}
-
-fs_initcall(acpi_map_iosapics);
-#endif /* CONFIG_ACPI_NUMA */
-
-int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
-{
- int err;
-
- if ((err = iosapic_init(phys_addr, gsi_base)))
- return err;
-
-#ifdef CONFIG_ACPI_NUMA
- acpi_map_iosapic(handle, 0, NULL, NULL);
-#endif /* CONFIG_ACPI_NUMA */
-
- return 0;
-}
-
-EXPORT_SYMBOL(acpi_register_ioapic);
-
-int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
-{
- return iosapic_remove(gsi_base);
-}
-
-EXPORT_SYMBOL(acpi_unregister_ioapic);
-
-#endif /* CONFIG_ACPI */
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c
deleted file mode 100644
index 2aa8c101aa..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-
-#define ASM_OFFSETS_C 1
-
-#include <linux/sched.h>
-
-#include <asm-ia64/processor.h>
-#include <asm-ia64/ptrace.h>
-#include <asm-ia64/siginfo.h>
-#include <asm-ia64/sigcontext.h>
-#include <asm-ia64/mca.h>
-
-#include "../kernel/sigframe.h"
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-void foo(void)
-{
- DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
- DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info));
- DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs));
- DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack));
- DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo));
- DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64));
- DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
- DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
-
- BLANK();
-
- DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
- DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
-
- BLANK();
-
- DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
- DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
- DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
- DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
- DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
- DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
- DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
- DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
- DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
- DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
- DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
-
- BLANK();
-
- DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
-
- BLANK();
-
- DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
- group_stop_count));
- DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
-
- BLANK();
-
- DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
- DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
- DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
- DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd));
- DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8));
- DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9));
- DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10));
- DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11));
- DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr));
- DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip));
- DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs));
- DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat));
- DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs));
- DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc));
- DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat));
-
- DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore));
- DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr));
- DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0));
- DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs));
- DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1));
- DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12));
- DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13));
- DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr));
- DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15));
- DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14));
- DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
- DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
- DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16));
- DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17));
- DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18));
- DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19));
- DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20));
- DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21));
- DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22));
- DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23));
- DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24));
- DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25));
- DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26));
- DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27));
- DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28));
- DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29));
- DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30));
- DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31));
- DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv));
- DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6));
- DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7));
- DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8));
- DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9));
- DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10));
- DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11));
-
- BLANK();
-
- DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat));
- DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr));
- DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2));
- DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3));
- DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4));
- DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5));
- DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12));
- DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13));
- DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14));
- DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15));
- DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16));
- DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17));
- DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18));
- DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19));
- DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20));
- DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21));
- DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22));
- DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23));
- DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24));
- DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25));
- DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26));
- DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27));
- DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28));
- DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29));
- DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30));
- DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31));
- DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4));
- DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5));
- DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6));
- DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7));
- DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0));
- DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1));
- DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2));
- DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3));
- DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4));
- DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5));
- DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs));
- DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc));
- DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat));
- DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat));
- DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore));
- DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr));
-
- BLANK();
-
- DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip));
- DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp));
- DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr));
- DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat));
- DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat));
- DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0]));
- DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm));
- DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags));
- DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6]));
- DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr));
- DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12]));
- DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base));
- DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs));
-
- BLANK();
-
- DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
-
- BLANK();
-
- DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
- DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
- DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));
- DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler));
- DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc));
- BLANK();
- /* for assembly files which can't include sched.h: */
- DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
- DEFINE(IA64_CLONE_VM, CLONE_VM);
-
- BLANK();
- DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
- offsetof (struct cpuinfo_ia64, nsec_per_cyc));
- DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET,
- offsetof (struct cpuinfo_ia64, ptce_base));
- DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET,
- offsetof (struct cpuinfo_ia64, ptce_count));
- DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET,
- offsetof (struct cpuinfo_ia64, ptce_stride));
- BLANK();
- DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET,
- offsetof (struct timespec, tv_nsec));
-
- DEFINE(CLONE_SETTLS_BIT, 19);
-#if CLONE_SETTLS != (1<<19)
-# error "CLONE_SETTLS_BIT incorrect, please fix"
-#endif
-
- BLANK();
- DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET,
- offsetof (struct ia64_mca_cpu, mca_stack));
- DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
- offsetof (struct ia64_mca_cpu, init_stack));
- BLANK();
- DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
- offsetof (struct ia64_sal_os_state, os_gp));
- DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
- offsetof (struct ia64_sal_os_state, proc_state_param));
- DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET,
- offsetof (struct ia64_sal_os_state, sal_ra));
- DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET,
- offsetof (struct ia64_sal_os_state, sal_gp));
- DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
- offsetof (struct ia64_sal_os_state, pal_min_state));
- DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET,
- offsetof (struct ia64_sal_os_state, os_status));
- DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET,
- offsetof (struct ia64_sal_os_state, context));
- DEFINE(IA64_SAL_OS_STATE_SIZE,
- sizeof (struct ia64_sal_os_state));
- BLANK();
-
- DEFINE(IA64_PMSA_GR_OFFSET,
- offsetof (struct pal_min_state_area_s, pmsa_gr));
- DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
- offsetof (struct pal_min_state_area_s, pmsa_bank1_gr));
- DEFINE(IA64_PMSA_PR_OFFSET,
- offsetof (struct pal_min_state_area_s, pmsa_pr));
- DEFINE(IA64_PMSA_BR0_OFFSET,
- offsetof (struct pal_min_state_area_s, pmsa_br0));
- DEFINE(IA64_PMSA_RSC_OFFSET,
- offsetof (struct pal_min_state_area_s, pmsa_rsc));
- DEFINE(IA64_PMSA_IIP_OFFSET,
- offsetof (struct pal_min_state_area_s, pmsa_iip));
- DEFINE(IA64_PMSA_IPSR_OFFSET,
- offsetof (struct pal_min_state_area_s, pmsa_ipsr));
- DEFINE(IA64_PMSA_IFS_OFFSET,
- offsetof (struct pal_min_state_area_s, pmsa_ifs));
- DEFINE(IA64_PMSA_XIP_OFFSET,
- offsetof (struct pal_min_state_area_s, pmsa_xip));
- BLANK();
-
- /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
- DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
- DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
- DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
- DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
- DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
- DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
- DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
- DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
- DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask));
- DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
- DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
- DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
- DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
-
-#ifdef CONFIG_XEN
- BLANK();
-
-#define DEFINE_MAPPED_REG_OFS(sym, field) \
- DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(mapped_regs_t, field)))
-
- DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr);
- DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr);
- DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip);
- DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs);
- DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs);
- DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr);
- DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa);
- DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa);
- DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim);
- DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha);
- DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir);
- DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
- DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
- DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
- DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
- DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
- DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);
-#endif /* CONFIG_XEN */
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S b/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
deleted file mode 100644
index f46bdcf401..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
+++ /dev/null
@@ -1,1620 +0,0 @@
-/*
- * ia64/kernel/entry.S
- *
- * Kernel entry points.
- *
- * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 1999, 2002-2003
- * Asit Mallick <Asit.K.Mallick@intel.com>
- * Don Dugger <Don.Dugger@intel.com>
- * Suresh Siddha <suresh.b.siddha@intel.com>
- * Fenghua Yu <fenghua.yu@intel.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- */
-/*
- * ia64_switch_to now places correct virtual mapping in in TR2 for
- * kernel stack. This allows us to handle interrupts without changing
- * to physical mode.
- *
- * Jonathan Nicklin <nicklin@missioncriticallinux.com>
- * Patrick O'Rourke <orourke@missioncriticallinux.com>
- * 11/07/2000
- */
-/*
- * Global (preserved) predicate usage on syscall entry/exit path:
- *
- * pKStk: See entry.h.
- * pUStk: See entry.h.
- * pSys: See entry.h.
- * pNonSys: !pSys
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/kregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/percpu.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-
-#include "minstate.h"
-
- /*
- * execve() is special because in case of success, we need to
- * setup a null register window frame.
- */
-ENTRY(ia64_execve)
- /*
- * Allocate 8 input registers since ptrace() may clobber them
- */
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
- alloc loc1=ar.pfs,8,2,4,0
- mov loc0=rp
- .body
- mov out0=in0 // filename
- ;; // stop bit between alloc and call
- mov out1=in1 // argv
- mov out2=in2 // envp
- add out3=16,sp // regs
- br.call.sptk.many rp=sys_execve
-.ret0:
-#ifdef CONFIG_IA32_SUPPORT
- /*
- * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers
- * from pt_regs.
- */
- adds r16=PT(CR_IPSR)+16,sp
- ;;
- ld8 r16=[r16]
-#endif
- cmp4.ge p6,p7=r8,r0
- mov ar.pfs=loc1 // restore ar.pfs
- sxt4 r8=r8 // return 64-bit result
- ;;
- stf.spill [sp]=f0
-(p6) cmp.ne pKStk,pUStk=r0,r0 // a successful execve() lands us in user-mode...
- mov rp=loc0
-(p6) mov ar.pfs=r0 // clear ar.pfs on success
-(p7) br.ret.sptk.many rp
-
- /*
- * In theory, we'd have to zap this state only to prevent leaking of
- * security sensitive state (e.g., if current->mm->dumpable is zero). However,
- * this executes in less than 20 cycles even on Itanium, so it's not worth
- * optimizing for...).
- */
- mov ar.unat=0; mov ar.lc=0
- mov r4=0; mov f2=f0; mov b1=r0
- mov r5=0; mov f3=f0; mov b2=r0
- mov r6=0; mov f4=f0; mov b3=r0
- mov r7=0; mov f5=f0; mov b4=r0
- ldf.fill f12=[sp]; mov f13=f0; mov b5=r0
- ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0
- ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0
- ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0
- ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0
- ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0
- ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0
-#ifdef CONFIG_IA32_SUPPORT
- tbit.nz p6,p0=r16, IA64_PSR_IS_BIT
- movl loc0=ia64_ret_from_ia32_execve
- ;;
-(p6) mov rp=loc0
-#endif
- br.ret.sptk.many rp
-END(ia64_execve)
-
-/*
- * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
- * u64 tls)
- */
-GLOBAL_ENTRY(sys_clone2)
- /*
- * Allocate 8 input registers since ptrace() may clobber them
- */
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
- alloc r16=ar.pfs,8,2,6,0
- DO_SAVE_SWITCH_STACK
- adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
- mov loc0=rp
- mov loc1=r16 // save ar.pfs across do_fork
- .body
- mov out1=in1
- mov out3=in2
- tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
- mov out4=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
- ;;
-(p6) st8 [r2]=in5 // store TLS in r16 for copy_thread()
- mov out5=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
- adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
- mov out0=in0 // out0 = clone_flags
- br.call.sptk.many rp=do_fork
-.ret1: .restore sp
- adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
- mov ar.pfs=loc1
- mov rp=loc0
- br.ret.sptk.many rp
-END(sys_clone2)
-
-/*
- * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
- * Deprecated. Use sys_clone2() instead.
- */
-GLOBAL_ENTRY(sys_clone)
- /*
- * Allocate 8 input registers since ptrace() may clobber them
- */
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
- alloc r16=ar.pfs,8,2,6,0
- DO_SAVE_SWITCH_STACK
- adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
- mov loc0=rp
- mov loc1=r16 // save ar.pfs across do_fork
- .body
- mov out1=in1
- mov out3=16 // stacksize (compensates for 16-byte scratch area)
- tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
- mov out4=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
- ;;
-(p6) st8 [r2]=in4 // store TLS in r13 (tp)
- mov out5=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
- adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
- mov out0=in0 // out0 = clone_flags
- br.call.sptk.many rp=do_fork
-.ret2: .restore sp
- adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
- mov ar.pfs=loc1
- mov rp=loc0
- br.ret.sptk.many rp
-END(sys_clone)
-
-/*
- * prev_task <- ia64_switch_to(struct task_struct *next)
- * With Ingo's new scheduler, interrupts are disabled when this routine gets
- * called. The code starting at .map relies on this. The rest of the code
- * doesn't care about the interrupt masking status.
- */
-GLOBAL_ENTRY(__ia64_switch_to)
- .prologue
- alloc r16=ar.pfs,1,0,0,0
- DO_SAVE_SWITCH_STACK
- .body
-
- adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
- movl r25=init_task
- mov r27=IA64_KR(CURRENT_STACK)
- adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
- dep r20=0,in0,61,3 // physical address of "next"
- ;;
- st8 [r22]=sp // save kernel stack pointer of old task
- shr.u r26=r20,IA64_GRANULE_SHIFT
- cmp.eq p7,p6=r25,in0
- ;;
- /*
- * If we've already mapped this task's page, we can skip doing it again.
- */
-(p6) cmp.eq p7,p6=r26,r27
-(p6) br.cond.dpnt .map
- ;;
-.done:
- ld8 sp=[r21] // load kernel stack pointer of new task
- mov IA64_KR(CURRENT)=in0 // update "current" application register
- mov r8=r13 // return pointer to previously running task
- mov r13=in0 // set "current" pointer
- ;;
- DO_LOAD_SWITCH_STACK
-
-#ifdef CONFIG_SMP
- sync.i // ensure "fc"s done by this CPU are visible on other CPUs
-#endif
- br.ret.sptk.many rp // boogie on out in new context
-
-.map:
- rsm psr.ic // interrupts (psr.i) are already disabled here
- movl r25=PAGE_KERNEL
- ;;
- srlz.d
- or r23=r25,r20 // construct PA | page properties
- mov r25=IA64_GRANULE_SHIFT<<2
- ;;
- mov cr.itir=r25
- mov cr.ifa=in0 // VA of next task...
- ;;
- mov r25=IA64_TR_CURRENT_STACK
- mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ssm psr.ic // reenable the psr.ic bit
- ;;
- srlz.d
- br.cond.sptk .done
-END(__ia64_switch_to)
-
-/*
- * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This
- * means that we may get an interrupt with "sp" pointing to the new kernel stack while
- * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc,
- * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
- * problem. Also, we don't need to specify unwind information for preserved registers
- * that are not modified in save_switch_stack as the right unwind information is already
- * specified at the call-site of save_switch_stack.
- */
-
-/*
- * save_switch_stack:
- * - r16 holds ar.pfs
- * - b7 holds address to return to
- * - rp (b0) holds return address to save
- */
-GLOBAL_ENTRY(save_switch_stack)
- .prologue
- .altrp b7
- flushrs // flush dirty regs to backing store (must be first in insn group)
- .save @priunat,r17
- mov r17=ar.unat // preserve caller's
- .body
-#ifdef CONFIG_ITANIUM
- adds r2=16+128,sp
- adds r3=16+64,sp
- adds r14=SW(R4)+16,sp
- ;;
- st8.spill [r14]=r4,16 // spill r4
- lfetch.fault.excl.nt1 [r3],128
- ;;
- lfetch.fault.excl.nt1 [r2],128
- lfetch.fault.excl.nt1 [r3],128
- ;;
- lfetch.fault.excl [r2]
- lfetch.fault.excl [r3]
- adds r15=SW(R5)+16,sp
-#else
- add r2=16+3*128,sp
- add r3=16,sp
- add r14=SW(R4)+16,sp
- ;;
- st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0
- lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010
- ;;
- lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090
- lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190
- ;;
- lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110
- lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210
- adds r15=SW(R5)+16,sp
-#endif
- ;;
- st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5
- mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0
- add r2=SW(F2)+16,sp // r2 = &sw->f2
- ;;
- st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6
- mov.m r18=ar.fpsr // preserve fpsr
- add r3=SW(F3)+16,sp // r3 = &sw->f3
- ;;
- stf.spill [r2]=f2,32
- mov.m r19=ar.rnat
- mov r21=b0
-
- stf.spill [r3]=f3,32
- st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7
- mov r22=b1
- ;;
- // since we're done with the spills, read and save ar.unat:
- mov.m r29=ar.unat
- mov.m r20=ar.bspstore
- mov r23=b2
- stf.spill [r2]=f4,32
- stf.spill [r3]=f5,32
- mov r24=b3
- ;;
- st8 [r14]=r21,SW(B1)-SW(B0) // save b0
- st8 [r15]=r23,SW(B3)-SW(B2) // save b2
- mov r25=b4
- mov r26=b5
- ;;
- st8 [r14]=r22,SW(B4)-SW(B1) // save b1
- st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3
- mov r21=ar.lc // I-unit
- stf.spill [r2]=f12,32
- stf.spill [r3]=f13,32
- ;;
- st8 [r14]=r25,SW(B5)-SW(B4) // save b4
- st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs
- stf.spill [r2]=f14,32
- stf.spill [r3]=f15,32
- ;;
- st8 [r14]=r26 // save b5
- st8 [r15]=r21 // save ar.lc
- stf.spill [r2]=f16,32
- stf.spill [r3]=f17,32
- ;;
- stf.spill [r2]=f18,32
- stf.spill [r3]=f19,32
- ;;
- stf.spill [r2]=f20,32
- stf.spill [r3]=f21,32
- ;;
- stf.spill [r2]=f22,32
- stf.spill [r3]=f23,32
- ;;
- stf.spill [r2]=f24,32
- stf.spill [r3]=f25,32
- ;;
- stf.spill [r2]=f26,32
- stf.spill [r3]=f27,32
- ;;
- stf.spill [r2]=f28,32
- stf.spill [r3]=f29,32
- ;;
- stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
- stf.spill [r3]=f31,SW(PR)-SW(F31)
- add r14=SW(CALLER_UNAT)+16,sp
- ;;
- st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat
- st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
- mov r21=pr
- ;;
- st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
- st8 [r3]=r21 // save predicate registers
- ;;
- st8 [r2]=r20 // save ar.bspstore
- st8 [r14]=r18 // save fpsr
- mov ar.rsc=3 // put RSE back into eager mode, pl 0
- br.cond.sptk.many b7
-END(save_switch_stack)
-
-/*
- * load_switch_stack:
- * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
- * - b7 holds address to return to
- * - must not touch r8-r11
- */
-GLOBAL_ENTRY(load_switch_stack)
- .prologue
- .altrp b7
-
- .body
- lfetch.fault.nt1 [sp]
- adds r2=SW(AR_BSPSTORE)+16,sp
- adds r3=SW(AR_UNAT)+16,sp
- mov ar.rsc=0 // put RSE into enforced lazy mode
- adds r14=SW(CALLER_UNAT)+16,sp
- adds r15=SW(AR_FPSR)+16,sp
- ;;
- ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore
- ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat
- ;;
- ld8 r21=[r2],16 // restore b0
- ld8 r22=[r3],16 // restore b1
- ;;
- ld8 r23=[r2],16 // restore b2
- ld8 r24=[r3],16 // restore b3
- ;;
- ld8 r25=[r2],16 // restore b4
- ld8 r26=[r3],16 // restore b5
- ;;
- ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs
- ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc
- ;;
- ld8 r28=[r2] // restore pr
- ld8 r30=[r3] // restore rnat
- ;;
- ld8 r18=[r14],16 // restore caller's unat
- ld8 r19=[r15],24 // restore fpsr
- ;;
- ldf.fill f2=[r14],32
- ldf.fill f3=[r15],32
- ;;
- ldf.fill f4=[r14],32
- ldf.fill f5=[r15],32
- ;;
- ldf.fill f12=[r14],32
- ldf.fill f13=[r15],32
- ;;
- ldf.fill f14=[r14],32
- ldf.fill f15=[r15],32
- ;;
- ldf.fill f16=[r14],32
- ldf.fill f17=[r15],32
- ;;
- ldf.fill f18=[r14],32
- ldf.fill f19=[r15],32
- mov b0=r21
- ;;
- ldf.fill f20=[r14],32
- ldf.fill f21=[r15],32
- mov b1=r22
- ;;
- ldf.fill f22=[r14],32
- ldf.fill f23=[r15],32
- mov b2=r23
- ;;
- mov ar.bspstore=r27
- mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7
- mov b3=r24
- ;;
- ldf.fill f24=[r14],32
- ldf.fill f25=[r15],32
- mov b4=r25
- ;;
- ldf.fill f26=[r14],32
- ldf.fill f27=[r15],32
- mov b5=r26
- ;;
- ldf.fill f28=[r14],32
- ldf.fill f29=[r15],32
- mov ar.pfs=r16
- ;;
- ldf.fill f30=[r14],32
- ldf.fill f31=[r15],24
- mov ar.lc=r17
- ;;
- ld8.fill r4=[r14],16
- ld8.fill r5=[r15],16
- mov pr=r28,-1
- ;;
- ld8.fill r6=[r14],16
- ld8.fill r7=[r15],16
-
- mov ar.unat=r18 // restore caller's unat
- mov ar.rnat=r30 // must restore after bspstore but before rsc!
- mov ar.fpsr=r19 // restore fpsr
- mov ar.rsc=3 // put RSE back into eager mode, pl 0
- br.cond.sptk.many b7
-END(load_switch_stack)
-
-GLOBAL_ENTRY(prefetch_stack)
- add r14 = -IA64_SWITCH_STACK_SIZE, sp
- add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
- ;;
- ld8 r16 = [r15] // load next's stack pointer
- lfetch.fault.excl [r14], 128
- ;;
- lfetch.fault.excl [r14], 128
- lfetch.fault [r16], 128
- ;;
- lfetch.fault.excl [r14], 128
- lfetch.fault [r16], 128
- ;;
- lfetch.fault.excl [r14], 128
- lfetch.fault [r16], 128
- ;;
- lfetch.fault.excl [r14], 128
- lfetch.fault [r16], 128
- ;;
- lfetch.fault [r16], 128
- br.ret.sptk.many rp
-END(prefetch_stack)
-
-GLOBAL_ENTRY(execve)
- mov r15=__NR_execve // put syscall number in place
- break __BREAK_SYSCALL
- br.ret.sptk.many rp
-END(execve)
-
-GLOBAL_ENTRY(clone)
- mov r15=__NR_clone // put syscall number in place
- break __BREAK_SYSCALL
- br.ret.sptk.many rp
-END(clone)
-
- /*
- * Invoke a system call, but do some tracing before and after the call.
- * We MUST preserve the current register frame throughout this routine
- * because some system calls (such as ia64_execve) directly
- * manipulate ar.pfs.
- */
-GLOBAL_ENTRY(__ia64_trace_syscall)
- PT_REGS_UNWIND_INFO(0)
- /*
- * We need to preserve the scratch registers f6-f11 in case the system
- * call is sigreturn.
- */
- adds r16=PT(F6)+16,sp
- adds r17=PT(F7)+16,sp
- ;;
- stf.spill [r16]=f6,32
- stf.spill [r17]=f7,32
- ;;
- stf.spill [r16]=f8,32
- stf.spill [r17]=f9,32
- ;;
- stf.spill [r16]=f10
- stf.spill [r17]=f11
- br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
- adds r16=PT(F6)+16,sp
- adds r17=PT(F7)+16,sp
- ;;
- ldf.fill f6=[r16],32
- ldf.fill f7=[r17],32
- ;;
- ldf.fill f8=[r16],32
- ldf.fill f9=[r17],32
- ;;
- ldf.fill f10=[r16]
- ldf.fill f11=[r17]
- // the syscall number may have changed, so re-load it and re-calculate the
- // syscall entry-point:
- adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #)
- ;;
- ld8 r15=[r15]
- mov r3=NR_syscalls - 1
- ;;
- adds r15=-1024,r15
- movl r16=sys_call_table
- ;;
- shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
- cmp.leu p6,p7=r15,r3
- ;;
-(p6) ld8 r20=[r20] // load address of syscall entry point
-(p7) movl r20=sys_ni_syscall
- ;;
- mov b6=r20
- br.call.sptk.many rp=b6 // do the syscall
-.strace_check_retval:
- cmp.lt p6,p0=r8,r0 // syscall failed?
- adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
- adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
- mov r10=0
-(p6) br.cond.sptk strace_error // syscall failed ->
- ;; // avoid RAW on r10
-.strace_save_retval:
-.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8
-.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10
- br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
-.ret3:
-(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
- br.cond.sptk .work_pending_syscall_end
-
-strace_error:
- ld8 r3=[r2] // load pt_regs.r8
- sub r9=0,r8 // negate return value to get errno value
- ;;
- cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0?
- adds r3=16,r2 // r3=&pt_regs.r10
- ;;
-(p6) mov r10=-1
-(p6) mov r8=r9
- br.cond.sptk .strace_save_retval
-END(__ia64_trace_syscall)
-
- /*
- * When traced and returning from sigreturn, we invoke syscall_trace but then
- * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
- */
-GLOBAL_ENTRY(ia64_strace_leave_kernel)
- PT_REGS_UNWIND_INFO(0)
-{ /*
- * Some versions of gas generate bad unwind info if the first instruction of a
- * procedure doesn't go into the first slot of a bundle. This is a workaround.
- */
- nop.m 0
- nop.i 0
- br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
-}
-.ret4: br.cond.sptk ia64_leave_kernel
-END(ia64_strace_leave_kernel)
-
-GLOBAL_ENTRY(__ia64_ret_from_clone)
- PT_REGS_UNWIND_INFO(0)
-{ /*
- * Some versions of gas generate bad unwind info if the first instruction of a
- * procedure doesn't go into the first slot of a bundle. This is a workaround.
- */
- nop.m 0
- nop.i 0
- /*
- * We need to call schedule_tail() to complete the scheduling process.
- * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the
- * address of the previously executing task.
- */
- br.call.sptk.many rp=ia64_invoke_schedule_tail
-}
-.ret8:
- adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- ld4 r2=[r2]
- ;;
- mov r8=0
- and r2=_TIF_SYSCALL_TRACEAUDIT,r2
- ;;
- cmp.ne p6,p0=r2,r0
-(p6) br.cond.spnt .strace_check_retval
- ;; // added stop bits to prevent r8 dependency
-END(__ia64_ret_from_clone)
- // fall through
-GLOBAL_ENTRY(ia64_ret_from_syscall)
- PT_REGS_UNWIND_INFO(0)
- cmp.ge p6,p7=r8,r0 // syscall executed successfully?
- adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
- mov r10=r0 // clear error indication in r10
-(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure
- ;;
- // don't fall through, ia64_leave_syscall may be #define'd
- br.cond.sptk.few ia64_leave_syscall
- ;;
-END(ia64_ret_from_syscall)
-/*
- * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
- * need to switch to bank 0 and doesn't restore the scratch registers.
- * To avoid leaking kernel bits, the scratch registers are set to
- * the following known-to-be-safe values:
- *
- * r1: restored (global pointer)
- * r2: cleared
- * r3: 1 (when returning to user-level)
- * r8-r11: restored (syscall return value(s))
- * r12: restored (user-level stack pointer)
- * r13: restored (user-level thread pointer)
- * r14: set to __kernel_syscall_via_epc
- * r15: restored (syscall #)
- * r16-r17: cleared
- * r18: user-level b6
- * r19: cleared
- * r20: user-level ar.fpsr
- * r21: user-level b0
- * r22: cleared
- * r23: user-level ar.bspstore
- * r24: user-level ar.rnat
- * r25: user-level ar.unat
- * r26: user-level ar.pfs
- * r27: user-level ar.rsc
- * r28: user-level ip
- * r29: user-level psr
- * r30: user-level cfm
- * r31: user-level pr
- * f6-f11: cleared
- * pr: restored (user-level pr)
- * b0: restored (user-level rp)
- * b6: restored
- * b7: set to __kernel_syscall_via_epc
- * ar.unat: restored (user-level ar.unat)
- * ar.pfs: restored (user-level ar.pfs)
- * ar.rsc: restored (user-level ar.rsc)
- * ar.rnat: restored (user-level ar.rnat)
- * ar.bspstore: restored (user-level ar.bspstore)
- * ar.fpsr: restored (user-level ar.fpsr)
- * ar.ccv: cleared
- * ar.csd: cleared
- * ar.ssd: cleared
- */
-GLOBAL_ENTRY(__ia64_leave_syscall)
- PT_REGS_UNWIND_INFO(0)
- /*
- * work.need_resched etc. mustn't get changed by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on.
- *
- * p6 controls whether current_thread_info()->flags needs to be check for
- * extra work. We always check for extra work when returning to user-level.
- * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
- * is 0. After extra work processing has been completed, execution
- * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
- * needs to be redone.
- */
-#ifdef CONFIG_PREEMPT
- rsm psr.i // disable interrupts
- cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
- ;;
- .pred.rel.mutex pUStk,pKStk
-(pKStk) ld4 r21=[r20] // r21 <- preempt_count
-(pUStk) mov r21=0 // r21 <- 0
- ;;
- cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
-#else /* !CONFIG_PREEMPT */
-(pUStk) rsm psr.i
- cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
-(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
-#endif
-.work_processed_syscall:
- adds r2=PT(LOADRS)+16,r12
- adds r3=PT(AR_BSPSTORE)+16,r12
- adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
-(p6) ld4 r31=[r18] // load current_thread_info()->flags
- ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
- nop.i 0
- ;;
- mov r16=ar.bsp // M2 get existing backing store pointer
- ld8 r18=[r2],PT(R9)-PT(B6) // load b6
-(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
- ;;
- ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
-(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
-(p6) br.cond.spnt .work_pending_syscall
- ;;
- // start restoring the state saved on the kernel stack (struct pt_regs):
- ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
- ld8 r11=[r3],PT(CR_IIP)-PT(R11)
-(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE!
- ;;
- invala // M0|1 invalidate ALAT
- rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection
- cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs
-
- ld8 r29=[r2],16 // M0|1 load cr.ipsr
- ld8 r28=[r3],16 // M0|1 load cr.iip
- mov r22=r0 // A clear r22
- ;;
- ld8 r30=[r2],16 // M0|1 load cr.ifs
- ld8 r25=[r3],16 // M0|1 load ar.unat
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
- ;;
- ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
- nop 0
- ;;
- ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
- ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc
- mov f6=f0 // F clear f6
- ;;
- ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage)
- ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates
- mov f7=f0 // F clear f7
- ;;
- ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr
- ld8.fill r1=[r3],16 // M0|1 load r1
-(pUStk) mov r17=1 // A
- ;;
-(pUStk) st1 [r14]=r17 // M2|3
- ld8.fill r13=[r3],16 // M0|1
- mov f8=f0 // F clear f8
- ;;
- ld8.fill r12=[r2] // M0|1 restore r12 (sp)
- ld8.fill r15=[r3] // M0|1 restore r15
- mov b6=r18 // I0 restore b6
-
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
- mov f9=f0 // F clear f9
-(pKStk) br.cond.dpnt.many skip_rbs_switch // B
-
- srlz.d // M0 ensure interruption collection is off (for cover)
- shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
- cover // B add current frame into dirty partition & set cr.ifs
- ;;
-(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8
- mov r19=ar.bsp // M2 get new backing store pointer
- mov f10=f0 // F clear f10
-
- nop.m 0
- movl r14=__kernel_syscall_via_epc // X
- ;;
- mov.m ar.csd=r0 // M2 clear ar.csd
- mov.m ar.ccv=r0 // M2 clear ar.ccv
- mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
-
- mov.m ar.ssd=r0 // M2 clear ar.ssd
- mov f11=f0 // F clear f11
- br.cond.sptk.many rbs_switch // B
-END(__ia64_leave_syscall)
-
-#ifdef CONFIG_IA32_SUPPORT
-GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
- PT_REGS_UNWIND_INFO(0)
- adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
- adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
- ;;
- .mem.offset 0,0
- st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit
- .mem.offset 8,0
- st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit
- ;;
- // don't fall through, ia64_leave_kernel may be #define'd
- br.cond.sptk.few ia64_leave_kernel
- ;;
-END(ia64_ret_from_ia32_execve)
-#endif /* CONFIG_IA32_SUPPORT */
-GLOBAL_ENTRY(__ia64_leave_kernel)
- PT_REGS_UNWIND_INFO(0)
- /*
- * work.need_resched etc. mustn't get changed by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on.
- *
- * p6 controls whether current_thread_info()->flags needs to be check for
- * extra work. We always check for extra work when returning to user-level.
- * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
- * is 0. After extra work processing has been completed, execution
- * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
- * needs to be redone.
- */
-#ifdef CONFIG_PREEMPT
- rsm psr.i // disable interrupts
- cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
- ;;
- .pred.rel.mutex pUStk,pKStk
-(pKStk) ld4 r21=[r20] // r21 <- preempt_count
-(pUStk) mov r21=0 // r21 <- 0
- ;;
- cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
-#else
-(pUStk) rsm psr.i
- cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
-(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
-#endif
-.work_processed_kernel:
- adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
-(p6) ld4 r31=[r17] // load current_thread_info()->flags
- adds r21=PT(PR)+16,r12
- ;;
-
- lfetch [r21],PT(CR_IPSR)-PT(PR)
- adds r2=PT(B6)+16,r12
- adds r3=PT(R16)+16,r12
- ;;
- lfetch [r21]
- ld8 r28=[r2],8 // load b6
- adds r29=PT(R24)+16,r12
-
- ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
- adds r30=PT(AR_CCV)+16,r12
-(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
- ;;
- ld8.fill r24=[r29]
- ld8 r15=[r30] // load ar.ccv
-(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending?
- ;;
- ld8 r29=[r2],16 // load b7
- ld8 r30=[r3],16 // load ar.csd
-(p6) br.cond.spnt .work_pending
- ;;
- ld8 r31=[r2],16 // load ar.ssd
- ld8.fill r8=[r3],16
- ;;
- ld8.fill r9=[r2],16
- ld8.fill r10=[r3],PT(R17)-PT(R10)
- ;;
- ld8.fill r11=[r2],PT(R18)-PT(R11)
- ld8.fill r17=[r3],16
- ;;
- ld8.fill r18=[r2],16
- ld8.fill r19=[r3],16
- ;;
- ld8.fill r20=[r2],16
- ld8.fill r21=[r3],16
- mov ar.csd=r30
- mov ar.ssd=r31
- ;;
- rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection
- invala // invalidate ALAT
- ;;
- ld8.fill r22=[r2],24
- ld8.fill r23=[r3],24
- mov b6=r28
- ;;
- ld8.fill r25=[r2],16
- ld8.fill r26=[r3],16
- mov b7=r29
- ;;
- ld8.fill r27=[r2],16
- ld8.fill r28=[r3],16
- ;;
- ld8.fill r29=[r2],16
- ld8.fill r30=[r3],24
- ;;
- ld8.fill r31=[r2],PT(F9)-PT(R31)
- adds r3=PT(F10)-PT(F6),r3
- ;;
- ldf.fill f9=[r2],PT(F6)-PT(F9)
- ldf.fill f10=[r3],PT(F8)-PT(F10)
- ;;
- ldf.fill f6=[r2],PT(F7)-PT(F6)
- ;;
- ldf.fill f7=[r2],PT(F11)-PT(F7)
- ldf.fill f8=[r3],32
- ;;
- srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned)
- mov ar.ccv=r15
- ;;
- ldf.fill f11=[r2]
- bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
- ;;
-(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
- adds r16=PT(CR_IPSR)+16,r12
- adds r17=PT(CR_IIP)+16,r12
-
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
- nop.i 0
- nop.i 0
- ;;
- ld8 r29=[r16],16 // load cr.ipsr
- ld8 r28=[r17],16 // load cr.iip
- ;;
- ld8 r30=[r16],16 // load cr.ifs
- ld8 r25=[r17],16 // load ar.unat
- ;;
- ld8 r26=[r16],16 // load ar.pfs
- ld8 r27=[r17],16 // load ar.rsc
- cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
- ;;
- ld8 r24=[r16],16 // load ar.rnat (may be garbage)
- ld8 r23=[r17],16 // load ar.bspstore (may be garbage)
- ;;
- ld8 r31=[r16],16 // load predicates
- ld8 r21=[r17],16 // load b0
- ;;
- ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
- ld8.fill r1=[r17],16 // load r1
- ;;
- ld8.fill r12=[r16],16
- ld8.fill r13=[r17],16
-(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
- ;;
- ld8 r20=[r16],16 // ar.fpsr
- ld8.fill r15=[r17],16
- ;;
- ld8.fill r14=[r16],16
- ld8.fill r2=[r17]
-(pUStk) mov r17=1
- ;;
- ld8.fill r3=[r16]
-(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
- shr.u r18=r19,16 // get byte size of existing "dirty" partition
- ;;
- mov r16=ar.bsp // get existing backing store pointer
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
- ;;
- ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
-(pKStk) br.cond.dpnt skip_rbs_switch
-
- /*
- * Restore user backing store.
- *
- * NOTE: alloc, loadrs, and cover can't be predicated.
- */
-(pNonSys) br.cond.dpnt dont_preserve_current_frame
- cover // add current frame into dirty partition and set cr.ifs
- ;;
- mov r19=ar.bsp // get new backing store pointer
-rbs_switch:
- sub r16=r16,r18 // krbs = old bsp - size of dirty partition
- cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs
- ;;
- sub r19=r19,r16 // calculate total byte size of dirty partition
- add r18=64,r18 // don't force in0-in7 into memory...
- ;;
- shl r19=r19,16 // shift size of dirty partition into loadrs position
- ;;
-dont_preserve_current_frame:
- /*
- * To prevent leaking bits between the kernel and user-space,
- * we must clear the stacked registers in the "invalid" partition here.
- * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
- * 5 registers/cycle on McKinley).
- */
-# define pRecurse p6
-# define pReturn p7
-#ifdef CONFIG_ITANIUM
-# define Nregs 10
-#else
-# define Nregs 14
-#endif
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
- sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize
- ;;
- mov ar.rsc=r19 // load ar.rsc to be used for "loadrs"
- shladd in0=loc1,3,r17
- mov in1=0
- ;;
- TEXT_ALIGN(32)
-rse_clear_invalid:
-#ifdef CONFIG_ITANIUM
- // cycle 0
- { .mii
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
- add out0=-Nregs*8,in0
-}{ .mfb
- add out1=1,in1 // increment recursion count
- nop.f 0
- nop.b 0 // can't do br.call here because of alloc (WAW on CFM)
- ;;
-}{ .mfi // cycle 1
- mov loc1=0
- nop.f 0
- mov loc2=0
-}{ .mib
- mov loc3=0
- mov loc4=0
-(pRecurse) br.call.sptk.many b0=rse_clear_invalid
-
-}{ .mfi // cycle 2
- mov loc5=0
- nop.f 0
- cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
-}{ .mib
- mov loc6=0
- mov loc7=0
-(pReturn) br.ret.sptk.many b0
-}
-#else /* !CONFIG_ITANIUM */
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
- add out0=-Nregs*8,in0
- add out1=1,in1 // increment recursion count
- mov loc1=0
- mov loc2=0
- ;;
- mov loc3=0
- mov loc4=0
- mov loc5=0
- mov loc6=0
- mov loc7=0
-(pRecurse) br.call.dptk.few b0=rse_clear_invalid
- ;;
- mov loc8=0
- mov loc9=0
- cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
- mov loc10=0
- mov loc11=0
-(pReturn) br.ret.dptk.many b0
-#endif /* !CONFIG_ITANIUM */
-# undef pRecurse
-# undef pReturn
- ;;
- alloc r17=ar.pfs,0,0,0,0 // drop current register frame
- ;;
- loadrs
- ;;
-skip_rbs_switch:
- mov ar.unat=r25 // M2
-(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22
-(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise
- ;;
-(pUStk) mov ar.bspstore=r23 // M2
-(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp
-(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise
- ;;
- mov cr.ipsr=r29 // M2
- mov ar.pfs=r26 // I0
-(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise
-
-(p9) mov cr.ifs=r30 // M2
- mov b0=r21 // I0
-(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise
-
- mov ar.fpsr=r20 // M2
- mov cr.iip=r28 // M2
- nop 0
- ;;
-(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode
- nop 0
-(pLvSys)mov r2=r0
-
- mov ar.rsc=r27 // M2
- mov pr=r31,-1 // I0
- rfi // B
-
- /*
- * On entry:
- * r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
- * r31 = current->thread_info->flags
- * On exit:
- * p6 = TRUE if work-pending-check needs to be redone
- */
-.work_pending_syscall:
- add r2=-8,r2
- add r3=-8,r3
- ;;
- st8 [r2]=r8
- st8 [r3]=r10
-.work_pending:
- tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
-(p6) br.cond.sptk.few .notify
-#ifdef CONFIG_PREEMPT
-(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
- ;;
-(pKStk) st4 [r20]=r21
- ssm psr.i // enable interrupts
-#endif
- br.call.spnt.many rp=schedule
-.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1
- rsm psr.i // disable interrupts
- ;;
-#ifdef CONFIG_PREEMPT
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
- ;;
-(pKStk) st4 [r20]=r0 // preempt_count() <- 0
-#endif
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // re-check
-
-.notify:
-(pUStk) br.call.spnt.many rp=notify_resume_user
-.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // don't re-check
-
-.work_pending_syscall_end:
- adds r2=PT(R8)+16,r12
- adds r3=PT(R10)+16,r12
- ;;
- ld8 r8=[r2]
- ld8 r10=[r3]
- br.cond.sptk.many .work_processed_syscall // re-check
-
-END(__ia64_leave_kernel)
-
-ENTRY(handle_syscall_error)
- /*
- * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
- * lead us to mistake a negative return value as a failed syscall. Those syscall
- * must deposit a non-zero value in pt_regs.r8 to indicate an error. If
- * pt_regs.r8 is zero, we assume that the call completed successfully.
- */
- PT_REGS_UNWIND_INFO(0)
- ld8 r3=[r2] // load pt_regs.r8
- ;;
- cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0?
- ;;
-(p7) mov r10=-1
-(p7) sub r8=0,r8 // negate return value to get errno
- br.cond.sptk ia64_leave_syscall
-END(handle_syscall_error)
-
- /*
- * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
- * in case a system call gets restarted.
- */
-GLOBAL_ENTRY(ia64_invoke_schedule_tail)
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
- alloc loc1=ar.pfs,8,2,1,0
- mov loc0=rp
- mov out0=r8 // Address of previous task
- ;;
- br.call.sptk.many rp=schedule_tail
-.ret11: mov ar.pfs=loc1
- mov rp=loc0
- br.ret.sptk.many rp
-END(ia64_invoke_schedule_tail)
-
- /*
- * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to
- * be set up by the caller. We declare 8 input registers so the system call
- * args get preserved, in case we need to restart a system call.
- */
-GLOBAL_ENTRY(notify_resume_user)
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
- alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
- mov r9=ar.unat
- mov loc0=rp // save return address
- mov out0=0 // there is no "oldset"
- adds out1=8,sp // out1=&sigscratch->ar_pfs
-(pSys) mov out2=1 // out2==1 => we're in a syscall
- ;;
-(pNonSys) mov out2=0 // out2==0 => not a syscall
- .fframe 16
- .spillsp ar.unat, 16
- st8 [sp]=r9,-16 // allocate space for ar.unat and save it
- st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch
- .body
- br.call.sptk.many rp=do_notify_resume_user
-.ret15: .restore sp
- adds sp=16,sp // pop scratch stack space
- ;;
- ld8 r9=[sp] // load new unat from sigscratch->scratch_unat
- mov rp=loc0
- ;;
- mov ar.unat=r9
- mov ar.pfs=loc1
- br.ret.sptk.many rp
-END(notify_resume_user)
-
-GLOBAL_ENTRY(sys_rt_sigsuspend)
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
- alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
- mov r9=ar.unat
- mov loc0=rp // save return address
- mov out0=in0 // mask
- mov out1=in1 // sigsetsize
- adds out2=8,sp // out2=&sigscratch->ar_pfs
- ;;
- .fframe 16
- .spillsp ar.unat, 16
- st8 [sp]=r9,-16 // allocate space for ar.unat and save it
- st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch
- .body
- br.call.sptk.many rp=ia64_rt_sigsuspend
-.ret17: .restore sp
- adds sp=16,sp // pop scratch stack space
- ;;
- ld8 r9=[sp] // load new unat from sw->caller_unat
- mov rp=loc0
- ;;
- mov ar.unat=r9
- mov ar.pfs=loc1
- br.ret.sptk.many rp
-END(sys_rt_sigsuspend)
-
-ENTRY(sys_rt_sigreturn)
- PT_REGS_UNWIND_INFO(0)
- /*
- * Allocate 8 input registers since ptrace() may clobber them
- */
- alloc r2=ar.pfs,8,0,1,0
- .prologue
- PT_REGS_SAVES(16)
- adds sp=-16,sp
- .body
- cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall...
- ;;
- /*
- * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
- * syscall-entry path does not save them we save them here instead. Note: we
- * don't need to save any other registers that are not saved by the stream-lined
- * syscall path, because restore_sigcontext() restores them.
- */
- adds r16=PT(F6)+32,sp
- adds r17=PT(F7)+32,sp
- ;;
- stf.spill [r16]=f6,32
- stf.spill [r17]=f7,32
- ;;
- stf.spill [r16]=f8,32
- stf.spill [r17]=f9,32
- ;;
- stf.spill [r16]=f10
- stf.spill [r17]=f11
- adds out0=16,sp // out0 = &sigscratch
- br.call.sptk.many rp=ia64_rt_sigreturn
-.ret19: .restore sp,0
- adds sp=16,sp
- ;;
- ld8 r9=[sp] // load new ar.unat
- mov.sptk b7=r8,__ia64_leave_kernel
- ;;
- mov ar.unat=r9
- br.many b7
-END(sys_rt_sigreturn)
-
-GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
- .prologue
- /*
- * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
- */
- mov r16=r0
- DO_SAVE_SWITCH_STACK
- br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt
-.ret21: .body
- DO_LOAD_SWITCH_STACK
- br.cond.sptk.many rp // goes to ia64_leave_kernel
-END(ia64_prepare_handle_unaligned)
-
- //
- // unw_init_running(void (*callback)(info, arg), void *arg)
- //
-# define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15)
-
-GLOBAL_ENTRY(unw_init_running)
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
- alloc loc1=ar.pfs,2,3,3,0
- ;;
- ld8 loc2=[in0],8
- mov loc0=rp
- mov r16=loc1
- DO_SAVE_SWITCH_STACK
- .body
-
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
- .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
- SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
- adds sp=-EXTRA_FRAME_SIZE,sp
- .body
- ;;
- adds out0=16,sp // &info
- mov out1=r13 // current
- adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack
- br.call.sptk.many rp=unw_init_frame_info
-1: adds out0=16,sp // &info
- mov b6=loc2
- mov loc2=gp // save gp across indirect function call
- ;;
- ld8 gp=[in0]
- mov out1=in1 // arg
- br.call.sptk.many rp=b6 // invoke the callback function
-1: mov gp=loc2 // restore gp
-
- // For now, we don't allow changing registers from within
- // unw_init_running; if we ever want to allow that, we'd
- // have to do a load_switch_stack here:
- .restore sp
- adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
-
- mov ar.pfs=loc1
- mov rp=loc0
- br.ret.sptk.many rp
-END(unw_init_running)
-
- .rodata
- .align 8
- .globl sys_call_table
-sys_call_table:
- data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S.
- data8 sys_exit // 1025
- data8 sys_read
- data8 sys_write
- data8 sys_open
- data8 sys_close
- data8 sys_creat // 1030
- data8 sys_link
- data8 sys_unlink
- data8 ia64_execve
- data8 sys_chdir
- data8 sys_fchdir // 1035
- data8 sys_utimes
- data8 sys_mknod
- data8 sys_chmod
- data8 sys_chown
- data8 sys_lseek // 1040
- data8 sys_getpid
- data8 sys_getppid
- data8 sys_mount
- data8 sys_umount
- data8 sys_setuid // 1045
- data8 sys_getuid
- data8 sys_geteuid
- data8 sys_ptrace
- data8 sys_access
- data8 sys_sync // 1050
- data8 sys_fsync
- data8 sys_fdatasync
- data8 sys_kill
- data8 sys_rename
- data8 sys_mkdir // 1055
- data8 sys_rmdir
- data8 sys_dup
- data8 sys_pipe
- data8 sys_times
- data8 ia64_brk // 1060
- data8 sys_setgid
- data8 sys_getgid
- data8 sys_getegid
- data8 sys_acct
- data8 sys_ioctl // 1065
- data8 sys_fcntl
- data8 sys_umask
- data8 sys_chroot
- data8 sys_ustat
- data8 sys_dup2 // 1070
- data8 sys_setreuid
- data8 sys_setregid
- data8 sys_getresuid
- data8 sys_setresuid
- data8 sys_getresgid // 1075
- data8 sys_setresgid
- data8 sys_getgroups
- data8 sys_setgroups
- data8 sys_getpgid
- data8 sys_setpgid // 1080
- data8 sys_setsid
- data8 sys_getsid
- data8 sys_sethostname
- data8 sys_setrlimit
- data8 sys_getrlimit // 1085
- data8 sys_getrusage
- data8 sys_gettimeofday
- data8 sys_settimeofday
- data8 sys_select
- data8 sys_poll // 1090
- data8 sys_symlink
- data8 sys_readlink
- data8 sys_uselib
- data8 sys_swapon
- data8 sys_swapoff // 1095
- data8 sys_reboot
- data8 sys_truncate
- data8 sys_ftruncate
- data8 sys_fchmod
- data8 sys_fchown // 1100
- data8 ia64_getpriority
- data8 sys_setpriority
- data8 sys_statfs
- data8 sys_fstatfs
- data8 sys_gettid // 1105
- data8 sys_semget
- data8 sys_semop
- data8 sys_semctl
- data8 sys_msgget
- data8 sys_msgsnd // 1110
- data8 sys_msgrcv
- data8 sys_msgctl
- data8 sys_shmget
- data8 sys_shmat
- data8 sys_shmdt // 1115
- data8 sys_shmctl
- data8 sys_syslog
- data8 sys_setitimer
- data8 sys_getitimer
- data8 sys_ni_syscall // 1120 /* was: ia64_oldstat */
- data8 sys_ni_syscall /* was: ia64_oldlstat */
- data8 sys_ni_syscall /* was: ia64_oldfstat */
- data8 sys_vhangup
- data8 sys_lchown
- data8 sys_remap_file_pages // 1125
- data8 sys_wait4
- data8 sys_sysinfo
- data8 sys_clone
- data8 sys_setdomainname
- data8 sys_newuname // 1130
- data8 sys_adjtimex
- data8 sys_ni_syscall /* was: ia64_create_module */
- data8 sys_init_module
- data8 sys_delete_module
- data8 sys_ni_syscall // 1135 /* was: sys_get_kernel_syms */
- data8 sys_ni_syscall /* was: sys_query_module */
- data8 sys_quotactl
- data8 sys_bdflush
- data8 sys_sysfs
- data8 sys_personality // 1140
- data8 sys_ni_syscall // sys_afs_syscall
- data8 sys_setfsuid
- data8 sys_setfsgid
- data8 sys_getdents
- data8 sys_flock // 1145
- data8 sys_readv
- data8 sys_writev
- data8 sys_pread64
- data8 sys_pwrite64
- data8 sys_sysctl // 1150
- data8 sys_mmap
- data8 sys_munmap
- data8 sys_mlock
- data8 sys_mlockall
- data8 sys_mprotect // 1155
- data8 ia64_mremap
- data8 sys_msync
- data8 sys_munlock
- data8 sys_munlockall
- data8 sys_sched_getparam // 1160
- data8 sys_sched_setparam
- data8 sys_sched_getscheduler
- data8 sys_sched_setscheduler
- data8 sys_sched_yield
- data8 sys_sched_get_priority_max // 1165
- data8 sys_sched_get_priority_min
- data8 sys_sched_rr_get_interval
- data8 sys_nanosleep
- data8 sys_nfsservctl
- data8 sys_prctl // 1170
- data8 sys_getpagesize
- data8 sys_mmap2
- data8 sys_pciconfig_read
- data8 sys_pciconfig_write
- data8 sys_perfmonctl // 1175
- data8 sys_sigaltstack
- data8 sys_rt_sigaction
- data8 sys_rt_sigpending
- data8 sys_rt_sigprocmask
- data8 sys_rt_sigqueueinfo // 1180
- data8 sys_rt_sigreturn
- data8 sys_rt_sigsuspend
- data8 sys_rt_sigtimedwait
- data8 sys_getcwd
- data8 sys_capget // 1185
- data8 sys_capset
- data8 sys_sendfile64
- data8 sys_ni_syscall // sys_getpmsg (STREAMS)
- data8 sys_ni_syscall // sys_putpmsg (STREAMS)
- data8 sys_socket // 1190
- data8 sys_bind
- data8 sys_connect
- data8 sys_listen
- data8 sys_accept
- data8 sys_getsockname // 1195
- data8 sys_getpeername
- data8 sys_socketpair
- data8 sys_send
- data8 sys_sendto
- data8 sys_recv // 1200
- data8 sys_recvfrom
- data8 sys_shutdown
- data8 sys_setsockopt
- data8 sys_getsockopt
- data8 sys_sendmsg // 1205
- data8 sys_recvmsg
- data8 sys_pivot_root
- data8 sys_mincore
- data8 sys_madvise
- data8 sys_newstat // 1210
- data8 sys_newlstat
- data8 sys_newfstat
- data8 sys_clone2
- data8 sys_getdents64
- data8 sys_getunwind // 1215
- data8 sys_readahead
- data8 sys_setxattr
- data8 sys_lsetxattr
- data8 sys_fsetxattr
- data8 sys_getxattr // 1220
- data8 sys_lgetxattr
- data8 sys_fgetxattr
- data8 sys_listxattr
- data8 sys_llistxattr
- data8 sys_flistxattr // 1225
- data8 sys_removexattr
- data8 sys_lremovexattr
- data8 sys_fremovexattr
- data8 sys_tkill
- data8 sys_futex // 1230
- data8 sys_sched_setaffinity
- data8 sys_sched_getaffinity
- data8 sys_set_tid_address
- data8 sys_fadvise64_64
- data8 sys_tgkill // 1235
- data8 sys_exit_group
- data8 sys_lookup_dcookie
- data8 sys_io_setup
- data8 sys_io_destroy
- data8 sys_io_getevents // 1240
- data8 sys_io_submit
- data8 sys_io_cancel
- data8 sys_epoll_create
- data8 sys_epoll_ctl
- data8 sys_epoll_wait // 1245
- data8 sys_restart_syscall
- data8 sys_semtimedop
- data8 sys_timer_create
- data8 sys_timer_settime
- data8 sys_timer_gettime // 1250
- data8 sys_timer_getoverrun
- data8 sys_timer_delete
- data8 sys_clock_settime
- data8 sys_clock_gettime
- data8 sys_clock_getres // 1255
- data8 sys_clock_nanosleep
- data8 sys_fstatfs64
- data8 sys_statfs64
- data8 sys_mbind
- data8 sys_get_mempolicy // 1260
- data8 sys_set_mempolicy
- data8 sys_mq_open
- data8 sys_mq_unlink
- data8 sys_mq_timedsend
- data8 sys_mq_timedreceive // 1265
- data8 sys_mq_notify
- data8 sys_mq_getsetattr
- data8 sys_ni_syscall // reserved for kexec_load
- data8 sys_ni_syscall // reserved for vserver
- data8 sys_waitid // 1270
- data8 sys_add_key
- data8 sys_request_key
- data8 sys_keyctl
- data8 sys_ioprio_set
- data8 sys_ioprio_get // 1275
- data8 sys_move_pages
- data8 sys_inotify_init
- data8 sys_inotify_add_watch
- data8 sys_inotify_rm_watch
- data8 sys_migrate_pages // 1280
- data8 sys_openat
- data8 sys_mkdirat
- data8 sys_mknodat
- data8 sys_fchownat
- data8 sys_futimesat // 1285
- data8 sys_newfstatat
- data8 sys_unlinkat
- data8 sys_renameat
- data8 sys_linkat
- data8 sys_symlinkat // 1290
- data8 sys_readlinkat
- data8 sys_fchmodat
- data8 sys_faccessat
- data8 sys_ni_syscall // reserved for pselect
- data8 sys_ni_syscall // 1295 reserved for ppoll
- data8 sys_unshare
- data8 sys_splice
- data8 sys_ni_syscall // reserved for set_robust_list
- data8 sys_ni_syscall // reserved for get_robust_list
- data8 sys_sync_file_range // 1300
- data8 sys_tee
- data8 sys_vmsplice
-
- .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S b/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S
deleted file mode 100644
index 98b5d15855..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S
+++ /dev/null
@@ -1,925 +0,0 @@
-/*
- * This file contains the light-weight system call handlers (fsyscall-handlers).
- *
- * Copyright (C) 2003 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 25-Sep-03 davidm Implement fsys_rt_sigprocmask().
- * 18-Feb-03 louisk Implement fsys_gettimeofday().
- * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
- * probably broke it along the way... ;-)
- * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
- * it capable of using memory based clocks without falling back to C code.
- */
-
-#include <asm/asmmacro.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/percpu.h>
-#include <asm/thread_info.h>
-#include <asm/sal.h>
-#include <asm/signal.h>
-#include <asm/system.h>
-#include <asm/unistd.h>
-
-#include "entry.h"
-
-/*
- * See Documentation/ia64/fsys.txt for details on fsyscalls.
- *
- * On entry to an fsyscall handler:
- * r10 = 0 (i.e., defaults to "successful syscall return")
- * r11 = saved ar.pfs (a user-level value)
- * r15 = system call number
- * r16 = "current" task pointer (in normal kernel-mode, this is in r13)
- * r32-r39 = system call arguments
- * b6 = return address (a user-level value)
- * ar.pfs = previous frame-state (a user-level value)
- * PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
- * all other registers may contain values passed in from user-mode
- *
- * On return from an fsyscall handler:
- * r11 = saved ar.pfs (as passed into the fsyscall handler)
- * r15 = system call number (as passed into the fsyscall handler)
- * r32-r39 = system call arguments (as passed into the fsyscall handler)
- * b6 = return address (as passed into the fsyscall handler)
- * ar.pfs = previous frame-state (as passed into the fsyscall handler)
- */
-
-ENTRY(fsys_ni_syscall)
- .prologue
- .altrp b6
- .body
- mov r8=ENOSYS
- mov r10=-1
- FSYS_RETURN
-END(fsys_ni_syscall)
-
-ENTRY(fsys_getpid)
- .prologue
- .altrp b6
- .body
- add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- ;;
- ld4 r9=[r9]
- add r8=IA64_TASK_TGID_OFFSET,r16
- ;;
- and r9=TIF_ALLWORK_MASK,r9
- ld4 r8=[r8] // r8 = current->tgid
- ;;
- cmp.ne p8,p0=0,r9
-(p8) br.spnt.many fsys_fallback_syscall
- FSYS_RETURN
-END(fsys_getpid)
-
-ENTRY(fsys_getppid)
- .prologue
- .altrp b6
- .body
- add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
- ;;
- ld8 r17=[r17] // r17 = current->group_leader
- add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- ;;
-
- ld4 r9=[r9]
- add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
- ;;
- and r9=TIF_ALLWORK_MASK,r9
-
-1: ld8 r18=[r17] // r18 = current->group_leader->real_parent
- ;;
- cmp.ne p8,p0=0,r9
- add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = &current->group_leader->real_parent->tgid
- ;;
-
- /*
- * The .acq is needed to ensure that the read of tgid has returned its data before
- * we re-check "real_parent".
- */
- ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid
-#ifdef CONFIG_SMP
- /*
- * Re-read current->group_leader->real_parent.
- */
- ld8 r19=[r17] // r19 = current->group_leader->real_parent
-(p8) br.spnt.many fsys_fallback_syscall
- ;;
- cmp.ne p6,p0=r18,r19 // did real_parent change?
- mov r19=0 // i must not leak kernel bits...
-(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check
- ;;
- mov r17=0 // i must not leak kernel bits...
- mov r18=0 // i must not leak kernel bits...
-#else
- mov r17=0 // i must not leak kernel bits...
- mov r18=0 // i must not leak kernel bits...
- mov r19=0 // i must not leak kernel bits...
-#endif
- FSYS_RETURN
-END(fsys_getppid)
-
-ENTRY(fsys_set_tid_address)
- .prologue
- .altrp b6
- .body
- add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- ;;
- ld4 r9=[r9]
- tnat.z p6,p7=r32 // check argument register for being NaT
- ;;
- and r9=TIF_ALLWORK_MASK,r9
- add r8=IA64_TASK_PID_OFFSET,r16
- add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
- ;;
- ld4 r8=[r8]
- cmp.ne p8,p0=0,r9
- mov r17=-1
- ;;
-(p6) st8 [r18]=r32
-(p7) st8 [r18]=r17
-(p8) br.spnt.many fsys_fallback_syscall
- ;;
- mov r17=0 // i must not leak kernel bits...
- mov r18=0 // i must not leak kernel bits...
- FSYS_RETURN
-END(fsys_set_tid_address)
-
-/*
- * Ensure that the time interpolator structure is compatible with the asm code
- */
-#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
- || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
-#error fsys_gettimeofday incompatible with changes to struct time_interpolator
-#endif
-#define CLOCK_REALTIME 0
-#define CLOCK_MONOTONIC 1
-#define CLOCK_DIVIDE_BY_1000 0x4000
-#define CLOCK_ADD_MONOTONIC 0x8000
-
-ENTRY(fsys_gettimeofday)
- .prologue
- .altrp b6
- .body
- mov r31 = r32
- tnat.nz p6,p0 = r33 // guard against NaT argument
-(p6) br.cond.spnt.few .fail_einval
- mov r30 = CLOCK_DIVIDE_BY_1000
- ;;
-.gettime:
- // Register map
- // Incoming r31 = pointer to address where to place result
- // r30 = flags determining how time is processed
- // r2,r3 = temp r4-r7 preserved
- // r8 = result nanoseconds
- // r9 = result seconds
- // r10 = temporary storage for clock difference
- // r11 = preserved: saved ar.pfs
- // r12 = preserved: memory stack
- // r13 = preserved: thread pointer
- // r14 = address of mask / mask
- // r15 = preserved: system call number
- // r16 = preserved: current task pointer
- // r17 = wall to monotonic use
- // r18 = time_interpolator->offset
- // r19 = address of wall_to_monotonic
- // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
- // r21 = shift factor
- // r22 = address of time interpolator->last_counter
- // r23 = address of time_interpolator->last_cycle
- // r24 = adress of time_interpolator->offset
- // r25 = last_cycle value
- // r26 = last_counter value
- // r27 = pointer to xtime
- // r28 = sequence number at the beginning of critcal section
- // r29 = address of seqlock
- // r30 = time processing flags / memory address
- // r31 = pointer to result
- // Predicates
- // p6,p7 short term use
- // p8 = timesource ar.itc
- // p9 = timesource mmio64
- // p10 = timesource mmio32
- // p11 = timesource not to be handled by asm code
- // p12 = memory time source ( = p9 | p10)
- // p13 = do cmpxchg with time_interpolator_last_cycle
- // p14 = Divide by 1000
- // p15 = Add monotonic
- //
- // Note that instructions are optimized for McKinley. McKinley can process two
- // bundles simultaneously and therefore we continuously try to feed the CPU
- // two bundles and then a stop.
- tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure
- mov pr = r30,0xc000 // Set predicates according to function
- add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
- movl r20 = time_interpolator
- ;;
- ld8 r20 = [r20] // get pointer to time_interpolator structure
- movl r29 = xtime_lock
- ld4 r2 = [r2] // process work pending flags
- movl r27 = xtime
- ;; // only one bundle here
- ld8 r21 = [r20] // first quad with control information
- and r2 = TIF_ALLWORK_MASK,r2
-(p6) br.cond.spnt.few .fail_einval // deferred branch
- ;;
- add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
- extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc
- extr r8 = r21,0,16 // time_interpolator->source
- cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
-(p6) br.cond.spnt.many fsys_fallback_syscall
- ;;
- cmp.eq p8,p12 = 0,r8 // Check for cpu timer
- cmp.eq p9,p0 = 1,r8 // MMIO64 ?
- extr r2 = r21,24,8 // time_interpolator->jitter
- cmp.eq p10,p0 = 2,r8 // MMIO32 ?
- cmp.ltu p11,p0 = 2,r8 // function or other clock
-(p11) br.cond.spnt.many fsys_fallback_syscall
- ;;
- setf.sig f7 = r3 // Setup for scaling of counter
-(p15) movl r19 = wall_to_monotonic
-(p12) ld8 r30 = [r10]
- cmp.ne p13,p0 = r2,r0 // need jitter compensation?
- extr r21 = r21,16,8 // shift factor
- ;;
-.time_redo:
- .pred.rel.mutex p8,p9,p10
- ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes
-(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
- add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
-(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues..
-(p10) ld4 r2 = [r30] // readw(ti->address)
-(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
- ;; // could be removed by moving the last add upward
- ld8 r26 = [r22] // time_interpolator->last_counter
-(p13) ld8 r25 = [r23] // time interpolator->last_cycle
- add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
-(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
- ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
- add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
- ;;
- ld8 r18 = [r24] // time_interpolator->offset
- ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec
-(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
- ;;
- ld8 r14 = [r14] // time_interpolator->mask
-(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
- sub r10 = r2,r26 // current_counter - last_counter
- ;;
-(p6) sub r10 = r25,r26 // time we got was less than last_cycle
-(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
- ;;
- and r10 = r10,r14 // Apply mask
- ;;
- setf.sig f8 = r10
- nop.i 123
- ;;
-(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv
-EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time
- xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
-(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs
- ;;
-(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
-(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo
- // simulate tbit.nz.or p7,p0 = r28,0
- and r28 = ~1,r28 // Make sequence even to force retry if odd
- getf.sig r2 = f8
- mf
- add r8 = r8,r18 // Add time interpolator offset
- ;;
- ld4 r10 = [r29] // xtime_lock.sequence
-(p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs
- shr.u r2 = r2,r21
- ;; // overloaded 3 bundles!
- // End critical section.
- add r8 = r8,r2 // Add xtime.nsecs
- cmp4.ne.or p7,p0 = r28,r10
-(p7) br.cond.dpnt.few .time_redo // sequence number changed ?
- // Now r8=tv->tv_nsec and r9=tv->tv_sec
- mov r10 = r0
- movl r2 = 1000000000
- add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
-(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
- ;;
-.time_normalize:
- mov r21 = r8
- cmp.ge p6,p0 = r8,r2
-(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time
- ;;
-(p14) setf.sig f8 = r20
-(p6) sub r8 = r8,r2
-(p6) add r9 = 1,r9 // two nops before the branch.
-(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
-(p6) br.cond.dpnt.few .time_normalize
- ;;
- // Divided by 8 though shift. Now divide by 125
- // The compiler was able to do that with a multiply
- // and a shift and we do the same
-EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
-(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it...
- ;;
- mov r8 = r0
-(p14) getf.sig r2 = f8
- ;;
-(p14) shr.u r21 = r2, 4
- ;;
-EX(.fail_efault, st8 [r31] = r9)
-EX(.fail_efault, st8 [r23] = r21)
- FSYS_RETURN
-.fail_einval:
- mov r8 = EINVAL
- mov r10 = -1
- FSYS_RETURN
-.fail_efault:
- mov r8 = EFAULT
- mov r10 = -1
- FSYS_RETURN
-END(fsys_gettimeofday)
-
-ENTRY(fsys_clock_gettime)
- .prologue
- .altrp b6
- .body
- cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
- // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
-(p6) br.spnt.few fsys_fallback_syscall
- mov r31 = r33
- shl r30 = r32,15
- br.many .gettime
-END(fsys_clock_gettime)
-
-/*
- * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
- */
-#if _NSIG_WORDS != 1
-# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
-#endif
-ENTRY(fsys_rt_sigprocmask)
- .prologue
- .altrp b6
- .body
-
- add r2=IA64_TASK_BLOCKED_OFFSET,r16
- add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- cmp4.ltu p6,p0=SIG_SETMASK,r32
-
- cmp.ne p15,p0=r0,r34 // oset != NULL?
- tnat.nz p8,p0=r34
- add r31=IA64_TASK_SIGHAND_OFFSET,r16
- ;;
- ld8 r3=[r2] // read/prefetch current->blocked
- ld4 r9=[r9]
- tnat.nz.or p6,p0=r35
-
- cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
- tnat.nz.or p6,p0=r32
-(p6) br.spnt.few .fail_einval // fail with EINVAL
- ;;
-#ifdef CONFIG_SMP
- ld8 r31=[r31] // r31 <- current->sighand
-#endif
- and r9=TIF_ALLWORK_MASK,r9
- tnat.nz.or p8,p0=r33
- ;;
- cmp.ne p7,p0=0,r9
- cmp.eq p6,p0=r0,r33 // set == NULL?
- add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
-(p8) br.spnt.few .fail_efault // fail with EFAULT
-(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work...
-(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask
-
- /* Argh, we actually have to do some work and _update_ the signal mask: */
-
-EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set
-EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
- mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
- ;;
-
- rsm psr.i // mask interrupt delivery
- mov ar.ccv=0
- andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
-
-#ifdef CONFIG_SMP
- mov r17=1
- ;;
- cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock
- mov r8=EINVAL // default to EINVAL
- ;;
- ld8 r3=[r2] // re-read current->blocked now that we hold the lock
- cmp4.ne p6,p0=r18,r0
-(p6) br.cond.spnt.many .lock_contention
- ;;
-#else
- ld8 r3=[r2] // re-read current->blocked now that we hold the lock
- mov r8=EINVAL // default to EINVAL
-#endif
- add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
- add r19=IA64_TASK_SIGNAL_OFFSET,r16
- cmp4.eq p6,p0=SIG_BLOCK,r32
- ;;
- ld8 r19=[r19] // r19 <- current->signal
- cmp4.eq p7,p0=SIG_UNBLOCK,r32
- cmp4.eq p8,p0=SIG_SETMASK,r32
- ;;
- ld8 r18=[r18] // r18 <- current->pending.signal
- .pred.rel.mutex p6,p7,p8
-(p6) or r14=r3,r14 // SIG_BLOCK
-(p7) andcm r14=r3,r14 // SIG_UNBLOCK
-
-(p8) mov r14=r14 // SIG_SETMASK
-(p6) mov r8=0 // clear error code
- // recalc_sigpending()
- add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
-
- add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
- ;;
- ld4 r17=[r17] // r17 <- current->signal->group_stop_count
-(p7) mov r8=0 // clear error code
-
- ld8 r19=[r19] // r19 <- current->signal->shared_pending
- ;;
- cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)?
-(p8) mov r8=0 // clear error code
-
- or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending
- ;;
- // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
- andcm r18=r18,r14
- add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- ;;
-
-(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending
- mov r19=0 // i must not leak kernel bits...
-(p6) br.cond.dpnt.many .sig_pending
- ;;
-
-1: ld4 r17=[r9] // r17 <- current->thread_info->flags
- ;;
- mov ar.ccv=r17
- and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING)
- ;;
-
- st8 [r2]=r14 // update current->blocked with new mask
- cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
- ;;
- cmp.ne p6,p0=r17,r8 // update failed?
-(p6) br.cond.spnt.few 1b // yes -> retry
-
-#ifdef CONFIG_SMP
- st4.rel [r31]=r0 // release the lock
-#endif
- ssm psr.i
- ;;
-
- srlz.d // ensure psr.i is set again
- mov r18=0 // i must not leak kernel bits...
-
-.store_mask:
-EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset
-EX(.fail_efault, (p15) st8 [r34]=r3)
- mov r2=0 // i must not leak kernel bits...
- mov r3=0 // i must not leak kernel bits...
- mov r8=0 // return 0
- mov r9=0 // i must not leak kernel bits...
- mov r14=0 // i must not leak kernel bits...
- mov r17=0 // i must not leak kernel bits...
- mov r31=0 // i must not leak kernel bits...
- FSYS_RETURN
-
-.sig_pending:
-#ifdef CONFIG_SMP
- st4.rel [r31]=r0 // release the lock
-#endif
- ssm psr.i
- ;;
- srlz.d
- br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
-
-#ifdef CONFIG_SMP
-.lock_contention:
- /* Rather than spinning here, fall back on doing a heavy-weight syscall. */
- ssm psr.i
- ;;
- srlz.d
- br.sptk.many fsys_fallback_syscall
-#endif
-END(fsys_rt_sigprocmask)
-
-ENTRY(fsys_fallback_syscall)
- .prologue
- .altrp b6
- .body
- /*
- * We only get here from light-weight syscall handlers. Thus, we already
- * know that r15 contains a valid syscall number. No need to re-check.
- */
- adds r17=-1024,r15
- movl r14=sys_call_table
- ;;
-#ifdef CONFIG_XEN
- movl r18=running_on_xen;;
- ld4 r18=[r18];;
- // p14 = running_on_xen
- // p15 = !running_on_xen
- cmp.ne p14,p15=r0,r18
- ;;
-(p14) movl r18=XSI_PSR_I_ADDR;;
-(p14) ld8 r18=[r18]
-(p14) mov r29=1;;
-(p14) st1 [r18]=r29
-(p15) rsm psr.i
-#else
- rsm psr.i
-#endif
- shladd r18=r17,3,r14
- ;;
- ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
-#ifdef CONFIG_XEN
-(p14) mov r27=r8
-(p14) XEN_HYPER_GET_PSR
- ;;
-(p14) mov r29=r8
-(p14) mov r8=r27
-(p15) mov r29=psr // read psr (12 cyc load latency)
-#else
- mov r29=psr // read psr (12 cyc load latency)
-#endif
- mov r27=ar.rsc
- mov r21=ar.fpsr
- mov r26=ar.pfs
-END(fsys_fallback_syscall)
- /* FALL THROUGH */
-GLOBAL_ENTRY(fsys_bubble_down)
- .prologue
- .altrp b6
- .body
- /*
- * We get here for syscalls that don't have a lightweight
- * handler. For those, we need to bubble down into the kernel
- * and that requires setting up a minimal pt_regs structure,
- * and initializing the CPU state more or less as if an
- * interruption had occurred. To make syscall-restarts work,
- * we setup pt_regs such that cr_iip points to the second
- * instruction in syscall_via_break. Decrementing the IP
- * hence will restart the syscall via break and not
- * decrementing IP will return us to the caller, as usual.
- * Note that we preserve the value of psr.pp rather than
- * initializing it from dcr.pp. This makes it possible to
- * distinguish fsyscall execution from other privileged
- * execution.
- *
- * On entry:
- * - normal fsyscall handler register usage, except
- * that we also have:
- * - r18: address of syscall entry point
- * - r21: ar.fpsr
- * - r26: ar.pfs
- * - r27: ar.rsc
- * - r29: psr
- *
- * We used to clear some PSR bits here but that requires slow
- * serialization. Fortuntely, that isn't really necessary.
- * The rationale is as follows: we used to clear bits
- * ~PSR_PRESERVED_BITS in PSR.L. Since
- * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
- * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
- * However,
- *
- * PSR.BE : already is turned off in __kernel_syscall_via_epc()
- * PSR.AC : don't care (kernel normally turns PSR.AC on)
- * PSR.I : already turned off by the time fsys_bubble_down gets
- * invoked
- * PSR.DFL: always 0 (kernel never turns it on)
- * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
- * initiative
- * PSR.DI : always 0 (kernel never turns it on)
- * PSR.SI : always 0 (kernel never turns it on)
- * PSR.DB : don't care --- kernel never enables kernel-level
- * breakpoints
- * PSR.TB : must be 0 already; if it wasn't zero on entry to
- * __kernel_syscall_via_epc, the branch to fsys_bubble_down
- * will trigger a taken branch; the taken-trap-handler then
- * converts the syscall into a break-based system-call.
- */
- /*
- * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
- * The rest we have to synthesize.
- */
-# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \
- | (0x1 << IA64_PSR_RI_BIT) \
- | IA64_PSR_BN | IA64_PSR_I)
-
- invala // M0|1
- movl r14=ia64_ret_from_syscall // X
-
- nop.m 0
- movl r28=__kernel_syscall_via_break // X create cr.iip
- ;;
-
- mov r2=r16 // A get task addr to addl-addressable register
- adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
- mov r31=pr // I0 save pr (2 cyc)
- ;;
- st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
- addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS
- add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A
- ;;
- ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags
- lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store
- nop.i 0
- ;;
- mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
- nop.m 0
- nop.i 0
- ;;
- mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
- mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!)
- nop.i 0
- ;;
- mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS
- movl r8=PSR_ONE_BITS // X
- ;;
- mov r25=ar.unat // M2 (5 cyc) save ar.unat
- mov r19=b6 // I0 save b6 (2 cyc)
- mov r20=r1 // A save caller's gp in r20
- ;;
- or r29=r8,r29 // A construct cr.ipsr value to save
- mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc)
- addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
-
- mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc)
- cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
- br.call.sptk.many b7=ia64_syscall_setup // B
- ;;
- mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
- mov rp=r14 // I0 set the real return addr
- and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
- ;;
-#ifdef CONFIG_XEN
- movl r14=running_on_xen;;
- ld4 r14=[r14];;
- // p14 = running_on_xen
- // p15 = !running_on_xen
- cmp.ne p14,p15=r0,r14
- ;;
-(p14) movl r28=XSI_PSR_I_ADDR;;
-(p14) ld8 r28=[r28];;
-(p14) adds r28=-1,r28;; // event_pending
-(p14) ld1 r14=[r28];;
-(p14) cmp.ne.unc p13,p14=r14,r0;;
-(p13) XEN_HYPER_SSM_I
-(p14) adds r28=1,r28;; // event_mask
-(p14) st1 [r28]=r0;;
-(p15) ssm psr.i
-#else
- ssm psr.i // M2 we're on kernel stacks now, reenable irqs
-#endif
- cmp.eq p8,p0=r3,r0 // A
-(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
-
- nop.m 0
-(p8) br.call.sptk.many b6=b6 // B (ignore return address)
- br.cond.spnt ia64_trace_syscall // B
-END(fsys_bubble_down)
-
- .rodata
- .align 8
- .globl fsyscall_table
-
- data8 fsys_bubble_down
-fsyscall_table:
- data8 fsys_ni_syscall
- data8 0 // exit // 1025
- data8 0 // read
- data8 0 // write
- data8 0 // open
- data8 0 // close
- data8 0 // creat // 1030
- data8 0 // link
- data8 0 // unlink
- data8 0 // execve
- data8 0 // chdir
- data8 0 // fchdir // 1035
- data8 0 // utimes
- data8 0 // mknod
- data8 0 // chmod
- data8 0 // chown
- data8 0 // lseek // 1040
- data8 fsys_getpid // getpid
- data8 fsys_getppid // getppid
- data8 0 // mount
- data8 0 // umount
- data8 0 // setuid // 1045
- data8 0 // getuid
- data8 0 // geteuid
- data8 0 // ptrace
- data8 0 // access
- data8 0 // sync // 1050
- data8 0 // fsync
- data8 0 // fdatasync
- data8 0 // kill
- data8 0 // rename
- data8 0 // mkdir // 1055
- data8 0 // rmdir
- data8 0 // dup
- data8 0 // pipe
- data8 0 // times
- data8 0 // brk // 1060
- data8 0 // setgid
- data8 0 // getgid
- data8 0 // getegid
- data8 0 // acct
- data8 0 // ioctl // 1065
- data8 0 // fcntl
- data8 0 // umask
- data8 0 // chroot
- data8 0 // ustat
- data8 0 // dup2 // 1070
- data8 0 // setreuid
- data8 0 // setregid
- data8 0 // getresuid
- data8 0 // setresuid
- data8 0 // getresgid // 1075
- data8 0 // setresgid
- data8 0 // getgroups
- data8 0 // setgroups
- data8 0 // getpgid
- data8 0 // setpgid // 1080
- data8 0 // setsid
- data8 0 // getsid
- data8 0 // sethostname
- data8 0 // setrlimit
- data8 0 // getrlimit // 1085
- data8 0 // getrusage
- data8 fsys_gettimeofday // gettimeofday
- data8 0 // settimeofday
- data8 0 // select
- data8 0 // poll // 1090
- data8 0 // symlink
- data8 0 // readlink
- data8 0 // uselib
- data8 0 // swapon
- data8 0 // swapoff // 1095
- data8 0 // reboot
- data8 0 // truncate
- data8 0 // ftruncate
- data8 0 // fchmod
- data8 0 // fchown // 1100
- data8 0 // getpriority
- data8 0 // setpriority
- data8 0 // statfs
- data8 0 // fstatfs
- data8 0 // gettid // 1105
- data8 0 // semget
- data8 0 // semop
- data8 0 // semctl
- data8 0 // msgget
- data8 0 // msgsnd // 1110
- data8 0 // msgrcv
- data8 0 // msgctl
- data8 0 // shmget
- data8 0 // shmat
- data8 0 // shmdt // 1115
- data8 0 // shmctl
- data8 0 // syslog
- data8 0 // setitimer
- data8 0 // getitimer
- data8 0 // 1120
- data8 0
- data8 0
- data8 0 // vhangup
- data8 0 // lchown
- data8 0 // remap_file_pages // 1125
- data8 0 // wait4
- data8 0 // sysinfo
- data8 0 // clone
- data8 0 // setdomainname
- data8 0 // newuname // 1130
- data8 0 // adjtimex
- data8 0
- data8 0 // init_module
- data8 0 // delete_module
- data8 0 // 1135
- data8 0
- data8 0 // quotactl
- data8 0 // bdflush
- data8 0 // sysfs
- data8 0 // personality // 1140
- data8 0 // afs_syscall
- data8 0 // setfsuid
- data8 0 // setfsgid
- data8 0 // getdents
- data8 0 // flock // 1145
- data8 0 // readv
- data8 0 // writev
- data8 0 // pread64
- data8 0 // pwrite64
- data8 0 // sysctl // 1150
- data8 0 // mmap
- data8 0 // munmap
- data8 0 // mlock
- data8 0 // mlockall
- data8 0 // mprotect // 1155
- data8 0 // mremap
- data8 0 // msync
- data8 0 // munlock
- data8 0 // munlockall
- data8 0 // sched_getparam // 1160
- data8 0 // sched_setparam
- data8 0 // sched_getscheduler
- data8 0 // sched_setscheduler
- data8 0 // sched_yield
- data8 0 // sched_get_priority_max // 1165
- data8 0 // sched_get_priority_min
- data8 0 // sched_rr_get_interval
- data8 0 // nanosleep
- data8 0 // nfsservctl
- data8 0 // prctl // 1170
- data8 0 // getpagesize
- data8 0 // mmap2
- data8 0 // pciconfig_read
- data8 0 // pciconfig_write
- data8 0 // perfmonctl // 1175
- data8 0 // sigaltstack
- data8 0 // rt_sigaction
- data8 0 // rt_sigpending
- data8 fsys_rt_sigprocmask // rt_sigprocmask
- data8 0 // rt_sigqueueinfo // 1180
- data8 0 // rt_sigreturn
- data8 0 // rt_sigsuspend
- data8 0 // rt_sigtimedwait
- data8 0 // getcwd
- data8 0 // capget // 1185
- data8 0 // capset
- data8 0 // sendfile
- data8 0
- data8 0
- data8 0 // socket // 1190
- data8 0 // bind
- data8 0 // connect
- data8 0 // listen
- data8 0 // accept
- data8 0 // getsockname // 1195
- data8 0 // getpeername
- data8 0 // socketpair
- data8 0 // send
- data8 0 // sendto
- data8 0 // recv // 1200
- data8 0 // recvfrom
- data8 0 // shutdown
- data8 0 // setsockopt
- data8 0 // getsockopt
- data8 0 // sendmsg // 1205
- data8 0 // recvmsg
- data8 0 // pivot_root
- data8 0 // mincore
- data8 0 // madvise
- data8 0 // newstat // 1210
- data8 0 // newlstat
- data8 0 // newfstat
- data8 0 // clone2
- data8 0 // getdents64
- data8 0 // getunwind // 1215
- data8 0 // readahead
- data8 0 // setxattr
- data8 0 // lsetxattr
- data8 0 // fsetxattr
- data8 0 // getxattr // 1220
- data8 0 // lgetxattr
- data8 0 // fgetxattr
- data8 0 // listxattr
- data8 0 // llistxattr
- data8 0 // flistxattr // 1225
- data8 0 // removexattr
- data8 0 // lremovexattr
- data8 0 // fremovexattr
- data8 0 // tkill
- data8 0 // futex // 1230
- data8 0 // sched_setaffinity
- data8 0 // sched_getaffinity
- data8 fsys_set_tid_address // set_tid_address
- data8 0 // fadvise64_64
- data8 0 // tgkill // 1235
- data8 0 // exit_group
- data8 0 // lookup_dcookie
- data8 0 // io_setup
- data8 0 // io_destroy
- data8 0 // io_getevents // 1240
- data8 0 // io_submit
- data8 0 // io_cancel
- data8 0 // epoll_create
- data8 0 // epoll_ctl
- data8 0 // epoll_wait // 1245
- data8 0 // restart_syscall
- data8 0 // semtimedop
- data8 0 // timer_create
- data8 0 // timer_settime
- data8 0 // timer_gettime // 1250
- data8 0 // timer_getoverrun
- data8 0 // timer_delete
- data8 0 // clock_settime
- data8 fsys_clock_gettime // clock_gettime
-
- // fill in zeros for the remaining entries
- .zero:
- .space fsyscall_table + 8*NR_syscalls - .zero, 0
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S
deleted file mode 100644
index e242e36b04..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S
+++ /dev/null
@@ -1,478 +0,0 @@
-/*
- * This file contains the code that gets mapped at the upper end of each task's text
- * region. For now, it contains the signal trampoline code only.
- *
- * Copyright (C) 1999-2003 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/sigcontext.h>
-#include <asm/system.h>
-#include <asm/unistd.h>
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-# include <asm/privop.h>
-#endif
-
-/*
- * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation,
- * complications with the linker (which likes to create PLT stubs for branches
- * to targets outside the shared object) and to avoid multi-phase kernel builds, we
- * simply create minimalistic "patch lists" in special ELF sections.
- */
- .section ".data.patch.fsyscall_table", "a"
- .previous
-#define LOAD_FSYSCALL_TABLE(reg) \
-[1:] movl reg=0; \
- .xdata4 ".data.patch.fsyscall_table", 1b-.
-
- .section ".data.patch.brl_fsys_bubble_down", "a"
- .previous
-#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \
-[1:](pr)brl.cond.sptk 0; \
- .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-.
-
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
- // The page in which hyperprivop lives must be pinned by ITR.
- // However vDSO area isn't pinned. So issuing hyperprivop
- // from vDSO page causes trouble that Kevin pointed out.
- // After clearing vpsr.ic, the vcpu is pre-empted and the itlb
- // is flushed. Then vcpu get cpu again, tlb miss fault occures.
- // However it results in nested dtlb fault because vpsr.ic is off.
- // To avoid such a situation, we jump into the kernel text area
- // which is pinned, and then issue hyperprivop and return back
- // to vDSO page.
- // This is Dan Magenheimer's idea.
-
- // Currently is_running_on_xen() is defined as running_on_xen.
- // If is_running_on_xen() is a real function, we must update
- // according to it.
- .section ".data.patch.running_on_xen", "a"
- .previous
-#define LOAD_RUNNING_ON_XEN(reg) \
-[1:] movl reg=0; \
- .xdata4 ".data.patch.running_on_xen", 1b-.
-
- .section ".data.patch.brl_xen_ssm_i_0", "a"
- .previous
-#define BRL_COND_XEN_SSM_I_0(pr) \
-[1:](pr)brl.cond.sptk 0; \
- .xdata4 ".data.patch.brl_xen_ssm_i_0", 1b-.
-
- .section ".data.patch.brl_xen_ssm_i_1", "a"
- .previous
-#define BRL_COND_XEN_SSM_I_1(pr) \
-[1:](pr)brl.cond.sptk 0; \
- .xdata4 ".data.patch.brl_xen_ssm_i_1", 1b-.
-#endif
-
-GLOBAL_ENTRY(__kernel_syscall_via_break)
- .prologue
- .altrp b6
- .body
- /*
- * Note: for (fast) syscall restart to work, the break instruction must be
- * the first one in the bundle addressed by syscall_via_break.
- */
-{ .mib
- break 0x100000
- nop.i 0
- br.ret.sptk.many b6
-}
-END(__kernel_syscall_via_break)
-
-/*
- * On entry:
- * r11 = saved ar.pfs
- * r15 = system call #
- * b0 = saved return address
- * b6 = return address
- * On exit:
- * r11 = saved ar.pfs
- * r15 = system call #
- * b0 = saved return address
- * all other "scratch" registers: undefined
- * all "preserved" registers: same as on entry
- */
-
-GLOBAL_ENTRY(__kernel_syscall_via_epc)
- .prologue
- .altrp b6
- .body
-{
- /*
- * Note: the kernel cannot assume that the first two instructions in this
- * bundle get executed. The remaining code must be safe even if
- * they do not get executed.
- */
- adds r17=-1024,r15 // A
- mov r10=0 // A default to successful syscall execution
- epc // B causes split-issue
-}
- ;;
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
- // r20 = 1
- // r22 = &vcpu->vcpu_info->evtchn_upcall_mask
- // r23 = &vpsr.ic
- // r24 = &vcpu->vcpu_info->evtchn_upcall_pending
- // r25 = tmp
- // r28 = &running_on_xen
- // r30 = running_on_xen
- // r31 = tmp
- // p11 = tmp
- // p12 = running_on_xen
- // p13 = !running_on_xen
- // p14 = tmp
- // p15 = tmp
-#define isXen p12
-#define isRaw p13
- LOAD_RUNNING_ON_XEN(r28)
- movl r22=XSI_PSR_I_ADDR
- ;;
- ld8 r22=[r22]
- ;;
- movl r23=XSI_PSR_IC
- adds r24=-1,r22
- mov r20=1
- ;;
- ld4 r30=[r28]
- ;;
- cmp.ne isXen,isRaw=r0,r30
- ;;
-(isRaw) rsm psr.be | psr.i
-(isXen) st1 [r22]=r20
-(isXen) rum psr.be
- ;;
-#else
- rsm psr.be | psr.i // M2 (5 cyc to srlz.d)
-#endif
- LOAD_FSYSCALL_TABLE(r14) // X
- ;;
- mov r16=IA64_KR(CURRENT) // M2 (12 cyc)
- shladd r18=r17,3,r14 // A
- mov r19=NR_syscalls-1 // A
- ;;
- lfetch [r18] // M0|1
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-(isRaw) mov r29=psr
-(isXen) XEN_HYPER_GET_PSR
- ;;
-(isXen) mov r29=r8
-#else
- mov r29=psr // M2 (12 cyc)
-#endif
- // If r17 is a NaT, p6 will be zero
- cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)?
- ;;
- mov r21=ar.fpsr // M2 (12 cyc)
- tnat.nz p10,p9=r15 // I0
- mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...)
- ;;
- srlz.d // M0 (forces split-issue) ensure PSR.BE==0
-(p6) ld8 r18=[r18] // M0|1
- nop.i 0
- ;;
- nop.m 0
-(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!)
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
- ;;
- // p14 = running_on_xen && p8
- // p15 = !running_on_xen && p8
-(p8) cmp.ne.unc p14,p15=r0,r30
- ;;
-(p15) ssm psr.i
- BRL_COND_XEN_SSM_I_0(p14)
- .global .vdso_ssm_i_0_ret
-.vdso_ssm_i_0_ret:
-#else
- nop.i 0
- ;;
-(p8) ssm psr.i
-#endif
-(p6) mov b7=r18 // I0
-(p8) br.dptk.many b7 // B
-
- mov r27=ar.rsc // M2 (12 cyc)
-/*
- * brl.cond doesn't work as intended because the linker would convert this branch
- * into a branch to a PLT. Perhaps there will be a way to avoid this with some
- * future version of the linker. In the meantime, we just use an indirect branch
- * instead.
- */
-#ifdef CONFIG_ITANIUM
-(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
- ;;
-(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
- ;;
-(p6) mov b7=r14
-(p6) br.sptk.many b7
-#else
- BRL_COND_FSYS_BUBBLE_DOWN(p6)
-#endif
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-(isRaw) ssm psr.i
- BRL_COND_XEN_SSM_I_1(isXen)
- .global .vdso_ssm_i_1_ret
-.vdso_ssm_i_1_ret:
-#else
- ssm psr.i
-#endif
- mov r10=-1
-(p10) mov r8=EINVAL
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
- dv_serialize_data // shut up gas warning.
- // we know xen_hyper_ssm_i_0 or xen_hyper_ssm_i_1
- // doesn't change p9 and p10
-#endif
-(p9) mov r8=ENOSYS
- FSYS_RETURN
-END(__kernel_syscall_via_epc)
-
-# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
-# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET)
-# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET)
-# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET)
-# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET)
-
-# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET
-# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET
-# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET
-# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET
-# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET
-# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET
-# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET
-# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET
-# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET
-# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET
-# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET
-# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET
-# define base0 r2
-# define base1 r3
- /*
- * When we get here, the memory stack looks like this:
- *
- * +===============================+
- * | |
- * // struct sigframe //
- * | |
- * +-------------------------------+ <-- sp+16
- * | 16 byte of scratch |
- * | space |
- * +-------------------------------+ <-- sp
- *
- * The register stack looks _exactly_ the way it looked at the time the signal
- * occurred. In other words, we're treading on a potential mine-field: each
- * incoming general register may be a NaT value (including sp, in which case the
- * process ends up dying with a SIGSEGV).
- *
- * The first thing need to do is a cover to get the registers onto the backing
- * store. Once that is done, we invoke the signal handler which may modify some
- * of the machine state. After returning from the signal handler, we return
- * control to the previous context by executing a sigreturn system call. A signal
- * handler may call the rt_sigreturn() function to directly return to a given
- * sigcontext. However, the user-level sigreturn() needs to do much more than
- * calling the rt_sigreturn() system call as it needs to unwind the stack to
- * restore preserved registers that may have been saved on the signal handler's
- * call stack.
- */
-
-#define SIGTRAMP_SAVES \
- .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \
- .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \
- .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \
- .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \
- .savesp pr, PR_OFF+SIGCONTEXT_OFF; \
- .savesp rp, RP_OFF+SIGCONTEXT_OFF; \
- .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \
- .vframesp SP_OFF+SIGCONTEXT_OFF
-
-GLOBAL_ENTRY(__kernel_sigtramp)
- // describe the state that is active when we get here:
- .prologue
- SIGTRAMP_SAVES
- .body
-
- .label_state 1
-
- adds base0=SIGHANDLER_OFF,sp
- adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp
- br.call.sptk.many rp=1f
-1:
- ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel
- ld8 r15=[base1] // get address of new RBS base (or NULL)
- cover // push args in interrupted frame onto backing store
- ;;
- cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel)
- mov.m r9=ar.bsp // fetch ar.bsp
- .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
-(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20)
-back_from_setup_rbs:
- alloc r8=ar.pfs,0,0,3,0
- ld8 out0=[base0],16 // load arg0 (signum)
- adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1
- ;;
- ld8 out1=[base1] // load arg1 (siginfop)
- ld8 r10=[r17],8 // get signal handler entry point
- ;;
- ld8 out2=[base0] // load arg2 (sigcontextp)
- ld8 gp=[r17] // get signal handler's global pointer
- adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
- ;;
- .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF
- st8 [base0]=r9 // save sc_ar_bsp
- adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
- adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
- ;;
- stf.spill [base0]=f6,32
- stf.spill [base1]=f7,32
- ;;
- stf.spill [base0]=f8,32
- stf.spill [base1]=f9,32
- mov b6=r10
- ;;
- stf.spill [base0]=f10,32
- stf.spill [base1]=f11,32
- ;;
- stf.spill [base0]=f12,32
- stf.spill [base1]=f13,32
- ;;
- stf.spill [base0]=f14,32
- stf.spill [base1]=f15,32
- br.call.sptk.many rp=b6 // call the signal handler
-.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
- ;;
- ld8 r15=[base0] // fetch sc_ar_bsp
- mov r14=ar.bsp
- ;;
- cmp.ne p1,p0=r14,r15 // do we need to restore the rbs?
-(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7)
- ;;
-back_from_restore_rbs:
- adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
- adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
- ;;
- ldf.fill f6=[base0],32
- ldf.fill f7=[base1],32
- ;;
- ldf.fill f8=[base0],32
- ldf.fill f9=[base1],32
- ;;
- ldf.fill f10=[base0],32
- ldf.fill f11=[base1],32
- ;;
- ldf.fill f12=[base0],32
- ldf.fill f13=[base1],32
- ;;
- ldf.fill f14=[base0],32
- ldf.fill f15=[base1],32
- mov r15=__NR_rt_sigreturn
- .restore sp // pop .prologue
- break __BREAK_SYSCALL
-
- .prologue
- SIGTRAMP_SAVES
-setup_rbs:
- mov ar.rsc=0 // put RSE into enforced lazy mode
- ;;
- .save ar.rnat, r19
- mov r19=ar.rnat // save RNaT before switching backing store area
- adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp
-
- mov r18=ar.bspstore
- mov ar.bspstore=r15 // switch over to new register backing store area
- ;;
-
- .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
- st8 [r14]=r19 // save sc_ar_rnat
- .body
- mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16
- adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
- ;;
- invala
- sub r15=r16,r15
- extr.u r20=r18,3,6
- ;;
- mov ar.rsc=0xf // set RSE into eager mode, pl 3
- cmp.eq p8,p0=63,r20
- shl r15=r15,16
- ;;
- st8 [r14]=r15 // save sc_loadrs
-(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now
- .restore sp // pop .prologue
- br.cond.sptk back_from_setup_rbs
-
- .prologue
- SIGTRAMP_SAVES
- .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
- .body
-restore_rbs:
- // On input:
- // r14 = bsp1 (bsp at the time of return from signal handler)
- // r15 = bsp0 (bsp at the time the signal occurred)
- //
- // Here, we need to calculate bspstore0, the value that ar.bspstore needs
- // to be set to, based on bsp0 and the size of the dirty partition on
- // the alternate stack (sc_loadrs >> 16). This can be done with the
- // following algorithm:
- //
- // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
- //
- // This is what the code below does.
- //
- alloc r2=ar.pfs,0,0,0,0 // alloc null frame
- adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
- adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
- ;;
- ld8 r17=[r16]
- ld8 r16=[r18] // get new rnat
- extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0)
- ;;
- mov ar.rsc=r17 // put RSE into enforced lazy mode
- shr.u r17=r17,16
- ;;
- sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
- shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19)
- ;;
- loadrs // restore dirty partition
- extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1)
- ;;
- add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
- ;;
- shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
- ;;
- sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1)
- movl r17=0x8208208208208209
- ;;
- add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
- setf.sig f7=r17
- cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)?
- ;;
-(p7) adds r18=-62,r18 // delta -= 62
- ;;
- setf.sig f6=r18
- ;;
- xmpy.h f6=f6,f7
- ;;
- getf.sig r17=f6
- ;;
- add r17=r17,r18
- shr r18=r18,63
- ;;
- shr r17=r17,5
- ;;
- sub r17=r17,r18 // r17 = delta/63
- ;;
- add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
- ;;
- shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
- ;;
- mov ar.bspstore=r15 // switch back to old register backing store area
- ;;
- mov ar.rnat=r16 // restore RNaT
- mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc)
- // invala not necessary as that will happen when returning to user-mode
- br.cond.sptk back_from_restore_rbs
-END(__kernel_sigtramp)
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
deleted file mode 100644
index 58582ccdfe..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Linker script for gate DSO. The gate pages are an ELF shared object prelinked to its
- * virtual address, with only one read-only segment and one execute-only segment (both fit
- * in one page). This script controls its layout.
- */
-
-
-#include <asm/system.h>
-
-SECTIONS
-{
- . = GATE_ADDR + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :readable
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
- .dynamic : { *(.dynamic) } :readable :dynamic
-
- /*
- * This linker script is used both with -r and with -shared. For the layouts to match,
- * we need to skip more than enough space for the dynamic symbol table et al. If this
- * amount is insufficient, ld -shared will barf. Just increase it here.
- */
- . = GATE_ADDR + 0x500;
-
- .data.patch : {
- __start_gate_mckinley_e9_patchlist = .;
- *(.data.patch.mckinley_e9)
- __end_gate_mckinley_e9_patchlist = .;
-
- __start_gate_vtop_patchlist = .;
- *(.data.patch.vtop)
- __end_gate_vtop_patchlist = .;
-
- __start_gate_fsyscall_patchlist = .;
- *(.data.patch.fsyscall_table)
- __end_gate_fsyscall_patchlist = .;
-
- __start_gate_brl_fsys_bubble_down_patchlist = .;
- *(.data.patch.brl_fsys_bubble_down)
- __end_gate_brl_fsys_bubble_down_patchlist = .;
-
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
- __start_gate_running_on_xen_patchlist = .;
- *(.data.patch.running_on_xen)
- __end_gate_running_on_xen_patchlist = .;
-
- __start_gate_brl_xen_ssm_i_0_patchlist = .;
- *(.data.patch.brl_xen_ssm_i_0)
- __end_gate_brl_xen_ssm_i_0_patchlist = .;
-
- __start_gate_brl_xen_ssm_i_1_patchlist = .;
- *(.data.patch.brl_xen_ssm_i_1)
- __end_gate_brl_xen_ssm_i_1_patchlist = .;
-#endif
- } :readable
- .IA_64.unwind_info : { *(.IA_64.unwind_info*) }
- .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind
-#ifdef HAVE_BUGGY_SEGREL
- .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable
-#else
- . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
- .text : { *(.text) *(.text.*) } :epc
-#endif
-
- /DISCARD/ : {
- *(.got.plt) *(.got)
- *(.data .data.* .gnu.linkonce.d.*)
- *(.dynbss)
- *(.bss .bss.* .gnu.linkonce.b.*)
- *(__ex_table)
- *(__mca_table)
- }
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */
-#ifndef HAVE_BUGGY_SEGREL
- epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */
-#endif
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- LINUX_2.5 {
- global:
- __kernel_syscall_via_break;
- __kernel_syscall_via_epc;
- __kernel_sigtramp;
-
- local: *;
- };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value. */
-ENTRY(__kernel_syscall_via_epc)
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/head.S b/linux-2.6-xen-sparse/arch/ia64/kernel/head.S
deleted file mode 100644
index dded6f24f1..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/head.S
+++ /dev/null
@@ -1,1229 +0,0 @@
-/*
- * Here is where the ball gets rolling as far as the kernel is concerned.
- * When control is transferred to _start, the bootload has already
- * loaded us to the correct address. All that's left to do here is
- * to set up the kernel's global pointer and jump to the kernel
- * entry point.
- *
- * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@hpl.hp.com>
- * Stephane Eranian <eranian@hpl.hp.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999 Intel Corp.
- * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
- * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
- * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
- * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
- * Support for CPU Hotplug
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/fpu.h>
-#include <asm/kregs.h>
-#include <asm/mmu_context.h>
-#include <asm/asm-offsets.h>
-#include <asm/pal.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-#include <asm/mca_asm.h>
-
-#ifdef CONFIG_HOTPLUG_CPU
-#define SAL_PSR_BITS_TO_SET \
- (IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL)
-
-#define SAVE_FROM_REG(src, ptr, dest) \
- mov dest=src;; \
- st8 [ptr]=dest,0x08
-
-#define RESTORE_REG(reg, ptr, _tmp) \
- ld8 _tmp=[ptr],0x08;; \
- mov reg=_tmp
-
-#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\
- mov ar.lc=IA64_NUM_DBG_REGS-1;; \
- mov _idx=0;; \
-1: \
- SAVE_FROM_REG(_breg[_idx], ptr, _dest);; \
- add _idx=1,_idx;; \
- br.cloop.sptk.many 1b
-
-#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\
- mov ar.lc=IA64_NUM_DBG_REGS-1;; \
- mov _idx=0;; \
-_lbl: RESTORE_REG(_breg[_idx], ptr, _tmp);; \
- add _idx=1, _idx;; \
- br.cloop.sptk.many _lbl
-
-#define SAVE_ONE_RR(num, _reg, _tmp) \
- movl _tmp=(num<<61);; \
- mov _reg=rr[_tmp]
-
-#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
- SAVE_ONE_RR(0,_r0, _tmp);; \
- SAVE_ONE_RR(1,_r1, _tmp);; \
- SAVE_ONE_RR(2,_r2, _tmp);; \
- SAVE_ONE_RR(3,_r3, _tmp);; \
- SAVE_ONE_RR(4,_r4, _tmp);; \
- SAVE_ONE_RR(5,_r5, _tmp);; \
- SAVE_ONE_RR(6,_r6, _tmp);; \
- SAVE_ONE_RR(7,_r7, _tmp);;
-
-#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
- st8 [ptr]=_r0, 8;; \
- st8 [ptr]=_r1, 8;; \
- st8 [ptr]=_r2, 8;; \
- st8 [ptr]=_r3, 8;; \
- st8 [ptr]=_r4, 8;; \
- st8 [ptr]=_r5, 8;; \
- st8 [ptr]=_r6, 8;; \
- st8 [ptr]=_r7, 8;;
-
-#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \
- mov ar.lc=0x08-1;; \
- movl _idx1=0x00;; \
-RestRR: \
- dep.z _idx2=_idx1,61,3;; \
- ld8 _tmp=[ptr],8;; \
- mov rr[_idx2]=_tmp;; \
- srlz.d;; \
- add _idx1=1,_idx1;; \
- br.cloop.sptk.few RestRR
-
-#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \
- movl reg1=sal_state_for_booting_cpu;; \
- ld8 reg2=[reg1];;
-
-/*
- * Adjust region registers saved before starting to save
- * break regs and rest of the states that need to be preserved.
- */
-#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred) \
- SAVE_FROM_REG(b0,_reg1,_reg2);; \
- SAVE_FROM_REG(b1,_reg1,_reg2);; \
- SAVE_FROM_REG(b2,_reg1,_reg2);; \
- SAVE_FROM_REG(b3,_reg1,_reg2);; \
- SAVE_FROM_REG(b4,_reg1,_reg2);; \
- SAVE_FROM_REG(b5,_reg1,_reg2);; \
- st8 [_reg1]=r1,0x08;; \
- st8 [_reg1]=r12,0x08;; \
- st8 [_reg1]=r13,0x08;; \
- SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);; \
- SAVE_FROM_REG(ar.pfs,_reg1,_reg2);; \
- SAVE_FROM_REG(ar.rnat,_reg1,_reg2);; \
- SAVE_FROM_REG(ar.unat,_reg1,_reg2);; \
- SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);; \
- SAVE_FROM_REG(cr.dcr,_reg1,_reg2);; \
- SAVE_FROM_REG(cr.iva,_reg1,_reg2);; \
- SAVE_FROM_REG(cr.pta,_reg1,_reg2);; \
- SAVE_FROM_REG(cr.itv,_reg1,_reg2);; \
- SAVE_FROM_REG(cr.pmv,_reg1,_reg2);; \
- SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);; \
- SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);; \
- SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);; \
- st8 [_reg1]=r4,0x08;; \
- st8 [_reg1]=r5,0x08;; \
- st8 [_reg1]=r6,0x08;; \
- st8 [_reg1]=r7,0x08;; \
- st8 [_reg1]=_pred,0x08;; \
- SAVE_FROM_REG(ar.lc, _reg1, _reg2);; \
- stf.spill.nta [_reg1]=f2,16;; \
- stf.spill.nta [_reg1]=f3,16;; \
- stf.spill.nta [_reg1]=f4,16;; \
- stf.spill.nta [_reg1]=f5,16;; \
- stf.spill.nta [_reg1]=f16,16;; \
- stf.spill.nta [_reg1]=f17,16;; \
- stf.spill.nta [_reg1]=f18,16;; \
- stf.spill.nta [_reg1]=f19,16;; \
- stf.spill.nta [_reg1]=f20,16;; \
- stf.spill.nta [_reg1]=f21,16;; \
- stf.spill.nta [_reg1]=f22,16;; \
- stf.spill.nta [_reg1]=f23,16;; \
- stf.spill.nta [_reg1]=f24,16;; \
- stf.spill.nta [_reg1]=f25,16;; \
- stf.spill.nta [_reg1]=f26,16;; \
- stf.spill.nta [_reg1]=f27,16;; \
- stf.spill.nta [_reg1]=f28,16;; \
- stf.spill.nta [_reg1]=f29,16;; \
- stf.spill.nta [_reg1]=f30,16;; \
- stf.spill.nta [_reg1]=f31,16;;
-
-#else
-#define SET_AREA_FOR_BOOTING_CPU(a1, a2)
-#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3)
-#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
-#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
-#endif
-
-#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \
- movl _tmp1=(num << 61);; \
- mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \
- mov rr[_tmp1]=_tmp2
-
- .section __special_page_section,"ax"
-
- .global empty_zero_page
-empty_zero_page:
- .skip PAGE_SIZE
-
- .global swapper_pg_dir
-swapper_pg_dir:
- .skip PAGE_SIZE
-
- .rodata
-halt_msg:
- stringz "Halting kernel\n"
-
- .text
-
- .global start_ap
-
- /*
- * Start the kernel. When the bootloader passes control to _start(), r28
- * points to the address of the boot parameter area. Execution reaches
- * here in physical mode.
- */
-GLOBAL_ENTRY(_start)
-start_ap:
- .prologue
- .save rp, r0 // terminate unwind chain with a NULL rp
- .body
-
- rsm psr.i | psr.ic
- ;;
- srlz.i
- ;;
- {
- flushrs // must be first insn in group
- srlz.i
- }
- ;;
- /*
- * Save the region registers, predicate before they get clobbered
- */
- SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15);
- mov r25=pr;;
-
- /*
- * Initialize kernel region registers:
- * rr[0]: VHPT enabled, page size = PAGE_SHIFT
- * rr[1]: VHPT enabled, page size = PAGE_SHIFT
- * rr[2]: VHPT enabled, page size = PAGE_SHIFT
- * rr[3]: VHPT enabled, page size = PAGE_SHIFT
- * rr[4]: VHPT enabled, page size = PAGE_SHIFT
- * rr[5]: VHPT enabled, page size = PAGE_SHIFT
- * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
- * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
- * We initialize all of them to prevent inadvertently assuming
- * something about the state of address translation early in boot.
- */
- SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);;
- SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);;
- SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);;
- SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);;
- SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);;
- SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);;
- SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);;
- SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);;
- /*
- * Now pin mappings into the TLB for kernel text and data
- */
- mov r18=KERNEL_TR_PAGE_SHIFT<<2
- movl r17=KERNEL_START
- ;;
- mov cr.itir=r18
- mov cr.ifa=r17
- mov r16=IA64_TR_KERNEL
- mov r3=ip
- movl r18=PAGE_KERNEL
- ;;
- dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
- ;;
- or r18=r2,r18
- ;;
- srlz.i
- ;;
- itr.i itr[r16]=r18
- ;;
- itr.d dtr[r16]=r18
- ;;
- srlz.i
-
- /*
- * Switch into virtual mode:
- */
- movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
- |IA64_PSR_DI)
- ;;
- mov cr.ipsr=r16
- movl r17=1f
- ;;
- mov cr.iip=r17
- mov cr.ifs=r0
- ;;
- rfi
- ;;
-1: // now we are in virtual mode
-
- SET_AREA_FOR_BOOTING_CPU(r2, r16);
-
- STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15);
- SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25)
- ;;
-
- // set IVT entry point---can't access I/O ports without it
- movl r3=ia64_ivt
- ;;
- mov cr.iva=r3
- movl r2=FPSR_DEFAULT
- ;;
- srlz.i
- movl gp=__gp
-
- mov ar.fpsr=r2
- ;;
-
-#define isAP p2 // are we an Application Processor?
-#define isBP p3 // are we the Bootstrap Processor?
-
-#ifdef CONFIG_SMP
- /*
- * Find the init_task for the currently booting CPU. At poweron, and in
- * UP mode, task_for_booting_cpu is NULL.
- */
- movl r3=task_for_booting_cpu
- ;;
- ld8 r3=[r3]
- movl r2=init_task
- ;;
- cmp.eq isBP,isAP=r3,r0
- ;;
-(isAP) mov r2=r3
-#else
- movl r2=init_task
- cmp.eq isBP,isAP=r0,r0
-#endif
- ;;
- tpa r3=r2 // r3 == phys addr of task struct
- mov r16=-1
-(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
-
- // load mapping for stack (virtaddr in r2, physaddr in r3)
- rsm psr.ic
- movl r17=PAGE_KERNEL
- ;;
- srlz.d
- dep r18=0,r3,0,12
- ;;
- or r18=r17,r18
- dep r2=-1,r3,61,3 // IMVA of task
- ;;
- mov r17=rr[r2]
- shr.u r16=r3,IA64_GRANULE_SHIFT
- ;;
- dep r17=0,r17,8,24
- ;;
- mov cr.itir=r17
- mov cr.ifa=r2
-
- mov r19=IA64_TR_CURRENT_STACK
- ;;
- itr.d dtr[r19]=r18
- ;;
- ssm psr.ic
- srlz.d
- ;;
-
-.load_current:
- // load the "current" pointer (r13) and ar.k6 with the current task
- mov IA64_KR(CURRENT)=r2 // virtual address
- mov IA64_KR(CURRENT_STACK)=r16
- mov r13=r2
- /*
- * Reserve space at the top of the stack for "struct pt_regs". Kernel
- * threads don't store interesting values in that structure, but the space
- * still needs to be there because time-critical stuff such as the context
- * switching can be implemented more efficiently (for example, __switch_to()
- * always sets the psr.dfh bit of the task it is switching to).
- */
-
- addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
- addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE
- mov ar.rsc=0 // place RSE in enforced lazy mode
- ;;
- loadrs // clear the dirty partition
- mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base
- ;;
- mov ar.bspstore=r2 // establish the new RSE stack
- ;;
- mov ar.rsc=0x3 // place RSE in eager mode
-
-(isBP) dep r28=-1,r28,61,3 // make address virtual
-(isBP) movl r2=ia64_boot_param
- ;;
-(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader
-
-#ifdef CONFIG_XEN
- // Note: isBP is used by the subprogram.
- br.call.sptk.many rp=early_xen_setup
- ;;
-#endif
-
-#ifdef CONFIG_SMP
-(isAP) br.call.sptk.many rp=start_secondary
-.ret0:
-(isAP) br.cond.sptk self
-#endif
-
- // This is executed by the bootstrap processor (bsp) only:
-
-#ifdef CONFIG_IA64_FW_EMU
- // initialize PAL & SAL emulator:
- br.call.sptk.many rp=sys_fw_init
-.ret1:
-#endif
- br.call.sptk.many rp=start_kernel
-.ret2: addl r3=@ltoff(halt_msg),gp
- ;;
- alloc r2=ar.pfs,8,0,2,0
- ;;
- ld8 out0=[r3]
- br.call.sptk.many b0=console_print
-
-self: hint @pause
- br.sptk.many self // endless loop
-END(_start)
-
-GLOBAL_ENTRY(ia64_save_debug_regs)
- alloc r16=ar.pfs,1,0,0,0
- mov r20=ar.lc // preserve ar.lc
- mov ar.lc=IA64_NUM_DBG_REGS-1
- mov r18=0
- add r19=IA64_NUM_DBG_REGS*8,in0
- ;;
-1: mov r16=dbr[r18]
-#ifdef CONFIG_ITANIUM
- ;;
- srlz.d
-#endif
- mov r17=ibr[r18]
- add r18=1,r18
- ;;
- st8.nta [in0]=r16,8
- st8.nta [r19]=r17,8
- br.cloop.sptk.many 1b
- ;;
- mov ar.lc=r20 // restore ar.lc
- br.ret.sptk.many rp
-END(ia64_save_debug_regs)
-
-GLOBAL_ENTRY(ia64_load_debug_regs)
- alloc r16=ar.pfs,1,0,0,0
- lfetch.nta [in0]
- mov r20=ar.lc // preserve ar.lc
- add r19=IA64_NUM_DBG_REGS*8,in0
- mov ar.lc=IA64_NUM_DBG_REGS-1
- mov r18=-1
- ;;
-1: ld8.nta r16=[in0],8
- ld8.nta r17=[r19],8
- add r18=1,r18
- ;;
- mov dbr[r18]=r16
-#ifdef CONFIG_ITANIUM
- ;;
- srlz.d // Errata 132 (NoFix status)
-#endif
- mov ibr[r18]=r17
- br.cloop.sptk.many 1b
- ;;
- mov ar.lc=r20 // restore ar.lc
- br.ret.sptk.many rp
-END(ia64_load_debug_regs)
-
-GLOBAL_ENTRY(__ia64_save_fpu)
- alloc r2=ar.pfs,1,4,0,0
- adds loc0=96*16-16,in0
- adds loc1=96*16-16-128,in0
- ;;
- stf.spill.nta [loc0]=f127,-256
- stf.spill.nta [loc1]=f119,-256
- ;;
- stf.spill.nta [loc0]=f111,-256
- stf.spill.nta [loc1]=f103,-256
- ;;
- stf.spill.nta [loc0]=f95,-256
- stf.spill.nta [loc1]=f87,-256
- ;;
- stf.spill.nta [loc0]=f79,-256
- stf.spill.nta [loc1]=f71,-256
- ;;
- stf.spill.nta [loc0]=f63,-256
- stf.spill.nta [loc1]=f55,-256
- adds loc2=96*16-32,in0
- ;;
- stf.spill.nta [loc0]=f47,-256
- stf.spill.nta [loc1]=f39,-256
- adds loc3=96*16-32-128,in0
- ;;
- stf.spill.nta [loc2]=f126,-256
- stf.spill.nta [loc3]=f118,-256
- ;;
- stf.spill.nta [loc2]=f110,-256
- stf.spill.nta [loc3]=f102,-256
- ;;
- stf.spill.nta [loc2]=f94,-256
- stf.spill.nta [loc3]=f86,-256
- ;;
- stf.spill.nta [loc2]=f78,-256
- stf.spill.nta [loc3]=f70,-256
- ;;
- stf.spill.nta [loc2]=f62,-256
- stf.spill.nta [loc3]=f54,-256
- adds loc0=96*16-48,in0
- ;;
- stf.spill.nta [loc2]=f46,-256
- stf.spill.nta [loc3]=f38,-256
- adds loc1=96*16-48-128,in0
- ;;
- stf.spill.nta [loc0]=f125,-256
- stf.spill.nta [loc1]=f117,-256
- ;;
- stf.spill.nta [loc0]=f109,-256
- stf.spill.nta [loc1]=f101,-256
- ;;
- stf.spill.nta [loc0]=f93,-256
- stf.spill.nta [loc1]=f85,-256
- ;;
- stf.spill.nta [loc0]=f77,-256
- stf.spill.nta [loc1]=f69,-256
- ;;
- stf.spill.nta [loc0]=f61,-256
- stf.spill.nta [loc1]=f53,-256
- adds loc2=96*16-64,in0
- ;;
- stf.spill.nta [loc0]=f45,-256
- stf.spill.nta [loc1]=f37,-256
- adds loc3=96*16-64-128,in0
- ;;
- stf.spill.nta [loc2]=f124,-256
- stf.spill.nta [loc3]=f116,-256
- ;;
- stf.spill.nta [loc2]=f108,-256
- stf.spill.nta [loc3]=f100,-256
- ;;
- stf.spill.nta [loc2]=f92,-256
- stf.spill.nta [loc3]=f84,-256
- ;;
- stf.spill.nta [loc2]=f76,-256
- stf.spill.nta [loc3]=f68,-256
- ;;
- stf.spill.nta [loc2]=f60,-256
- stf.spill.nta [loc3]=f52,-256
- adds loc0=96*16-80,in0
- ;;
- stf.spill.nta [loc2]=f44,-256
- stf.spill.nta [loc3]=f36,-256
- adds loc1=96*16-80-128,in0
- ;;
- stf.spill.nta [loc0]=f123,-256
- stf.spill.nta [loc1]=f115,-256
- ;;
- stf.spill.nta [loc0]=f107,-256
- stf.spill.nta [loc1]=f99,-256
- ;;
- stf.spill.nta [loc0]=f91,-256
- stf.spill.nta [loc1]=f83,-256
- ;;
- stf.spill.nta [loc0]=f75,-256
- stf.spill.nta [loc1]=f67,-256
- ;;
- stf.spill.nta [loc0]=f59,-256
- stf.spill.nta [loc1]=f51,-256
- adds loc2=96*16-96,in0
- ;;
- stf.spill.nta [loc0]=f43,-256
- stf.spill.nta [loc1]=f35,-256
- adds loc3=96*16-96-128,in0
- ;;
- stf.spill.nta [loc2]=f122,-256
- stf.spill.nta [loc3]=f114,-256
- ;;
- stf.spill.nta [loc2]=f106,-256
- stf.spill.nta [loc3]=f98,-256
- ;;
- stf.spill.nta [loc2]=f90,-256
- stf.spill.nta [loc3]=f82,-256
- ;;
- stf.spill.nta [loc2]=f74,-256
- stf.spill.nta [loc3]=f66,-256
- ;;
- stf.spill.nta [loc2]=f58,-256
- stf.spill.nta [loc3]=f50,-256
- adds loc0=96*16-112,in0
- ;;
- stf.spill.nta [loc2]=f42,-256
- stf.spill.nta [loc3]=f34,-256
- adds loc1=96*16-112-128,in0
- ;;
- stf.spill.nta [loc0]=f121,-256
- stf.spill.nta [loc1]=f113,-256
- ;;
- stf.spill.nta [loc0]=f105,-256
- stf.spill.nta [loc1]=f97,-256
- ;;
- stf.spill.nta [loc0]=f89,-256
- stf.spill.nta [loc1]=f81,-256
- ;;
- stf.spill.nta [loc0]=f73,-256
- stf.spill.nta [loc1]=f65,-256
- ;;
- stf.spill.nta [loc0]=f57,-256
- stf.spill.nta [loc1]=f49,-256
- adds loc2=96*16-128,in0
- ;;
- stf.spill.nta [loc0]=f41,-256
- stf.spill.nta [loc1]=f33,-256
- adds loc3=96*16-128-128,in0
- ;;
- stf.spill.nta [loc2]=f120,-256
- stf.spill.nta [loc3]=f112,-256
- ;;
- stf.spill.nta [loc2]=f104,-256
- stf.spill.nta [loc3]=f96,-256
- ;;
- stf.spill.nta [loc2]=f88,-256
- stf.spill.nta [loc3]=f80,-256
- ;;
- stf.spill.nta [loc2]=f72,-256
- stf.spill.nta [loc3]=f64,-256
- ;;
- stf.spill.nta [loc2]=f56,-256
- stf.spill.nta [loc3]=f48,-256
- ;;
- stf.spill.nta [loc2]=f40
- stf.spill.nta [loc3]=f32
- br.ret.sptk.many rp
-END(__ia64_save_fpu)
-
-GLOBAL_ENTRY(__ia64_load_fpu)
- alloc r2=ar.pfs,1,2,0,0
- adds r3=128,in0
- adds r14=256,in0
- adds r15=384,in0
- mov loc0=512
- mov loc1=-1024+16
- ;;
- ldf.fill.nta f32=[in0],loc0
- ldf.fill.nta f40=[ r3],loc0
- ldf.fill.nta f48=[r14],loc0
- ldf.fill.nta f56=[r15],loc0
- ;;
- ldf.fill.nta f64=[in0],loc0
- ldf.fill.nta f72=[ r3],loc0
- ldf.fill.nta f80=[r14],loc0
- ldf.fill.nta f88=[r15],loc0
- ;;
- ldf.fill.nta f96=[in0],loc1
- ldf.fill.nta f104=[ r3],loc1
- ldf.fill.nta f112=[r14],loc1
- ldf.fill.nta f120=[r15],loc1
- ;;
- ldf.fill.nta f33=[in0],loc0
- ldf.fill.nta f41=[ r3],loc0
- ldf.fill.nta f49=[r14],loc0
- ldf.fill.nta f57=[r15],loc0
- ;;
- ldf.fill.nta f65=[in0],loc0
- ldf.fill.nta f73=[ r3],loc0
- ldf.fill.nta f81=[r14],loc0
- ldf.fill.nta f89=[r15],loc0
- ;;
- ldf.fill.nta f97=[in0],loc1
- ldf.fill.nta f105=[ r3],loc1
- ldf.fill.nta f113=[r14],loc1
- ldf.fill.nta f121=[r15],loc1
- ;;
- ldf.fill.nta f34=[in0],loc0
- ldf.fill.nta f42=[ r3],loc0
- ldf.fill.nta f50=[r14],loc0
- ldf.fill.nta f58=[r15],loc0
- ;;
- ldf.fill.nta f66=[in0],loc0
- ldf.fill.nta f74=[ r3],loc0
- ldf.fill.nta f82=[r14],loc0
- ldf.fill.nta f90=[r15],loc0
- ;;
- ldf.fill.nta f98=[in0],loc1
- ldf.fill.nta f106=[ r3],loc1
- ldf.fill.nta f114=[r14],loc1
- ldf.fill.nta f122=[r15],loc1
- ;;
- ldf.fill.nta f35=[in0],loc0
- ldf.fill.nta f43=[ r3],loc0
- ldf.fill.nta f51=[r14],loc0
- ldf.fill.nta f59=[r15],loc0
- ;;
- ldf.fill.nta f67=[in0],loc0
- ldf.fill.nta f75=[ r3],loc0
- ldf.fill.nta f83=[r14],loc0
- ldf.fill.nta f91=[r15],loc0
- ;;
- ldf.fill.nta f99=[in0],loc1
- ldf.fill.nta f107=[ r3],loc1
- ldf.fill.nta f115=[r14],loc1
- ldf.fill.nta f123=[r15],loc1
- ;;
- ldf.fill.nta f36=[in0],loc0
- ldf.fill.nta f44=[ r3],loc0
- ldf.fill.nta f52=[r14],loc0
- ldf.fill.nta f60=[r15],loc0
- ;;
- ldf.fill.nta f68=[in0],loc0
- ldf.fill.nta f76=[ r3],loc0
- ldf.fill.nta f84=[r14],loc0
- ldf.fill.nta f92=[r15],loc0
- ;;
- ldf.fill.nta f100=[in0],loc1
- ldf.fill.nta f108=[ r3],loc1
- ldf.fill.nta f116=[r14],loc1
- ldf.fill.nta f124=[r15],loc1
- ;;
- ldf.fill.nta f37=[in0],loc0
- ldf.fill.nta f45=[ r3],loc0
- ldf.fill.nta f53=[r14],loc0
- ldf.fill.nta f61=[r15],loc0
- ;;
- ldf.fill.nta f69=[in0],loc0
- ldf.fill.nta f77=[ r3],loc0
- ldf.fill.nta f85=[r14],loc0
- ldf.fill.nta f93=[r15],loc0
- ;;
- ldf.fill.nta f101=[in0],loc1
- ldf.fill.nta f109=[ r3],loc1
- ldf.fill.nta f117=[r14],loc1
- ldf.fill.nta f125=[r15],loc1
- ;;
- ldf.fill.nta f38 =[in0],loc0
- ldf.fill.nta f46 =[ r3],loc0
- ldf.fill.nta f54 =[r14],loc0
- ldf.fill.nta f62 =[r15],loc0
- ;;
- ldf.fill.nta f70 =[in0],loc0
- ldf.fill.nta f78 =[ r3],loc0
- ldf.fill.nta f86 =[r14],loc0
- ldf.fill.nta f94 =[r15],loc0
- ;;
- ldf.fill.nta f102=[in0],loc1
- ldf.fill.nta f110=[ r3],loc1
- ldf.fill.nta f118=[r14],loc1
- ldf.fill.nta f126=[r15],loc1
- ;;
- ldf.fill.nta f39 =[in0],loc0
- ldf.fill.nta f47 =[ r3],loc0
- ldf.fill.nta f55 =[r14],loc0
- ldf.fill.nta f63 =[r15],loc0
- ;;
- ldf.fill.nta f71 =[in0],loc0
- ldf.fill.nta f79 =[ r3],loc0
- ldf.fill.nta f87 =[r14],loc0
- ldf.fill.nta f95 =[r15],loc0
- ;;
- ldf.fill.nta f103=[in0]
- ldf.fill.nta f111=[ r3]
- ldf.fill.nta f119=[r14]
- ldf.fill.nta f127=[r15]
- br.ret.sptk.many rp
-END(__ia64_load_fpu)
-
-GLOBAL_ENTRY(__ia64_init_fpu)
- stf.spill [sp]=f0 // M3
- mov f32=f0 // F
- nop.b 0
-
- ldfps f33,f34=[sp] // M0
- ldfps f35,f36=[sp] // M1
- mov f37=f0 // F
- ;;
-
- setf.s f38=r0 // M2
- setf.s f39=r0 // M3
- mov f40=f0 // F
-
- ldfps f41,f42=[sp] // M0
- ldfps f43,f44=[sp] // M1
- mov f45=f0 // F
-
- setf.s f46=r0 // M2
- setf.s f47=r0 // M3
- mov f48=f0 // F
-
- ldfps f49,f50=[sp] // M0
- ldfps f51,f52=[sp] // M1
- mov f53=f0 // F
-
- setf.s f54=r0 // M2
- setf.s f55=r0 // M3
- mov f56=f0 // F
-
- ldfps f57,f58=[sp] // M0
- ldfps f59,f60=[sp] // M1
- mov f61=f0 // F
-
- setf.s f62=r0 // M2
- setf.s f63=r0 // M3
- mov f64=f0 // F
-
- ldfps f65,f66=[sp] // M0
- ldfps f67,f68=[sp] // M1
- mov f69=f0 // F
-
- setf.s f70=r0 // M2
- setf.s f71=r0 // M3
- mov f72=f0 // F
-
- ldfps f73,f74=[sp] // M0
- ldfps f75,f76=[sp] // M1
- mov f77=f0 // F
-
- setf.s f78=r0 // M2
- setf.s f79=r0 // M3
- mov f80=f0 // F
-
- ldfps f81,f82=[sp] // M0
- ldfps f83,f84=[sp] // M1
- mov f85=f0 // F
-
- setf.s f86=r0 // M2
- setf.s f87=r0 // M3
- mov f88=f0 // F
-
- /*
- * When the instructions are cached, it would be faster to initialize
- * the remaining registers with simply mov instructions (F-unit).
- * This gets the time down to ~29 cycles. However, this would use up
- * 33 bundles, whereas continuing with the above pattern yields
- * 10 bundles and ~30 cycles.
- */
-
- ldfps f89,f90=[sp] // M0
- ldfps f91,f92=[sp] // M1
- mov f93=f0 // F
-
- setf.s f94=r0 // M2
- setf.s f95=r0 // M3
- mov f96=f0 // F
-
- ldfps f97,f98=[sp] // M0
- ldfps f99,f100=[sp] // M1
- mov f101=f0 // F
-
- setf.s f102=r0 // M2
- setf.s f103=r0 // M3
- mov f104=f0 // F
-
- ldfps f105,f106=[sp] // M0
- ldfps f107,f108=[sp] // M1
- mov f109=f0 // F
-
- setf.s f110=r0 // M2
- setf.s f111=r0 // M3
- mov f112=f0 // F
-
- ldfps f113,f114=[sp] // M0
- ldfps f115,f116=[sp] // M1
- mov f117=f0 // F
-
- setf.s f118=r0 // M2
- setf.s f119=r0 // M3
- mov f120=f0 // F
-
- ldfps f121,f122=[sp] // M0
- ldfps f123,f124=[sp] // M1
- mov f125=f0 // F
-
- setf.s f126=r0 // M2
- setf.s f127=r0 // M3
- br.ret.sptk.many rp // F
-END(__ia64_init_fpu)
-
-/*
- * Switch execution mode from virtual to physical
- *
- * Inputs:
- * r16 = new psr to establish
- * Output:
- * r19 = old virtual address of ar.bsp
- * r20 = old virtual address of sp
- *
- * Note: RSE must already be in enforced lazy mode
- */
-GLOBAL_ENTRY(ia64_switch_mode_phys)
- {
- rsm psr.i | psr.ic // disable interrupts and interrupt collection
- mov r15=ip
- }
- ;;
- {
- flushrs // must be first insn in group
- srlz.i
- }
- ;;
- mov cr.ipsr=r16 // set new PSR
- add r3=1f-ia64_switch_mode_phys,r15
-
- mov r19=ar.bsp
- mov r20=sp
- mov r14=rp // get return address into a general register
- ;;
-
- // going to physical mode, use tpa to translate virt->phys
- tpa r17=r19
- tpa r3=r3
- tpa sp=sp
- tpa r14=r14
- ;;
-
- mov r18=ar.rnat // save ar.rnat
- mov ar.bspstore=r17 // this steps on ar.rnat
- mov cr.iip=r3
- mov cr.ifs=r0
- ;;
- mov ar.rnat=r18 // restore ar.rnat
- rfi // must be last insn in group
- ;;
-1: mov rp=r14
- br.ret.sptk.many rp
-END(ia64_switch_mode_phys)
-
-/*
- * Switch execution mode from physical to virtual
- *
- * Inputs:
- * r16 = new psr to establish
- * r19 = new bspstore to establish
- * r20 = new sp to establish
- *
- * Note: RSE must already be in enforced lazy mode
- */
-GLOBAL_ENTRY(ia64_switch_mode_virt)
- {
- rsm psr.i | psr.ic // disable interrupts and interrupt collection
- mov r15=ip
- }
- ;;
- {
- flushrs // must be first insn in group
- srlz.i
- }
- ;;
- mov cr.ipsr=r16 // set new PSR
- add r3=1f-ia64_switch_mode_virt,r15
-
- mov r14=rp // get return address into a general register
- ;;
-
- // going to virtual
- // - for code addresses, set upper bits of addr to KERNEL_START
- // - for stack addresses, copy from input argument
- movl r18=KERNEL_START
- dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
- dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
- mov sp=r20
- ;;
- or r3=r3,r18
- or r14=r14,r18
- ;;
-
- mov r18=ar.rnat // save ar.rnat
- mov ar.bspstore=r19 // this steps on ar.rnat
- mov cr.iip=r3
- mov cr.ifs=r0
- ;;
- mov ar.rnat=r18 // restore ar.rnat
- rfi // must be last insn in group
- ;;
-1: mov rp=r14
- br.ret.sptk.many rp
-END(ia64_switch_mode_virt)
-
-GLOBAL_ENTRY(ia64_delay_loop)
- .prologue
-{ nop 0 // work around GAS unwind info generation bug...
- .save ar.lc,r2
- mov r2=ar.lc
- .body
- ;;
- mov ar.lc=r32
-}
- ;;
- // force loop to be 32-byte aligned (GAS bug means we cannot use .align
- // inside function body without corrupting unwind info).
-{ nop 0 }
-1: br.cloop.sptk.few 1b
- ;;
- mov ar.lc=r2
- br.ret.sptk.many rp
-END(ia64_delay_loop)
-
-/*
- * Return a CPU-local timestamp in nano-seconds. This timestamp is
- * NOT synchronized across CPUs its return value must never be
- * compared against the values returned on another CPU. The usage in
- * kernel/sched.c ensures that.
- *
- * The return-value of sched_clock() is NOT supposed to wrap-around.
- * If it did, it would cause some scheduling hiccups (at the worst).
- * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
- * that would happen only once every 5+ years.
- *
- * The code below basically calculates:
- *
- * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
- *
- * except that the multiplication and the shift are done with 128-bit
- * intermediate precision so that we can produce a full 64-bit result.
- */
-GLOBAL_ENTRY(sched_clock)
- addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
- mov.m r9=ar.itc // fetch cycle-counter (35 cyc)
- ;;
- ldf8 f8=[r8]
- ;;
- setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8...
- ;;
- xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
- xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
- ;;
- getf.sig r8=f10 // (5 cyc)
- getf.sig r9=f11
- ;;
- shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
- br.ret.sptk.many rp
-END(sched_clock)
-
-GLOBAL_ENTRY(start_kernel_thread)
- .prologue
- .save rp, r0 // this is the end of the call-chain
- .body
- alloc r2 = ar.pfs, 0, 0, 2, 0
- mov out0 = r9
- mov out1 = r11;;
- br.call.sptk.many rp = kernel_thread_helper;;
- mov out0 = r8
- br.call.sptk.many rp = sys_exit;;
-1: br.sptk.few 1b // not reached
-END(start_kernel_thread)
-
-#ifdef CONFIG_IA64_BRL_EMU
-
-/*
- * Assembly routines used by brl_emu.c to set preserved register state.
- */
-
-#define SET_REG(reg) \
- GLOBAL_ENTRY(ia64_set_##reg); \
- alloc r16=ar.pfs,1,0,0,0; \
- mov reg=r32; \
- ;; \
- br.ret.sptk.many rp; \
- END(ia64_set_##reg)
-
-SET_REG(b1);
-SET_REG(b2);
-SET_REG(b3);
-SET_REG(b4);
-SET_REG(b5);
-
-#endif /* CONFIG_IA64_BRL_EMU */
-
-#ifdef CONFIG_SMP
- /*
- * This routine handles spinlock contention. It uses a non-standard calling
- * convention to avoid converting leaf routines into interior routines. Because
- * of this special convention, there are several restrictions:
- *
- * - do not use gp relative variables, this code is called from the kernel
- * and from modules, r1 is undefined.
- * - do not use stacked registers, the caller owns them.
- * - do not use the scratch stack space, the caller owns it.
- * - do not use any registers other than the ones listed below
- *
- * Inputs:
- * ar.pfs - saved CFM of caller
- * ar.ccv - 0 (and available for use)
- * r27 - flags from spin_lock_irqsave or 0. Must be preserved.
- * r28 - available for use.
- * r29 - available for use.
- * r30 - available for use.
- * r31 - address of lock, available for use.
- * b6 - return address
- * p14 - available for use.
- * p15 - used to track flag status.
- *
- * If you patch this code to use more registers, do not forget to update
- * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
- */
-
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-
-GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
- .prologue
- .save ar.pfs, r0 // this code effectively has a zero frame size
- .save rp, r28
- .body
- nop 0
- tbit.nz p15,p0=r27,IA64_PSR_I_BIT
- .restore sp // pop existing prologue after next insn
- mov b6 = r28
- .prologue
- .save ar.pfs, r0
- .altrp b6
- .body
- ;;
-(p15) ssm psr.i // reenable interrupts if they were on
- // DavidM says that srlz.d is slow and is not required in this case
-.wait:
- // exponential backoff, kdb, lockmeter etc. go in here
- hint @pause
- ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
- nop 0
- ;;
- cmp4.ne p14,p0=r30,r0
-(p14) br.cond.sptk.few .wait
-(p15) rsm psr.i // disable interrupts if we reenabled them
- br.cond.sptk.few b6 // lock is now free, try to acquire
- .global ia64_spinlock_contention_pre3_4_end // for kernprof
-ia64_spinlock_contention_pre3_4_end:
-END(ia64_spinlock_contention_pre3_4)
-
-#else
-
-GLOBAL_ENTRY(ia64_spinlock_contention)
- .prologue
- .altrp b6
- .body
- tbit.nz p15,p0=r27,IA64_PSR_I_BIT
- ;;
-.wait:
-(p15) ssm psr.i // reenable interrupts if they were on
- // DavidM says that srlz.d is slow and is not required in this case
-.wait2:
- // exponential backoff, kdb, lockmeter etc. go in here
- hint @pause
- ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
- ;;
- cmp4.ne p14,p0=r30,r0
- mov r30 = 1
-(p14) br.cond.sptk.few .wait2
-(p15) rsm psr.i // disable interrupts if we reenabled them
- ;;
- cmpxchg4.acq r30=[r31], r30, ar.ccv
- ;;
- cmp4.ne p14,p0=r0,r30
-(p14) br.cond.sptk.few .wait
-
- br.ret.sptk.many b6 // lock is now taken
-END(ia64_spinlock_contention)
-
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
-GLOBAL_ENTRY(ia64_jump_to_sal)
- alloc r16=ar.pfs,1,0,0,0;;
- rsm psr.i | psr.ic
-{
- flushrs
- srlz.i
-}
- tpa r25=in0
- movl r18=tlb_purge_done;;
- DATA_VA_TO_PA(r18);;
- mov b1=r18 // Return location
- movl r18=ia64_do_tlb_purge;;
- DATA_VA_TO_PA(r18);;
- mov b2=r18 // doing tlb_flush work
- mov ar.rsc=0 // Put RSE in enforced lazy, LE mode
- movl r17=1f;;
- DATA_VA_TO_PA(r17);;
- mov cr.iip=r17
- movl r16=SAL_PSR_BITS_TO_SET;;
- mov cr.ipsr=r16
- mov cr.ifs=r0;;
- rfi;;
-1:
- /*
- * Invalidate all TLB data/inst
- */
- br.sptk.many b2;; // jump to tlb purge code
-
-tlb_purge_done:
- RESTORE_REGION_REGS(r25, r17,r18,r19);;
- RESTORE_REG(b0, r25, r17);;
- RESTORE_REG(b1, r25, r17);;
- RESTORE_REG(b2, r25, r17);;
- RESTORE_REG(b3, r25, r17);;
- RESTORE_REG(b4, r25, r17);;
- RESTORE_REG(b5, r25, r17);;
- ld8 r1=[r25],0x08;;
- ld8 r12=[r25],0x08;;
- ld8 r13=[r25],0x08;;
- RESTORE_REG(ar.fpsr, r25, r17);;
- RESTORE_REG(ar.pfs, r25, r17);;
- RESTORE_REG(ar.rnat, r25, r17);;
- RESTORE_REG(ar.unat, r25, r17);;
- RESTORE_REG(ar.bspstore, r25, r17);;
- RESTORE_REG(cr.dcr, r25, r17);;
- RESTORE_REG(cr.iva, r25, r17);;
- RESTORE_REG(cr.pta, r25, r17);;
- RESTORE_REG(cr.itv, r25, r17);;
- RESTORE_REG(cr.pmv, r25, r17);;
- RESTORE_REG(cr.cmcv, r25, r17);;
- RESTORE_REG(cr.lrr0, r25, r17);;
- RESTORE_REG(cr.lrr1, r25, r17);;
- ld8 r4=[r25],0x08;;
- ld8 r5=[r25],0x08;;
- ld8 r6=[r25],0x08;;
- ld8 r7=[r25],0x08;;
- ld8 r17=[r25],0x08;;
- mov pr=r17,-1;;
- RESTORE_REG(ar.lc, r25, r17);;
- /*
- * Now Restore floating point regs
- */
- ldf.fill.nta f2=[r25],16;;
- ldf.fill.nta f3=[r25],16;;
- ldf.fill.nta f4=[r25],16;;
- ldf.fill.nta f5=[r25],16;;
- ldf.fill.nta f16=[r25],16;;
- ldf.fill.nta f17=[r25],16;;
- ldf.fill.nta f18=[r25],16;;
- ldf.fill.nta f19=[r25],16;;
- ldf.fill.nta f20=[r25],16;;
- ldf.fill.nta f21=[r25],16;;
- ldf.fill.nta f22=[r25],16;;
- ldf.fill.nta f23=[r25],16;;
- ldf.fill.nta f24=[r25],16;;
- ldf.fill.nta f25=[r25],16;;
- ldf.fill.nta f26=[r25],16;;
- ldf.fill.nta f27=[r25],16;;
- ldf.fill.nta f28=[r25],16;;
- ldf.fill.nta f29=[r25],16;;
- ldf.fill.nta f30=[r25],16;;
- ldf.fill.nta f31=[r25],16;;
-
- /*
- * Now that we have done all the register restores
- * we are now ready for the big DIVE to SAL Land
- */
- ssm psr.ic;;
- srlz.d;;
- br.ret.sptk.many b0;;
-END(ia64_jump_to_sal)
-#endif /* CONFIG_HOTPLUG_CPU */
-
-#endif /* CONFIG_SMP */
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c b/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c
deleted file mode 100644
index 1541b57a5c..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c
+++ /dev/null
@@ -1,1253 +0,0 @@
-/*
- * I/O SAPIC support.
- *
- * Copyright (C) 1999 Intel Corp.
- * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
- * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
- * David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
- *
- * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O
- * APIC code. In particular, we now have separate
- * handlers for edge and level triggered
- * interrupts.
- * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
- * allocation PCI to vector mapping, shared PCI
- * interrupts.
- * 00/10/27 D. Mosberger Document things a bit more to make them more
- * understandable. Clean up much of the old
- * IOSAPIC cruft.
- * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts
- * and fixes for ACPI S5(SoftOff) support.
- * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT
- * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt
- * vectors in iosapic_set_affinity(),
- * initializations for /proc/irq/#/smp_affinity
- * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing.
- * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq
- * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to
- * IOSAPIC mapping error
- * 02/07/29 T. Kochi Allocate interrupt vectors dynamically
- * 02/08/04 T. Kochi Cleaned up terminology (irq, global system
- * interrupt, vector, etc.)
- * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's
- * pci_irq code.
- * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC.
- * Remove iosapic_address & gsi_base from
- * external interfaces. Rationalize
- * __init/__devinit attributes.
- * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004
- * Updated to work with irq migration necessary
- * for CPU Hotplug
- */
-/*
- * Here is what the interrupt logic between a PCI device and the kernel looks
- * like:
- *
- * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
- * INTD). The device is uniquely identified by its bus-, and slot-number
- * (the function number does not matter here because all functions share
- * the same interrupt lines).
- *
- * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
- * controller. Multiple interrupt lines may have to share the same
- * IOSAPIC pin (if they're level triggered and use the same polarity).
- * Each interrupt line has a unique Global System Interrupt (GSI) number
- * which can be calculated as the sum of the controller's base GSI number
- * and the IOSAPIC pin number to which the line connects.
- *
- * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
- * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then
- * sent to the CPU.
- *
- * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is
- * used as architecture-independent interrupt handling mechanism in Linux.
- * As an IRQ is a number, we have to have
- * IA-64 interrupt vector number <-> IRQ number mapping. On smaller
- * systems, we use one-to-one mapping between IA-64 vector and IRQ. A
- * platform can implement platform_irq_to_vector(irq) and
- * platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
- * Please see also include/asm-ia64/hw_irq.h for those APIs.
- *
- * To sum up, there are three levels of mappings involved:
- *
- * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
- *
- * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
- * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ
- * (isa_irq) is the only exception in this source code.
- */
-
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/string.h>
-#include <linux/bootmem.h>
-
-#include <asm/delay.h>
-#include <asm/hw_irq.h>
-#include <asm/io.h>
-#include <asm/iosapic.h>
-#include <asm/machvec.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-
-#undef DEBUG_INTERRUPT_ROUTING
-
-#ifdef DEBUG_INTERRUPT_ROUTING
-#define DBG(fmt...) printk(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-#define NR_PREALLOCATE_RTE_ENTRIES \
- (PAGE_SIZE / sizeof(struct iosapic_rte_info))
-#define RTE_PREALLOCATED (1)
-
-static DEFINE_SPINLOCK(iosapic_lock);
-
-/*
- * These tables map IA-64 vectors to the IOSAPIC pin that generates this
- * vector.
- */
-
-struct iosapic_rte_info {
- struct list_head rte_list; /* node in list of RTEs sharing the
- * same vector */
- char __iomem *addr; /* base address of IOSAPIC */
- unsigned int gsi_base; /* first GSI assigned to this
- * IOSAPIC */
- char rte_index; /* IOSAPIC RTE index */
- int refcnt; /* reference counter */
- unsigned int flags; /* flags */
-} ____cacheline_aligned;
-
-static struct iosapic_intr_info {
- struct list_head rtes; /* RTEs using this vector (empty =>
- * not an IOSAPIC interrupt) */
- int count; /* # of RTEs that shares this vector */
- u32 low32; /* current value of low word of
- * Redirection table entry */
- unsigned int dest; /* destination CPU physical ID */
- unsigned char dmode : 3; /* delivery mode (see iosapic.h) */
- unsigned char polarity: 1; /* interrupt polarity
- * (see iosapic.h) */
- unsigned char trigger : 1; /* trigger mode (see iosapic.h) */
-} iosapic_intr_info[IA64_NUM_VECTORS];
-
-static struct iosapic {
- char __iomem *addr; /* base address of IOSAPIC */
- unsigned int gsi_base; /* first GSI assigned to this
- * IOSAPIC */
- unsigned short num_rte; /* # of RTEs on this IOSAPIC */
- int rtes_inuse; /* # of RTEs in use on this IOSAPIC */
-#ifdef CONFIG_NUMA
- unsigned short node; /* numa node association via pxm */
-#endif
-} iosapic_lists[NR_IOSAPICS];
-
-static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */
-
-static int iosapic_kmalloc_ok;
-static LIST_HEAD(free_rte_list);
-
-#ifdef CONFIG_XEN
-#include <xen/interface/xen.h>
-#include <xen/interface/physdev.h>
-#include <asm/hypervisor.h>
-static inline unsigned int xen_iosapic_read(char __iomem *iosapic, unsigned int reg)
-{
- struct physdev_apic apic_op;
- int ret;
-
- apic_op.apic_physbase = (unsigned long)iosapic -
- __IA64_UNCACHED_OFFSET;
- apic_op.reg = reg;
- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
- if (ret)
- return ret;
- return apic_op.value;
-}
-
-static inline void xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
-{
- struct physdev_apic apic_op;
-
- apic_op.apic_physbase = (unsigned long)iosapic -
- __IA64_UNCACHED_OFFSET;
- apic_op.reg = reg;
- apic_op.value = val;
- HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
-}
-
-static inline unsigned int iosapic_read(char __iomem *iosapic, unsigned int reg)
-{
- if (!is_running_on_xen()) {
- writel(reg, iosapic + IOSAPIC_REG_SELECT);
- return readl(iosapic + IOSAPIC_WINDOW);
- } else
- return xen_iosapic_read(iosapic, reg);
-}
-
-static inline void iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
-{
- if (!is_running_on_xen()) {
- writel(reg, iosapic + IOSAPIC_REG_SELECT);
- writel(val, iosapic + IOSAPIC_WINDOW);
- } else
- xen_iosapic_write(iosapic, reg, val);
-}
-
-int xen_assign_irq_vector(int irq)
-{
- struct physdev_irq irq_op;
-
- irq_op.irq = irq;
- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
- return -ENOSPC;
-
- return irq_op.vector;
-}
-
-void xen_free_irq_vector(int vector)
-{
- struct physdev_irq irq_op;
-
- irq_op.vector = vector;
- if (HYPERVISOR_physdev_op(PHYSDEVOP_free_irq_vector, &irq_op))
- printk(KERN_WARNING "%s: xen_free_irq_vecotr fail vector=%d\n",
- __FUNCTION__, vector);
-}
-#endif /* XEN */
-
-/*
- * Find an IOSAPIC associated with a GSI
- */
-static inline int
-find_iosapic (unsigned int gsi)
-{
- int i;
-
- for (i = 0; i < NR_IOSAPICS; i++) {
- if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
- iosapic_lists[i].num_rte)
- return i;
- }
-
- return -1;
-}
-
-static inline int
-_gsi_to_vector (unsigned int gsi)
-{
- struct iosapic_intr_info *info;
- struct iosapic_rte_info *rte;
-
- for (info = iosapic_intr_info; info <
- iosapic_intr_info + IA64_NUM_VECTORS; ++info)
- list_for_each_entry(rte, &info->rtes, rte_list)
- if (rte->gsi_base + rte->rte_index == gsi)
- return info - iosapic_intr_info;
- return -1;
-}
-
-/*
- * Translate GSI number to the corresponding IA-64 interrupt vector. If no
- * entry exists, return -1.
- */
-inline int
-gsi_to_vector (unsigned int gsi)
-{
- return _gsi_to_vector(gsi);
-}
-
-int
-gsi_to_irq (unsigned int gsi)
-{
- unsigned long flags;
- int irq;
- /*
- * XXX fix me: this assumes an identity mapping between IA-64 vector
- * and Linux irq numbers...
- */
- spin_lock_irqsave(&iosapic_lock, flags);
- {
- irq = _gsi_to_vector(gsi);
- }
- spin_unlock_irqrestore(&iosapic_lock, flags);
-
- return irq;
-}
-
-static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
- unsigned int vec)
-{
- struct iosapic_rte_info *rte;
-
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
- if (rte->gsi_base + rte->rte_index == gsi)
- return rte;
- return NULL;
-}
-
-static void
-set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
-{
- unsigned long pol, trigger, dmode;
- u32 low32, high32;
- char __iomem *addr;
- int rte_index;
- char redir;
- struct iosapic_rte_info *rte;
-
- DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
-
- rte = gsi_vector_to_rte(gsi, vector);
- if (!rte)
- return; /* not an IOSAPIC interrupt */
-
- rte_index = rte->rte_index;
- addr = rte->addr;
- pol = iosapic_intr_info[vector].polarity;
- trigger = iosapic_intr_info[vector].trigger;
- dmode = iosapic_intr_info[vector].dmode;
-
- redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
-
-#ifdef CONFIG_SMP
- {
- unsigned int irq;
-
- for (irq = 0; irq < NR_IRQS; ++irq)
- if (irq_to_vector(irq) == vector) {
- set_irq_affinity_info(irq,
- (int)(dest & 0xffff),
- redir);
- break;
- }
- }
-#endif
-
- low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
- (trigger << IOSAPIC_TRIGGER_SHIFT) |
- (dmode << IOSAPIC_DELIVERY_SHIFT) |
- ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
- vector);
-
- /* dest contains both id and eid */
- high32 = (dest << IOSAPIC_DEST_SHIFT);
-
- iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
- iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
- iosapic_intr_info[vector].low32 = low32;
- iosapic_intr_info[vector].dest = dest;
-}
-
-static void
-nop (unsigned int irq)
-{
- /* do nothing... */
-}
-
-static void
-mask_irq (unsigned int irq)
-{
- unsigned long flags;
- char __iomem *addr;
- u32 low32;
- int rte_index;
- ia64_vector vec = irq_to_vector(irq);
- struct iosapic_rte_info *rte;
-
- if (list_empty(&iosapic_intr_info[vec].rtes))
- return; /* not an IOSAPIC interrupt! */
-
- spin_lock_irqsave(&iosapic_lock, flags);
- {
- /* set only the mask bit */
- low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
- rte_list) {
- addr = rte->addr;
- rte_index = rte->rte_index;
- iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
- }
- }
- spin_unlock_irqrestore(&iosapic_lock, flags);
-}
-
-static void
-unmask_irq (unsigned int irq)
-{
- unsigned long flags;
- char __iomem *addr;
- u32 low32;
- int rte_index;
- ia64_vector vec = irq_to_vector(irq);
- struct iosapic_rte_info *rte;
-
- if (list_empty(&iosapic_intr_info[vec].rtes))
- return; /* not an IOSAPIC interrupt! */
-
- spin_lock_irqsave(&iosapic_lock, flags);
- {
- low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
- rte_list) {
- addr = rte->addr;
- rte_index = rte->rte_index;
- iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
- }
- }
- spin_unlock_irqrestore(&iosapic_lock, flags);
-}
-
-
-static void
-iosapic_set_affinity (unsigned int irq, cpumask_t mask)
-{
-#ifdef CONFIG_SMP
- unsigned long flags;
- u32 high32, low32;
- int dest, rte_index;
- char __iomem *addr;
- int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
- ia64_vector vec;
- struct iosapic_rte_info *rte;
-
- irq &= (~IA64_IRQ_REDIRECTED);
- vec = irq_to_vector(irq);
-
- if (cpus_empty(mask))
- return;
-
- dest = cpu_physical_id(first_cpu(mask));
-
- if (list_empty(&iosapic_intr_info[vec].rtes))
- return; /* not an IOSAPIC interrupt */
-
- set_irq_affinity_info(irq, dest, redir);
-
- /* dest contains both id and eid */
- high32 = dest << IOSAPIC_DEST_SHIFT;
-
- spin_lock_irqsave(&iosapic_lock, flags);
- {
- low32 = iosapic_intr_info[vec].low32 &
- ~(7 << IOSAPIC_DELIVERY_SHIFT);
-
- if (redir)
- /* change delivery mode to lowest priority */
- low32 |= (IOSAPIC_LOWEST_PRIORITY <<
- IOSAPIC_DELIVERY_SHIFT);
- else
- /* change delivery mode to fixed */
- low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
-
- iosapic_intr_info[vec].low32 = low32;
- iosapic_intr_info[vec].dest = dest;
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
- rte_list) {
- addr = rte->addr;
- rte_index = rte->rte_index;
- iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
- high32);
- iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
- }
- }
- spin_unlock_irqrestore(&iosapic_lock, flags);
-#endif
-}
-
-/*
- * Handlers for level-triggered interrupts.
- */
-
-static unsigned int
-iosapic_startup_level_irq (unsigned int irq)
-{
- unmask_irq(irq);
- return 0;
-}
-
-static void
-iosapic_end_level_irq (unsigned int irq)
-{
- ia64_vector vec = irq_to_vector(irq);
- struct iosapic_rte_info *rte;
-
- move_native_irq(irq);
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
- iosapic_eoi(rte->addr, vec);
-}
-
-#define iosapic_shutdown_level_irq mask_irq
-#define iosapic_enable_level_irq unmask_irq
-#define iosapic_disable_level_irq mask_irq
-#define iosapic_ack_level_irq nop
-
-struct hw_interrupt_type irq_type_iosapic_level = {
- .typename = "IO-SAPIC-level",
- .startup = iosapic_startup_level_irq,
- .shutdown = iosapic_shutdown_level_irq,
- .enable = iosapic_enable_level_irq,
- .disable = iosapic_disable_level_irq,
- .ack = iosapic_ack_level_irq,
- .end = iosapic_end_level_irq,
- .set_affinity = iosapic_set_affinity
-};
-
-/*
- * Handlers for edge-triggered interrupts.
- */
-
-static unsigned int
-iosapic_startup_edge_irq (unsigned int irq)
-{
- unmask_irq(irq);
- /*
- * IOSAPIC simply drops interrupts pended while the
- * corresponding pin was masked, so we can't know if an
- * interrupt is pending already. Let's hope not...
- */
- return 0;
-}
-
-static void
-iosapic_ack_edge_irq (unsigned int irq)
-{
- irq_desc_t *idesc = irq_desc + irq;
-
- move_native_irq(irq);
- /*
- * Once we have recorded IRQ_PENDING already, we can mask the
- * interrupt for real. This prevents IRQ storms from unhandled
- * devices.
- */
- if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
- (IRQ_PENDING|IRQ_DISABLED))
- mask_irq(irq);
-}
-
-#define iosapic_enable_edge_irq unmask_irq
-#define iosapic_disable_edge_irq nop
-#define iosapic_end_edge_irq nop
-
-struct hw_interrupt_type irq_type_iosapic_edge = {
- .typename = "IO-SAPIC-edge",
- .startup = iosapic_startup_edge_irq,
- .shutdown = iosapic_disable_edge_irq,
- .enable = iosapic_enable_edge_irq,
- .disable = iosapic_disable_edge_irq,
- .ack = iosapic_ack_edge_irq,
- .end = iosapic_end_edge_irq,
- .set_affinity = iosapic_set_affinity
-};
-
-unsigned int
-iosapic_version (char __iomem *addr)
-{
- /*
- * IOSAPIC Version Register return 32 bit structure like:
- * {
- * unsigned int version : 8;
- * unsigned int reserved1 : 8;
- * unsigned int max_redir : 8;
- * unsigned int reserved2 : 8;
- * }
- */
- return iosapic_read(addr, IOSAPIC_VERSION);
-}
-
-static int iosapic_find_sharable_vector (unsigned long trigger,
- unsigned long pol)
-{
- int i, vector = -1, min_count = -1;
- struct iosapic_intr_info *info;
-
- /*
- * shared vectors for edge-triggered interrupts are not
- * supported yet
- */
- if (trigger == IOSAPIC_EDGE)
- return -1;
-
- for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
- info = &iosapic_intr_info[i];
- if (info->trigger == trigger && info->polarity == pol &&
- (info->dmode == IOSAPIC_FIXED || info->dmode ==
- IOSAPIC_LOWEST_PRIORITY)) {
- if (min_count == -1 || info->count < min_count) {
- vector = i;
- min_count = info->count;
- }
- }
- }
-
- return vector;
-}
-
-/*
- * if the given vector is already owned by other,
- * assign a new vector for the other and make the vector available
- */
-static void __init
-iosapic_reassign_vector (int vector)
-{
- int new_vector;
-
- if (!list_empty(&iosapic_intr_info[vector].rtes)) {
- new_vector = assign_irq_vector(AUTO_ASSIGN);
- if (new_vector < 0)
- panic("%s: out of interrupt vectors!\n", __FUNCTION__);
- printk(KERN_INFO "Reassigning vector %d to %d\n",
- vector, new_vector);
- memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
- sizeof(struct iosapic_intr_info));
- INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
- list_move(iosapic_intr_info[vector].rtes.next,
- &iosapic_intr_info[new_vector].rtes);
- memset(&iosapic_intr_info[vector], 0,
- sizeof(struct iosapic_intr_info));
- iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
- INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
- }
-}
-
-static struct iosapic_rte_info *iosapic_alloc_rte (void)
-{
- int i;
- struct iosapic_rte_info *rte;
- int preallocated = 0;
-
- if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
- rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
- NR_PREALLOCATE_RTE_ENTRIES);
- if (!rte)
- return NULL;
- for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
- list_add(&rte->rte_list, &free_rte_list);
- }
-
- if (!list_empty(&free_rte_list)) {
- rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
- rte_list);
- list_del(&rte->rte_list);
- preallocated++;
- } else {
- rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
- if (!rte)
- return NULL;
- }
-
- memset(rte, 0, sizeof(struct iosapic_rte_info));
- if (preallocated)
- rte->flags |= RTE_PREALLOCATED;
-
- return rte;
-}
-
-static void iosapic_free_rte (struct iosapic_rte_info *rte)
-{
- if (rte->flags & RTE_PREALLOCATED)
- list_add_tail(&rte->rte_list, &free_rte_list);
- else
- kfree(rte);
-}
-
-static inline int vector_is_shared (int vector)
-{
- return (iosapic_intr_info[vector].count > 1);
-}
-
-static int
-register_intr (unsigned int gsi, int vector, unsigned char delivery,
- unsigned long polarity, unsigned long trigger)
-{
- irq_desc_t *idesc;
- struct hw_interrupt_type *irq_type;
- int rte_index;
- int index;
- unsigned long gsi_base;
- void __iomem *iosapic_address;
- struct iosapic_rte_info *rte;
-
- index = find_iosapic(gsi);
- if (index < 0) {
- printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
- __FUNCTION__, gsi);
- return -ENODEV;
- }
-
- iosapic_address = iosapic_lists[index].addr;
- gsi_base = iosapic_lists[index].gsi_base;
-
- rte = gsi_vector_to_rte(gsi, vector);
- if (!rte) {
- rte = iosapic_alloc_rte();
- if (!rte) {
- printk(KERN_WARNING "%s: cannot allocate memory\n",
- __FUNCTION__);
- return -ENOMEM;
- }
-
- rte_index = gsi - gsi_base;
- rte->rte_index = rte_index;
- rte->addr = iosapic_address;
- rte->gsi_base = gsi_base;
- rte->refcnt++;
- list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
- iosapic_intr_info[vector].count++;
- iosapic_lists[index].rtes_inuse++;
- }
- else if (vector_is_shared(vector)) {
- struct iosapic_intr_info *info = &iosapic_intr_info[vector];
- if (info->trigger != trigger || info->polarity != polarity) {
- printk (KERN_WARNING
- "%s: cannot override the interrupt\n",
- __FUNCTION__);
- return -EINVAL;
- }
- }
-
- iosapic_intr_info[vector].polarity = polarity;
- iosapic_intr_info[vector].dmode = delivery;
- iosapic_intr_info[vector].trigger = trigger;
-
- if (is_running_on_xen())
- return 0;
-
- if (trigger == IOSAPIC_EDGE)
- irq_type = &irq_type_iosapic_edge;
- else
- irq_type = &irq_type_iosapic_level;
-
- idesc = irq_desc + vector;
- if (idesc->chip != irq_type) {
- if (idesc->chip != &no_irq_type)
- printk(KERN_WARNING
- "%s: changing vector %d from %s to %s\n",
- __FUNCTION__, vector,
- idesc->chip->typename, irq_type->typename);
- idesc->chip = irq_type;
- }
- return 0;
-}
-
-static unsigned int
-get_target_cpu (unsigned int gsi, int vector)
-{
-#ifdef CONFIG_SMP
- static int cpu = -1;
- extern int cpe_vector;
-
- /*
- * In case of vector shared by multiple RTEs, all RTEs that
- * share the vector need to use the same destination CPU.
- */
- if (!list_empty(&iosapic_intr_info[vector].rtes))
- return iosapic_intr_info[vector].dest;
-
- /*
- * If the platform supports redirection via XTP, let it
- * distribute interrupts.
- */
- if (smp_int_redirect & SMP_IRQ_REDIRECTION)
- return cpu_physical_id(smp_processor_id());
-
- /*
- * Some interrupts (ACPI SCI, for instance) are registered
- * before the BSP is marked as online.
- */
- if (!cpu_online(smp_processor_id()))
- return cpu_physical_id(smp_processor_id());
-
-#ifdef CONFIG_ACPI
- if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
- return get_cpei_target_cpu();
-#endif
-
-#ifdef CONFIG_NUMA
- {
- int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
- cpumask_t cpu_mask;
-
- iosapic_index = find_iosapic(gsi);
- if (iosapic_index < 0 ||
- iosapic_lists[iosapic_index].node == MAX_NUMNODES)
- goto skip_numa_setup;
-
- cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
-
- for_each_cpu_mask(numa_cpu, cpu_mask) {
- if (!cpu_online(numa_cpu))
- cpu_clear(numa_cpu, cpu_mask);
- }
-
- num_cpus = cpus_weight(cpu_mask);
-
- if (!num_cpus)
- goto skip_numa_setup;
-
- /* Use vector assignment to distribute across cpus in node */
- cpu_index = vector % num_cpus;
-
- for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
- numa_cpu = next_cpu(numa_cpu, cpu_mask);
-
- if (numa_cpu != NR_CPUS)
- return cpu_physical_id(numa_cpu);
- }
-skip_numa_setup:
-#endif
- /*
- * Otherwise, round-robin interrupt vectors across all the
- * processors. (It'd be nice if we could be smarter in the
- * case of NUMA.)
- */
- do {
- if (++cpu >= NR_CPUS)
- cpu = 0;
- } while (!cpu_online(cpu));
-
- return cpu_physical_id(cpu);
-#else /* CONFIG_SMP */
- return cpu_physical_id(smp_processor_id());
-#endif
-}
-
-/*
- * ACPI can describe IOSAPIC interrupts via static tables and namespace
- * methods. This provides an interface to register those interrupts and
- * program the IOSAPIC RTE.
- */
-int
-iosapic_register_intr (unsigned int gsi,
- unsigned long polarity, unsigned long trigger)
-{
- int vector, mask = 1, err;
- unsigned int dest;
- unsigned long flags;
- struct iosapic_rte_info *rte;
- u32 low32;
-again:
- /*
- * If this GSI has already been registered (i.e., it's a
- * shared interrupt, or we lost a race to register it),
- * don't touch the RTE.
- */
- spin_lock_irqsave(&iosapic_lock, flags);
- {
- vector = gsi_to_vector(gsi);
- if (vector > 0) {
- rte = gsi_vector_to_rte(gsi, vector);
- rte->refcnt++;
- spin_unlock_irqrestore(&iosapic_lock, flags);
- return vector;
- }
- }
- spin_unlock_irqrestore(&iosapic_lock, flags);
-
- /* If vector is running out, we try to find a sharable vector */
- vector = assign_irq_vector(AUTO_ASSIGN);
- if (vector < 0) {
- vector = iosapic_find_sharable_vector(trigger, polarity);
- if (vector < 0)
- return -ENOSPC;
- }
-
- spin_lock_irqsave(&irq_desc[vector].lock, flags);
- spin_lock(&iosapic_lock);
- {
- if (gsi_to_vector(gsi) > 0) {
- if (list_empty(&iosapic_intr_info[vector].rtes))
- free_irq_vector(vector);
- spin_unlock(&iosapic_lock);
- spin_unlock_irqrestore(&irq_desc[vector].lock,
- flags);
- goto again;
- }
-
- dest = get_target_cpu(gsi, vector);
- err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
- polarity, trigger);
- if (err < 0) {
- spin_unlock(&iosapic_lock);
- spin_unlock_irqrestore(&irq_desc[vector].lock,
- flags);
- return err;
- }
-
- /*
- * If the vector is shared and already unmasked for
- * other interrupt sources, don't mask it.
- */
- low32 = iosapic_intr_info[vector].low32;
- if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
- mask = 0;
- set_rte(gsi, vector, dest, mask);
- }
- spin_unlock(&iosapic_lock);
- spin_unlock_irqrestore(&irq_desc[vector].lock, flags);
-
- printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
- gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
- (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
- cpu_logical_id(dest), dest, vector);
-
- return vector;
-}
-
-void
-iosapic_unregister_intr (unsigned int gsi)
-{
- unsigned long flags;
- int irq, vector, index;
- irq_desc_t *idesc;
- u32 low32;
- unsigned long trigger, polarity;
- unsigned int dest;
- struct iosapic_rte_info *rte;
-
- /*
- * If the irq associated with the gsi is not found,
- * iosapic_unregister_intr() is unbalanced. We need to check
- * this again after getting locks.
- */
- irq = gsi_to_irq(gsi);
- if (irq < 0) {
- printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
- gsi);
- WARN_ON(1);
- return;
- }
- vector = irq_to_vector(irq);
-
- idesc = irq_desc + irq;
- spin_lock_irqsave(&idesc->lock, flags);
- spin_lock(&iosapic_lock);
- {
- if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
- printk(KERN_ERR
- "iosapic_unregister_intr(%u) unbalanced\n",
- gsi);
- WARN_ON(1);
- goto out;
- }
-
- if (--rte->refcnt > 0)
- goto out;
-
- /* Mask the interrupt */
- low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
- iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
- low32);
-
- /* Remove the rte entry from the list */
- list_del(&rte->rte_list);
- iosapic_intr_info[vector].count--;
- iosapic_free_rte(rte);
- index = find_iosapic(gsi);
- iosapic_lists[index].rtes_inuse--;
- WARN_ON(iosapic_lists[index].rtes_inuse < 0);
-
- trigger = iosapic_intr_info[vector].trigger;
- polarity = iosapic_intr_info[vector].polarity;
- dest = iosapic_intr_info[vector].dest;
- printk(KERN_INFO
- "GSI %u (%s, %s) -> CPU %d (0x%04x)"
- " vector %d unregistered\n",
- gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
- (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
- cpu_logical_id(dest), dest, vector);
-
- if (list_empty(&iosapic_intr_info[vector].rtes)) {
- /* Sanity check */
- BUG_ON(iosapic_intr_info[vector].count);
-
- /* Clear the interrupt controller descriptor */
- idesc->chip = &no_irq_type;
-
- /* Clear the interrupt information */
- memset(&iosapic_intr_info[vector], 0,
- sizeof(struct iosapic_intr_info));
- iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
- INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
-
- if (idesc->action) {
- printk(KERN_ERR
- "interrupt handlers still exist on"
- "IRQ %u\n", irq);
- WARN_ON(1);
- }
-
- /* Free the interrupt vector */
- free_irq_vector(vector);
- }
- }
- out:
- spin_unlock(&iosapic_lock);
- spin_unlock_irqrestore(&idesc->lock, flags);
-}
-
-/*
- * ACPI calls this when it finds an entry for a platform interrupt.
- */
-int __init
-iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
- int iosapic_vector, u16 eid, u16 id,
- unsigned long polarity, unsigned long trigger)
-{
- static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
- unsigned char delivery;
- int vector, mask = 0;
- unsigned int dest = ((id << 8) | eid) & 0xffff;
-
- switch (int_type) {
- case ACPI_INTERRUPT_PMI:
- vector = iosapic_vector;
- /*
- * since PMI vector is alloc'd by FW(ACPI) not by kernel,
- * we need to make sure the vector is available
- */
- iosapic_reassign_vector(vector);
- delivery = IOSAPIC_PMI;
- break;
- case ACPI_INTERRUPT_INIT:
- vector = assign_irq_vector(AUTO_ASSIGN);
- if (vector < 0)
- panic("%s: out of interrupt vectors!\n", __FUNCTION__);
- delivery = IOSAPIC_INIT;
- break;
- case ACPI_INTERRUPT_CPEI:
- vector = IA64_CPE_VECTOR;
- delivery = IOSAPIC_LOWEST_PRIORITY;
- mask = 1;
- break;
- default:
- printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
- int_type);
- return -1;
- }
-
- register_intr(gsi, vector, delivery, polarity, trigger);
-
- printk(KERN_INFO
- "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
- " vector %d\n",
- int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
- int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
- (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
- cpu_logical_id(dest), dest, vector);
-
- set_rte(gsi, vector, dest, mask);
- return vector;
-}
-
-/*
- * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
- */
-void __init
-iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
- unsigned long polarity,
- unsigned long trigger)
-{
- int vector;
- unsigned int dest = cpu_physical_id(smp_processor_id());
-
- vector = isa_irq_to_vector(isa_irq);
-
- register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
-
- DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
- isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
- polarity == IOSAPIC_POL_HIGH ? "high" : "low",
- cpu_logical_id(dest), dest, vector);
-
- set_rte(gsi, vector, dest, 1);
-}
-
-void __init
-iosapic_system_init (int system_pcat_compat)
-{
- int vector;
-
- for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
- iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
- /* mark as unused */
- INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
- }
-
- pcat_compat = system_pcat_compat;
- if (is_running_on_xen())
- return;
-
- if (pcat_compat) {
- /*
- * Disable the compatibility mode interrupts (8259 style),
- * needs IN/OUT support enabled.
- */
- printk(KERN_INFO
- "%s: Disabling PC-AT compatible 8259 interrupts\n",
- __FUNCTION__);
- outb(0xff, 0xA1);
- outb(0xff, 0x21);
- }
-}
-
-static inline int
-iosapic_alloc (void)
-{
- int index;
-
- for (index = 0; index < NR_IOSAPICS; index++)
- if (!iosapic_lists[index].addr)
- return index;
-
- printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
- return -1;
-}
-
-static inline void
-iosapic_free (int index)
-{
- memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
-}
-
-static inline int
-iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
-{
- int index;
- unsigned int gsi_end, base, end;
-
- /* check gsi range */
- gsi_end = gsi_base + ((ver >> 16) & 0xff);
- for (index = 0; index < NR_IOSAPICS; index++) {
- if (!iosapic_lists[index].addr)
- continue;
-
- base = iosapic_lists[index].gsi_base;
- end = base + iosapic_lists[index].num_rte - 1;
-
- if (gsi_end < base || end < gsi_base)
- continue; /* OK */
-
- return -EBUSY;
- }
- return 0;
-}
-
-int __devinit
-iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
-{
- int num_rte, err, index;
- unsigned int isa_irq, ver;
- char __iomem *addr;
- unsigned long flags;
-
- spin_lock_irqsave(&iosapic_lock, flags);
- {
- addr = ioremap(phys_addr, 0);
- ver = iosapic_version(addr);
-
- if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
- iounmap(addr);
- spin_unlock_irqrestore(&iosapic_lock, flags);
- return err;
- }
-
- /*
- * The MAX_REDIR register holds the highest input pin
- * number (starting from 0).
- * We add 1 so that we can use it for number of pins (= RTEs)
- */
- num_rte = ((ver >> 16) & 0xff) + 1;
-
- index = iosapic_alloc();
- iosapic_lists[index].addr = addr;
- iosapic_lists[index].gsi_base = gsi_base;
- iosapic_lists[index].num_rte = num_rte;
-#ifdef CONFIG_NUMA
- iosapic_lists[index].node = MAX_NUMNODES;
-#endif
- }
- spin_unlock_irqrestore(&iosapic_lock, flags);
-
- if ((gsi_base == 0) && pcat_compat) {
- /*
- * Map the legacy ISA devices into the IOSAPIC data. Some of
- * these may get reprogrammed later on with data from the ACPI
- * Interrupt Source Override table.
- */
- for (isa_irq = 0; isa_irq < 16; ++isa_irq)
- iosapic_override_isa_irq(isa_irq, isa_irq,
- IOSAPIC_POL_HIGH,
- IOSAPIC_EDGE);
- }
- return 0;
-}
-
-#ifdef CONFIG_HOTPLUG
-int
-iosapic_remove (unsigned int gsi_base)
-{
- int index, err = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&iosapic_lock, flags);
- {
- index = find_iosapic(gsi_base);
- if (index < 0) {
- printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
- __FUNCTION__, gsi_base);
- goto out;
- }
-
- if (iosapic_lists[index].rtes_inuse) {
- err = -EBUSY;
- printk(KERN_WARNING
- "%s: IOSAPIC for GSI base %u is busy\n",
- __FUNCTION__, gsi_base);
- goto out;
- }
-
- iounmap(iosapic_lists[index].addr);
- iosapic_free(index);
- }
- out:
- spin_unlock_irqrestore(&iosapic_lock, flags);
- return err;
-}
-#endif /* CONFIG_HOTPLUG */
-
-#ifdef CONFIG_NUMA
-void __devinit
-map_iosapic_to_node(unsigned int gsi_base, int node)
-{
- int index;
-
- index = find_iosapic(gsi_base);
- if (index < 0) {
- printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
- __FUNCTION__, gsi_base);
- return;
- }
- iosapic_lists[index].node = node;
- return;
-}
-#endif
-
-static int __init iosapic_enable_kmalloc (void)
-{
- iosapic_kmalloc_ok = 1;
- return 0;
-}
-core_initcall (iosapic_enable_kmalloc);
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c b/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c
deleted file mode 100644
index 5a9db93417..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c
+++ /dev/null
@@ -1,649 +0,0 @@
-/*
- * linux/arch/ia64/kernel/irq.c
- *
- * Copyright (C) 1998-2001 Hewlett-Packard Co
- * Stephane Eranian <eranian@hpl.hp.com>
- * David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 6/10/99: Updated to bring in sync with x86 version to facilitate
- * support for SMP and different interrupt controllers.
- *
- * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
- * PCI to vector allocation routine.
- * 04/14/2004 Ashok Raj <ashok.raj@intel.com>
- * Added CPU Hotplug handling for IPF.
- */
-
-#include <linux/module.h>
-
-#include <linux/jiffies.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/kernel_stat.h>
-#include <linux/slab.h>
-#include <linux/ptrace.h>
-#include <linux/random.h> /* for rand_initialize_irq() */
-#include <linux/signal.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/threads.h>
-#include <linux/bitops.h>
-#ifdef CONFIG_XEN
-#include <linux/cpu.h>
-#endif
-
-#include <asm/delay.h>
-#include <asm/intrinsics.h>
-#include <asm/io.h>
-#include <asm/hw_irq.h>
-#include <asm/machvec.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_PERFMON
-# include <asm/perfmon.h>
-#endif
-
-#define IRQ_DEBUG 0
-
-/* These can be overridden in platform_irq_init */
-int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR;
-int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR;
-
-/* default base addr of IPI table */
-void __iomem *ipi_base_addr = ((void __iomem *)
- (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
-
-/*
- * Legacy IRQ to IA-64 vector translation table.
- */
-__u8 isa_irq_to_vector_map[16] = {
- /* 8259 IRQ translation, first 16 entries */
- 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
- 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
-};
-EXPORT_SYMBOL(isa_irq_to_vector_map);
-
-static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_MAX_DEVICE_VECTORS)];
-
-int
-assign_irq_vector (int irq)
-{
- int pos, vector;
-
-#ifdef CONFIG_XEN
- if (is_running_on_xen()) {
- extern int xen_assign_irq_vector(int);
- return xen_assign_irq_vector(irq);
- }
-#endif
- again:
- pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
- vector = IA64_FIRST_DEVICE_VECTOR + pos;
- if (vector > IA64_LAST_DEVICE_VECTOR)
- return -ENOSPC;
- if (test_and_set_bit(pos, ia64_vector_mask))
- goto again;
- return vector;
-}
-
-void
-free_irq_vector (int vector)
-{
- int pos;
-
- if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR)
- return;
-
-#ifdef CONFIG_XEN
- if (is_running_on_xen()) {
- extern void xen_free_irq_vector(int);
- xen_free_irq_vector(vector);
- return;
- }
-#endif
- pos = vector - IA64_FIRST_DEVICE_VECTOR;
- if (!test_and_clear_bit(pos, ia64_vector_mask))
- printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
-}
-
-int
-reserve_irq_vector (int vector)
-{
- int pos;
-
- if (vector < IA64_FIRST_DEVICE_VECTOR ||
- vector > IA64_LAST_DEVICE_VECTOR)
- return -EINVAL;
-
- pos = vector - IA64_FIRST_DEVICE_VECTOR;
- return test_and_set_bit(pos, ia64_vector_mask);
-}
-
-#ifdef CONFIG_SMP
-# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE)
-#else
-# define IS_RESCHEDULE(vec) (0)
-#endif
-/*
- * That's where the IVT branches when we get an external
- * interrupt. This branches to the correct hardware IRQ handler via
- * function ptr.
- */
-void
-ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
-{
- unsigned long saved_tpr;
-
-#if IRQ_DEBUG
- {
- unsigned long bsp, sp;
-
- /*
- * Note: if the interrupt happened while executing in
- * the context switch routine (ia64_switch_to), we may
- * get a spurious stack overflow here. This is
- * because the register and the memory stack are not
- * switched atomically.
- */
- bsp = ia64_getreg(_IA64_REG_AR_BSP);
- sp = ia64_getreg(_IA64_REG_SP);
-
- if ((sp - bsp) < 1024) {
- static unsigned char count;
- static long last_time;
-
- if (jiffies - last_time > 5*HZ)
- count = 0;
- if (++count < 5) {
- last_time = jiffies;
- printk("ia64_handle_irq: DANGER: less than "
- "1KB of free stack space!!\n"
- "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
- }
- }
- }
-#endif /* IRQ_DEBUG */
-
- /*
- * Always set TPR to limit maximum interrupt nesting depth to
- * 16 (without this, it would be ~240, which could easily lead
- * to kernel stack overflows).
- */
- irq_enter();
- saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
- ia64_srlz_d();
- while (vector != IA64_SPURIOUS_INT_VECTOR) {
- if (!IS_RESCHEDULE(vector)) {
- ia64_setreg(_IA64_REG_CR_TPR, vector);
- ia64_srlz_d();
-
- __do_IRQ(local_vector_to_irq(vector), regs);
-
- /*
- * Disable interrupts and send EOI:
- */
- local_irq_disable();
- ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
- }
- ia64_eoi();
- vector = ia64_get_ivr();
- }
- /*
- * This must be done *after* the ia64_eoi(). For example, the keyboard softirq
- * handler needs to be able to wait for further keyboard interrupts, which can't
- * come through until ia64_eoi() has been done.
- */
- irq_exit();
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * This function emulates a interrupt processing when a cpu is about to be
- * brought down.
- */
-void ia64_process_pending_intr(void)
-{
- ia64_vector vector;
- unsigned long saved_tpr;
- extern unsigned int vectors_in_migration[NR_IRQS];
-
- vector = ia64_get_ivr();
-
- irq_enter();
- saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
- ia64_srlz_d();
-
- /*
- * Perform normal interrupt style processing
- */
- while (vector != IA64_SPURIOUS_INT_VECTOR) {
- if (!IS_RESCHEDULE(vector)) {
- ia64_setreg(_IA64_REG_CR_TPR, vector);
- ia64_srlz_d();
-
- /*
- * Now try calling normal ia64_handle_irq as it would have got called
- * from a real intr handler. Try passing null for pt_regs, hopefully
- * it will work. I hope it works!.
- * Probably could shared code.
- */
- vectors_in_migration[local_vector_to_irq(vector)]=0;
- __do_IRQ(local_vector_to_irq(vector), NULL);
-
- /*
- * Disable interrupts and send EOI
- */
- local_irq_disable();
- ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
- }
- ia64_eoi();
- vector = ia64_get_ivr();
- }
- irq_exit();
-}
-#endif
-
-
-#ifdef CONFIG_SMP
-extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs);
-
-static struct irqaction ipi_irqaction = {
- .handler = handle_IPI,
- .flags = IRQF_DISABLED,
- .name = "IPI"
-};
-#endif
-
-#ifdef CONFIG_XEN
-#include <xen/evtchn.h>
-#include <xen/interface/callback.h>
-
-static DEFINE_PER_CPU(int, timer_irq) = -1;
-static DEFINE_PER_CPU(int, ipi_irq) = -1;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, cmc_irq) = -1;
-static DEFINE_PER_CPU(int, cmcp_irq) = -1;
-static DEFINE_PER_CPU(int, cpep_irq) = -1;
-static char timer_name[NR_CPUS][15];
-static char ipi_name[NR_CPUS][15];
-static char resched_name[NR_CPUS][15];
-static char cmc_name[NR_CPUS][15];
-static char cmcp_name[NR_CPUS][15];
-static char cpep_name[NR_CPUS][15];
-
-struct saved_irq {
- unsigned int irq;
- struct irqaction *action;
-};
-/* 16 should be far optimistic value, since only several percpu irqs
- * are registered early.
- */
-#define MAX_LATE_IRQ 16
-static struct saved_irq saved_percpu_irqs[MAX_LATE_IRQ];
-static unsigned short late_irq_cnt = 0;
-static unsigned short saved_irq_cnt = 0;
-static int xen_slab_ready = 0;
-
-#ifdef CONFIG_SMP
-/* Dummy stub. Though we may check RESCHEDULE_VECTOR before __do_IRQ,
- * it ends up to issue several memory accesses upon percpu data and
- * thus adds unnecessary traffic to other paths.
- */
-static irqreturn_t
-handle_reschedule(int irq, void *dev_id, struct pt_regs *regs)
-{
-
- return IRQ_HANDLED;
-}
-
-static struct irqaction resched_irqaction = {
- .handler = handle_reschedule,
- .flags = SA_INTERRUPT,
- .name = "RESCHED"
-};
-#endif
-
-/*
- * This is xen version percpu irq registration, which needs bind
- * to xen specific evtchn sub-system. One trick here is that xen
- * evtchn binding interface depends on kmalloc because related
- * port needs to be freed at device/cpu down. So we cache the
- * registration on BSP before slab is ready and then deal them
- * at later point. For rest instances happening after slab ready,
- * we hook them to xen evtchn immediately.
- *
- * FIXME: MCA is not supported by far, and thus "nomca" boot param is
- * required.
- */
-static void
-xen_register_percpu_irq(unsigned int cpu, unsigned int vec,
- struct irqaction *action, int save)
-{
- irq_desc_t *desc;
- int irq = 0;
-
- if (xen_slab_ready) {
- switch (vec) {
- case IA64_TIMER_VECTOR:
- sprintf(timer_name[cpu], "%s%d", action->name, cpu);
- irq = bind_virq_to_irqhandler(VIRQ_ITC, cpu,
- action->handler, action->flags,
- timer_name[cpu], action->dev_id);
- per_cpu(timer_irq,cpu) = irq;
- break;
- case IA64_IPI_RESCHEDULE:
- sprintf(resched_name[cpu], "%s%d", action->name, cpu);
- irq = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, cpu,
- action->handler, action->flags,
- resched_name[cpu], action->dev_id);
- per_cpu(resched_irq,cpu) = irq;
- break;
- case IA64_IPI_VECTOR:
- sprintf(ipi_name[cpu], "%s%d", action->name, cpu);
- irq = bind_ipi_to_irqhandler(IPI_VECTOR, cpu,
- action->handler, action->flags,
- ipi_name[cpu], action->dev_id);
- per_cpu(ipi_irq,cpu) = irq;
- break;
- case IA64_CMC_VECTOR:
- sprintf(cmc_name[cpu], "%s%d", action->name, cpu);
- irq = bind_virq_to_irqhandler(VIRQ_MCA_CMC, cpu,
- action->handler,
- action->flags,
- cmc_name[cpu],
- action->dev_id);
- per_cpu(cmc_irq,cpu) = irq;
- break;
- case IA64_CMCP_VECTOR:
- sprintf(cmcp_name[cpu], "%s%d", action->name, cpu);
- irq = bind_ipi_to_irqhandler(CMCP_VECTOR, cpu,
- action->handler,
- action->flags,
- cmcp_name[cpu],
- action->dev_id);
- per_cpu(cmcp_irq,cpu) = irq;
- break;
- case IA64_CPEP_VECTOR:
- sprintf(cpep_name[cpu], "%s%d", action->name, cpu);
- irq = bind_ipi_to_irqhandler(CPEP_VECTOR, cpu,
- action->handler,
- action->flags,
- cpep_name[cpu],
- action->dev_id);
- per_cpu(cpep_irq,cpu) = irq;
- break;
- case IA64_CPE_VECTOR:
- case IA64_MCA_RENDEZ_VECTOR:
- case IA64_PERFMON_VECTOR:
- case IA64_MCA_WAKEUP_VECTOR:
- case IA64_SPURIOUS_INT_VECTOR:
- /* No need to complain, these aren't supported. */
- break;
- default:
- printk(KERN_WARNING "Percpu irq %d is unsupported "
- "by xen!\n", vec);
- break;
- }
- BUG_ON(irq < 0);
-
- if (irq > 0) {
- /*
- * Mark percpu. Without this, migrate_irqs() will
- * mark the interrupt for migrations and trigger it
- * on cpu hotplug.
- */
- desc = irq_desc + irq;
- desc->status |= IRQ_PER_CPU;
- }
- }
-
- /* For BSP, we cache registered percpu irqs, and then re-walk
- * them when initializing APs
- */
- if (!cpu && save) {
- BUG_ON(saved_irq_cnt == MAX_LATE_IRQ);
- saved_percpu_irqs[saved_irq_cnt].irq = vec;
- saved_percpu_irqs[saved_irq_cnt].action = action;
- saved_irq_cnt++;
- if (!xen_slab_ready)
- late_irq_cnt++;
- }
-}
-
-static void
-xen_bind_early_percpu_irq (void)
-{
- int i;
-
- xen_slab_ready = 1;
- /* There's no race when accessing this cached array, since only
- * BSP will face with such step shortly
- */
- for (i = 0; i < late_irq_cnt; i++)
- xen_register_percpu_irq(smp_processor_id(),
- saved_percpu_irqs[i].irq,
- saved_percpu_irqs[i].action, 0);
-}
-
-/* FIXME: There's no obvious point to check whether slab is ready. So
- * a hack is used here by utilizing a late time hook.
- */
-extern void (*late_time_init)(void);
-extern char xen_event_callback;
-extern void xen_init_IRQ(void);
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int __devinit
-unbind_evtchn_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- if (action == CPU_DEAD) {
- /* Unregister evtchn. */
- if (per_cpu(cpep_irq,cpu) >= 0) {
- unbind_from_irqhandler(per_cpu(cpep_irq, cpu), NULL);
- per_cpu(cpep_irq, cpu) = -1;
- }
- if (per_cpu(cmcp_irq,cpu) >= 0) {
- unbind_from_irqhandler(per_cpu(cmcp_irq, cpu), NULL);
- per_cpu(cmcp_irq, cpu) = -1;
- }
- if (per_cpu(cmc_irq,cpu) >= 0) {
- unbind_from_irqhandler(per_cpu(cmc_irq, cpu), NULL);
- per_cpu(cmc_irq, cpu) = -1;
- }
- if (per_cpu(ipi_irq,cpu) >= 0) {
- unbind_from_irqhandler (per_cpu(ipi_irq, cpu), NULL);
- per_cpu(ipi_irq, cpu) = -1;
- }
- if (per_cpu(resched_irq,cpu) >= 0) {
- unbind_from_irqhandler (per_cpu(resched_irq, cpu),
- NULL);
- per_cpu(resched_irq, cpu) = -1;
- }
- if (per_cpu(timer_irq,cpu) >= 0) {
- unbind_from_irqhandler (per_cpu(timer_irq, cpu), NULL);
- per_cpu(timer_irq, cpu) = -1;
- }
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block unbind_evtchn_notifier = {
- .notifier_call = unbind_evtchn_callback,
- .priority = 0
-};
-#endif
-
-DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
-void xen_smp_intr_init_early(unsigned int cpu)
-{
-#ifdef CONFIG_SMP
- unsigned int i;
-
- for (i = 0; i < saved_irq_cnt; i++)
- xen_register_percpu_irq(cpu, saved_percpu_irqs[i].irq,
- saved_percpu_irqs[i].action, 0);
-#endif
-}
-
-void xen_smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
- unsigned int cpu = smp_processor_id();
- struct callback_register event = {
- .type = CALLBACKTYPE_event,
- .address = (unsigned long)&xen_event_callback,
- };
-
- if (cpu == 0) {
- /* Initialization was already done for boot cpu. */
-#ifdef CONFIG_HOTPLUG_CPU
- /* Register the notifier only once. */
- register_cpu_notifier(&unbind_evtchn_notifier);
-#endif
- return;
- }
-
- /* This should be piggyback when setup vcpu guest context */
- BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
-#endif /* CONFIG_SMP */
-}
-
-void
-xen_irq_init(void)
-{
- struct callback_register event = {
- .type = CALLBACKTYPE_event,
- .address = (unsigned long)&xen_event_callback,
- };
-
- xen_init_IRQ();
- BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
- late_time_init = xen_bind_early_percpu_irq;
-#ifdef CONFIG_SMP
- register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
-#endif
-}
-
-void
-xen_platform_send_ipi(int cpu, int vector, int delivery_mode, int redirect)
-{
- int irq = -1;
-
-#ifdef CONFIG_SMP
- /* TODO: we need to call vcpu_up here */
- if (unlikely(vector == ap_wakeup_vector)) {
- extern void xen_send_ipi (int cpu, int vec);
-
- /* XXX
- * This should be in __cpu_up(cpu) in ia64 smpboot.c
- * like x86. But don't want to modify it,
- * keep it untouched.
- */
- xen_smp_intr_init_early(cpu);
-
- xen_send_ipi (cpu, vector);
- //vcpu_prepare_and_up(cpu);
- return;
- }
-#endif
-
- switch (vector) {
- case IA64_IPI_VECTOR:
- irq = per_cpu(ipi_to_irq, cpu)[IPI_VECTOR];
- break;
- case IA64_IPI_RESCHEDULE:
- irq = per_cpu(ipi_to_irq, cpu)[RESCHEDULE_VECTOR];
- break;
- case IA64_CMCP_VECTOR:
- irq = per_cpu(ipi_to_irq, cpu)[CMCP_VECTOR];
- break;
- case IA64_CPEP_VECTOR:
- irq = per_cpu(ipi_to_irq, cpu)[CPEP_VECTOR];
- break;
- default:
- printk(KERN_WARNING "Unsupported IPI type 0x%x\n",
- vector);
- irq = 0;
- break;
- }
-
- BUG_ON(irq < 0);
- notify_remote_via_irq(irq);
- return;
-}
-#endif /* CONFIG_XEN */
-
-void
-register_percpu_irq (ia64_vector vec, struct irqaction *action)
-{
- irq_desc_t *desc;
- unsigned int irq;
-
-#ifdef CONFIG_XEN
- if (is_running_on_xen())
- return xen_register_percpu_irq(smp_processor_id(),
- vec, action, 1);
-#endif
-
- for (irq = 0; irq < NR_IRQS; ++irq)
- if (irq_to_vector(irq) == vec) {
- desc = irq_desc + irq;
- desc->status |= IRQ_PER_CPU;
- desc->chip = &irq_type_ia64_lsapic;
- if (action)
- setup_irq(irq, action);
- }
-}
-
-void __init
-init_IRQ (void)
-{
- register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
-#ifdef CONFIG_SMP
- register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
-#endif
-#ifdef CONFIG_PERFMON
- pfm_init_percpu();
-#endif
- platform_irq_init();
-#ifdef CONFIG_XEN
- if (is_running_on_xen() && !ia64_platform_is("xen"))
- xen_irq_init();
-#endif
-}
-
-void
-ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
-{
- void __iomem *ipi_addr;
- unsigned long ipi_data;
- unsigned long phys_cpu_id;
-
-#ifdef CONFIG_XEN
- if (is_running_on_xen()) {
- xen_platform_send_ipi(cpu, vector, delivery_mode, redirect);
- return;
- }
-#endif
-
-#ifdef CONFIG_SMP
- phys_cpu_id = cpu_physical_id(cpu);
-#else
- phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff;
-#endif
-
- /*
- * cpu number is in 8bit ID and 8bit EID
- */
-
- ipi_data = (delivery_mode << 8) | (vector & 0xff);
- ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
-
- writeq(ipi_data, ipi_addr);
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S b/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S
deleted file mode 100644
index af5cc0bc41..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * PAL Firmware support
- * IA-64 Processor Programmers Reference Vol 2
- *
- * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
- * David Mosberger <davidm@hpl.hp.com>
- * Stephane Eranian <eranian@hpl.hp.com>
- *
- * 05/22/2000 eranian Added support for stacked register calls
- * 05/24/2000 eranian Added support for physical mode static calls
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-
- .data
- .globl pal_entry_point
-pal_entry_point:
- data8 ia64_pal_default_handler
- .text
-
-/*
- * Set the PAL entry point address. This could be written in C code, but we do it here
- * to keep it all in one module (besides, it's so trivial that it's
- * not a big deal).
- *
- * in0 Address of the PAL entry point (text address, NOT a function descriptor).
- */
-GLOBAL_ENTRY(ia64_pal_handler_init)
- alloc r3=ar.pfs,1,0,0,0
- movl r2=pal_entry_point
- ;;
- st8 [r2]=in0
- br.ret.sptk.many rp
-END(ia64_pal_handler_init)
-
-/*
- * Default PAL call handler. This needs to be coded in assembly because it uses
- * the static calling convention, i.e., the RSE may not be used and calls are
- * done via "br.cond" (not "br.call").
- */
-GLOBAL_ENTRY(ia64_pal_default_handler)
- mov r8=-1
- br.cond.sptk.many rp
-END(ia64_pal_default_handler)
-
-/*
- * Make a PAL call using the static calling convention.
- *
- * in0 Index of PAL service
- * in1 - in3 Remaining PAL arguments
- * in4 1 ==> clear psr.ic, 0 ==> don't clear psr.ic
- *
- */
-GLOBAL_ENTRY(__ia64_pal_call_static)
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
- alloc loc1 = ar.pfs,5,5,0,0
- movl loc2 = pal_entry_point
-1: {
- mov r28 = in0
- mov r29 = in1
- mov r8 = ip
- }
- ;;
- ld8 loc2 = [loc2] // loc2 <- entry point
- tbit.nz p6,p7 = in4, 0
- adds r8 = 1f-1b,r8
- mov loc4=ar.rsc // save RSE configuration
- ;;
- mov ar.rsc=0 // put RSE in enforced lazy, LE mode
- mov loc3 = psr
- mov loc0 = rp
- .body
- mov r30 = in2
-
-(p6) rsm psr.i | psr.ic
- mov r31 = in3
- mov b7 = loc2
-
-(p7) rsm psr.i
- ;;
-(p6) srlz.i
- mov rp = r8
- br.cond.sptk.many b7
-1: mov psr.l = loc3
- mov ar.rsc = loc4 // restore RSE configuration
- mov ar.pfs = loc1
- mov rp = loc0
- ;;
- srlz.d // seralize restoration of psr.l
- br.ret.sptk.many b0
-END(__ia64_pal_call_static)
-
-/*
- * Make a PAL call using the stacked registers calling convention.
- *
- * Inputs:
- * in0 Index of PAL service
- * in2 - in3 Remaning PAL arguments
- */
-GLOBAL_ENTRY(ia64_pal_call_stacked)
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
- alloc loc1 = ar.pfs,4,4,4,0
- movl loc2 = pal_entry_point
-
- mov r28 = in0 // Index MUST be copied to r28
- mov out0 = in0 // AND in0 of PAL function
- mov loc0 = rp
- .body
- ;;
- ld8 loc2 = [loc2] // loc2 <- entry point
- mov out1 = in1
- mov out2 = in2
- mov out3 = in3
- mov loc3 = psr
- ;;
- rsm psr.i
- mov b7 = loc2
- ;;
- br.call.sptk.many rp=b7 // now make the call
-.ret0: mov psr.l = loc3
- mov ar.pfs = loc1
- mov rp = loc0
- ;;
- srlz.d // serialize restoration of psr.l
- br.ret.sptk.many b0
-END(ia64_pal_call_stacked)
-
-/*
- * Make a physical mode PAL call using the static registers calling convention.
- *
- * Inputs:
- * in0 Index of PAL service
- * in2 - in3 Remaning PAL arguments
- *
- * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
- * So we don't need to clear them.
- */
-#define PAL_PSR_BITS_TO_CLEAR \
- (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT | \
- IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
- IA64_PSR_DFL | IA64_PSR_DFH)
-
-#define PAL_PSR_BITS_TO_SET \
- (IA64_PSR_BN)
-
-
-GLOBAL_ENTRY(ia64_pal_call_phys_static)
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
- alloc loc1 = ar.pfs,4,7,0,0
- movl loc2 = pal_entry_point
-1: {
- mov r28 = in0 // copy procedure index
- mov r8 = ip // save ip to compute branch
- mov loc0 = rp // save rp
- }
- .body
- ;;
- ld8 loc2 = [loc2] // loc2 <- entry point
- mov r29 = in1 // first argument
- mov r30 = in2 // copy arg2
- mov r31 = in3 // copy arg3
- ;;
- mov loc3 = psr // save psr
- adds r8 = 1f-1b,r8 // calculate return address for call
- ;;
- mov loc4=ar.rsc // save RSE configuration
- dep.z loc2=loc2,0,61 // convert pal entry point to physical
- tpa r8=r8 // convert rp to physical
- ;;
- mov b7 = loc2 // install target to branch reg
- mov ar.rsc=0 // put RSE in enforced lazy, LE mode
- movl r16=PAL_PSR_BITS_TO_CLEAR
- movl r17=PAL_PSR_BITS_TO_SET
- ;;
- or loc3=loc3,r17 // add in psr the bits to set
- ;;
- andcm r16=loc3,r16 // removes bits to clear from psr
- br.call.sptk.many rp=ia64_switch_mode_phys
-.ret1: mov rp = r8 // install return address (physical)
- mov loc5 = r19
- mov loc6 = r20
- br.cond.sptk.many b7
-1:
- mov ar.rsc=0 // put RSE in enforced lazy, LE mode
- mov r16=loc3 // r16= original psr
- mov r19=loc5
- mov r20=loc6
- br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
-.ret2:
- mov psr.l = loc3 // restore init PSR
-
- mov ar.pfs = loc1
- mov rp = loc0
- ;;
- mov ar.rsc=loc4 // restore RSE configuration
- srlz.d // seralize restoration of psr.l
- br.ret.sptk.many b0
-END(ia64_pal_call_phys_static)
-
-/*
- * Make a PAL call using the stacked registers in physical mode.
- *
- * Inputs:
- * in0 Index of PAL service
- * in2 - in3 Remaning PAL arguments
- */
-GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
- alloc loc1 = ar.pfs,5,7,4,0
- movl loc2 = pal_entry_point
-1: {
- mov r28 = in0 // copy procedure index
- mov loc0 = rp // save rp
- }
- .body
- ;;
- ld8 loc2 = [loc2] // loc2 <- entry point
- mov loc3 = psr // save psr
- ;;
- mov loc4=ar.rsc // save RSE configuration
- dep.z loc2=loc2,0,61 // convert pal entry point to physical
- ;;
- mov ar.rsc=0 // put RSE in enforced lazy, LE mode
- movl r16=PAL_PSR_BITS_TO_CLEAR
- movl r17=PAL_PSR_BITS_TO_SET
- ;;
- or loc3=loc3,r17 // add in psr the bits to set
- mov b7 = loc2 // install target to branch reg
- ;;
- andcm r16=loc3,r16 // removes bits to clear from psr
- br.call.sptk.many rp=ia64_switch_mode_phys
-
- mov out0 = in0 // first argument
- mov out1 = in1 // copy arg2
- mov out2 = in2 // copy arg3
- mov out3 = in3 // copy arg3
- mov loc5 = r19
- mov loc6 = r20
-
- br.call.sptk.many rp=b7 // now make the call
-
- mov ar.rsc=0 // put RSE in enforced lazy, LE mode
- mov r16=loc3 // r16= original psr
- mov r19=loc5
- mov r20=loc6
- br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
-
- mov psr.l = loc3 // restore init PSR
- mov ar.pfs = loc1
- mov rp = loc0
- ;;
- mov ar.rsc=loc4 // restore RSE configuration
- srlz.d // seralize restoration of psr.l
- br.ret.sptk.many b0
-END(ia64_pal_call_phys_stacked)
-
-/*
- * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
- *
- * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
- * regs fp-low partition.
- *
- * Inputs:
- * in0 Address of stack storage for fp regs
- */
-GLOBAL_ENTRY(ia64_save_scratch_fpregs)
- alloc r3=ar.pfs,1,0,0,0
- add r2=16,in0
- ;;
- stf.spill [in0] = f10,32
- stf.spill [r2] = f11,32
- ;;
- stf.spill [in0] = f12,32
- stf.spill [r2] = f13,32
- ;;
- stf.spill [in0] = f14,32
- stf.spill [r2] = f15,32
- br.ret.sptk.many rp
-END(ia64_save_scratch_fpregs)
-
-/*
- * Load scratch fp scratch regs (fp10-fp15)
- *
- * Inputs:
- * in0 Address of stack storage for fp regs
- */
-GLOBAL_ENTRY(ia64_load_scratch_fpregs)
- alloc r3=ar.pfs,1,0,0,0
- add r2=16,in0
- ;;
- ldf.fill f10 = [in0],32
- ldf.fill f11 = [r2],32
- ;;
- ldf.fill f12 = [in0],32
- ldf.fill f13 = [r2],32
- ;;
- ldf.fill f14 = [in0],32
- ldf.fill f15 = [r2],32
- br.ret.sptk.many rp
-END(ia64_load_scratch_fpregs)
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c b/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c
deleted file mode 100644
index 73597d2866..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Instruction-patching support.
- *
- * Copyright (C) 2003 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@hpl.hp.com>
- */
-#include <linux/init.h>
-#include <linux/string.h>
-
-#include <asm/patch.h>
-#include <asm/processor.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-#include <asm/unistd.h>
-
-/*
- * This was adapted from code written by Tony Luck:
- *
- * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
- * like this:
- *
- * 6 6 5 4 3 2 1
- * 3210987654321098765432109876543210987654321098765432109876543210
- * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
- *
- * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
- */
-static u64
-get_imm64 (u64 insn_addr)
-{
- u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */
-
- return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/
- ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
- ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
- ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
- ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
- ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
- ((p[1] & 0x000007f000000000UL) >> 36); /*G*/
-}
-
-/* Patch instruction with "val" where "mask" has 1 bits. */
-void
-ia64_patch (u64 insn_addr, u64 mask, u64 val)
-{
- u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
-# define insn_mask ((1UL << 41) - 1)
- unsigned long shift;
-
- b0 = b[0]; b1 = b[1];
- shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */
- if (shift >= 64) {
- m1 = mask << (shift - 64);
- v1 = val << (shift - 64);
- } else {
- m0 = mask << shift; m1 = mask >> (64 - shift);
- v0 = val << shift; v1 = val >> (64 - shift);
- b[0] = (b0 & ~m0) | (v0 & m0);
- }
- b[1] = (b1 & ~m1) | (v1 & m1);
-}
-
-void
-ia64_patch_imm64 (u64 insn_addr, u64 val)
-{
- /* The assembler may generate offset pointing to either slot 1
- or slot 2 for a long (2-slot) instruction, occupying slots 1
- and 2. */
- insn_addr &= -16UL;
- ia64_patch(insn_addr + 2,
- 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
- | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */
- | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */
- | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */
- | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */));
- ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
-}
-
-void
-ia64_patch_imm60 (u64 insn_addr, u64 val)
-{
- /* The assembler may generate offset pointing to either slot 1
- or slot 2 for a long (2-slot) instruction, occupying slots 1
- and 2. */
- insn_addr &= -16UL;
- ia64_patch(insn_addr + 2,
- 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
- | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */));
- ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18);
-}
-
-/*
- * We need sometimes to load the physical address of a kernel
- * object. Often we can convert the virtual address to physical
- * at execution time, but sometimes (either for performance reasons
- * or during error recovery) we cannot to this. Patch the marked
- * bundles to load the physical address.
- */
-void __init
-ia64_patch_vtop (unsigned long start, unsigned long end)
-{
- s32 *offp = (s32 *) start;
- u64 ip;
-
- while (offp < (s32 *) end) {
- ip = (u64) offp + *offp;
-
- /* replace virtual address with corresponding physical address: */
- ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
- ia64_fc((void *) ip);
- ++offp;
- }
- ia64_sync_i();
- ia64_srlz_i();
-}
-
-void __init
-ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
-{
- static int first_time = 1;
- int need_workaround;
- s32 *offp = (s32 *) start;
- u64 *wp;
-
- need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);
-
- if (first_time) {
- first_time = 0;
- if (need_workaround)
- printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
- else
- printk(KERN_INFO "McKinley Errata 9 workaround not needed; "
- "disabling it\n");
- }
- if (need_workaround)
- return;
-
- while (offp < (s32 *) end) {
- wp = (u64 *) ia64_imva((char *) offp + *offp);
- wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
- wp[1] = 0x0004000000000200UL;
- wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
- wp[3] = 0x0084006880000200UL;
- ia64_fc(wp); ia64_fc(wp + 2);
- ++offp;
- }
- ia64_sync_i();
- ia64_srlz_i();
-}
-
-static void __init
-patch_fsyscall_table (unsigned long start, unsigned long end)
-{
- extern unsigned long fsyscall_table[NR_syscalls];
- s32 *offp = (s32 *) start;
- u64 ip;
-
- while (offp < (s32 *) end) {
- ip = (u64) ia64_imva((char *) offp + *offp);
- ia64_patch_imm64(ip, (u64) fsyscall_table);
- ia64_fc((void *) ip);
- ++offp;
- }
- ia64_sync_i();
- ia64_srlz_i();
-}
-
-static void __init
-patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
-{
- extern char fsys_bubble_down[];
- s32 *offp = (s32 *) start;
- u64 ip;
-
- while (offp < (s32 *) end) {
- ip = (u64) offp + *offp;
- ia64_patch_imm60((u64) ia64_imva((void *) ip),
- (u64) (fsys_bubble_down - (ip & -16)) / 16);
- ia64_fc((void *) ip);
- ++offp;
- }
- ia64_sync_i();
- ia64_srlz_i();
-}
-
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-extern char __start_gate_running_on_xen_patchlist[];
-extern char __end_gate_running_on_xen_patchlist[];
-
-void
-patch_running_on_xen(unsigned long start, unsigned long end)
-{
- extern int running_on_xen;
- s32 *offp = (s32 *)start;
- u64 ip;
-
- while (offp < (s32 *)end) {
- ip = (u64)ia64_imva((char *)offp + *offp);
- ia64_patch_imm64(ip, (u64)&running_on_xen);
- ia64_fc((void *)ip);
- ++offp;
- }
- ia64_sync_i();
- ia64_srlz_i();
-}
-
-static void
-patch_brl_symaddr(unsigned long start, unsigned long end,
- unsigned long symaddr)
-{
- s32 *offp = (s32 *)start;
- u64 ip;
-
- while (offp < (s32 *)end) {
- ip = (u64)offp + *offp;
- ia64_patch_imm60((u64)ia64_imva((void *)ip),
- (u64)(symaddr - (ip & -16)) / 16);
- ia64_fc((void *)ip);
- ++offp;
- }
- ia64_sync_i();
- ia64_srlz_i();
-}
-
-#define EXTERN_PATCHLIST(name) \
- extern char __start_gate_brl_##name##_patchlist[]; \
- extern char __end_gate_brl_##name##_patchlist[]; \
- extern char name[]
-
-#define PATCH_BRL_SYMADDR(name) \
- patch_brl_symaddr((unsigned long)__start_gate_brl_##name##_patchlist, \
- (unsigned long)__end_gate_brl_##name##_patchlist, \
- (unsigned long)name)
-
-static void
-patch_brl_in_vdso(void)
-{
- EXTERN_PATCHLIST(xen_ssm_i_0);
- EXTERN_PATCHLIST(xen_ssm_i_1);
-
- PATCH_BRL_SYMADDR(xen_ssm_i_0);
- PATCH_BRL_SYMADDR(xen_ssm_i_1);
-}
-#else
-#define patch_running_on_xen(start, end) do { } while (0)
-#define patch_brl_in_vdso() do { } while (0)
-#endif
-
-void __init
-ia64_patch_gate (void)
-{
-# define START(name) ((unsigned long) __start_gate_##name##_patchlist)
-# define END(name) ((unsigned long)__end_gate_##name##_patchlist)
-
- patch_fsyscall_table(START(fsyscall), END(fsyscall));
- patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
-#ifdef CONFIG_XEN
- patch_running_on_xen(START(running_on_xen), END(running_on_xen));
- patch_brl_in_vdso();
-#endif
- ia64_patch_vtop(START(vtop), END(vtop));
- ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c b/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c
deleted file mode 100644
index 59d277fb8b..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c
+++ /dev/null
@@ -1,6943 +0,0 @@
-/*
- * This file implements the perfmon-2 subsystem which is used
- * to program the IA-64 Performance Monitoring Unit (PMU).
- *
- * The initial version of perfmon.c was written by
- * Ganesh Venkitachalam, IBM Corp.
- *
- * Then it was modified for perfmon-1.x by Stephane Eranian and
- * David Mosberger, Hewlett Packard Co.
- *
- * Version Perfmon-2.x is a rewrite of perfmon-1.x
- * by Stephane Eranian, Hewlett Packard Co.
- *
- * Copyright (C) 1999-2005 Hewlett Packard Co
- * Stephane Eranian <eranian@hpl.hp.com>
- * David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * More information about perfmon available at:
- * http://www.hpl.hp.com/research/linux/perfmon
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/smp_lock.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <linux/list.h>
-#include <linux/file.h>
-#include <linux/poll.h>
-#include <linux/vfs.h>
-#include <linux/pagemap.h>
-#include <linux/mount.h>
-#include <linux/bitops.h>
-#include <linux/capability.h>
-#include <linux/rcupdate.h>
-#include <linux/completion.h>
-
-#include <asm/errno.h>
-#include <asm/intrinsics.h>
-#include <asm/page.h>
-#include <asm/perfmon.h>
-#include <asm/processor.h>
-#include <asm/signal.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/delay.h>
-
-#ifdef CONFIG_PERFMON
-#ifdef CONFIG_XEN
-//#include <xen/xenoprof.h>
-#include <xen/interface/xenoprof.h>
-
-static int xenoprof_is_primary = 0;
-#define init_xenoprof_primary(is_primary) (xenoprof_is_primary = (is_primary))
-#define is_xenoprof_primary() (xenoprof_is_primary)
-#define XEN_NOT_SUPPORTED_YET \
- do { \
- if (is_running_on_xen()) { \
- printk("%s is not supported yet under xen.\n", \
- __func__); \
- return -ENOSYS; \
- } \
- } while (0)
-#else
-#define init_xenoprof_primary(is_primary) do { } while (0)
-#define is_xenoprof_primary() (0)
-#define XEN_NOT_SUPPORTED_YET do { } while (0)
-#define HYPERVISOR_perfmon_op(cmd, arg, count) do { } while (0)
-#endif
-
-/*
- * perfmon context state
- */
-#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */
-#define PFM_CTX_LOADED 2 /* context is loaded onto a task */
-#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */
-#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */
-
-#define PFM_INVALID_ACTIVATION (~0UL)
-
-/*
- * depth of message queue
- */
-#define PFM_MAX_MSGS 32
-#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
-
-/*
- * type of a PMU register (bitmask).
- * bitmask structure:
- * bit0 : register implemented
- * bit1 : end marker
- * bit2-3 : reserved
- * bit4 : pmc has pmc.pm
- * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter
- * bit6-7 : register type
- * bit8-31: reserved
- */
-#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */
-#define PFM_REG_IMPL 0x1 /* register implemented */
-#define PFM_REG_END 0x2 /* end marker */
-#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
-#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
-#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */
-#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */
-#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
-
-#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END)
-#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END)
-
-#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
-
-/* i assumed unsigned */
-#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
-#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
-
-/* XXX: these assume that register i is implemented */
-#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
-#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
-#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR)
-#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL)
-
-#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value
-#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask
-#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0]
-#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0]
-
-#define PFM_NUM_IBRS IA64_NUM_DBG_REGS
-#define PFM_NUM_DBRS IA64_NUM_DBG_REGS
-
-#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
-#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling)
-#define PFM_CTX_TASK(h) (h)->ctx_task
-
-#define PMU_PMC_OI 5 /* position of pmc.oi bit */
-
-/* XXX: does not support more than 64 PMDs */
-#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
-#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
-
-#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
-
-#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
-#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
-#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
-#define PFM_CODE_RR 0 /* requesting code range restriction */
-#define PFM_DATA_RR 1 /* requestion data range restriction */
-
-#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v)
-#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v)
-#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info)
-
-#define RDEP(x) (1UL<<(x))
-
-/*
- * context protection macros
- * in SMP:
- * - we need to protect against CPU concurrency (spin_lock)
- * - we need to protect against PMU overflow interrupts (local_irq_disable)
- * in UP:
- * - we need to protect against PMU overflow interrupts (local_irq_disable)
- *
- * spin_lock_irqsave()/spin_lock_irqrestore():
- * in SMP: local_irq_disable + spin_lock
- * in UP : local_irq_disable
- *
- * spin_lock()/spin_lock():
- * in UP : removed automatically
- * in SMP: protect against context accesses from other CPU. interrupts
- * are not masked. This is useful for the PMU interrupt handler
- * because we know we will not get PMU concurrency in that code.
- */
-#define PROTECT_CTX(c, f) \
- do { \
- DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
- spin_lock_irqsave(&(c)->ctx_lock, f); \
- DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \
- } while(0)
-
-#define UNPROTECT_CTX(c, f) \
- do { \
- DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
- spin_unlock_irqrestore(&(c)->ctx_lock, f); \
- } while(0)
-
-#define PROTECT_CTX_NOPRINT(c, f) \
- do { \
- spin_lock_irqsave(&(c)->ctx_lock, f); \
- } while(0)
-
-
-#define UNPROTECT_CTX_NOPRINT(c, f) \
- do { \
- spin_unlock_irqrestore(&(c)->ctx_lock, f); \
- } while(0)
-
-
-#define PROTECT_CTX_NOIRQ(c) \
- do { \
- spin_lock(&(c)->ctx_lock); \
- } while(0)
-
-#define UNPROTECT_CTX_NOIRQ(c) \
- do { \
- spin_unlock(&(c)->ctx_lock); \
- } while(0)
-
-
-#ifdef CONFIG_SMP
-
-#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)
-#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++
-#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION()
-
-#else /* !CONFIG_SMP */
-#define SET_ACTIVATION(t) do {} while(0)
-#define GET_ACTIVATION(t) do {} while(0)
-#define INC_ACTIVATION(t) do {} while(0)
-#endif /* CONFIG_SMP */
-
-#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
-#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner)
-#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx)
-
-#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
-#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
-
-#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
-
-/*
- * cmp0 must be the value of pmc0
- */
-#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL)
-
-#define PFMFS_MAGIC 0xa0b4d889
-
-/*
- * debugging
- */
-#define PFM_DEBUGGING 1
-#ifdef PFM_DEBUGGING
-#define DPRINT(a) \
- do { \
- if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
- } while (0)
-
-#define DPRINT_ovfl(a) \
- do { \
- if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
- } while (0)
-#endif
-
-/*
- * 64-bit software counter structure
- *
- * the next_reset_type is applied to the next call to pfm_reset_regs()
- */
-typedef struct {
- unsigned long val; /* virtual 64bit counter value */
- unsigned long lval; /* last reset value */
- unsigned long long_reset; /* reset value on sampling overflow */
- unsigned long short_reset; /* reset value on overflow */
- unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */
- unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */
- unsigned long seed; /* seed for random-number generator */
- unsigned long mask; /* mask for random-number generator */
- unsigned int flags; /* notify/do not notify */
- unsigned long eventid; /* overflow event identifier */
-} pfm_counter_t;
-
-/*
- * context flags
- */
-typedef struct {
- unsigned int block:1; /* when 1, task will blocked on user notifications */
- unsigned int system:1; /* do system wide monitoring */
- unsigned int using_dbreg:1; /* using range restrictions (debug registers) */
- unsigned int is_sampling:1; /* true if using a custom format */
- unsigned int excl_idle:1; /* exclude idle task in system wide session */
- unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */
- unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */
- unsigned int no_msg:1; /* no message sent on overflow */
- unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */
- unsigned int reserved:22;
-} pfm_context_flags_t;
-
-#define PFM_TRAP_REASON_NONE 0x0 /* default value */
-#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */
-#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */
-
-
-/*
- * perfmon context: encapsulates all the state of a monitoring session
- */
-
-typedef struct pfm_context {
- spinlock_t ctx_lock; /* context protection */
-
- pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */
- unsigned int ctx_state; /* state: active/inactive (no bitfield) */
-
- struct task_struct *ctx_task; /* task to which context is attached */
-
- unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */
-
- struct completion ctx_restart_done; /* use for blocking notification mode */
-
- unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */
- unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */
- unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */
-
- unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */
- unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */
- unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */
-
- unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */
-
- unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */
- unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */
- unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */
- unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */
-
- pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
-
- u64 ctx_saved_psr_up; /* only contains psr.up value */
-
- unsigned long ctx_last_activation; /* context last activation number for last_cpu */
- unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */
- unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */
-
- int ctx_fd; /* file descriptor used my this context */
- pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */
-
- pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */
- void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */
- unsigned long ctx_smpl_size; /* size of sampling buffer */
- void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */
-
- wait_queue_head_t ctx_msgq_wait;
- pfm_msg_t ctx_msgq[PFM_MAX_MSGS];
- int ctx_msgq_head;
- int ctx_msgq_tail;
- struct fasync_struct *ctx_async_queue;
-
- wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */
-} pfm_context_t;
-
-/*
- * magic number used to verify that structure is really
- * a perfmon context
- */
-#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops)
-
-#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context)
-
-#ifdef CONFIG_SMP
-#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v)
-#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu
-#else
-#define SET_LAST_CPU(ctx, v) do {} while(0)
-#define GET_LAST_CPU(ctx) do {} while(0)
-#endif
-
-
-#define ctx_fl_block ctx_flags.block
-#define ctx_fl_system ctx_flags.system
-#define ctx_fl_using_dbreg ctx_flags.using_dbreg
-#define ctx_fl_is_sampling ctx_flags.is_sampling
-#define ctx_fl_excl_idle ctx_flags.excl_idle
-#define ctx_fl_going_zombie ctx_flags.going_zombie
-#define ctx_fl_trap_reason ctx_flags.trap_reason
-#define ctx_fl_no_msg ctx_flags.no_msg
-#define ctx_fl_can_restart ctx_flags.can_restart
-
-#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0);
-#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking
-
-/*
- * global information about all sessions
- * mostly used to synchronize between system wide and per-process
- */
-typedef struct {
- spinlock_t pfs_lock; /* lock the structure */
-
- unsigned int pfs_task_sessions; /* number of per task sessions */
- unsigned int pfs_sys_sessions; /* number of per system wide sessions */
- unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
- unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
- struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
-} pfm_session_t;
-
-/*
- * information about a PMC or PMD.
- * dep_pmd[]: a bitmask of dependent PMD registers
- * dep_pmc[]: a bitmask of dependent PMC registers
- */
-typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
-typedef struct {
- unsigned int type;
- int pm_pos;
- unsigned long default_value; /* power-on default value */
- unsigned long reserved_mask; /* bitmask of reserved bits */
- pfm_reg_check_t read_check;
- pfm_reg_check_t write_check;
- unsigned long dep_pmd[4];
- unsigned long dep_pmc[4];
-} pfm_reg_desc_t;
-
-/* assume cnum is a valid monitor */
-#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
-
-/*
- * This structure is initialized at boot time and contains
- * a description of the PMU main characteristics.
- *
- * If the probe function is defined, detection is based
- * on its return value:
- * - 0 means recognized PMU
- * - anything else means not supported
- * When the probe function is not defined, then the pmu_family field
- * is used and it must match the host CPU family such that:
- * - cpu->family & config->pmu_family != 0
- */
-typedef struct {
- unsigned long ovfl_val; /* overflow value for counters */
-
- pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */
- pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */
-
- unsigned int num_pmcs; /* number of PMCS: computed at init time */
- unsigned int num_pmds; /* number of PMDS: computed at init time */
- unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */
- unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */
-
- char *pmu_name; /* PMU family name */
- unsigned int pmu_family; /* cpuid family pattern used to identify pmu */
- unsigned int flags; /* pmu specific flags */
- unsigned int num_ibrs; /* number of IBRS: computed at init time */
- unsigned int num_dbrs; /* number of DBRS: computed at init time */
- unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */
- int (*probe)(void); /* customized probe routine */
- unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */
-} pmu_config_t;
-/*
- * PMU specific flags
- */
-#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */
-
-/*
- * debug register related type definitions
- */
-typedef struct {
- unsigned long ibr_mask:56;
- unsigned long ibr_plm:4;
- unsigned long ibr_ig:3;
- unsigned long ibr_x:1;
-} ibr_mask_reg_t;
-
-typedef struct {
- unsigned long dbr_mask:56;
- unsigned long dbr_plm:4;
- unsigned long dbr_ig:2;
- unsigned long dbr_w:1;
- unsigned long dbr_r:1;
-} dbr_mask_reg_t;
-
-typedef union {
- unsigned long val;
- ibr_mask_reg_t ibr;
- dbr_mask_reg_t dbr;
-} dbreg_t;
-
-
-/*
- * perfmon command descriptions
- */
-typedef struct {
- int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
- char *cmd_name;
- int cmd_flags;
- unsigned int cmd_narg;
- size_t cmd_argsize;
- int (*cmd_getsize)(void *arg, size_t *sz);
-} pfm_cmd_desc_t;
-
-#define PFM_CMD_FD 0x01 /* command requires a file descriptor */
-#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */
-#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */
-#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */
-
-
-#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name
-#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
-#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
-#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
-#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
-
-#define PFM_CMD_ARG_MANY -1 /* cannot be zero */
-
-typedef struct {
- unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
- unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */
- unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
- unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */
- unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */
- unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */
- unsigned long pfm_smpl_handler_calls;
- unsigned long pfm_smpl_handler_cycles;
- char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
-} pfm_stats_t;
-
-/*
- * perfmon internal variables
- */
-static pfm_stats_t pfm_stats[NR_CPUS];
-static pfm_session_t pfm_sessions; /* global sessions information */
-
-static DEFINE_SPINLOCK(pfm_alt_install_check);
-static pfm_intr_handler_desc_t *pfm_alt_intr_handler;
-
-static struct proc_dir_entry *perfmon_dir;
-static pfm_uuid_t pfm_null_uuid = {0,};
-
-static spinlock_t pfm_buffer_fmt_lock;
-static LIST_HEAD(pfm_buffer_fmt_list);
-
-static pmu_config_t *pmu_conf;
-
-/* sysctl() controls */
-pfm_sysctl_t pfm_sysctl;
-EXPORT_SYMBOL(pfm_sysctl);
-
-static ctl_table pfm_ctl_table[]={
- {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
- {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
- {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
- {4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
- { 0, },
-};
-static ctl_table pfm_sysctl_dir[] = {
- {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
- {0,},
-};
-static ctl_table pfm_sysctl_root[] = {
- {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
- {0,},
-};
-static struct ctl_table_header *pfm_sysctl_header;
-
-static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
-
-#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v)
-#define pfm_get_cpu_data(a,b) per_cpu(a, b)
-
-static inline void
-pfm_put_task(struct task_struct *task)
-{
- if (task != current) put_task_struct(task);
-}
-
-static inline void
-pfm_set_task_notify(struct task_struct *task)
-{
- struct thread_info *info;
-
- info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
- set_bit(TIF_NOTIFY_RESUME, &info->flags);
-}
-
-static inline void
-pfm_clear_task_notify(void)
-{
- clear_thread_flag(TIF_NOTIFY_RESUME);
-}
-
-static inline void
-pfm_reserve_page(unsigned long a)
-{
- SetPageReserved(vmalloc_to_page((void *)a));
-}
-static inline void
-pfm_unreserve_page(unsigned long a)
-{
- ClearPageReserved(vmalloc_to_page((void*)a));
-}
-
-static inline unsigned long
-pfm_protect_ctx_ctxsw(pfm_context_t *x)
-{
- spin_lock(&(x)->ctx_lock);
- return 0UL;
-}
-
-static inline void
-pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
-{
- spin_unlock(&(x)->ctx_lock);
-}
-
-static inline unsigned int
-pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
-{
- return do_munmap(mm, addr, len);
-}
-
-static inline unsigned long
-pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
-{
- return get_unmapped_area(file, addr, len, pgoff, flags);
-}
-
-
-static int
-pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
-{
- return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
-}
-
-static struct file_system_type pfm_fs_type = {
- .name = "pfmfs",
- .get_sb = pfmfs_get_sb,
- .kill_sb = kill_anon_super,
-};
-
-DEFINE_PER_CPU(unsigned long, pfm_syst_info);
-DEFINE_PER_CPU(struct task_struct *, pmu_owner);
-DEFINE_PER_CPU(pfm_context_t *, pmu_ctx);
-DEFINE_PER_CPU(unsigned long, pmu_activation_number);
-EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
-
-
-/* forward declaration */
-static struct file_operations pfm_file_ops;
-
-/*
- * forward declarations
- */
-#ifndef CONFIG_SMP
-static void pfm_lazy_save_regs (struct task_struct *ta);
-#endif
-
-void dump_pmu_state(const char *);
-static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
-
-#include "perfmon_itanium.h"
-#include "perfmon_mckinley.h"
-#include "perfmon_montecito.h"
-#include "perfmon_generic.h"
-
-static pmu_config_t *pmu_confs[]={
- &pmu_conf_mont,
- &pmu_conf_mck,
- &pmu_conf_ita,
- &pmu_conf_gen, /* must be last */
- NULL
-};
-
-
-static int pfm_end_notify_user(pfm_context_t *ctx);
-
-static inline void
-pfm_clear_psr_pp(void)
-{
- ia64_rsm(IA64_PSR_PP);
- ia64_srlz_i();
-}
-
-static inline void
-pfm_set_psr_pp(void)
-{
- ia64_ssm(IA64_PSR_PP);
- ia64_srlz_i();
-}
-
-static inline void
-pfm_clear_psr_up(void)
-{
- ia64_rsm(IA64_PSR_UP);
- ia64_srlz_i();
-}
-
-static inline void
-pfm_set_psr_up(void)
-{
- ia64_ssm(IA64_PSR_UP);
- ia64_srlz_i();
-}
-
-static inline unsigned long
-pfm_get_psr(void)
-{
- unsigned long tmp;
- tmp = ia64_getreg(_IA64_REG_PSR);
- ia64_srlz_i();
- return tmp;
-}
-
-static inline void
-pfm_set_psr_l(unsigned long val)
-{
- ia64_setreg(_IA64_REG_PSR_L, val);
- ia64_srlz_i();
-}
-
-static inline void
-pfm_freeze_pmu(void)
-{
- ia64_set_pmc(0,1UL);
- ia64_srlz_d();
-}
-
-static inline void
-pfm_unfreeze_pmu(void)
-{
- ia64_set_pmc(0,0UL);
- ia64_srlz_d();
-}
-
-static inline void
-pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
-{
- int i;
-
- for (i=0; i < nibrs; i++) {
- ia64_set_ibr(i, ibrs[i]);
- ia64_dv_serialize_instruction();
- }
- ia64_srlz_i();
-}
-
-static inline void
-pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
-{
- int i;
-
- for (i=0; i < ndbrs; i++) {
- ia64_set_dbr(i, dbrs[i]);
- ia64_dv_serialize_data();
- }
- ia64_srlz_d();
-}
-
-/*
- * PMD[i] must be a counter. no check is made
- */
-static inline unsigned long
-pfm_read_soft_counter(pfm_context_t *ctx, int i)
-{
- return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
-}
-
-/*
- * PMD[i] must be a counter. no check is made
- */
-static inline void
-pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
-{
- unsigned long ovfl_val = pmu_conf->ovfl_val;
-
- ctx->ctx_pmds[i].val = val & ~ovfl_val;
- /*
- * writing to unimplemented part is ignore, so we do not need to
- * mask off top part
- */
- ia64_set_pmd(i, val & ovfl_val);
-}
-
-static pfm_msg_t *
-pfm_get_new_msg(pfm_context_t *ctx)
-{
- int idx, next;
-
- next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
-
- DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
- if (next == ctx->ctx_msgq_head) return NULL;
-
- idx = ctx->ctx_msgq_tail;
- ctx->ctx_msgq_tail = next;
-
- DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
-
- return ctx->ctx_msgq+idx;
-}
-
-static pfm_msg_t *
-pfm_get_next_msg(pfm_context_t *ctx)
-{
- pfm_msg_t *msg;
-
- DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
-
- if (PFM_CTXQ_EMPTY(ctx)) return NULL;
-
- /*
- * get oldest message
- */
- msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
-
- /*
- * and move forward
- */
- ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
-
- DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
-
- return msg;
-}
-
-static void
-pfm_reset_msgq(pfm_context_t *ctx)
-{
- ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
- DPRINT(("ctx=%p msgq reset\n", ctx));
-}
-
-static void *
-pfm_rvmalloc(unsigned long size)
-{
- void *mem;
- unsigned long addr;
-
- size = PAGE_ALIGN(size);
- mem = vmalloc(size);
- if (mem) {
- //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
- memset(mem, 0, size);
- addr = (unsigned long)mem;
- while (size > 0) {
- pfm_reserve_page(addr);
- addr+=PAGE_SIZE;
- size-=PAGE_SIZE;
- }
- }
- return mem;
-}
-
-static void
-pfm_rvfree(void *mem, unsigned long size)
-{
- unsigned long addr;
-
- if (mem) {
- DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
- addr = (unsigned long) mem;
- while ((long) size > 0) {
- pfm_unreserve_page(addr);
- addr+=PAGE_SIZE;
- size-=PAGE_SIZE;
- }
- vfree(mem);
- }
- return;
-}
-
-static pfm_context_t *
-pfm_context_alloc(void)
-{
- pfm_context_t *ctx;
-
- /*
- * allocate context descriptor
- * must be able to free with interrupts disabled
- */
- ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
- if (ctx) {
- memset(ctx, 0, sizeof(pfm_context_t));
- DPRINT(("alloc ctx @%p\n", ctx));
- }
- return ctx;
-}
-
-static void
-pfm_context_free(pfm_context_t *ctx)
-{
- if (ctx) {
- DPRINT(("free ctx @%p\n", ctx));
- kfree(ctx);
- }
-}
-
-static void
-pfm_mask_monitoring(struct task_struct *task)
-{
- pfm_context_t *ctx = PFM_GET_CTX(task);
- struct thread_struct *th = &task->thread;
- unsigned long mask, val, ovfl_mask;
- int i;
-
- DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
-
- ovfl_mask = pmu_conf->ovfl_val;
- /*
- * monitoring can only be masked as a result of a valid
- * counter overflow. In UP, it means that the PMU still
- * has an owner. Note that the owner can be different
- * from the current task. However the PMU state belongs
- * to the owner.
- * In SMP, a valid overflow only happens when task is
- * current. Therefore if we come here, we know that
- * the PMU state belongs to the current task, therefore
- * we can access the live registers.
- *
- * So in both cases, the live register contains the owner's
- * state. We can ONLY touch the PMU registers and NOT the PSR.
- *
- * As a consequence to this call, the thread->pmds[] array
- * contains stale information which must be ignored
- * when context is reloaded AND monitoring is active (see
- * pfm_restart).
- */
- mask = ctx->ctx_used_pmds[0];
- for (i = 0; mask; i++, mask>>=1) {
- /* skip non used pmds */
- if ((mask & 0x1) == 0) continue;
- val = ia64_get_pmd(i);
-
- if (PMD_IS_COUNTING(i)) {
- /*
- * we rebuild the full 64 bit value of the counter
- */
- ctx->ctx_pmds[i].val += (val & ovfl_mask);
- } else {
- ctx->ctx_pmds[i].val = val;
- }
- DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
- i,
- ctx->ctx_pmds[i].val,
- val & ovfl_mask));
- }
- /*
- * mask monitoring by setting the privilege level to 0
- * we cannot use psr.pp/psr.up for this, it is controlled by
- * the user
- *
- * if task is current, modify actual registers, otherwise modify
- * thread save state, i.e., what will be restored in pfm_load_regs()
- */
- mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
- for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
- if ((mask & 0x1) == 0UL) continue;
- ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
- th->pmcs[i] &= ~0xfUL;
- DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
- }
- /*
- * make all of this visible
- */
- ia64_srlz_d();
-}
-
-/*
- * must always be done with task == current
- *
- * context must be in MASKED state when calling
- */
-static void
-pfm_restore_monitoring(struct task_struct *task)
-{
- pfm_context_t *ctx = PFM_GET_CTX(task);
- struct thread_struct *th = &task->thread;
- unsigned long mask, ovfl_mask;
- unsigned long psr, val;
- int i, is_system;
-
- is_system = ctx->ctx_fl_system;
- ovfl_mask = pmu_conf->ovfl_val;
-
- if (task != current) {
- printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
- return;
- }
- if (ctx->ctx_state != PFM_CTX_MASKED) {
- printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
- task->pid, current->pid, ctx->ctx_state);
- return;
- }
- psr = pfm_get_psr();
- /*
- * monitoring is masked via the PMC.
- * As we restore their value, we do not want each counter to
- * restart right away. We stop monitoring using the PSR,
- * restore the PMC (and PMD) and then re-establish the psr
- * as it was. Note that there can be no pending overflow at
- * this point, because monitoring was MASKED.
- *
- * system-wide session are pinned and self-monitoring
- */
- if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
- /* disable dcr pp */
- ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
- pfm_clear_psr_pp();
- } else {
- pfm_clear_psr_up();
- }
- /*
- * first, we restore the PMD
- */
- mask = ctx->ctx_used_pmds[0];
- for (i = 0; mask; i++, mask>>=1) {
- /* skip non used pmds */
- if ((mask & 0x1) == 0) continue;
-
- if (PMD_IS_COUNTING(i)) {
- /*
- * we split the 64bit value according to
- * counter width
- */
- val = ctx->ctx_pmds[i].val & ovfl_mask;
- ctx->ctx_pmds[i].val &= ~ovfl_mask;
- } else {
- val = ctx->ctx_pmds[i].val;
- }
- ia64_set_pmd(i, val);
-
- DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
- i,
- ctx->ctx_pmds[i].val,
- val));
- }
- /*
- * restore the PMCs
- */
- mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
- for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
- if ((mask & 0x1) == 0UL) continue;
- th->pmcs[i] = ctx->ctx_pmcs[i];
- ia64_set_pmc(i, th->pmcs[i]);
- DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
- }
- ia64_srlz_d();
-
- /*
- * must restore DBR/IBR because could be modified while masked
- * XXX: need to optimize
- */
- if (ctx->ctx_fl_using_dbreg) {
- pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
- pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
- }
-
- /*
- * now restore PSR
- */
- if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
- /* enable dcr pp */
- ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
- ia64_srlz_i();
- }
- pfm_set_psr_l(psr);
-}
-
-static inline void
-pfm_save_pmds(unsigned long *pmds, unsigned long mask)
-{
- int i;
-
- ia64_srlz_d();
-
- for (i=0; mask; i++, mask>>=1) {
- if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
- }
-}
-
-/*
- * reload from thread state (used for ctxw only)
- */
-static inline void
-pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
-{
- int i;
- unsigned long val, ovfl_val = pmu_conf->ovfl_val;
-
- for (i=0; mask; i++, mask>>=1) {
- if ((mask & 0x1) == 0) continue;
- val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
- ia64_set_pmd(i, val);
- }
- ia64_srlz_d();
-}
-
-/*
- * propagate PMD from context to thread-state
- */
-static inline void
-pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
-{
- struct thread_struct *thread = &task->thread;
- unsigned long ovfl_val = pmu_conf->ovfl_val;
- unsigned long mask = ctx->ctx_all_pmds[0];
- unsigned long val;
- int i;
-
- DPRINT(("mask=0x%lx\n", mask));
-
- for (i=0; mask; i++, mask>>=1) {
-
- val = ctx->ctx_pmds[i].val;
-
- /*
- * We break up the 64 bit value into 2 pieces
- * the lower bits go to the machine state in the
- * thread (will be reloaded on ctxsw in).
- * The upper part stays in the soft-counter.
- */
- if (PMD_IS_COUNTING(i)) {
- ctx->ctx_pmds[i].val = val & ~ovfl_val;
- val &= ovfl_val;
- }
- thread->pmds[i] = val;
-
- DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
- i,
- thread->pmds[i],
- ctx->ctx_pmds[i].val));
- }
-}
-
-/*
- * propagate PMC from context to thread-state
- */
-static inline void
-pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
-{
- struct thread_struct *thread = &task->thread;
- unsigned long mask = ctx->ctx_all_pmcs[0];
- int i;
-
- DPRINT(("mask=0x%lx\n", mask));
-
- for (i=0; mask; i++, mask>>=1) {
- /* masking 0 with ovfl_val yields 0 */
- thread->pmcs[i] = ctx->ctx_pmcs[i];
- DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
- }
-}
-
-
-
-static inline void
-pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
-{
- int i;
-
- for (i=0; mask; i++, mask>>=1) {
- if ((mask & 0x1) == 0) continue;
- ia64_set_pmc(i, pmcs[i]);
- }
- ia64_srlz_d();
-}
-
-static inline int
-pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
-{
- return memcmp(a, b, sizeof(pfm_uuid_t));
-}
-
-static inline int
-pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
-{
- int ret = 0;
- if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
- return ret;
-}
-
-static inline int
-pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
-{
- int ret = 0;
- if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
- return ret;
-}
-
-
-static inline int
-pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
- int cpu, void *arg)
-{
- int ret = 0;
- if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
- return ret;
-}
-
-static inline int
-pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
- int cpu, void *arg)
-{
- int ret = 0;
- if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
- return ret;
-}
-
-static inline int
-pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
-{
- int ret = 0;
- if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
- return ret;
-}
-
-static inline int
-pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
-{
- int ret = 0;
- if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
- return ret;
-}
-
-static pfm_buffer_fmt_t *
-__pfm_find_buffer_fmt(pfm_uuid_t uuid)
-{
- struct list_head * pos;
- pfm_buffer_fmt_t * entry;
-
- list_for_each(pos, &pfm_buffer_fmt_list) {
- entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
- if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
- return entry;
- }
- return NULL;
-}
-
-/*
- * find a buffer format based on its uuid
- */
-static pfm_buffer_fmt_t *
-pfm_find_buffer_fmt(pfm_uuid_t uuid)
-{
- pfm_buffer_fmt_t * fmt;
- spin_lock(&pfm_buffer_fmt_lock);
- fmt = __pfm_find_buffer_fmt(uuid);
- spin_unlock(&pfm_buffer_fmt_lock);
- return fmt;
-}
-
-int
-pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
-{
- int ret = 0;
-
- /* some sanity checks */
- if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;
-
- /* we need at least a handler */
- if (fmt->fmt_handler == NULL) return -EINVAL;
-
- /*
- * XXX: need check validity of fmt_arg_size
- */
-
- spin_lock(&pfm_buffer_fmt_lock);
-
- if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
- printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
- ret = -EBUSY;
- goto out;
- }
- list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
- printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
-
-out:
- spin_unlock(&pfm_buffer_fmt_lock);
- return ret;
-}
-EXPORT_SYMBOL(pfm_register_buffer_fmt);
-
-int
-pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
-{
- pfm_buffer_fmt_t *fmt;
- int ret = 0;
-
- spin_lock(&pfm_buffer_fmt_lock);
-
- fmt = __pfm_find_buffer_fmt(uuid);
- if (!fmt) {
- printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
- ret = -EINVAL;
- goto out;
- }
- list_del_init(&fmt->fmt_list);
- printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);
-
-out:
- spin_unlock(&pfm_buffer_fmt_lock);
- return ret;
-
-}
-EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
-
-extern void update_pal_halt_status(int);
-
-static int
-pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
-{
- unsigned long flags;
- /*
- * validy checks on cpu_mask have been done upstream
- */
- LOCK_PFS(flags);
-
- DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
- pfm_sessions.pfs_sys_sessions,
- pfm_sessions.pfs_task_sessions,
- pfm_sessions.pfs_sys_use_dbregs,
- is_syswide,
- cpu));
-
- if (is_syswide) {
- /*
- * cannot mix system wide and per-task sessions
- */
- if (pfm_sessions.pfs_task_sessions > 0UL) {
- DPRINT(("system wide not possible, %u conflicting task_sessions\n",
- pfm_sessions.pfs_task_sessions));
- goto abort;
- }
-
- if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;
-
- DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));
-
- pfm_sessions.pfs_sys_session[cpu] = task;
-
- pfm_sessions.pfs_sys_sessions++ ;
-
- } else {
- if (pfm_sessions.pfs_sys_sessions) goto abort;
- pfm_sessions.pfs_task_sessions++;
- }
-
- DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
- pfm_sessions.pfs_sys_sessions,
- pfm_sessions.pfs_task_sessions,
- pfm_sessions.pfs_sys_use_dbregs,
- is_syswide,
- cpu));
-
- /*
- * disable default_idle() to go to PAL_HALT
- */
- update_pal_halt_status(0);
-
- UNLOCK_PFS(flags);
-
- return 0;
-
-error_conflict:
- DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
- pfm_sessions.pfs_sys_session[cpu]->pid,
- cpu));
-abort:
- UNLOCK_PFS(flags);
-
- return -EBUSY;
-
-}
-
-static int
-pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
-{
- unsigned long flags;
- /*
- * validy checks on cpu_mask have been done upstream
- */
- LOCK_PFS(flags);
-
- DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
- pfm_sessions.pfs_sys_sessions,
- pfm_sessions.pfs_task_sessions,
- pfm_sessions.pfs_sys_use_dbregs,
- is_syswide,
- cpu));
-
-
- if (is_syswide) {
- pfm_sessions.pfs_sys_session[cpu] = NULL;
- /*
- * would not work with perfmon+more than one bit in cpu_mask
- */
- if (ctx && ctx->ctx_fl_using_dbreg) {
- if (pfm_sessions.pfs_sys_use_dbregs == 0) {
- printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
- } else {
- pfm_sessions.pfs_sys_use_dbregs--;
- }
- }
- pfm_sessions.pfs_sys_sessions--;
- } else {
- pfm_sessions.pfs_task_sessions--;
- }
- DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
- pfm_sessions.pfs_sys_sessions,
- pfm_sessions.pfs_task_sessions,
- pfm_sessions.pfs_sys_use_dbregs,
- is_syswide,
- cpu));
-
- /*
- * if possible, enable default_idle() to go into PAL_HALT
- */
- if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0)
- update_pal_halt_status(1);
-
- UNLOCK_PFS(flags);
-
- return 0;
-}
-
-/*
- * removes virtual mapping of the sampling buffer.
- * IMPORTANT: cannot be called with interrupts disable, e.g. inside
- * a PROTECT_CTX() section.
- */
-static int
-pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
-{
- int r;
-
- /* sanity checks */
- if (task->mm == NULL || size == 0UL || vaddr == NULL) {
- printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
- return -EINVAL;
- }
-
- DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));
-
- /*
- * does the actual unmapping
- */
- down_write(&task->mm->mmap_sem);
-
- DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
-
- r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
-
- up_write(&task->mm->mmap_sem);
- if (r !=0) {
- printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
- }
-
- DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
-
- return 0;
-}
-
-/*
- * free actual physical storage used by sampling buffer
- */
-#if 0
-static int
-pfm_free_smpl_buffer(pfm_context_t *ctx)
-{
- pfm_buffer_fmt_t *fmt;
-
- if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;
-
- /*
- * we won't use the buffer format anymore
- */
- fmt = ctx->ctx_buf_fmt;
-
- DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
- ctx->ctx_smpl_hdr,
- ctx->ctx_smpl_size,
- ctx->ctx_smpl_vaddr));
-
- pfm_buf_fmt_exit(fmt, current, NULL, NULL);
-
- /*
- * free the buffer
- */
- pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);
-
- ctx->ctx_smpl_hdr = NULL;
- ctx->ctx_smpl_size = 0UL;
-
- return 0;
-
-invalid_free:
- printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
- return -EINVAL;
-}
-#endif
-
-static inline void
-pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
-{
- if (fmt == NULL) return;
-
- pfm_buf_fmt_exit(fmt, current, NULL, NULL);
-
-}
-
-/*
- * pfmfs should _never_ be mounted by userland - too much of security hassle,
- * no real gain from having the whole whorehouse mounted. So we don't need
- * any operations on the root directory. However, we need a non-trivial
- * d_name - pfm: will go nicely and kill the special-casing in procfs.
- */
-static struct vfsmount *pfmfs_mnt;
-
-static int __init
-init_pfm_fs(void)
-{
- int err = register_filesystem(&pfm_fs_type);
- if (!err) {
- pfmfs_mnt = kern_mount(&pfm_fs_type);
- err = PTR_ERR(pfmfs_mnt);
- if (IS_ERR(pfmfs_mnt))
- unregister_filesystem(&pfm_fs_type);
- else
- err = 0;
- }
- return err;
-}
-
-static void __exit
-exit_pfm_fs(void)
-{
- unregister_filesystem(&pfm_fs_type);
- mntput(pfmfs_mnt);
-}
-
-static ssize_t
-pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
-{
- pfm_context_t *ctx;
- pfm_msg_t *msg;
- ssize_t ret;
- unsigned long flags;
- DECLARE_WAITQUEUE(wait, current);
- XEN_NOT_SUPPORTED_YET;
- if (PFM_IS_FILE(filp) == 0) {
- printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
- return -EINVAL;
- }
-
- ctx = (pfm_context_t *)filp->private_data;
- if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
- return -EINVAL;
- }
-
- /*
- * check even when there is no message
- */
- if (size < sizeof(pfm_msg_t)) {
- DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
- return -EINVAL;
- }
-
- PROTECT_CTX(ctx, flags);
-
- /*
- * put ourselves on the wait queue
- */
- add_wait_queue(&ctx->ctx_msgq_wait, &wait);
-
-
- for(;;) {
- /*
- * check wait queue
- */
-
- set_current_state(TASK_INTERRUPTIBLE);
-
- DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
-
- ret = 0;
- if(PFM_CTXQ_EMPTY(ctx) == 0) break;
-
- UNPROTECT_CTX(ctx, flags);
-
- /*
- * check non-blocking read
- */
- ret = -EAGAIN;
- if(filp->f_flags & O_NONBLOCK) break;
-
- /*
- * check pending signals
- */
- if(signal_pending(current)) {
- ret = -EINTR;
- break;
- }
- /*
- * no message, so wait
- */
- schedule();
-
- PROTECT_CTX(ctx, flags);
- }
- DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
-
- if (ret < 0) goto abort;
-
- ret = -EINVAL;
- msg = pfm_get_next_msg(ctx);
- if (msg == NULL) {
- printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
- goto abort_locked;
- }
-
- DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
-
- ret = -EFAULT;
- if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
-
-abort_locked:
- UNPROTECT_CTX(ctx, flags);
-abort:
- return ret;
-}
-
-static ssize_t
-pfm_write(struct file *file, const char __user *ubuf,
- size_t size, loff_t *ppos)
-{
- DPRINT(("pfm_write called\n"));
- return -EINVAL;
-}
-
-static unsigned int
-pfm_poll(struct file *filp, poll_table * wait)
-{
- pfm_context_t *ctx;
- unsigned long flags;
- unsigned int mask = 0;
-
- if (PFM_IS_FILE(filp) == 0) {
- printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
- return 0;
- }
-
- ctx = (pfm_context_t *)filp->private_data;
- if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
- return 0;
- }
-
-
- DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));
-
- poll_wait(filp, &ctx->ctx_msgq_wait, wait);
-
- PROTECT_CTX(ctx, flags);
-
- if (PFM_CTXQ_EMPTY(ctx) == 0)
- mask = POLLIN | POLLRDNORM;
-
- UNPROTECT_CTX(ctx, flags);
-
- DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));
-
- return mask;
-}
-
-static int
-pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
-{
- DPRINT(("pfm_ioctl called\n"));
- return -EINVAL;
-}
-
-/*
- * interrupt cannot be masked when coming here
- */
-static inline int
-pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
-{
- int ret;
-
- ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
-
- DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
- current->pid,
- fd,
- on,
- ctx->ctx_async_queue, ret));
-
- return ret;
-}
-
-static int
-pfm_fasync(int fd, struct file *filp, int on)
-{
- pfm_context_t *ctx;
- int ret;
-
- if (PFM_IS_FILE(filp) == 0) {
- printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
- return -EBADF;
- }
-
- ctx = (pfm_context_t *)filp->private_data;
- if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
- return -EBADF;
- }
- /*
- * we cannot mask interrupts during this call because this may
- * may go to sleep if memory is not readily avalaible.
- *
- * We are protected from the conetxt disappearing by the get_fd()/put_fd()
- * done in caller. Serialization of this function is ensured by caller.
- */
- ret = pfm_do_fasync(fd, filp, ctx, on);
-
-
- DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
- fd,
- on,
- ctx->ctx_async_queue, ret));
-
- return ret;
-}
-
-#ifdef CONFIG_SMP
-/*
- * this function is exclusively called from pfm_close().
- * The context is not protected at that time, nor are interrupts
- * on the remote CPU. That's necessary to avoid deadlocks.
- */
-static void
-pfm_syswide_force_stop(void *info)
-{
- pfm_context_t *ctx = (pfm_context_t *)info;
- struct pt_regs *regs = task_pt_regs(current);
- struct task_struct *owner;
- unsigned long flags;
- int ret;
-
- if (ctx->ctx_cpu != smp_processor_id()) {
- printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n",
- ctx->ctx_cpu,
- smp_processor_id());
- return;
- }
- owner = GET_PMU_OWNER();
- if (owner != ctx->ctx_task) {
- printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
- smp_processor_id(),
- owner->pid, ctx->ctx_task->pid);
- return;
- }
- if (GET_PMU_CTX() != ctx) {
- printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
- smp_processor_id(),
- GET_PMU_CTX(), ctx);
- return;
- }
-
- DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));
- /*
- * the context is already protected in pfm_close(), we simply
- * need to mask interrupts to avoid a PMU interrupt race on
- * this CPU
- */
- local_irq_save(flags);
-
- ret = pfm_context_unload(ctx, NULL, 0, regs);
- if (ret) {
- DPRINT(("context_unload returned %d\n", ret));
- }
-
- /*
- * unmask interrupts, PMU interrupts are now spurious here
- */
- local_irq_restore(flags);
-}
-
-static void
-pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
-{
- int ret;
-
- DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
- ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
- DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
-}
-#endif /* CONFIG_SMP */
-
-/*
- * called for each close(). Partially free resources.
- * When caller is self-monitoring, the context is unloaded.
- */
-static int
-pfm_flush(struct file *filp, fl_owner_t id)
-{
- pfm_context_t *ctx;
- struct task_struct *task;
- struct pt_regs *regs;
- unsigned long flags;
- unsigned long smpl_buf_size = 0UL;
- void *smpl_buf_vaddr = NULL;
- int state, is_system;
-
- if (PFM_IS_FILE(filp) == 0) {
- DPRINT(("bad magic for\n"));
- return -EBADF;
- }
-
- ctx = (pfm_context_t *)filp->private_data;
- if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
- return -EBADF;
- }
-
- /*
- * remove our file from the async queue, if we use this mode.
- * This can be done without the context being protected. We come
- * here when the context has become unreacheable by other tasks.
- *
- * We may still have active monitoring at this point and we may
- * end up in pfm_overflow_handler(). However, fasync_helper()
- * operates with interrupts disabled and it cleans up the
- * queue. If the PMU handler is called prior to entering
- * fasync_helper() then it will send a signal. If it is
- * invoked after, it will find an empty queue and no
- * signal will be sent. In both case, we are safe
- */
- if (filp->f_flags & FASYNC) {
- DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
- pfm_do_fasync (-1, filp, ctx, 0);
- }
-
- PROTECT_CTX(ctx, flags);
-
- state = ctx->ctx_state;
- is_system = ctx->ctx_fl_system;
-
- task = PFM_CTX_TASK(ctx);
- regs = task_pt_regs(task);
-
- DPRINT(("ctx_state=%d is_current=%d\n",
- state,
- task == current ? 1 : 0));
-
- /*
- * if state == UNLOADED, then task is NULL
- */
-
- /*
- * we must stop and unload because we are losing access to the context.
- */
- if (task == current) {
-#ifdef CONFIG_SMP
- /*
- * the task IS the owner but it migrated to another CPU: that's bad
- * but we must handle this cleanly. Unfortunately, the kernel does
- * not provide a mechanism to block migration (while the context is loaded).
- *
- * We need to release the resource on the ORIGINAL cpu.
- */
- if (is_system && ctx->ctx_cpu != smp_processor_id()) {
-
- DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
- /*
- * keep context protected but unmask interrupt for IPI
- */
- local_irq_restore(flags);
-
- pfm_syswide_cleanup_other_cpu(ctx);
-
- /*
- * restore interrupt masking
- */
- local_irq_save(flags);
-
- /*
- * context is unloaded at this point
- */
- } else
-#endif /* CONFIG_SMP */
- {
-
- DPRINT(("forcing unload\n"));
- /*
- * stop and unload, returning with state UNLOADED
- * and session unreserved.
- */
- pfm_context_unload(ctx, NULL, 0, regs);
-
- DPRINT(("ctx_state=%d\n", ctx->ctx_state));
- }
- }
-
- /*
- * remove virtual mapping, if any, for the calling task.
- * cannot reset ctx field until last user is calling close().
- *
- * ctx_smpl_vaddr must never be cleared because it is needed
- * by every task with access to the context
- *
- * When called from do_exit(), the mm context is gone already, therefore
- * mm is NULL, i.e., the VMA is already gone and we do not have to
- * do anything here
- */
- if (ctx->ctx_smpl_vaddr && current->mm) {
- smpl_buf_vaddr = ctx->ctx_smpl_vaddr;
- smpl_buf_size = ctx->ctx_smpl_size;
- }
-
- UNPROTECT_CTX(ctx, flags);
-
- /*
- * if there was a mapping, then we systematically remove it
- * at this point. Cannot be done inside critical section
- * because some VM function reenables interrupts.
- *
- */
- if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);
-
- return 0;
-}
-/*
- * called either on explicit close() or from exit_files().
- * Only the LAST user of the file gets to this point, i.e., it is
- * called only ONCE.
- *
- * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero
- * (fput()),i.e, last task to access the file. Nobody else can access the
- * file at this point.
- *
- * When called from exit_files(), the VMA has been freed because exit_mm()
- * is executed before exit_files().
- *
- * When called from exit_files(), the current task is not yet ZOMBIE but we
- * flush the PMU state to the context.
- */
-static int
-pfm_close(struct inode *inode, struct file *filp)
-{
- pfm_context_t *ctx;
- struct task_struct *task;
- struct pt_regs *regs;
- DECLARE_WAITQUEUE(wait, current);
- unsigned long flags;
- unsigned long smpl_buf_size = 0UL;
- void *smpl_buf_addr = NULL;
- int free_possible = 1;
- int state, is_system;
-
- DPRINT(("pfm_close called private=%p\n", filp->private_data));
-
- if (PFM_IS_FILE(filp) == 0) {
- DPRINT(("bad magic\n"));
- return -EBADF;
- }
-
- ctx = (pfm_context_t *)filp->private_data;
- if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
- return -EBADF;
- }
-
- PROTECT_CTX(ctx, flags);
-
- state = ctx->ctx_state;
- is_system = ctx->ctx_fl_system;
-
- task = PFM_CTX_TASK(ctx);
- regs = task_pt_regs(task);
-
- DPRINT(("ctx_state=%d is_current=%d\n",
- state,
- task == current ? 1 : 0));
-
- /*
- * if task == current, then pfm_flush() unloaded the context
- */
- if (state == PFM_CTX_UNLOADED) goto doit;
-
- /*
- * context is loaded/masked and task != current, we need to
- * either force an unload or go zombie
- */
-
- /*
- * The task is currently blocked or will block after an overflow.
- * we must force it to wakeup to get out of the
- * MASKED state and transition to the unloaded state by itself.
- *
- * This situation is only possible for per-task mode
- */
- if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) {
-
- /*
- * set a "partial" zombie state to be checked
- * upon return from down() in pfm_handle_work().
- *
- * We cannot use the ZOMBIE state, because it is checked
- * by pfm_load_regs() which is called upon wakeup from down().
- * In such case, it would free the context and then we would
- * return to pfm_handle_work() which would access the
- * stale context. Instead, we set a flag invisible to pfm_load_regs()
- * but visible to pfm_handle_work().
- *
- * For some window of time, we have a zombie context with
- * ctx_state = MASKED and not ZOMBIE
- */
- ctx->ctx_fl_going_zombie = 1;
-
- /*
- * force task to wake up from MASKED state
- */
- complete(&ctx->ctx_restart_done);
-
- DPRINT(("waking up ctx_state=%d\n", state));
-
- /*
- * put ourself to sleep waiting for the other
- * task to report completion
- *
- * the context is protected by mutex, therefore there
- * is no risk of being notified of completion before
- * begin actually on the waitq.
- */
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&ctx->ctx_zombieq, &wait);
-
- UNPROTECT_CTX(ctx, flags);
-
- /*
- * XXX: check for signals :
- * - ok for explicit close
- * - not ok when coming from exit_files()
- */
- schedule();
-
-
- PROTECT_CTX(ctx, flags);
-
-
- remove_wait_queue(&ctx->ctx_zombieq, &wait);
- set_current_state(TASK_RUNNING);
-
- /*
- * context is unloaded at this point
- */
- DPRINT(("after zombie wakeup ctx_state=%d for\n", state));
- }
- else if (task != current) {
-#ifdef CONFIG_SMP
- /*
- * switch context to zombie state
- */
- ctx->ctx_state = PFM_CTX_ZOMBIE;
-
- DPRINT(("zombie ctx for [%d]\n", task->pid));
- /*
- * cannot free the context on the spot. deferred until
- * the task notices the ZOMBIE state
- */
- free_possible = 0;
-#else
- pfm_context_unload(ctx, NULL, 0, regs);
-#endif
- }
-
-doit:
- /* reload state, may have changed during opening of critical section */
- state = ctx->ctx_state;
-
- /*
- * the context is still attached to a task (possibly current)
- * we cannot destroy it right now
- */
-
- /*
- * we must free the sampling buffer right here because
- * we cannot rely on it being cleaned up later by the
- * monitored task. It is not possible to free vmalloc'ed
- * memory in pfm_load_regs(). Instead, we remove the buffer
- * now. should there be subsequent PMU overflow originally
- * meant for sampling, the will be converted to spurious
- * and that's fine because the monitoring tools is gone anyway.
- */
- if (ctx->ctx_smpl_hdr) {
- smpl_buf_addr = ctx->ctx_smpl_hdr;
- smpl_buf_size = ctx->ctx_smpl_size;
- /* no more sampling */
- ctx->ctx_smpl_hdr = NULL;
- ctx->ctx_fl_is_sampling = 0;
- }
-
- DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n",
- state,
- free_possible,
- smpl_buf_addr,
- smpl_buf_size));
-
- if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt);
-
- /*
- * UNLOADED that the session has already been unreserved.
- */
- if (state == PFM_CTX_ZOMBIE) {
- pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu);
- }
-
- /*
- * disconnect file descriptor from context must be done
- * before we unlock.
- */
- filp->private_data = NULL;
-
- /*
- * if we free on the spot, the context is now completely unreacheable
- * from the callers side. The monitored task side is also cut, so we
- * can freely cut.
- *
- * If we have a deferred free, only the caller side is disconnected.
- */
- UNPROTECT_CTX(ctx, flags);
-
- /*
- * All memory free operations (especially for vmalloc'ed memory)
- * MUST be done with interrupts ENABLED.
- */
- if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size);
-
- /*
- * return the memory used by the context
- */
- if (free_possible) pfm_context_free(ctx);
-
- if (is_running_on_xen()) {
- if (is_xenoprof_primary()) {
- int ret = HYPERVISOR_perfmon_op(PFM_DESTROY_CONTEXT,
- NULL, 0);
- if (ret)
- printk("%s:%d PFM_DESTROY_CONTEXT hypercall "
- "failed\n", __func__, __LINE__);
- }
- }
- return 0;
-}
-
-static int
-pfm_no_open(struct inode *irrelevant, struct file *dontcare)
-{
- DPRINT(("pfm_no_open called\n"));
- return -ENXIO;
-}
-
-
-
-static struct file_operations pfm_file_ops = {
- .llseek = no_llseek,
- .read = pfm_read,
- .write = pfm_write,
- .poll = pfm_poll,
- .ioctl = pfm_ioctl,
- .open = pfm_no_open, /* special open code to disallow open via /proc */
- .fasync = pfm_fasync,
- .release = pfm_close,
- .flush = pfm_flush
-};
-
-static int
-pfmfs_delete_dentry(struct dentry *dentry)
-{
- return 1;
-}
-
-static struct dentry_operations pfmfs_dentry_operations = {
- .d_delete = pfmfs_delete_dentry,
-};
-
-
-static int
-pfm_alloc_fd(struct file **cfile)
-{
- int fd, ret = 0;
- struct file *file = NULL;
- struct inode * inode;
- char name[32];
- struct qstr this;
-
- fd = get_unused_fd();
- if (fd < 0) return -ENFILE;
-
- ret = -ENFILE;
-
- file = get_empty_filp();
- if (!file) goto out;
-
- /*
- * allocate a new inode
- */
- inode = new_inode(pfmfs_mnt->mnt_sb);
- if (!inode) goto out;
-
- DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));
-
- inode->i_mode = S_IFCHR|S_IRUGO;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
-
- sprintf(name, "[%lu]", inode->i_ino);
- this.name = name;
- this.len = strlen(name);
- this.hash = inode->i_ino;
-
- ret = -ENOMEM;
-
- /*
- * allocate a new dcache entry
- */
- file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
- if (!file->f_dentry) goto out;
-
- file->f_dentry->d_op = &pfmfs_dentry_operations;
-
- d_add(file->f_dentry, inode);
- file->f_vfsmnt = mntget(pfmfs_mnt);
- file->f_mapping = inode->i_mapping;
-
- file->f_op = &pfm_file_ops;
- file->f_mode = FMODE_READ;
- file->f_flags = O_RDONLY;
- file->f_pos = 0;
-
- /*
- * may have to delay until context is attached?
- */
- fd_install(fd, file);
-
- /*
- * the file structure we will use
- */
- *cfile = file;
-
- return fd;
-out:
- if (file) put_filp(file);
- put_unused_fd(fd);
- return ret;
-}
-
-static void
-pfm_free_fd(int fd, struct file *file)
-{
- struct files_struct *files = current->files;
- struct fdtable *fdt;
-
- /*
- * there ie no fd_uninstall(), so we do it here
- */
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- rcu_assign_pointer(fdt->fd[fd], NULL);
- spin_unlock(&files->file_lock);
-
- if (file)
- put_filp(file);
- put_unused_fd(fd);
-}
-
-static int
-pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size)
-{
- DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
-
- while (size > 0) {
- unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT;
-
-
- if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY))
- return -ENOMEM;
-
- addr += PAGE_SIZE;
- buf += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
- return 0;
-}
-
-/*
- * allocate a sampling buffer and remaps it into the user address space of the task
- */
-static int
-pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
-{
- struct mm_struct *mm = task->mm;
- struct vm_area_struct *vma = NULL;
- unsigned long size;
- void *smpl_buf;
-
-
- /*
- * the fixed header + requested size and align to page boundary
- */
- size = PAGE_ALIGN(rsize);
-
- DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size));
-
- /*
- * check requested size to avoid Denial-of-service attacks
- * XXX: may have to refine this test
- * Check against address space limit.
- *
- * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
- * return -ENOMEM;
- */
- if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
- return -ENOMEM;
-
- /*
- * We do the easy to undo allocations first.
- *
- * pfm_rvmalloc(), clears the buffer, so there is no leak
- */
- smpl_buf = pfm_rvmalloc(size);
- if (smpl_buf == NULL) {
- DPRINT(("Can't allocate sampling buffer\n"));
- return -ENOMEM;
- }
-
- DPRINT(("smpl_buf @%p\n", smpl_buf));
-
- /* allocate vma */
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!vma) {
- DPRINT(("Cannot allocate vma\n"));
- goto error_kmem;
- }
- memset(vma, 0, sizeof(*vma));
-
- /*
- * partially initialize the vma for the sampling buffer
- */
- vma->vm_mm = mm;
- vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED;
- vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
-
- /*
- * Now we have everything we need and we can initialize
- * and connect all the data structures
- */
-
- ctx->ctx_smpl_hdr = smpl_buf;
- ctx->ctx_smpl_size = size; /* aligned size */
-
- /*
- * Let's do the difficult operations next.
- *
- * now we atomically find some area in the address space and
- * remap the buffer in it.
- */
- down_write(&task->mm->mmap_sem);
-
- /* find some free area in address space, must have mmap sem held */
- vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
- if (vma->vm_start == 0UL) {
- DPRINT(("Cannot find unmapped area for size %ld\n", size));
- up_write(&task->mm->mmap_sem);
- goto error;
- }
- vma->vm_end = vma->vm_start + size;
- vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
-
- DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
-
- /* can only be applied to current task, need to have the mm semaphore held when called */
- if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
- DPRINT(("Can't remap buffer\n"));
- up_write(&task->mm->mmap_sem);
- goto error;
- }
-
- /*
- * now insert the vma in the vm list for the process, must be
- * done with mmap lock held
- */
- insert_vm_struct(mm, vma);
-
- mm->total_vm += size >> PAGE_SHIFT;
- vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
- vma_pages(vma));
- up_write(&task->mm->mmap_sem);
-
- /*
- * keep track of user level virtual address
- */
- ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
- *(unsigned long *)user_vaddr = vma->vm_start;
-
- return 0;
-
-error:
- kmem_cache_free(vm_area_cachep, vma);
-error_kmem:
- pfm_rvfree(smpl_buf, size);
-
- return -ENOMEM;
-}
-
-/*
- * XXX: do something better here
- */
-static int
-pfm_bad_permissions(struct task_struct *task)
-{
- /* inspired by ptrace_attach() */
- DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
- current->uid,
- current->gid,
- task->euid,
- task->suid,
- task->uid,
- task->egid,
- task->sgid));
-
- return ((current->uid != task->euid)
- || (current->uid != task->suid)
- || (current->uid != task->uid)
- || (current->gid != task->egid)
- || (current->gid != task->sgid)
- || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
-}
-
-static int
-pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx)
-{
- int ctx_flags;
-
- /* valid signal */
-
- ctx_flags = pfx->ctx_flags;
-
- if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
-
- /*
- * cannot block in this mode
- */
- if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
- DPRINT(("cannot use blocking mode when in system wide monitoring\n"));
- return -EINVAL;
- }
- } else {
- }
- /* probably more to add here */
-
- return 0;
-}
-
-static int
-pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags,
- unsigned int cpu, pfarg_context_t *arg)
-{
- pfm_buffer_fmt_t *fmt = NULL;
- unsigned long size = 0UL;
- void *uaddr = NULL;
- void *fmt_arg = NULL;
- int ret = 0;
-#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1)
-
- /* invoke and lock buffer format, if found */
- fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
- if (fmt == NULL) {
- DPRINT(("[%d] cannot find buffer format\n", task->pid));
- return -EINVAL;
- }
-
- /*
- * buffer argument MUST be contiguous to pfarg_context_t
- */
- if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg);
-
- ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
-
- DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
-
- if (ret) goto error;
-
- /* link buffer format and context */
- ctx->ctx_buf_fmt = fmt;
-
- /*
- * check if buffer format wants to use perfmon buffer allocation/mapping service
- */
- ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size);
- if (ret) goto error;
-
- if (size) {
- /*
- * buffer is always remapped into the caller's address space
- */
- ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr);
- if (ret) goto error;
-
- /* keep track of user address of buffer */
- arg->ctx_smpl_vaddr = uaddr;
- }
- ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg);
-
-error:
- return ret;
-}
-
-static void
-pfm_reset_pmu_state(pfm_context_t *ctx)
-{
- int i;
-
- /*
- * install reset values for PMC.
- */
- for (i=1; PMC_IS_LAST(i) == 0; i++) {
- if (PMC_IS_IMPL(i) == 0) continue;
- ctx->ctx_pmcs[i] = PMC_DFL_VAL(i);
- DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i]));
- }
- /*
- * PMD registers are set to 0UL when the context in memset()
- */
-
- /*
- * On context switched restore, we must restore ALL pmc and ALL pmd even
- * when they are not actively used by the task. In UP, the incoming process
- * may otherwise pick up left over PMC, PMD state from the previous process.
- * As opposed to PMD, stale PMC can cause harm to the incoming
- * process because they may change what is being measured.
- * Therefore, we must systematically reinstall the entire
- * PMC state. In SMP, the same thing is possible on the
- * same CPU but also on between 2 CPUs.
- *
- * The problem with PMD is information leaking especially
- * to user level when psr.sp=0
- *
- * There is unfortunately no easy way to avoid this problem
- * on either UP or SMP. This definitively slows down the
- * pfm_load_regs() function.
- */
-
- /*
- * bitmask of all PMCs accessible to this context
- *
- * PMC0 is treated differently.
- */
- ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1;
-
- /*
- * bitmask of all PMDs that are accesible to this context
- */
- ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0];
-
- DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0]));
-
- /*
- * useful in case of re-enable after disable
- */
- ctx->ctx_used_ibrs[0] = 0UL;
- ctx->ctx_used_dbrs[0] = 0UL;
-}
-
-static int
-pfm_ctx_getsize(void *arg, size_t *sz)
-{
- pfarg_context_t *req = (pfarg_context_t *)arg;
- pfm_buffer_fmt_t *fmt;
-
- *sz = 0;
-
- if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0;
-
- fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id);
- if (fmt == NULL) {
- DPRINT(("cannot find buffer format\n"));
- return -EINVAL;
- }
- /* get just enough to copy in user parameters */
- *sz = fmt->fmt_arg_size;
- DPRINT(("arg_size=%lu\n", *sz));
-
- return 0;
-}
-
-
-
-/*
- * cannot attach if :
- * - kernel task
- * - task not owned by caller
- * - task incompatible with context mode
- */
-static int
-pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
-{
- /*
- * no kernel task or task not owner by caller
- */
- if (task->mm == NULL) {
- DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
- return -EPERM;
- }
- if (pfm_bad_permissions(task)) {
- DPRINT(("no permission to attach to [%d]\n", task->pid));
- return -EPERM;
- }
- /*
- * cannot block in self-monitoring mode
- */
- if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
- DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
- return -EINVAL;
- }
-
- if (task->exit_state == EXIT_ZOMBIE) {
- DPRINT(("cannot attach to zombie task [%d]\n", task->pid));
- return -EBUSY;
- }
-
- /*
- * always ok for self
- */
- if (task == current) return 0;
-
- if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
- DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
- return -EBUSY;
- }
- /*
- * make sure the task is off any CPU
- */
- wait_task_inactive(task);
-
- /* more to come... */
-
- return 0;
-}
-
-static int
-pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
-{
- struct task_struct *p = current;
- int ret;
-
- /* XXX: need to add more checks here */
- if (pid < 2) return -EPERM;
-
- if (pid != current->pid) {
-
- read_lock(&tasklist_lock);
-
- p = find_task_by_pid(pid);
-
- /* make sure task cannot go away while we operate on it */
- if (p) get_task_struct(p);
-
- read_unlock(&tasklist_lock);
-
- if (p == NULL) return -ESRCH;
- }
-
- ret = pfm_task_incompatible(ctx, p);
- if (ret == 0) {
- *task = p;
- } else if (p != current) {
- pfm_put_task(p);
- }
- return ret;
-}
-
-
-
-static int
-pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- pfarg_context_t *req = (pfarg_context_t *)arg;
- struct file *filp;
- int ctx_flags;
- int ret;
-
- /* let's check the arguments first */
- ret = pfarg_is_sane(current, req);
- if (ret < 0) return ret;
-
- ctx_flags = req->ctx_flags;
-
- ret = -ENOMEM;
-
- ctx = pfm_context_alloc();
- if (!ctx) goto error;
-
- ret = pfm_alloc_fd(&filp);
- if (ret < 0) goto error_file;
-
- req->ctx_fd = ctx->ctx_fd = ret;
-
- /*
- * attach context to file
- */
- filp->private_data = ctx;
-
- /*
- * does the user want to sample?
- */
- if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
- ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req);
- if (ret) goto buffer_error;
- }
-
- /*
- * init context protection lock
- */
- spin_lock_init(&ctx->ctx_lock);
-
- /*
- * context is unloaded
- */
- ctx->ctx_state = PFM_CTX_UNLOADED;
-
- /*
- * initialization of context's flags
- */
- ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
- ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
- ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
- ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
- /*
- * will move to set properties
- * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
- */
-
- /*
- * init restart semaphore to locked
- */
- init_completion(&ctx->ctx_restart_done);
-
- /*
- * activation is used in SMP only
- */
- ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
- SET_LAST_CPU(ctx, -1);
-
- /*
- * initialize notification message queue
- */
- ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
- init_waitqueue_head(&ctx->ctx_msgq_wait);
- init_waitqueue_head(&ctx->ctx_zombieq);
-
- DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
- ctx,
- ctx_flags,
- ctx->ctx_fl_system,
- ctx->ctx_fl_block,
- ctx->ctx_fl_excl_idle,
- ctx->ctx_fl_no_msg,
- ctx->ctx_fd));
-
- /*
- * initialize soft PMU state
- */
- pfm_reset_pmu_state(ctx);
-
- if (is_running_on_xen()) {
- /*
- * kludge to get xenoprof.is_primary.
- * XENOPROF_init/ia64 is nop. so it is safe to call it here.
- */
- struct xenoprof_init init;
- ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
- if (ret)
- goto buffer_error;
- init_xenoprof_primary(init.is_primary);
-
- if (is_xenoprof_primary()) {
- ret = HYPERVISOR_perfmon_op(PFM_CREATE_CONTEXT, arg, 0);
- if (ret)
- goto buffer_error;
- }
- }
- return 0;
-
-buffer_error:
- pfm_free_fd(ctx->ctx_fd, filp);
-
- if (ctx->ctx_buf_fmt) {
- pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
- }
-error_file:
- pfm_context_free(ctx);
-
-error:
- return ret;
-}
-
-static inline unsigned long
-pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
-{
- unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
- unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
- extern unsigned long carta_random32 (unsigned long seed);
-
- if (reg->flags & PFM_REGFL_RANDOM) {
- new_seed = carta_random32(old_seed);
- val -= (old_seed & mask); /* counter values are negative numbers! */
- if ((mask >> 32) != 0)
- /* construct a full 64-bit random value: */
- new_seed |= carta_random32(old_seed >> 32) << 32;
- reg->seed = new_seed;
- }
- reg->lval = val;
- return val;
-}
-
-static void
-pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
-{
- unsigned long mask = ovfl_regs[0];
- unsigned long reset_others = 0UL;
- unsigned long val;
- int i;
-
- /*
- * now restore reset value on sampling overflowed counters
- */
- mask >>= PMU_FIRST_COUNTER;
- for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
-
- if ((mask & 0x1UL) == 0UL) continue;
-
- ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
- reset_others |= ctx->ctx_pmds[i].reset_pmds[0];
-
- DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
- }
-
- /*
- * Now take care of resetting the other registers
- */
- for(i = 0; reset_others; i++, reset_others >>= 1) {
-
- if ((reset_others & 0x1) == 0) continue;
-
- ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
-
- DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
- is_long_reset ? "long" : "short", i, val));
- }
-}
-
-static void
-pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
-{
- unsigned long mask = ovfl_regs[0];
- unsigned long reset_others = 0UL;
- unsigned long val;
- int i;
-
- DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset));
-
- if (ctx->ctx_state == PFM_CTX_MASKED) {
- pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset);
- return;
- }
-
- /*
- * now restore reset value on sampling overflowed counters
- */
- mask >>= PMU_FIRST_COUNTER;
- for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
-
- if ((mask & 0x1UL) == 0UL) continue;
-
- val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
- reset_others |= ctx->ctx_pmds[i].reset_pmds[0];
-
- DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
-
- pfm_write_soft_counter(ctx, i, val);
- }
-
- /*
- * Now take care of resetting the other registers
- */
- for(i = 0; reset_others; i++, reset_others >>= 1) {
-
- if ((reset_others & 0x1) == 0) continue;
-
- val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
-
- if (PMD_IS_COUNTING(i)) {
- pfm_write_soft_counter(ctx, i, val);
- } else {
- ia64_set_pmd(i, val);
- }
- DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
- is_long_reset ? "long" : "short", i, val));
- }
- ia64_srlz_d();
-}
-
-static int
-pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- struct thread_struct *thread = NULL;
- struct task_struct *task;
- pfarg_reg_t *req = (pfarg_reg_t *)arg;
- unsigned long value, pmc_pm;
- unsigned long smpl_pmds, reset_pmds, impl_pmds;
- unsigned int cnum, reg_flags, flags, pmc_type;
- int i, can_access_pmu = 0, is_loaded, is_system, expert_mode;
- int is_monitor, is_counting, state;
- int ret = -EINVAL;
- pfm_reg_check_t wr_func;
-#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
-
- if (is_running_on_xen()) {
- if (is_xenoprof_primary())
- return HYPERVISOR_perfmon_op(PFM_WRITE_PMCS,
- arg, count);
- return 0;
- }
- state = ctx->ctx_state;
- is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
- is_system = ctx->ctx_fl_system;
- task = ctx->ctx_task;
- impl_pmds = pmu_conf->impl_pmds[0];
-
- if (state == PFM_CTX_ZOMBIE) return -EINVAL;
-
- if (is_loaded) {
- thread = &task->thread;
- /*
- * In system wide and when the context is loaded, access can only happen
- * when the caller is running on the CPU being monitored by the session.
- * It does not have to be the owner (ctx_task) of the context per se.
- */
- if (is_system && ctx->ctx_cpu != smp_processor_id()) {
- DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
- return -EBUSY;
- }
- can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
- }
- expert_mode = pfm_sysctl.expert_mode;
-
- for (i = 0; i < count; i++, req++) {
-
- cnum = req->reg_num;
- reg_flags = req->reg_flags;
- value = req->reg_value;
- smpl_pmds = req->reg_smpl_pmds[0];
- reset_pmds = req->reg_reset_pmds[0];
- flags = 0;
-
-
- if (cnum >= PMU_MAX_PMCS) {
- DPRINT(("pmc%u is invalid\n", cnum));
- goto error;
- }
-
- pmc_type = pmu_conf->pmc_desc[cnum].type;
- pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1;
- is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0;
- is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0;
-
- /*
- * we reject all non implemented PMC as well
- * as attempts to modify PMC[0-3] which are used
- * as status registers by the PMU
- */
- if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) {
- DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type));
- goto error;
- }
- wr_func = pmu_conf->pmc_desc[cnum].write_check;
- /*
- * If the PMC is a monitor, then if the value is not the default:
- * - system-wide session: PMCx.pm=1 (privileged monitor)
- * - per-task : PMCx.pm=0 (user monitor)
- */
- if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) {
- DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n",
- cnum,
- pmc_pm,
- is_system));
- goto error;
- }
-
- if (is_counting) {
- /*
- * enforce generation of overflow interrupt. Necessary on all
- * CPUs.
- */
- value |= 1 << PMU_PMC_OI;
-
- if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
- flags |= PFM_REGFL_OVFL_NOTIFY;
- }
-
- if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
-
- /* verify validity of smpl_pmds */
- if ((smpl_pmds & impl_pmds) != smpl_pmds) {
- DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum));
- goto error;
- }
-
- /* verify validity of reset_pmds */
- if ((reset_pmds & impl_pmds) != reset_pmds) {
- DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
- goto error;
- }
- } else {
- if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
- DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum));
- goto error;
- }
- /* eventid on non-counting monitors are ignored */
- }
-
- /*
- * execute write checker, if any
- */
- if (likely(expert_mode == 0 && wr_func)) {
- ret = (*wr_func)(task, ctx, cnum, &value, regs);
- if (ret) goto error;
- ret = -EINVAL;
- }
-
- /*
- * no error on this register
- */
- PFM_REG_RETFLAG_SET(req->reg_flags, 0);
-
- /*
- * Now we commit the changes to the software state
- */
-
- /*
- * update overflow information
- */
- if (is_counting) {
- /*
- * full flag update each time a register is programmed
- */
- ctx->ctx_pmds[cnum].flags = flags;
-
- ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds;
- ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds;
- ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid;
-
- /*
- * Mark all PMDS to be accessed as used.
- *
- * We do not keep track of PMC because we have to
- * systematically restore ALL of them.
- *
- * We do not update the used_monitors mask, because
- * if we have not programmed them, then will be in
- * a quiescent state, therefore we will not need to
- * mask/restore then when context is MASKED.
- */
- CTX_USED_PMD(ctx, reset_pmds);
- CTX_USED_PMD(ctx, smpl_pmds);
- /*
- * make sure we do not try to reset on
- * restart because we have established new values
- */
- if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
- }
- /*
- * Needed in case the user does not initialize the equivalent
- * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no
- * possible leak here.
- */
- CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]);
-
- /*
- * keep track of the monitor PMC that we are using.
- * we save the value of the pmc in ctx_pmcs[] and if
- * the monitoring is not stopped for the context we also
- * place it in the saved state area so that it will be
- * picked up later by the context switch code.
- *
- * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
- *
- * The value in thread->pmcs[] may be modified on overflow, i.e., when
- * monitoring needs to be stopped.
- */
- if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
-
- /*
- * update context state
- */
- ctx->ctx_pmcs[cnum] = value;
-
- if (is_loaded) {
- /*
- * write thread state
- */
- if (is_system == 0) thread->pmcs[cnum] = value;
-
- /*
- * write hardware register if we can
- */
- if (can_access_pmu) {
- ia64_set_pmc(cnum, value);
- }
-#ifdef CONFIG_SMP
- else {
- /*
- * per-task SMP only here
- *
- * we are guaranteed that the task is not running on the other CPU,
- * we indicate that this PMD will need to be reloaded if the task
- * is rescheduled on the CPU it ran last on.
- */
- ctx->ctx_reload_pmcs[0] |= 1UL << cnum;
- }
-#endif
- }
-
- DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n",
- cnum,
- value,
- is_loaded,
- can_access_pmu,
- flags,
- ctx->ctx_all_pmcs[0],
- ctx->ctx_used_pmds[0],
- ctx->ctx_pmds[cnum].eventid,
- smpl_pmds,
- reset_pmds,
- ctx->ctx_reload_pmcs[0],
- ctx->ctx_used_monitors[0],
- ctx->ctx_ovfl_regs[0]));
- }
-
- /*
- * make sure the changes are visible
- */
- if (can_access_pmu) ia64_srlz_d();
-
- return 0;
-error:
- PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
- return ret;
-}
-
-static int
-pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- struct thread_struct *thread = NULL;
- struct task_struct *task;
- pfarg_reg_t *req = (pfarg_reg_t *)arg;
- unsigned long value, hw_value, ovfl_mask;
- unsigned int cnum;
- int i, can_access_pmu = 0, state;
- int is_counting, is_loaded, is_system, expert_mode;
- int ret = -EINVAL;
- pfm_reg_check_t wr_func;
-
- if (is_running_on_xen()) {
- if (is_xenoprof_primary())
- return HYPERVISOR_perfmon_op(PFM_WRITE_PMDS,
- arg, count);
- return 0;
- }
-
- state = ctx->ctx_state;
- is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
- is_system = ctx->ctx_fl_system;
- ovfl_mask = pmu_conf->ovfl_val;
- task = ctx->ctx_task;
-
- if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL;
-
- /*
- * on both UP and SMP, we can only write to the PMC when the task is
- * the owner of the local PMU.
- */
- if (likely(is_loaded)) {
- thread = &task->thread;
- /*
- * In system wide and when the context is loaded, access can only happen
- * when the caller is running on the CPU being monitored by the session.
- * It does not have to be the owner (ctx_task) of the context per se.
- */
- if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
- DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
- return -EBUSY;
- }
- can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
- }
- expert_mode = pfm_sysctl.expert_mode;
-
- for (i = 0; i < count; i++, req++) {
-
- cnum = req->reg_num;
- value = req->reg_value;
-
- if (!PMD_IS_IMPL(cnum)) {
- DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum));
- goto abort_mission;
- }
- is_counting = PMD_IS_COUNTING(cnum);
- wr_func = pmu_conf->pmd_desc[cnum].write_check;
-
- /*
- * execute write checker, if any
- */
- if (unlikely(expert_mode == 0 && wr_func)) {
- unsigned long v = value;
-
- ret = (*wr_func)(task, ctx, cnum, &v, regs);
- if (ret) goto abort_mission;
-
- value = v;
- ret = -EINVAL;
- }
-
- /*
- * no error on this register
- */
- PFM_REG_RETFLAG_SET(req->reg_flags, 0);
-
- /*
- * now commit changes to software state
- */
- hw_value = value;
-
- /*
- * update virtualized (64bits) counter
- */
- if (is_counting) {
- /*
- * write context state
- */
- ctx->ctx_pmds[cnum].lval = value;
-
- /*
- * when context is load we use the split value
- */
- if (is_loaded) {
- hw_value = value & ovfl_mask;
- value = value & ~ovfl_mask;
- }
- }
- /*
- * update reset values (not just for counters)
- */
- ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset;
- ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset;
-
- /*
- * update randomization parameters (not just for counters)
- */
- ctx->ctx_pmds[cnum].seed = req->reg_random_seed;
- ctx->ctx_pmds[cnum].mask = req->reg_random_mask;
-
- /*
- * update context value
- */
- ctx->ctx_pmds[cnum].val = value;
-
- /*
- * Keep track of what we use
- *
- * We do not keep track of PMC because we have to
- * systematically restore ALL of them.
- */
- CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum));
-
- /*
- * mark this PMD register used as well
- */
- CTX_USED_PMD(ctx, RDEP(cnum));
-
- /*
- * make sure we do not try to reset on
- * restart because we have established new values
- */
- if (is_counting && state == PFM_CTX_MASKED) {
- ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
- }
-
- if (is_loaded) {
- /*
- * write thread state
- */
- if (is_system == 0) thread->pmds[cnum] = hw_value;
-
- /*
- * write hardware register if we can
- */
- if (can_access_pmu) {
- ia64_set_pmd(cnum, hw_value);
- } else {
-#ifdef CONFIG_SMP
- /*
- * we are guaranteed that the task is not running on the other CPU,
- * we indicate that this PMD will need to be reloaded if the task
- * is rescheduled on the CPU it ran last on.
- */
- ctx->ctx_reload_pmds[0] |= 1UL << cnum;
-#endif
- }
- }
-
- DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx "
- "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n",
- cnum,
- value,
- is_loaded,
- can_access_pmu,
- hw_value,
- ctx->ctx_pmds[cnum].val,
- ctx->ctx_pmds[cnum].short_reset,
- ctx->ctx_pmds[cnum].long_reset,
- PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
- ctx->ctx_pmds[cnum].seed,
- ctx->ctx_pmds[cnum].mask,
- ctx->ctx_used_pmds[0],
- ctx->ctx_pmds[cnum].reset_pmds[0],
- ctx->ctx_reload_pmds[0],
- ctx->ctx_all_pmds[0],
- ctx->ctx_ovfl_regs[0]));
- }
-
- /*
- * make changes visible
- */
- if (can_access_pmu) ia64_srlz_d();
-
- return 0;
-
-abort_mission:
- /*
- * for now, we have only one possibility for error
- */
- PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
- return ret;
-}
-
-/*
- * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function.
- * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an
- * interrupt is delivered during the call, it will be kept pending until we leave, making
- * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are
- * guaranteed to return consistent data to the user, it may simply be old. It is not
- * trivial to treat the overflow while inside the call because you may end up in
- * some module sampling buffer code causing deadlocks.
- */
-static int
-pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- struct thread_struct *thread = NULL;
- struct task_struct *task;
- unsigned long val = 0UL, lval, ovfl_mask, sval;
- pfarg_reg_t *req = (pfarg_reg_t *)arg;
- unsigned int cnum, reg_flags = 0;
- int i, can_access_pmu = 0, state;
- int is_loaded, is_system, is_counting, expert_mode;
- int ret = -EINVAL;
- pfm_reg_check_t rd_func;
- XEN_NOT_SUPPORTED_YET;
-
- /*
- * access is possible when loaded only for
- * self-monitoring tasks or in UP mode
- */
-
- state = ctx->ctx_state;
- is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
- is_system = ctx->ctx_fl_system;
- ovfl_mask = pmu_conf->ovfl_val;
- task = ctx->ctx_task;
-
- if (state == PFM_CTX_ZOMBIE) return -EINVAL;
-
- if (likely(is_loaded)) {
- thread = &task->thread;
- /*
- * In system wide and when the context is loaded, access can only happen
- * when the caller is running on the CPU being monitored by the session.
- * It does not have to be the owner (ctx_task) of the context per se.
- */
- if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
- DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
- return -EBUSY;
- }
- /*
- * this can be true when not self-monitoring only in UP
- */
- can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
-
- if (can_access_pmu) ia64_srlz_d();
- }
- expert_mode = pfm_sysctl.expert_mode;
-
- DPRINT(("ld=%d apmu=%d ctx_state=%d\n",
- is_loaded,
- can_access_pmu,
- state));
-
- /*
- * on both UP and SMP, we can only read the PMD from the hardware register when
- * the task is the owner of the local PMU.
- */
-
- for (i = 0; i < count; i++, req++) {
-
- cnum = req->reg_num;
- reg_flags = req->reg_flags;
-
- if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
- /*
- * we can only read the register that we use. That includes
- * the one we explicitely initialize AND the one we want included
- * in the sampling buffer (smpl_regs).
- *
- * Having this restriction allows optimization in the ctxsw routine
- * without compromising security (leaks)
- */
- if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error;
-
- sval = ctx->ctx_pmds[cnum].val;
- lval = ctx->ctx_pmds[cnum].lval;
- is_counting = PMD_IS_COUNTING(cnum);
-
- /*
- * If the task is not the current one, then we check if the
- * PMU state is still in the local live register due to lazy ctxsw.
- * If true, then we read directly from the registers.
- */
- if (can_access_pmu){
- val = ia64_get_pmd(cnum);
- } else {
- /*
- * context has been saved
- * if context is zombie, then task does not exist anymore.
- * In this case, we use the full value saved in the context (pfm_flush_regs()).
- */
- val = is_loaded ? thread->pmds[cnum] : 0UL;
- }
- rd_func = pmu_conf->pmd_desc[cnum].read_check;
-
- if (is_counting) {
- /*
- * XXX: need to check for overflow when loaded
- */
- val &= ovfl_mask;
- val += sval;
- }
-
- /*
- * execute read checker, if any
- */
- if (unlikely(expert_mode == 0 && rd_func)) {
- unsigned long v = val;
- ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs);
- if (ret) goto error;
- val = v;
- ret = -EINVAL;
- }
-
- PFM_REG_RETFLAG_SET(reg_flags, 0);
-
- DPRINT(("pmd[%u]=0x%lx\n", cnum, val));
-
- /*
- * update register return value, abort all if problem during copy.
- * we only modify the reg_flags field. no check mode is fine because
- * access has been verified upfront in sys_perfmonctl().
- */
- req->reg_value = val;
- req->reg_flags = reg_flags;
- req->reg_last_reset_val = lval;
- }
-
- return 0;
-
-error:
- PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
- return ret;
-}
-
-int
-pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
-{
- pfm_context_t *ctx;
-
- if (req == NULL) return -EINVAL;
-
- ctx = GET_PMU_CTX();
-
- if (ctx == NULL) return -EINVAL;
-
- /*
- * for now limit to current task, which is enough when calling
- * from overflow handler
- */
- if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
-
- return pfm_write_pmcs(ctx, req, nreq, regs);
-}
-EXPORT_SYMBOL(pfm_mod_write_pmcs);
-
-int
-pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
-{
- pfm_context_t *ctx;
-
- if (req == NULL) return -EINVAL;
-
- ctx = GET_PMU_CTX();
-
- if (ctx == NULL) return -EINVAL;
-
- /*
- * for now limit to current task, which is enough when calling
- * from overflow handler
- */
- if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
-
- return pfm_read_pmds(ctx, req, nreq, regs);
-}
-EXPORT_SYMBOL(pfm_mod_read_pmds);
-
-/*
- * Only call this function when a process it trying to
- * write the debug registers (reading is always allowed)
- */
-int
-pfm_use_debug_registers(struct task_struct *task)
-{
- pfm_context_t *ctx = task->thread.pfm_context;
- unsigned long flags;
- int ret = 0;
-
- if (pmu_conf->use_rr_dbregs == 0) return 0;
-
- DPRINT(("called for [%d]\n", task->pid));
-
- /*
- * do it only once
- */
- if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
-
- /*
- * Even on SMP, we do not need to use an atomic here because
- * the only way in is via ptrace() and this is possible only when the
- * process is stopped. Even in the case where the ctxsw out is not totally
- * completed by the time we come here, there is no way the 'stopped' process
- * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
- * So this is always safe.
- */
- if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
-
- LOCK_PFS(flags);
-
- /*
- * We cannot allow setting breakpoints when system wide monitoring
- * sessions are using the debug registers.
- */
- if (pfm_sessions.pfs_sys_use_dbregs> 0)
- ret = -1;
- else
- pfm_sessions.pfs_ptrace_use_dbregs++;
-
- DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n",
- pfm_sessions.pfs_ptrace_use_dbregs,
- pfm_sessions.pfs_sys_use_dbregs,
- task->pid, ret));
-
- UNLOCK_PFS(flags);
-
- return ret;
-}
-
-/*
- * This function is called for every task that exits with the
- * IA64_THREAD_DBG_VALID set. This indicates a task which was
- * able to use the debug registers for debugging purposes via
- * ptrace(). Therefore we know it was not using them for
- * perfmormance monitoring, so we only decrement the number
- * of "ptraced" debug register users to keep the count up to date
- */
-int
-pfm_release_debug_registers(struct task_struct *task)
-{
- unsigned long flags;
- int ret;
-
- if (pmu_conf->use_rr_dbregs == 0) return 0;
-
- LOCK_PFS(flags);
- if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
- printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
- ret = -1;
- } else {
- pfm_sessions.pfs_ptrace_use_dbregs--;
- ret = 0;
- }
- UNLOCK_PFS(flags);
-
- return ret;
-}
-
-static int
-pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- struct task_struct *task;
- pfm_buffer_fmt_t *fmt;
- pfm_ovfl_ctrl_t rst_ctrl;
- int state, is_system;
- int ret = 0;
- XEN_NOT_SUPPORTED_YET;
-
- state = ctx->ctx_state;
- fmt = ctx->ctx_buf_fmt;
- is_system = ctx->ctx_fl_system;
- task = PFM_CTX_TASK(ctx);
-
- switch(state) {
- case PFM_CTX_MASKED:
- break;
- case PFM_CTX_LOADED:
- if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break;
- /* fall through */
- case PFM_CTX_UNLOADED:
- case PFM_CTX_ZOMBIE:
- DPRINT(("invalid state=%d\n", state));
- return -EBUSY;
- default:
- DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state));
- return -EINVAL;
- }
-
- /*
- * In system wide and when the context is loaded, access can only happen
- * when the caller is running on the CPU being monitored by the session.
- * It does not have to be the owner (ctx_task) of the context per se.
- */
- if (is_system && ctx->ctx_cpu != smp_processor_id()) {
- DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
- return -EBUSY;
- }
-
- /* sanity check */
- if (unlikely(task == NULL)) {
- printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
- return -EINVAL;
- }
-
- if (task == current || is_system) {
-
- fmt = ctx->ctx_buf_fmt;
-
- DPRINT(("restarting self %d ovfl=0x%lx\n",
- task->pid,
- ctx->ctx_ovfl_regs[0]));
-
- if (CTX_HAS_SMPL(ctx)) {
-
- prefetch(ctx->ctx_smpl_hdr);
-
- rst_ctrl.bits.mask_monitoring = 0;
- rst_ctrl.bits.reset_ovfl_pmds = 0;
-
- if (state == PFM_CTX_LOADED)
- ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
- else
- ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
- } else {
- rst_ctrl.bits.mask_monitoring = 0;
- rst_ctrl.bits.reset_ovfl_pmds = 1;
- }
-
- if (ret == 0) {
- if (rst_ctrl.bits.reset_ovfl_pmds)
- pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
-
- if (rst_ctrl.bits.mask_monitoring == 0) {
- DPRINT(("resuming monitoring for [%d]\n", task->pid));
-
- if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
- } else {
- DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
-
- // cannot use pfm_stop_monitoring(task, regs);
- }
- }
- /*
- * clear overflowed PMD mask to remove any stale information
- */
- ctx->ctx_ovfl_regs[0] = 0UL;
-
- /*
- * back to LOADED state
- */
- ctx->ctx_state = PFM_CTX_LOADED;
-
- /*
- * XXX: not really useful for self monitoring
- */
- ctx->ctx_fl_can_restart = 0;
-
- return 0;
- }
-
- /*
- * restart another task
- */
-
- /*
- * When PFM_CTX_MASKED, we cannot issue a restart before the previous
- * one is seen by the task.
- */
- if (state == PFM_CTX_MASKED) {
- if (ctx->ctx_fl_can_restart == 0) return -EINVAL;
- /*
- * will prevent subsequent restart before this one is
- * seen by other task
- */
- ctx->ctx_fl_can_restart = 0;
- }
-
- /*
- * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e.
- * the task is blocked or on its way to block. That's the normal
- * restart path. If the monitoring is not masked, then the task
- * can be actively monitoring and we cannot directly intervene.
- * Therefore we use the trap mechanism to catch the task and
- * force it to reset the buffer/reset PMDs.
- *
- * if non-blocking, then we ensure that the task will go into
- * pfm_handle_work() before returning to user mode.
- *
- * We cannot explicitely reset another task, it MUST always
- * be done by the task itself. This works for system wide because
- * the tool that is controlling the session is logically doing
- * "self-monitoring".
- */
- if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
- DPRINT(("unblocking [%d] \n", task->pid));
- complete(&ctx->ctx_restart_done);
- } else {
- DPRINT(("[%d] armed exit trap\n", task->pid));
-
- ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
-
- PFM_SET_WORK_PENDING(task, 1);
-
- pfm_set_task_notify(task);
-
- /*
- * XXX: send reschedule if task runs on another CPU
- */
- }
- return 0;
-}
-
-static int
-pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- unsigned int m = *(unsigned int *)arg;
- XEN_NOT_SUPPORTED_YET;
-
- pfm_sysctl.debug = m == 0 ? 0 : 1;
-
- printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
-
- if (m == 0) {
- memset(pfm_stats, 0, sizeof(pfm_stats));
- for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL;
- }
- return 0;
-}
-
-/*
- * arg can be NULL and count can be zero for this function
- */
-static int
-pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- struct thread_struct *thread = NULL;
- struct task_struct *task;
- pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
- unsigned long flags;
- dbreg_t dbreg;
- unsigned int rnum;
- int first_time;
- int ret = 0, state;
- int i, can_access_pmu = 0;
- int is_system, is_loaded;
-
- if (pmu_conf->use_rr_dbregs == 0) return -EINVAL;
-
- state = ctx->ctx_state;
- is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
- is_system = ctx->ctx_fl_system;
- task = ctx->ctx_task;
-
- if (state == PFM_CTX_ZOMBIE) return -EINVAL;
-
- /*
- * on both UP and SMP, we can only write to the PMC when the task is
- * the owner of the local PMU.
- */
- if (is_loaded) {
- thread = &task->thread;
- /*
- * In system wide and when the context is loaded, access can only happen
- * when the caller is running on the CPU being monitored by the session.
- * It does not have to be the owner (ctx_task) of the context per se.
- */
- if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
- DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
- return -EBUSY;
- }
- can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
- }
-
- /*
- * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
- * ensuring that no real breakpoint can be installed via this call.
- *
- * IMPORTANT: regs can be NULL in this function
- */
-
- first_time = ctx->ctx_fl_using_dbreg == 0;
-
- /*
- * don't bother if we are loaded and task is being debugged
- */
- if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
- DPRINT(("debug registers already in use for [%d]\n", task->pid));
- return -EBUSY;
- }
-
- /*
- * check for debug registers in system wide mode
- *
- * If though a check is done in pfm_context_load(),
- * we must repeat it here, in case the registers are
- * written after the context is loaded
- */
- if (is_loaded) {
- LOCK_PFS(flags);
-
- if (first_time && is_system) {
- if (pfm_sessions.pfs_ptrace_use_dbregs)
- ret = -EBUSY;
- else
- pfm_sessions.pfs_sys_use_dbregs++;
- }
- UNLOCK_PFS(flags);
- }
-
- if (ret != 0) return ret;
-
- /*
- * mark ourself as user of the debug registers for
- * perfmon purposes.
- */
- ctx->ctx_fl_using_dbreg = 1;
-
- /*
- * clear hardware registers to make sure we don't
- * pick up stale state.
- *
- * for a system wide session, we do not use
- * thread.dbr, thread.ibr because this process
- * never leaves the current CPU and the state
- * is shared by all processes running on it
- */
- if (first_time && can_access_pmu) {
- DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
- for (i=0; i < pmu_conf->num_ibrs; i++) {
- ia64_set_ibr(i, 0UL);
- ia64_dv_serialize_instruction();
- }
- ia64_srlz_i();
- for (i=0; i < pmu_conf->num_dbrs; i++) {
- ia64_set_dbr(i, 0UL);
- ia64_dv_serialize_data();
- }
- ia64_srlz_d();
- }
-
- /*
- * Now install the values into the registers
- */
- for (i = 0; i < count; i++, req++) {
-
- rnum = req->dbreg_num;
- dbreg.val = req->dbreg_value;
-
- ret = -EINVAL;
-
- if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) {
- DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
- rnum, dbreg.val, mode, i, count));
-
- goto abort_mission;
- }
-
- /*
- * make sure we do not install enabled breakpoint
- */
- if (rnum & 0x1) {
- if (mode == PFM_CODE_RR)
- dbreg.ibr.ibr_x = 0;
- else
- dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
- }
-
- PFM_REG_RETFLAG_SET(req->dbreg_flags, 0);
-
- /*
- * Debug registers, just like PMC, can only be modified
- * by a kernel call. Moreover, perfmon() access to those
- * registers are centralized in this routine. The hardware
- * does not modify the value of these registers, therefore,
- * if we save them as they are written, we can avoid having
- * to save them on context switch out. This is made possible
- * by the fact that when perfmon uses debug registers, ptrace()
- * won't be able to modify them concurrently.
- */
- if (mode == PFM_CODE_RR) {
- CTX_USED_IBR(ctx, rnum);
-
- if (can_access_pmu) {
- ia64_set_ibr(rnum, dbreg.val);
- ia64_dv_serialize_instruction();
- }
-
- ctx->ctx_ibrs[rnum] = dbreg.val;
-
- DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n",
- rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu));
- } else {
- CTX_USED_DBR(ctx, rnum);
-
- if (can_access_pmu) {
- ia64_set_dbr(rnum, dbreg.val);
- ia64_dv_serialize_data();
- }
- ctx->ctx_dbrs[rnum] = dbreg.val;
-
- DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n",
- rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu));
- }
- }
-
- return 0;
-
-abort_mission:
- /*
- * in case it was our first attempt, we undo the global modifications
- */
- if (first_time) {
- LOCK_PFS(flags);
- if (ctx->ctx_fl_system) {
- pfm_sessions.pfs_sys_use_dbregs--;
- }
- UNLOCK_PFS(flags);
- ctx->ctx_fl_using_dbreg = 0;
- }
- /*
- * install error return flag
- */
- PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL);
-
- return ret;
-}
-
-static int
-pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs);
-}
-
-static int
-pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs);
-}
-
-int
-pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
-{
- pfm_context_t *ctx;
-
- if (req == NULL) return -EINVAL;
-
- ctx = GET_PMU_CTX();
-
- if (ctx == NULL) return -EINVAL;
-
- /*
- * for now limit to current task, which is enough when calling
- * from overflow handler
- */
- if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
-
- return pfm_write_ibrs(ctx, req, nreq, regs);
-}
-EXPORT_SYMBOL(pfm_mod_write_ibrs);
-
-int
-pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
-{
- pfm_context_t *ctx;
-
- if (req == NULL) return -EINVAL;
-
- ctx = GET_PMU_CTX();
-
- if (ctx == NULL) return -EINVAL;
-
- /*
- * for now limit to current task, which is enough when calling
- * from overflow handler
- */
- if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
-
- return pfm_write_dbrs(ctx, req, nreq, regs);
-}
-EXPORT_SYMBOL(pfm_mod_write_dbrs);
-
-
-static int
-pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- pfarg_features_t *req = (pfarg_features_t *)arg;
-
- if (is_running_on_xen())
- return HYPERVISOR_perfmon_op(PFM_GET_FEATURES, &arg, 0);
- req->ft_version = PFM_VERSION;
- return 0;
-}
-
-static int
-pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- struct pt_regs *tregs;
- struct task_struct *task = PFM_CTX_TASK(ctx);
- int state, is_system;
-
- if (is_running_on_xen()) {
- if (is_xenoprof_primary())
- return HYPERVISOR_perfmon_op(PFM_STOP, NULL, 0);
- return 0;
- }
-
- state = ctx->ctx_state;
- is_system = ctx->ctx_fl_system;
-
- /*
- * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE)
- */
- if (state == PFM_CTX_UNLOADED) return -EINVAL;
-
- /*
- * In system wide and when the context is loaded, access can only happen
- * when the caller is running on the CPU being monitored by the session.
- * It does not have to be the owner (ctx_task) of the context per se.
- */
- if (is_system && ctx->ctx_cpu != smp_processor_id()) {
- DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
- return -EBUSY;
- }
- DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
- PFM_CTX_TASK(ctx)->pid,
- state,
- is_system));
- /*
- * in system mode, we need to update the PMU directly
- * and the user level state of the caller, which may not
- * necessarily be the creator of the context.
- */
- if (is_system) {
- /*
- * Update local PMU first
- *
- * disable dcr pp
- */
- ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
- ia64_srlz_i();
-
- /*
- * update local cpuinfo
- */
- PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
-
- /*
- * stop monitoring, does srlz.i
- */
- pfm_clear_psr_pp();
-
- /*
- * stop monitoring in the caller
- */
- ia64_psr(regs)->pp = 0;
-
- return 0;
- }
- /*
- * per-task mode
- */
-
- if (task == current) {
- /* stop monitoring at kernel level */
- pfm_clear_psr_up();
-
- /*
- * stop monitoring at the user level
- */
- ia64_psr(regs)->up = 0;
- } else {
- tregs = task_pt_regs(task);
-
- /*
- * stop monitoring at the user level
- */
- ia64_psr(tregs)->up = 0;
-
- /*
- * monitoring disabled in kernel at next reschedule
- */
- ctx->ctx_saved_psr_up = 0;
- DPRINT(("task=[%d]\n", task->pid));
- }
- return 0;
-}
-
-
-static int
-pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- struct pt_regs *tregs;
- int state, is_system;
-
- if (is_running_on_xen()) {
- if (is_xenoprof_primary())
- return HYPERVISOR_perfmon_op(PFM_START, NULL, 0);
- return 0;
- }
- state = ctx->ctx_state;
- is_system = ctx->ctx_fl_system;
-
- if (state != PFM_CTX_LOADED) return -EINVAL;
-
- /*
- * In system wide and when the context is loaded, access can only happen
- * when the caller is running on the CPU being monitored by the session.
- * It does not have to be the owner (ctx_task) of the context per se.
- */
- if (is_system && ctx->ctx_cpu != smp_processor_id()) {
- DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
- return -EBUSY;
- }
-
- /*
- * in system mode, we need to update the PMU directly
- * and the user level state of the caller, which may not
- * necessarily be the creator of the context.
- */
- if (is_system) {
-
- /*
- * set user level psr.pp for the caller
- */
- ia64_psr(regs)->pp = 1;
-
- /*
- * now update the local PMU and cpuinfo
- */
- PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
-
- /*
- * start monitoring at kernel level
- */
- pfm_set_psr_pp();
-
- /* enable dcr pp */
- ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
- ia64_srlz_i();
-
- return 0;
- }
-
- /*
- * per-process mode
- */
-
- if (ctx->ctx_task == current) {
-
- /* start monitoring at kernel level */
- pfm_set_psr_up();
-
- /*
- * activate monitoring at user level
- */
- ia64_psr(regs)->up = 1;
-
- } else {
- tregs = task_pt_regs(ctx->ctx_task);
-
- /*
- * start monitoring at the kernel level the next
- * time the task is scheduled
- */
- ctx->ctx_saved_psr_up = IA64_PSR_UP;
-
- /*
- * activate monitoring at user level
- */
- ia64_psr(tregs)->up = 1;
- }
- return 0;
-}
-
-static int
-pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- pfarg_reg_t *req = (pfarg_reg_t *)arg;
- unsigned int cnum;
- int i;
- int ret = -EINVAL;
- XEN_NOT_SUPPORTED_YET;
-
- for (i = 0; i < count; i++, req++) {
-
- cnum = req->reg_num;
-
- if (!PMC_IS_IMPL(cnum)) goto abort_mission;
-
- req->reg_value = PMC_DFL_VAL(cnum);
-
- PFM_REG_RETFLAG_SET(req->reg_flags, 0);
-
- DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value));
- }
- return 0;
-
-abort_mission:
- PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
- return ret;
-}
-
-static int
-pfm_check_task_exist(pfm_context_t *ctx)
-{
- struct task_struct *g, *t;
- int ret = -ESRCH;
-
- read_lock(&tasklist_lock);
-
- do_each_thread (g, t) {
- if (t->thread.pfm_context == ctx) {
- ret = 0;
- break;
- }
- } while_each_thread (g, t);
-
- read_unlock(&tasklist_lock);
-
- DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
-
- return ret;
-}
-
-static int
-pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- struct task_struct *task;
- struct thread_struct *thread;
- struct pfm_context_t *old;
- unsigned long flags;
-#ifndef CONFIG_SMP
- struct task_struct *owner_task = NULL;
-#endif
- pfarg_load_t *req = (pfarg_load_t *)arg;
- unsigned long *pmcs_source, *pmds_source;
- int the_cpu;
- int ret = 0;
- int state, is_system, set_dbregs = 0;
-
- if (is_running_on_xen()) {
- if (is_xenoprof_primary())
- return HYPERVISOR_perfmon_op(PFM_LOAD_CONTEXT, arg, 0);
- return 0;
- }
- state = ctx->ctx_state;
- is_system = ctx->ctx_fl_system;
- /*
- * can only load from unloaded or terminated state
- */
- if (state != PFM_CTX_UNLOADED) {
- DPRINT(("cannot load to [%d], invalid ctx_state=%d\n",
- req->load_pid,
- ctx->ctx_state));
- return -EBUSY;
- }
-
- DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));
-
- if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) {
- DPRINT(("cannot use blocking mode on self\n"));
- return -EINVAL;
- }
-
- ret = pfm_get_task(ctx, req->load_pid, &task);
- if (ret) {
- DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret));
- return ret;
- }
-
- ret = -EINVAL;
-
- /*
- * system wide is self monitoring only
- */
- if (is_system && task != current) {
- DPRINT(("system wide is self monitoring only load_pid=%d\n",
- req->load_pid));
- goto error;
- }
-
- thread = &task->thread;
-
- ret = 0;
- /*
- * cannot load a context which is using range restrictions,
- * into a task that is being debugged.
- */
- if (ctx->ctx_fl_using_dbreg) {
- if (thread->flags & IA64_THREAD_DBG_VALID) {
- ret = -EBUSY;
- DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid));
- goto error;
- }
- LOCK_PFS(flags);
-
- if (is_system) {
- if (pfm_sessions.pfs_ptrace_use_dbregs) {
- DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
- ret = -EBUSY;
- } else {
- pfm_sessions.pfs_sys_use_dbregs++;
- DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
- set_dbregs = 1;
- }
- }
-
- UNLOCK_PFS(flags);
-
- if (ret) goto error;
- }
-
- /*
- * SMP system-wide monitoring implies self-monitoring.
- *
- * The programming model expects the task to
- * be pinned on a CPU throughout the session.
- * Here we take note of the current CPU at the
- * time the context is loaded. No call from
- * another CPU will be allowed.
- *
- * The pinning via shed_setaffinity()
- * must be done by the calling task prior
- * to this call.
- *
- * systemwide: keep track of CPU this session is supposed to run on
- */
- the_cpu = ctx->ctx_cpu = smp_processor_id();
-
- ret = -EBUSY;
- /*
- * now reserve the session
- */
- ret = pfm_reserve_session(current, is_system, the_cpu);
- if (ret) goto error;
-
- /*
- * task is necessarily stopped at this point.
- *
- * If the previous context was zombie, then it got removed in
- * pfm_save_regs(). Therefore we should not see it here.
- * If we see a context, then this is an active context
- *
- * XXX: needs to be atomic
- */
- DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
- thread->pfm_context, ctx));
-
- ret = -EBUSY;
- old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
- if (old != NULL) {
- DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
- goto error_unres;
- }
-
- pfm_reset_msgq(ctx);
-
- ctx->ctx_state = PFM_CTX_LOADED;
-
- /*
- * link context to task
- */
- ctx->ctx_task = task;
-
- if (is_system) {
- /*
- * we load as stopped
- */
- PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
- PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
-
- if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
- } else {
- thread->flags |= IA64_THREAD_PM_VALID;
- }
-
- /*
- * propagate into thread-state
- */
- pfm_copy_pmds(task, ctx);
- pfm_copy_pmcs(task, ctx);
-
- pmcs_source = thread->pmcs;
- pmds_source = thread->pmds;
-
- /*
- * always the case for system-wide
- */
- if (task == current) {
-
- if (is_system == 0) {
-
- /* allow user level control */
- ia64_psr(regs)->sp = 0;
- DPRINT(("clearing psr.sp for [%d]\n", task->pid));
-
- SET_LAST_CPU(ctx, smp_processor_id());
- INC_ACTIVATION();
- SET_ACTIVATION(ctx);
-#ifndef CONFIG_SMP
- /*
- * push the other task out, if any
- */
- owner_task = GET_PMU_OWNER();
- if (owner_task) pfm_lazy_save_regs(owner_task);
-#endif
- }
- /*
- * load all PMD from ctx to PMU (as opposed to thread state)
- * restore all PMC from ctx to PMU
- */
- pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]);
- pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]);
-
- ctx->ctx_reload_pmcs[0] = 0UL;
- ctx->ctx_reload_pmds[0] = 0UL;
-
- /*
- * guaranteed safe by earlier check against DBG_VALID
- */
- if (ctx->ctx_fl_using_dbreg) {
- pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
- pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
- }
- /*
- * set new ownership
- */
- SET_PMU_OWNER(task, ctx);
-
- DPRINT(("context loaded on PMU for [%d]\n", task->pid));
- } else {
- /*
- * when not current, task MUST be stopped, so this is safe
- */
- regs = task_pt_regs(task);
-
- /* force a full reload */
- ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
- SET_LAST_CPU(ctx, -1);
-
- /* initial saved psr (stopped) */
- ctx->ctx_saved_psr_up = 0UL;
- ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
- }
-
- ret = 0;
-
-error_unres:
- if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu);
-error:
- /*
- * we must undo the dbregs setting (for system-wide)
- */
- if (ret && set_dbregs) {
- LOCK_PFS(flags);
- pfm_sessions.pfs_sys_use_dbregs--;
- UNLOCK_PFS(flags);
- }
- /*
- * release task, there is now a link with the context
- */
- if (is_system == 0 && task != current) {
- pfm_put_task(task);
-
- if (ret == 0) {
- ret = pfm_check_task_exist(ctx);
- if (ret) {
- ctx->ctx_state = PFM_CTX_UNLOADED;
- ctx->ctx_task = NULL;
- }
- }
- }
- return ret;
-}
-
-/*
- * in this function, we do not need to increase the use count
- * for the task via get_task_struct(), because we hold the
- * context lock. If the task were to disappear while having
- * a context attached, it would go through pfm_exit_thread()
- * which also grabs the context lock and would therefore be blocked
- * until we are here.
- */
-static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx);
-
-static int
-pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
- struct task_struct *task = PFM_CTX_TASK(ctx);
- struct pt_regs *tregs;
- int prev_state, is_system;
- int ret;
-
- if (is_running_on_xen()) {
- if (is_xenoprof_primary())
- return HYPERVISOR_perfmon_op(PFM_UNLOAD_CONTEXT,
- NULL, 0);
- return 0;
- }
- DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
-
- prev_state = ctx->ctx_state;
- is_system = ctx->ctx_fl_system;
-
- /*
- * unload only when necessary
- */
- if (prev_state == PFM_CTX_UNLOADED) {
- DPRINT(("ctx_state=%d, nothing to do\n", prev_state));
- return 0;
- }
-
- /*
- * clear psr and dcr bits
- */
- ret = pfm_stop(ctx, NULL, 0, regs);
- if (ret) return ret;
-
- ctx->ctx_state = PFM_CTX_UNLOADED;
-
- /*
- * in system mode, we need to update the PMU directly
- * and the user level state of the caller, which may not
- * necessarily be the creator of the context.
- */
- if (is_system) {
-
- /*
- * Update cpuinfo
- *
- * local PMU is taken care of in pfm_stop()
- */
- PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
- PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
-
- /*
- * save PMDs in context
- * release ownership
- */
- pfm_flush_pmds(current, ctx);
-
- /*
- * at this point we are done with the PMU
- * so we can unreserve the resource.
- */
- if (prev_state != PFM_CTX_ZOMBIE)
- pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu);
-
- /*
- * disconnect context from task
- */
- task->thread.pfm_context = NULL;
- /*
- * disconnect task from context
- */
- ctx->ctx_task = NULL;
-
- /*
- * There is nothing more to cleanup here.
- */
- return 0;
- }
-
- /*
- * per-task mode
- */
- tregs = task == current ? regs : task_pt_regs(task);
-
- if (task == current) {
- /*
- * cancel user level control
- */
- ia64_psr(regs)->sp = 1;
-
- DPRINT(("setting psr.sp for [%d]\n", task->pid));
- }
- /*
- * save PMDs to context
- * release ownership
- */
- pfm_flush_pmds(task, ctx);
-
- /*
- * at this point we are done with the PMU
- * so we can unreserve the resource.
- *
- * when state was ZOMBIE, we have already unreserved.
- */
- if (prev_state != PFM_CTX_ZOMBIE)
- pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu);
-
- /*
- * reset activation counter and psr
- */
- ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
- SET_LAST_CPU(ctx, -1);
-
- /*
- * PMU state will not be restored
- */
- task->thread.flags &= ~IA64_THREAD_PM_VALID;
-
- /*
- * break links between context and task
- */
- task->thread.pfm_context = NULL;
- ctx->ctx_task = NULL;
-
- PFM_SET_WORK_PENDING(task, 0);
-
- ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
- ctx->ctx_fl_can_restart = 0;
- ctx->ctx_fl_going_zombie = 0;
-
- DPRINT(("disconnected [%d] from context\n", task->pid));
-
- return 0;
-}
-
-
-/*
- * called only from exit_thread(): task == current
- * we come here only if current has a context attached (loaded or masked)
- */
-void
-pfm_exit_thread(struct task_struct *task)
-{
- pfm_context_t *ctx;
- unsigned long flags;
- struct pt_regs *regs = task_pt_regs(task);
- int ret, state;
- int free_ok = 0;
-
- ctx = PFM_GET_CTX(task);
-
- PROTECT_CTX(ctx, flags);
-
- DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
-
- state = ctx->ctx_state;
- switch(state) {
- case PFM_CTX_UNLOADED:
- /*
- * only comes to thios function if pfm_context is not NULL, i.e., cannot
- * be in unloaded state
- */
- printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
- break;
- case PFM_CTX_LOADED:
- case PFM_CTX_MASKED:
- ret = pfm_context_unload(ctx, NULL, 0, regs);
- if (ret) {
- printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
- }
- DPRINT(("ctx unloaded for current state was %d\n", state));
-
- pfm_end_notify_user(ctx);
- break;
- case PFM_CTX_ZOMBIE:
- ret = pfm_context_unload(ctx, NULL, 0, regs);
- if (ret) {
- printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
- }
- free_ok = 1;
- break;
- default:
- printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
- break;
- }
- UNPROTECT_CTX(ctx, flags);
-
- { u64 psr = pfm_get_psr();
- BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
- BUG_ON(GET_PMU_OWNER());
- BUG_ON(ia64_psr(regs)->up);
- BUG_ON(ia64_psr(regs)->pp);
- }
-
- /*
- * All memory free operations (especially for vmalloc'ed memory)
- * MUST be done with interrupts ENABLED.
- */
- if (free_ok) pfm_context_free(ctx);
-}
-
-/*
- * functions MUST be listed in the increasing order of their index (see permfon.h)
- */
-#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz }
-#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL }
-#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP)
-#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW)
-#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL}
-
-static pfm_cmd_desc_t pfm_cmd_tab[]={
-/* 0 */PFM_CMD_NONE,
-/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
-/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
-/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
-/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS),
-/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS),
-/* 6 */PFM_CMD_NONE,
-/* 7 */PFM_CMD_NONE,
-/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize),
-/* 9 */PFM_CMD_NONE,
-/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW),
-/* 11 */PFM_CMD_NONE,
-/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL),
-/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL),
-/* 14 */PFM_CMD_NONE,
-/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
-/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL),
-/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS),
-/* 18 */PFM_CMD_NONE,
-/* 19 */PFM_CMD_NONE,
-/* 20 */PFM_CMD_NONE,
-/* 21 */PFM_CMD_NONE,
-/* 22 */PFM_CMD_NONE,
-/* 23 */PFM_CMD_NONE,
-/* 24 */PFM_CMD_NONE,
-/* 25 */PFM_CMD_NONE,
-/* 26 */PFM_CMD_NONE,
-/* 27 */PFM_CMD_NONE,
-/* 28 */PFM_CMD_NONE,
-/* 29 */PFM_CMD_NONE,
-/* 30 */PFM_CMD_NONE,
-/* 31 */PFM_CMD_NONE,
-/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL),
-/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL)
-};
-#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
-
-static int
-pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
-{
- struct task_struct *task;
- int state, old_state;
-
-recheck:
- state = ctx->ctx_state;
- task = ctx->ctx_task;
-
- if (task == NULL) {
- DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
- return 0;
- }
-
- DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
- ctx->ctx_fd,
- state,
- task->pid,
- task->state, PFM_CMD_STOPPED(cmd)));
-
- /*
- * self-monitoring always ok.
- *
- * for system-wide the caller can either be the creator of the
- * context (to one to which the context is attached to) OR
- * a task running on the same CPU as the session.
- */
- if (task == current || ctx->ctx_fl_system) return 0;
-
- /*
- * we are monitoring another thread
- */
- switch(state) {
- case PFM_CTX_UNLOADED:
- /*
- * if context is UNLOADED we are safe to go
- */
- return 0;
- case PFM_CTX_ZOMBIE:
- /*
- * no command can operate on a zombie context
- */
- DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
- return -EINVAL;
- case PFM_CTX_MASKED:
- /*
- * PMU state has been saved to software even though
- * the thread may still be running.
- */
- if (cmd != PFM_UNLOAD_CONTEXT) return 0;
- }
-
- /*
- * context is LOADED or MASKED. Some commands may need to have
- * the task stopped.
- *
- * We could lift this restriction for UP but it would mean that
- * the user has no guarantee the task would not run between
- * two successive calls to perfmonctl(). That's probably OK.
- * If this user wants to ensure the task does not run, then
- * the task must be stopped.
- */
- if (PFM_CMD_STOPPED(cmd)) {
- if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
- DPRINT(("[%d] task not in stopped state\n", task->pid));
- return -EBUSY;
- }
- /*
- * task is now stopped, wait for ctxsw out
- *
- * This is an interesting point in the code.
- * We need to unprotect the context because
- * the pfm_save_regs() routines needs to grab
- * the same lock. There are danger in doing
- * this because it leaves a window open for
- * another task to get access to the context
- * and possibly change its state. The one thing
- * that is not possible is for the context to disappear
- * because we are protected by the VFS layer, i.e.,
- * get_fd()/put_fd().
- */
- old_state = state;
-
- UNPROTECT_CTX(ctx, flags);
-
- wait_task_inactive(task);
-
- PROTECT_CTX(ctx, flags);
-
- /*
- * we must recheck to verify if state has changed
- */
- if (ctx->ctx_state != old_state) {
- DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state));
- goto recheck;
- }
- }
- return 0;
-}
-
-/*
- * system-call entry point (must return long)
- */
-asmlinkage long
-sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
-{
- struct file *file = NULL;
- pfm_context_t *ctx = NULL;
- unsigned long flags = 0UL;
- void *args_k = NULL;
- long ret; /* will expand int return types */
- size_t base_sz, sz, xtra_sz = 0;
- int narg, completed_args = 0, call_made = 0, cmd_flags;
- int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
- int (*getsize)(void *arg, size_t *sz);
-#define PFM_MAX_ARGSIZE 4096
-
- /*
- * reject any call if perfmon was disabled at initialization
- */
- if (unlikely(pmu_conf == NULL)) return -ENOSYS;
-
- if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
- DPRINT(("invalid cmd=%d\n", cmd));
- return -EINVAL;
- }
-
- func = pfm_cmd_tab[cmd].cmd_func;
- narg = pfm_cmd_tab[cmd].cmd_narg;
- base_sz = pfm_cmd_tab[cmd].cmd_argsize;
- getsize = pfm_cmd_tab[cmd].cmd_getsize;
- cmd_flags = pfm_cmd_tab[cmd].cmd_flags;
-
- if (unlikely(func == NULL)) {
- DPRINT(("invalid cmd=%d\n", cmd));
- return -EINVAL;
- }
-
- DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n",
- PFM_CMD_NAME(cmd),
- cmd,
- narg,
- base_sz,
- count));
-
- /*
- * check if number of arguments matches what the command expects
- */
- if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count)))
- return -EINVAL;
-
-restart_args:
- sz = xtra_sz + base_sz*count;
- /*
- * limit abuse to min page size
- */
- if (unlikely(sz > PFM_MAX_ARGSIZE)) {
- printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
- return -E2BIG;
- }
-
- /*
- * allocate default-sized argument buffer
- */
- if (likely(count && args_k == NULL)) {
- args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL);
- if (args_k == NULL) return -ENOMEM;
- }
-
- ret = -EFAULT;
-
- /*
- * copy arguments
- *
- * assume sz = 0 for command without parameters
- */
- if (sz && copy_from_user(args_k, arg, sz)) {
- DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg));
- goto error_args;
- }
-
- /*
- * check if command supports extra parameters
- */
- if (completed_args == 0 && getsize) {
- /*
- * get extra parameters size (based on main argument)
- */
- ret = (*getsize)(args_k, &xtra_sz);
- if (ret) goto error_args;
-
- completed_args = 1;
-
- DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz));
-
- /* retry if necessary */
- if (likely(xtra_sz)) goto restart_args;
- }
-
- if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd;
-
- ret = -EBADF;
-
- file = fget(fd);
- if (unlikely(file == NULL)) {
- DPRINT(("invalid fd %d\n", fd));
- goto error_args;
- }
- if (unlikely(PFM_IS_FILE(file) == 0)) {
- DPRINT(("fd %d not related to perfmon\n", fd));
- goto error_args;
- }
-
- ctx = (pfm_context_t *)file->private_data;
- if (unlikely(ctx == NULL)) {
- DPRINT(("no context for fd %d\n", fd));
- goto error_args;
- }
- prefetch(&ctx->ctx_state);
-
- PROTECT_CTX(ctx, flags);
-
- /*
- * check task is stopped
- */
- ret = pfm_check_task_state(ctx, cmd, flags);
- if (unlikely(ret)) goto abort_locked;
-
-skip_fd:
- ret = (*func)(ctx, args_k, count, task_pt_regs(current));
-
- call_made = 1;
-
-abort_locked:
- if (likely(ctx)) {
- DPRINT(("context unlocked\n"));
- UNPROTECT_CTX(ctx, flags);
- }
-
- /* copy argument back to user, if needed */
- if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
-
-error_args:
- if (file)
- fput(file);
-
- kfree(args_k);
-
- DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
-
- return ret;
-}
-
-static void
-pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs)
-{
- pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt;
- pfm_ovfl_ctrl_t rst_ctrl;
- int state;
- int ret = 0;
-
- state = ctx->ctx_state;
- /*
- * Unlock sampling buffer and reset index atomically
- * XXX: not really needed when blocking
- */
- if (CTX_HAS_SMPL(ctx)) {
-
- rst_ctrl.bits.mask_monitoring = 0;
- rst_ctrl.bits.reset_ovfl_pmds = 0;
-
- if (state == PFM_CTX_LOADED)
- ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
- else
- ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
- } else {
- rst_ctrl.bits.mask_monitoring = 0;
- rst_ctrl.bits.reset_ovfl_pmds = 1;
- }
-
- if (ret == 0) {
- if (rst_ctrl.bits.reset_ovfl_pmds) {
- pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET);
- }
- if (rst_ctrl.bits.mask_monitoring == 0) {
- DPRINT(("resuming monitoring\n"));
- if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current);
- } else {
- DPRINT(("stopping monitoring\n"));
- //pfm_stop_monitoring(current, regs);
- }
- ctx->ctx_state = PFM_CTX_LOADED;
- }
-}
-
-/*
- * context MUST BE LOCKED when calling
- * can only be called for current
- */
-static void
-pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
-{
- int ret;
-
- DPRINT(("entering for [%d]\n", current->pid));
-
- ret = pfm_context_unload(ctx, NULL, 0, regs);
- if (ret) {
- printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
- }
-
- /*
- * and wakeup controlling task, indicating we are now disconnected
- */
- wake_up_interruptible(&ctx->ctx_zombieq);
-
- /*
- * given that context is still locked, the controlling
- * task will only get access when we return from
- * pfm_handle_work().
- */
-}
-
-static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
- /*
- * pfm_handle_work() can be called with interrupts enabled
- * (TIF_NEED_RESCHED) or disabled. The down_interruptible
- * call may sleep, therefore we must re-enable interrupts
- * to avoid deadlocks. It is safe to do so because this function
- * is called ONLY when returning to user level (PUStk=1), in which case
- * there is no risk of kernel stack overflow due to deep
- * interrupt nesting.
- */
-void
-pfm_handle_work(void)
-{
- pfm_context_t *ctx;
- struct pt_regs *regs;
- unsigned long flags, dummy_flags;
- unsigned long ovfl_regs;
- unsigned int reason;
- int ret;
-
- ctx = PFM_GET_CTX(current);
- if (ctx == NULL) {
- printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
- return;
- }
-
- PROTECT_CTX(ctx, flags);
-
- PFM_SET_WORK_PENDING(current, 0);
-
- pfm_clear_task_notify();
-
- regs = task_pt_regs(current);
-
- /*
- * extract reason for being here and clear
- */
- reason = ctx->ctx_fl_trap_reason;
- ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
- ovfl_regs = ctx->ctx_ovfl_regs[0];
-
- DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state));
-
- /*
- * must be done before we check for simple-reset mode
- */
- if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
-
-
- //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
- if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
-
- /*
- * restore interrupt mask to what it was on entry.
- * Could be enabled/diasbled.
- */
- UNPROTECT_CTX(ctx, flags);
-
- /*
- * force interrupt enable because of down_interruptible()
- */
- local_irq_enable();
-
- DPRINT(("before block sleeping\n"));
-
- /*
- * may go through without blocking on SMP systems
- * if restart has been received already by the time we call down()
- */
- ret = wait_for_completion_interruptible(&ctx->ctx_restart_done);
-
- DPRINT(("after block sleeping ret=%d\n", ret));
-
- /*
- * lock context and mask interrupts again
- * We save flags into a dummy because we may have
- * altered interrupts mask compared to entry in this
- * function.
- */
- PROTECT_CTX(ctx, dummy_flags);
-
- /*
- * we need to read the ovfl_regs only after wake-up
- * because we may have had pfm_write_pmds() in between
- * and that can changed PMD values and therefore
- * ovfl_regs is reset for these new PMD values.
- */
- ovfl_regs = ctx->ctx_ovfl_regs[0];
-
- if (ctx->ctx_fl_going_zombie) {
-do_zombie:
- DPRINT(("context is zombie, bailing out\n"));
- pfm_context_force_terminate(ctx, regs);
- goto nothing_to_do;
- }
- /*
- * in case of interruption of down() we don't restart anything
- */
- if (ret < 0) goto nothing_to_do;
-
-skip_blocking:
- pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
- ctx->ctx_ovfl_regs[0] = 0UL;
-
-nothing_to_do:
- /*
- * restore flags as they were upon entry
- */
- UNPROTECT_CTX(ctx, flags);
-}
-
-static int
-pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg)
-{
- if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
- DPRINT(("ignoring overflow notification, owner is zombie\n"));
- return 0;
- }
-
- DPRINT(("waking up somebody\n"));
-
- if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait);
-
- /*
- * safe, we are not in intr handler, nor in ctxsw when
- * we come here
- */
- kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN);
-
- return 0;
-}
-
-static int
-pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds)
-{
- pfm_msg_t *msg = NULL;
-
- if (ctx->ctx_fl_no_msg == 0) {
- msg = pfm_get_new_msg(ctx);
- if (msg == NULL) {
- printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n");
- return -1;
- }
-
- msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL;
- msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd;
- msg->pfm_ovfl_msg.msg_active_set = 0;
- msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds;
- msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL;
- msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL;
- msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL;
- msg->pfm_ovfl_msg.msg_tstamp = 0UL;
- }
-
- DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n",
- msg,
- ctx->ctx_fl_no_msg,
- ctx->ctx_fd,
- ovfl_pmds));
-
- return pfm_notify_user(ctx, msg);
-}
-
-static int
-pfm_end_notify_user(pfm_context_t *ctx)
-{
- pfm_msg_t *msg;
-
- msg = pfm_get_new_msg(ctx);
- if (msg == NULL) {
- printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n");
- return -1;
- }
- /* no leak */
- memset(msg, 0, sizeof(*msg));
-
- msg->pfm_end_msg.msg_type = PFM_MSG_END;
- msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd;
- msg->pfm_ovfl_msg.msg_tstamp = 0UL;
-
- DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n",
- msg,
- ctx->ctx_fl_no_msg,
- ctx->ctx_fd));
-
- return pfm_notify_user(ctx, msg);
-}
-
-/*
- * main overflow processing routine.
- * it can be called from the interrupt path or explicitely during the context switch code
- */
-static void
-pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
-{
- pfm_ovfl_arg_t *ovfl_arg;
- unsigned long mask;
- unsigned long old_val, ovfl_val, new_val;
- unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
- unsigned long tstamp;
- pfm_ovfl_ctrl_t ovfl_ctrl;
- unsigned int i, has_smpl;
- int must_notify = 0;
-
- if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring;
-
- /*
- * sanity test. Should never happen
- */
- if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
-
- tstamp = ia64_get_itc();
- mask = pmc0 >> PMU_FIRST_COUNTER;
- ovfl_val = pmu_conf->ovfl_val;
- has_smpl = CTX_HAS_SMPL(ctx);
-
- DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
- "used_pmds=0x%lx\n",
- pmc0,
- task ? task->pid: -1,
- (regs ? regs->cr_iip : 0),
- CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
- ctx->ctx_used_pmds[0]));
-
-
- /*
- * first we update the virtual counters
- * assume there was a prior ia64_srlz_d() issued
- */
- for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
-
- /* skip pmd which did not overflow */
- if ((mask & 0x1) == 0) continue;
-
- /*
- * Note that the pmd is not necessarily 0 at this point as qualified events
- * may have happened before the PMU was frozen. The residual count is not
- * taken into consideration here but will be with any read of the pmd via
- * pfm_read_pmds().
- */
- old_val = new_val = ctx->ctx_pmds[i].val;
- new_val += 1 + ovfl_val;
- ctx->ctx_pmds[i].val = new_val;
-
- /*
- * check for overflow condition
- */
- if (likely(old_val > new_val)) {
- ovfl_pmds |= 1UL << i;
- if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i;
- }
-
- DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
- i,
- new_val,
- old_val,
- ia64_get_pmd(i) & ovfl_val,
- ovfl_pmds,
- ovfl_notify));
- }
-
- /*
- * there was no 64-bit overflow, nothing else to do
- */
- if (ovfl_pmds == 0UL) return;
-
- /*
- * reset all control bits
- */
- ovfl_ctrl.val = 0;
- reset_pmds = 0UL;
-
- /*
- * if a sampling format module exists, then we "cache" the overflow by
- * calling the module's handler() routine.
- */
- if (has_smpl) {
- unsigned long start_cycles, end_cycles;
- unsigned long pmd_mask;
- int j, k, ret = 0;
- int this_cpu = smp_processor_id();
-
- pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
- ovfl_arg = &ctx->ctx_ovfl_arg;
-
- prefetch(ctx->ctx_smpl_hdr);
-
- for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) {
-
- mask = 1UL << i;
-
- if ((pmd_mask & 0x1) == 0) continue;
-
- ovfl_arg->ovfl_pmd = (unsigned char )i;
- ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0;
- ovfl_arg->active_set = 0;
- ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */
- ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
-
- ovfl_arg->pmd_value = ctx->ctx_pmds[i].val;
- ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval;
- ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid;
-
- /*
- * copy values of pmds of interest. Sampling format may copy them
- * into sampling buffer.
- */
- if (smpl_pmds) {
- for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
- if ((smpl_pmds & 0x1) == 0) continue;
- ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
- DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1]));
- }
- }
-
- pfm_stats[this_cpu].pfm_smpl_handler_calls++;
-
- start_cycles = ia64_get_itc();
-
- /*
- * call custom buffer format record (handler) routine
- */
- ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp);
-
- end_cycles = ia64_get_itc();
-
- /*
- * For those controls, we take the union because they have
- * an all or nothing behavior.
- */
- ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user;
- ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task;
- ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring;
- /*
- * build the bitmask of pmds to reset now
- */
- if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
-
- pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
- }
- /*
- * when the module cannot handle the rest of the overflows, we abort right here
- */
- if (ret && pmd_mask) {
- DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n",
- pmd_mask<<PMU_FIRST_COUNTER));
- }
- /*
- * remove the pmds we reset now from the set of pmds to reset in pfm_restart()
- */
- ovfl_pmds &= ~reset_pmds;
- } else {
- /*
- * when no sampling module is used, then the default
- * is to notify on overflow if requested by user
- */
- ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0;
- ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0;
- ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */
- ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1;
- /*
- * if needed, we reset all overflowed pmds
- */
- if (ovfl_notify == 0) reset_pmds = ovfl_pmds;
- }
-
- DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds));
-
- /*
- * reset the requested PMD registers using the short reset values
- */
- if (reset_pmds) {
- unsigned long bm = reset_pmds;
- pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET);
- }
-
- if (ovfl_notify && ovfl_ctrl.bits.notify_user) {
- /*
- * keep track of what to reset when unblocking
- */
- ctx->ctx_ovfl_regs[0] = ovfl_pmds;
-
- /*
- * check for blocking context
- */
- if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) {
-
- ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK;
-
- /*
- * set the perfmon specific checking pending work for the task
- */
- PFM_SET_WORK_PENDING(task, 1);
-
- /*
- * when coming from ctxsw, current still points to the
- * previous task, therefore we must work with task and not current.
- */
- pfm_set_task_notify(task);
- }
- /*
- * defer until state is changed (shorten spin window). the context is locked
- * anyway, so the signal receiver would come spin for nothing.
- */
- must_notify = 1;
- }
-
- DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
- GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
- PFM_GET_WORK_PENDING(task),
- ctx->ctx_fl_trap_reason,
- ovfl_pmds,
- ovfl_notify,
- ovfl_ctrl.bits.mask_monitoring ? 1 : 0));
- /*
- * in case monitoring must be stopped, we toggle the psr bits
- */
- if (ovfl_ctrl.bits.mask_monitoring) {
- pfm_mask_monitoring(task);
- ctx->ctx_state = PFM_CTX_MASKED;
- ctx->ctx_fl_can_restart = 1;
- }
-
- /*
- * send notification now
- */
- if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify);
-
- return;
-
-sanity_check:
- printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
- smp_processor_id(),
- task ? task->pid : -1,
- pmc0);
- return;
-
-stop_monitoring:
- /*
- * in SMP, zombie context is never restored but reclaimed in pfm_load_regs().
- * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can
- * come here as zombie only if the task is the current task. In which case, we
- * can access the PMU hardware directly.
- *
- * Note that zombies do have PM_VALID set. So here we do the minimal.
- *
- * In case the context was zombified it could not be reclaimed at the time
- * the monitoring program exited. At this point, the PMU reservation has been
- * returned, the sampiing buffer has been freed. We must convert this call
- * into a spurious interrupt. However, we must also avoid infinite overflows
- * by stopping monitoring for this task. We can only come here for a per-task
- * context. All we need to do is to stop monitoring using the psr bits which
- * are always task private. By re-enabling secure montioring, we ensure that
- * the monitored task will not be able to re-activate monitoring.
- * The task will eventually be context switched out, at which point the context
- * will be reclaimed (that includes releasing ownership of the PMU).
- *
- * So there might be a window of time where the number of per-task session is zero
- * yet one PMU might have a owner and get at most one overflow interrupt for a zombie
- * context. This is safe because if a per-task session comes in, it will push this one
- * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide
- * session is force on that CPU, given that we use task pinning, pfm_save_regs() will
- * also push our zombie context out.
- *
- * Overall pretty hairy stuff....
- */
- DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
- pfm_clear_psr_up();
- ia64_psr(regs)->up = 0;
- ia64_psr(regs)->sp = 1;
- return;
-}
-
-static int
-pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
-{
- struct task_struct *task;
- pfm_context_t *ctx;
- unsigned long flags;
- u64 pmc0;
- int this_cpu = smp_processor_id();
- int retval = 0;
-
- pfm_stats[this_cpu].pfm_ovfl_intr_count++;
-
- /*
- * srlz.d done before arriving here
- */
- pmc0 = ia64_get_pmc(0);
-
- task = GET_PMU_OWNER();
- ctx = GET_PMU_CTX();
-
- /*
- * if we have some pending bits set
- * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1
- */
- if (PMC0_HAS_OVFL(pmc0) && task) {
- /*
- * we assume that pmc0.fr is always set here
- */
-
- /* sanity check */
- if (!ctx) goto report_spurious1;
-
- if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0)
- goto report_spurious2;
-
- PROTECT_CTX_NOPRINT(ctx, flags);
-
- pfm_overflow_handler(task, ctx, pmc0, regs);
-
- UNPROTECT_CTX_NOPRINT(ctx, flags);
-
- } else {
- pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++;
- retval = -1;
- }
- /*
- * keep it unfrozen at all times
- */
- pfm_unfreeze_pmu();
-
- return retval;
-
-report_spurious1:
- printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
- this_cpu, task->pid);
- pfm_unfreeze_pmu();
- return -1;
-report_spurious2:
- printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n",
- this_cpu,
- task->pid);
- pfm_unfreeze_pmu();
- return -1;
-}
-
-static irqreturn_t
-pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
-{
- unsigned long start_cycles, total_cycles;
- unsigned long min, max;
- int this_cpu;
- int ret;
-
- this_cpu = get_cpu();
- if (likely(!pfm_alt_intr_handler)) {
- min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
- max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
-
- start_cycles = ia64_get_itc();
-
- ret = pfm_do_interrupt_handler(irq, arg, regs);
-
- total_cycles = ia64_get_itc();
-
- /*
- * don't measure spurious interrupts
- */
- if (likely(ret == 0)) {
- total_cycles -= start_cycles;
-
- if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
- if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
-
- pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
- }
- }
- else {
- (*pfm_alt_intr_handler->handler)(irq, arg, regs);
- }
-
- put_cpu_no_resched();
- return IRQ_HANDLED;
-}
-
-/*
- * /proc/perfmon interface, for debug only
- */
-
-#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1)
-
-static void *
-pfm_proc_start(struct seq_file *m, loff_t *pos)
-{
- if (*pos == 0) {
- return PFM_PROC_SHOW_HEADER;
- }
-
- while (*pos <= NR_CPUS) {
- if (cpu_online(*pos - 1)) {
- return (void *)*pos;
- }
- ++*pos;
- }
- return NULL;
-}
-
-static void *
-pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return pfm_proc_start(m, pos);
-}
-
-static void
-pfm_proc_stop(struct seq_file *m, void *v)
-{
-}
-
-static void
-pfm_proc_show_header(struct seq_file *m)
-{
- struct list_head * pos;
- pfm_buffer_fmt_t * entry;
- unsigned long flags;
-
- seq_printf(m,
- "perfmon version : %u.%u\n"
- "model : %s\n"
- "fastctxsw : %s\n"
- "expert mode : %s\n"
- "ovfl_mask : 0x%lx\n"
- "PMU flags : 0x%x\n",
- PFM_VERSION_MAJ, PFM_VERSION_MIN,
- pmu_conf->pmu_name,
- pfm_sysctl.fastctxsw > 0 ? "Yes": "No",
- pfm_sysctl.expert_mode > 0 ? "Yes": "No",
- pmu_conf->ovfl_val,
- pmu_conf->flags);
-
- LOCK_PFS(flags);
-
- seq_printf(m,
- "proc_sessions : %u\n"
- "sys_sessions : %u\n"
- "sys_use_dbregs : %u\n"
- "ptrace_use_dbregs : %u\n",
- pfm_sessions.pfs_task_sessions,
- pfm_sessions.pfs_sys_sessions,
- pfm_sessions.pfs_sys_use_dbregs,
- pfm_sessions.pfs_ptrace_use_dbregs);
-
- UNLOCK_PFS(flags);
-
- spin_lock(&pfm_buffer_fmt_lock);
-
- list_for_each(pos, &pfm_buffer_fmt_list) {
- entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
- seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
- entry->fmt_uuid[0],
- entry->fmt_uuid[1],
- entry->fmt_uuid[2],
- entry->fmt_uuid[3],
- entry->fmt_uuid[4],
- entry->fmt_uuid[5],
- entry->fmt_uuid[6],
- entry->fmt_uuid[7],
- entry->fmt_uuid[8],
- entry->fmt_uuid[9],
- entry->fmt_uuid[10],
- entry->fmt_uuid[11],
- entry->fmt_uuid[12],
- entry->fmt_uuid[13],
- entry->fmt_uuid[14],
- entry->fmt_uuid[15],
- entry->fmt_name);
- }
- spin_unlock(&pfm_buffer_fmt_lock);
-
-}
-
-static int
-pfm_proc_show(struct seq_file *m, void *v)
-{
- unsigned long psr;
- unsigned int i;
- int cpu;
-
- if (v == PFM_PROC_SHOW_HEADER) {
- pfm_proc_show_header(m);
- return 0;
- }
-
- /* show info for CPU (v - 1) */
-
- cpu = (long)v - 1;
- seq_printf(m,
- "CPU%-2d overflow intrs : %lu\n"
- "CPU%-2d overflow cycles : %lu\n"
- "CPU%-2d overflow min : %lu\n"
- "CPU%-2d overflow max : %lu\n"
- "CPU%-2d smpl handler calls : %lu\n"
- "CPU%-2d smpl handler cycles : %lu\n"
- "CPU%-2d spurious intrs : %lu\n"
- "CPU%-2d replay intrs : %lu\n"
- "CPU%-2d syst_wide : %d\n"
- "CPU%-2d dcr_pp : %d\n"
- "CPU%-2d exclude idle : %d\n"
- "CPU%-2d owner : %d\n"
- "CPU%-2d context : %p\n"
- "CPU%-2d activations : %lu\n",
- cpu, pfm_stats[cpu].pfm_ovfl_intr_count,
- cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles,
- cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min,
- cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max,
- cpu, pfm_stats[cpu].pfm_smpl_handler_calls,
- cpu, pfm_stats[cpu].pfm_smpl_handler_cycles,
- cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count,
- cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count,
- cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0,
- cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0,
- cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0,
- cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1,
- cpu, pfm_get_cpu_data(pmu_ctx, cpu),
- cpu, pfm_get_cpu_data(pmu_activation_number, cpu));
-
- if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) {
-
- psr = pfm_get_psr();
-
- ia64_srlz_d();
-
- seq_printf(m,
- "CPU%-2d psr : 0x%lx\n"
- "CPU%-2d pmc0 : 0x%lx\n",
- cpu, psr,
- cpu, ia64_get_pmc(0));
-
- for (i=0; PMC_IS_LAST(i) == 0; i++) {
- if (PMC_IS_COUNTING(i) == 0) continue;
- seq_printf(m,
- "CPU%-2d pmc%u : 0x%lx\n"
- "CPU%-2d pmd%u : 0x%lx\n",
- cpu, i, ia64_get_pmc(i),
- cpu, i, ia64_get_pmd(i));
- }
- }
- return 0;
-}
-
-struct seq_operations pfm_seq_ops = {
- .start = pfm_proc_start,
- .next = pfm_proc_next,
- .stop = pfm_proc_stop,
- .show = pfm_proc_show
-};
-
-static int
-pfm_proc_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &pfm_seq_ops);
-}
-
-
-/*
- * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
- * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
- * is active or inactive based on mode. We must rely on the value in
- * local_cpu_data->pfm_syst_info
- */
-void
-pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
-{
- struct pt_regs *regs;
- unsigned long dcr;
- unsigned long dcr_pp;
-
- dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
-
- /*
- * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
- * on every CPU, so we can rely on the pid to identify the idle task.
- */
- if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
- regs = task_pt_regs(task);
- ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
- return;
- }
- /*
- * if monitoring has started
- */
- if (dcr_pp) {
- dcr = ia64_getreg(_IA64_REG_CR_DCR);
- /*
- * context switching in?
- */
- if (is_ctxswin) {
- /* mask monitoring for the idle task */
- ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
- pfm_clear_psr_pp();
- ia64_srlz_i();
- return;
- }
- /*
- * context switching out
- * restore monitoring for next task
- *
- * Due to inlining this odd if-then-else construction generates
- * better code.
- */
- ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP);
- pfm_set_psr_pp();
- ia64_srlz_i();
- }
-}
-
-#ifdef CONFIG_SMP
-
-static void
-pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
-{
- struct task_struct *task = ctx->ctx_task;
-
- ia64_psr(regs)->up = 0;
- ia64_psr(regs)->sp = 1;
-
- if (GET_PMU_OWNER() == task) {
- DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
- SET_PMU_OWNER(NULL, NULL);
- }
-
- /*
- * disconnect the task from the context and vice-versa
- */
- PFM_SET_WORK_PENDING(task, 0);
-
- task->thread.pfm_context = NULL;
- task->thread.flags &= ~IA64_THREAD_PM_VALID;
-
- DPRINT(("force cleanup for [%d]\n", task->pid));
-}
-
-
-/*
- * in 2.6, interrupts are masked when we come here and the runqueue lock is held
- */
-void
-pfm_save_regs(struct task_struct *task)
-{
- pfm_context_t *ctx;
- struct thread_struct *t;
- unsigned long flags;
- u64 psr;
-
-
- ctx = PFM_GET_CTX(task);
- if (ctx == NULL) return;
- t = &task->thread;
-
- /*
- * we always come here with interrupts ALREADY disabled by
- * the scheduler. So we simply need to protect against concurrent
- * access, not CPU concurrency.
- */
- flags = pfm_protect_ctx_ctxsw(ctx);
-
- if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
- struct pt_regs *regs = task_pt_regs(task);
-
- pfm_clear_psr_up();
-
- pfm_force_cleanup(ctx, regs);
-
- BUG_ON(ctx->ctx_smpl_hdr);
-
- pfm_unprotect_ctx_ctxsw(ctx, flags);
-
- pfm_context_free(ctx);
- return;
- }
-
- /*
- * save current PSR: needed because we modify it
- */
- ia64_srlz_d();
- psr = pfm_get_psr();
-
- BUG_ON(psr & (IA64_PSR_I));
-
- /*
- * stop monitoring:
- * This is the last instruction which may generate an overflow
- *
- * We do not need to set psr.sp because, it is irrelevant in kernel.
- * It will be restored from ipsr when going back to user level
- */
- pfm_clear_psr_up();
-
- /*
- * keep a copy of psr.up (for reload)
- */
- ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
-
- /*
- * release ownership of this PMU.
- * PM interrupts are masked, so nothing
- * can happen.
- */
- SET_PMU_OWNER(NULL, NULL);
-
- /*
- * we systematically save the PMD as we have no
- * guarantee we will be schedule at that same
- * CPU again.
- */
- pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
-
- /*
- * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
- * we will need it on the restore path to check
- * for pending overflow.
- */
- t->pmcs[0] = ia64_get_pmc(0);
-
- /*
- * unfreeze PMU if had pending overflows
- */
- if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
-
- /*
- * finally, allow context access.
- * interrupts will still be masked after this call.
- */
- pfm_unprotect_ctx_ctxsw(ctx, flags);
-}
-
-#else /* !CONFIG_SMP */
-void
-pfm_save_regs(struct task_struct *task)
-{
- pfm_context_t *ctx;
- u64 psr;
-
- ctx = PFM_GET_CTX(task);
- if (ctx == NULL) return;
-
- /*
- * save current PSR: needed because we modify it
- */
- psr = pfm_get_psr();
-
- BUG_ON(psr & (IA64_PSR_I));
-
- /*
- * stop monitoring:
- * This is the last instruction which may generate an overflow
- *
- * We do not need to set psr.sp because, it is irrelevant in kernel.
- * It will be restored from ipsr when going back to user level
- */
- pfm_clear_psr_up();
-
- /*
- * keep a copy of psr.up (for reload)
- */
- ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
-}
-
-static void
-pfm_lazy_save_regs (struct task_struct *task)
-{
- pfm_context_t *ctx;
- struct thread_struct *t;
- unsigned long flags;
-
- { u64 psr = pfm_get_psr();
- BUG_ON(psr & IA64_PSR_UP);
- }
-
- ctx = PFM_GET_CTX(task);
- t = &task->thread;
-
- /*
- * we need to mask PMU overflow here to
- * make sure that we maintain pmc0 until
- * we save it. overflow interrupts are
- * treated as spurious if there is no
- * owner.
- *
- * XXX: I don't think this is necessary
- */
- PROTECT_CTX(ctx,flags);
-
- /*
- * release ownership of this PMU.
- * must be done before we save the registers.
- *
- * after this call any PMU interrupt is treated
- * as spurious.
- */
- SET_PMU_OWNER(NULL, NULL);
-
- /*
- * save all the pmds we use
- */
- pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
-
- /*
- * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
- * it is needed to check for pended overflow
- * on the restore path
- */
- t->pmcs[0] = ia64_get_pmc(0);
-
- /*
- * unfreeze PMU if had pending overflows
- */
- if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
-
- /*
- * now get can unmask PMU interrupts, they will
- * be treated as purely spurious and we will not
- * lose any information
- */
- UNPROTECT_CTX(ctx,flags);
-}
-#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_SMP
-/*
- * in 2.6, interrupts are masked when we come here and the runqueue lock is held
- */
-void
-pfm_load_regs (struct task_struct *task)
-{
- pfm_context_t *ctx;
- struct thread_struct *t;
- unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
- unsigned long flags;
- u64 psr, psr_up;
- int need_irq_resend;
-
- ctx = PFM_GET_CTX(task);
- if (unlikely(ctx == NULL)) return;
-
- BUG_ON(GET_PMU_OWNER());
-
- t = &task->thread;
- /*
- * possible on unload
- */
- if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
-
- /*
- * we always come here with interrupts ALREADY disabled by
- * the scheduler. So we simply need to protect against concurrent
- * access, not CPU concurrency.
- */
- flags = pfm_protect_ctx_ctxsw(ctx);
- psr = pfm_get_psr();
-
- need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
-
- BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
- BUG_ON(psr & IA64_PSR_I);
-
- if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) {
- struct pt_regs *regs = task_pt_regs(task);
-
- BUG_ON(ctx->ctx_smpl_hdr);
-
- pfm_force_cleanup(ctx, regs);
-
- pfm_unprotect_ctx_ctxsw(ctx, flags);
-
- /*
- * this one (kmalloc'ed) is fine with interrupts disabled
- */
- pfm_context_free(ctx);
-
- return;
- }
-
- /*
- * we restore ALL the debug registers to avoid picking up
- * stale state.
- */
- if (ctx->ctx_fl_using_dbreg) {
- pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
- pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
- }
- /*
- * retrieve saved psr.up
- */
- psr_up = ctx->ctx_saved_psr_up;
-
- /*
- * if we were the last user of the PMU on that CPU,
- * then nothing to do except restore psr
- */
- if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
-
- /*
- * retrieve partial reload masks (due to user modifications)
- */
- pmc_mask = ctx->ctx_reload_pmcs[0];
- pmd_mask = ctx->ctx_reload_pmds[0];
-
- } else {
- /*
- * To avoid leaking information to the user level when psr.sp=0,
- * we must reload ALL implemented pmds (even the ones we don't use).
- * In the kernel we only allow PFM_READ_PMDS on registers which
- * we initialized or requested (sampling) so there is no risk there.
- */
- pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
-
- /*
- * ALL accessible PMCs are systematically reloaded, unused registers
- * get their default (from pfm_reset_pmu_state()) values to avoid picking
- * up stale configuration.
- *
- * PMC0 is never in the mask. It is always restored separately.
- */
- pmc_mask = ctx->ctx_all_pmcs[0];
- }
- /*
- * when context is MASKED, we will restore PMC with plm=0
- * and PMD with stale information, but that's ok, nothing
- * will be captured.
- *
- * XXX: optimize here
- */
- if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
- if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
-
- /*
- * check for pending overflow at the time the state
- * was saved.
- */
- if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
- /*
- * reload pmc0 with the overflow information
- * On McKinley PMU, this will trigger a PMU interrupt
- */
- ia64_set_pmc(0, t->pmcs[0]);
- ia64_srlz_d();
- t->pmcs[0] = 0UL;
-
- /*
- * will replay the PMU interrupt
- */
- if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
-
- pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
- }
-
- /*
- * we just did a reload, so we reset the partial reload fields
- */
- ctx->ctx_reload_pmcs[0] = 0UL;
- ctx->ctx_reload_pmds[0] = 0UL;
-
- SET_LAST_CPU(ctx, smp_processor_id());
-
- /*
- * dump activation value for this PMU
- */
- INC_ACTIVATION();
- /*
- * record current activation for this context
- */
- SET_ACTIVATION(ctx);
-
- /*
- * establish new ownership.
- */
- SET_PMU_OWNER(task, ctx);
-
- /*
- * restore the psr.up bit. measurement
- * is active again.
- * no PMU interrupt can happen at this point
- * because we still have interrupts disabled.
- */
- if (likely(psr_up)) pfm_set_psr_up();
-
- /*
- * allow concurrent access to context
- */
- pfm_unprotect_ctx_ctxsw(ctx, flags);
-}
-#else /* !CONFIG_SMP */
-/*
- * reload PMU state for UP kernels
- * in 2.5 we come here with interrupts disabled
- */
-void
-pfm_load_regs (struct task_struct *task)
-{
- struct thread_struct *t;
- pfm_context_t *ctx;
- struct task_struct *owner;
- unsigned long pmd_mask, pmc_mask;
- u64 psr, psr_up;
- int need_irq_resend;
-
- owner = GET_PMU_OWNER();
- ctx = PFM_GET_CTX(task);
- t = &task->thread;
- psr = pfm_get_psr();
-
- BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
- BUG_ON(psr & IA64_PSR_I);
-
- /*
- * we restore ALL the debug registers to avoid picking up
- * stale state.
- *
- * This must be done even when the task is still the owner
- * as the registers may have been modified via ptrace()
- * (not perfmon) by the previous task.
- */
- if (ctx->ctx_fl_using_dbreg) {
- pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
- pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
- }
-
- /*
- * retrieved saved psr.up
- */
- psr_up = ctx->ctx_saved_psr_up;
- need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
-
- /*
- * short path, our state is still there, just
- * need to restore psr and we go
- *
- * we do not touch either PMC nor PMD. the psr is not touched
- * by the overflow_handler. So we are safe w.r.t. to interrupt
- * concurrency even without interrupt masking.
- */
- if (likely(owner == task)) {
- if (likely(psr_up)) pfm_set_psr_up();
- return;
- }
-
- /*
- * someone else is still using the PMU, first push it out and
- * then we'll be able to install our stuff !
- *
- * Upon return, there will be no owner for the current PMU
- */
- if (owner) pfm_lazy_save_regs(owner);
-
- /*
- * To avoid leaking information to the user level when psr.sp=0,
- * we must reload ALL implemented pmds (even the ones we don't use).
- * In the kernel we only allow PFM_READ_PMDS on registers which
- * we initialized or requested (sampling) so there is no risk there.
- */
- pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
-
- /*
- * ALL accessible PMCs are systematically reloaded, unused registers
- * get their default (from pfm_reset_pmu_state()) values to avoid picking
- * up stale configuration.
- *
- * PMC0 is never in the mask. It is always restored separately
- */
- pmc_mask = ctx->ctx_all_pmcs[0];
-
- pfm_restore_pmds(t->pmds, pmd_mask);
- pfm_restore_pmcs(t->pmcs, pmc_mask);
-
- /*
- * check for pending overflow at the time the state
- * was saved.
- */
- if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
- /*
- * reload pmc0 with the overflow information
- * On McKinley PMU, this will trigger a PMU interrupt
- */
- ia64_set_pmc(0, t->pmcs[0]);
- ia64_srlz_d();
-
- t->pmcs[0] = 0UL;
-
- /*
- * will replay the PMU interrupt
- */
- if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
-
- pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
- }
-
- /*
- * establish new ownership.
- */
- SET_PMU_OWNER(task, ctx);
-
- /*
- * restore the psr.up bit. measurement
- * is active again.
- * no PMU interrupt can happen at this point
- * because we still have interrupts disabled.
- */
- if (likely(psr_up)) pfm_set_psr_up();
-}
-#endif /* CONFIG_SMP */
-
-/*
- * this function assumes monitoring is stopped
- */
-static void
-pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
-{
- u64 pmc0;
- unsigned long mask2, val, pmd_val, ovfl_val;
- int i, can_access_pmu = 0;
- int is_self;
-
- /*
- * is the caller the task being monitored (or which initiated the
- * session for system wide measurements)
- */
- is_self = ctx->ctx_task == task ? 1 : 0;
-
- /*
- * can access PMU is task is the owner of the PMU state on the current CPU
- * or if we are running on the CPU bound to the context in system-wide mode
- * (that is not necessarily the task the context is attached to in this mode).
- * In system-wide we always have can_access_pmu true because a task running on an
- * invalid processor is flagged earlier in the call stack (see pfm_stop).
- */
- can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id());
- if (can_access_pmu) {
- /*
- * Mark the PMU as not owned
- * This will cause the interrupt handler to do nothing in case an overflow
- * interrupt was in-flight
- * This also guarantees that pmc0 will contain the final state
- * It virtually gives us full control on overflow processing from that point
- * on.
- */
- SET_PMU_OWNER(NULL, NULL);
- DPRINT(("releasing ownership\n"));
-
- /*
- * read current overflow status:
- *
- * we are guaranteed to read the final stable state
- */
- ia64_srlz_d();
- pmc0 = ia64_get_pmc(0); /* slow */
-
- /*
- * reset freeze bit, overflow status information destroyed
- */
- pfm_unfreeze_pmu();
- } else {
- pmc0 = task->thread.pmcs[0];
- /*
- * clear whatever overflow status bits there were
- */
- task->thread.pmcs[0] = 0;
- }
- ovfl_val = pmu_conf->ovfl_val;
- /*
- * we save all the used pmds
- * we take care of overflows for counting PMDs
- *
- * XXX: sampling situation is not taken into account here
- */
- mask2 = ctx->ctx_used_pmds[0];
-
- DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2));
-
- for (i = 0; mask2; i++, mask2>>=1) {
-
- /* skip non used pmds */
- if ((mask2 & 0x1) == 0) continue;
-
- /*
- * can access PMU always true in system wide mode
- */
- val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
-
- if (PMD_IS_COUNTING(i)) {
- DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
- task->pid,
- i,
- ctx->ctx_pmds[i].val,
- val & ovfl_val));
-
- /*
- * we rebuild the full 64 bit value of the counter
- */
- val = ctx->ctx_pmds[i].val + (val & ovfl_val);
-
- /*
- * now everything is in ctx_pmds[] and we need
- * to clear the saved context from save_regs() such that
- * pfm_read_pmds() gets the correct value
- */
- pmd_val = 0UL;
-
- /*
- * take care of overflow inline
- */
- if (pmc0 & (1UL << i)) {
- val += 1 + ovfl_val;
- DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
- }
- }
-
- DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
-
- if (is_self) task->thread.pmds[i] = pmd_val;
-
- ctx->ctx_pmds[i].val = val;
- }
-}
-
-static struct irqaction perfmon_irqaction = {
- .handler = pfm_interrupt_handler,
- .flags = IRQF_DISABLED,
- .name = "perfmon"
-};
-
-static void
-pfm_alt_save_pmu_state(void *data)
-{
- struct pt_regs *regs;
-
- regs = task_pt_regs(current);
-
- DPRINT(("called\n"));
-
- /*
- * should not be necessary but
- * let's take not risk
- */
- pfm_clear_psr_up();
- pfm_clear_psr_pp();
- ia64_psr(regs)->pp = 0;
-
- /*
- * This call is required
- * May cause a spurious interrupt on some processors
- */
- pfm_freeze_pmu();
-
- ia64_srlz_d();
-}
-
-void
-pfm_alt_restore_pmu_state(void *data)
-{
- struct pt_regs *regs;
-
- regs = task_pt_regs(current);
-
- DPRINT(("called\n"));
-
- /*
- * put PMU back in state expected
- * by perfmon
- */
- pfm_clear_psr_up();
- pfm_clear_psr_pp();
- ia64_psr(regs)->pp = 0;
-
- /*
- * perfmon runs with PMU unfrozen at all times
- */
- pfm_unfreeze_pmu();
-
- ia64_srlz_d();
-}
-
-int
-pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
-{
- int ret, i;
- int reserve_cpu;
-
- /* some sanity checks */
- if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
-
- /* do the easy test first */
- if (pfm_alt_intr_handler) return -EBUSY;
-
- /* one at a time in the install or remove, just fail the others */
- if (!spin_trylock(&pfm_alt_install_check)) {
- return -EBUSY;
- }
-
- /* reserve our session */
- for_each_online_cpu(reserve_cpu) {
- ret = pfm_reserve_session(NULL, 1, reserve_cpu);
- if (ret) goto cleanup_reserve;
- }
-
- /* save the current system wide pmu states */
- ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- goto cleanup_reserve;
- }
-
- /* officially change to the alternate interrupt handler */
- pfm_alt_intr_handler = hdl;
-
- spin_unlock(&pfm_alt_install_check);
-
- return 0;
-
-cleanup_reserve:
- for_each_online_cpu(i) {
- /* don't unreserve more than we reserved */
- if (i >= reserve_cpu) break;
-
- pfm_unreserve_session(NULL, 1, i);
- }
-
- spin_unlock(&pfm_alt_install_check);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt);
-
-int
-pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
-{
- int i;
- int ret;
-
- if (hdl == NULL) return -EINVAL;
-
- /* cannot remove someone else's handler! */
- if (pfm_alt_intr_handler != hdl) return -EINVAL;
-
- /* one at a time in the install or remove, just fail the others */
- if (!spin_trylock(&pfm_alt_install_check)) {
- return -EBUSY;
- }
-
- pfm_alt_intr_handler = NULL;
-
- ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- }
-
- for_each_online_cpu(i) {
- pfm_unreserve_session(NULL, 1, i);
- }
-
- spin_unlock(&pfm_alt_install_check);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt);
-
-/*
- * perfmon initialization routine, called from the initcall() table
- */
-static int init_pfm_fs(void);
-
-static int __init
-pfm_probe_pmu(void)
-{
- pmu_config_t **p;
- int family;
-
- family = local_cpu_data->family;
- p = pmu_confs;
-
- while(*p) {
- if ((*p)->probe) {
- if ((*p)->probe() == 0) goto found;
- } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) {
- goto found;
- }
- p++;
- }
- return -1;
-found:
- pmu_conf = *p;
- return 0;
-}
-
-static struct file_operations pfm_proc_fops = {
- .open = pfm_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-int __init
-pfm_init(void)
-{
- unsigned int n, n_counters, i;
-
- printk("perfmon: version %u.%u IRQ %u\n",
- PFM_VERSION_MAJ,
- PFM_VERSION_MIN,
- IA64_PERFMON_VECTOR);
-
- if (pfm_probe_pmu()) {
- printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n",
- local_cpu_data->family);
- return -ENODEV;
- }
-
- /*
- * compute the number of implemented PMD/PMC from the
- * description tables
- */
- n = 0;
- for (i=0; PMC_IS_LAST(i) == 0; i++) {
- if (PMC_IS_IMPL(i) == 0) continue;
- pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63);
- n++;
- }
- pmu_conf->num_pmcs = n;
-
- n = 0; n_counters = 0;
- for (i=0; PMD_IS_LAST(i) == 0; i++) {
- if (PMD_IS_IMPL(i) == 0) continue;
- pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63);
- n++;
- if (PMD_IS_COUNTING(i)) n_counters++;
- }
- pmu_conf->num_pmds = n;
- pmu_conf->num_counters = n_counters;
-
- /*
- * sanity checks on the number of debug registers
- */
- if (pmu_conf->use_rr_dbregs) {
- if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) {
- printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs);
- pmu_conf = NULL;
- return -1;
- }
- if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) {
- printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs);
- pmu_conf = NULL;
- return -1;
- }
- }
-
- printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n",
- pmu_conf->pmu_name,
- pmu_conf->num_pmcs,
- pmu_conf->num_pmds,
- pmu_conf->num_counters,
- ffz(pmu_conf->ovfl_val));
-
- /* sanity check */
- if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
- printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
- pmu_conf = NULL;
- return -1;
- }
-
- /*
- * create /proc/perfmon (mostly for debugging purposes)
- */
- perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
- if (perfmon_dir == NULL) {
- printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
- pmu_conf = NULL;
- return -1;
- }
- /*
- * install customized file operations for /proc/perfmon entry
- */
- perfmon_dir->proc_fops = &pfm_proc_fops;
-
- /*
- * create /proc/sys/kernel/perfmon (for debugging purposes)
- */
- pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
-
- /*
- * initialize all our spinlocks
- */
- spin_lock_init(&pfm_sessions.pfs_lock);
- spin_lock_init(&pfm_buffer_fmt_lock);
-
- init_pfm_fs();
-
- for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL;
-
- return 0;
-}
-
-__initcall(pfm_init);
-
-/*
- * this function is called before pfm_init()
- */
-void
-pfm_init_percpu (void)
-{
- static int first_time=1;
- /*
- * make sure no measurement is active
- * (may inherit programmed PMCs from EFI).
- */
- pfm_clear_psr_pp();
- pfm_clear_psr_up();
-
- /*
- * we run with the PMU not frozen at all times
- */
- pfm_unfreeze_pmu();
-
- if (first_time) {
- register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
- first_time=0;
- }
-
- ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
- ia64_srlz_d();
-}
-
-/*
- * used for debug purposes only
- */
-void
-dump_pmu_state(const char *from)
-{
- struct task_struct *task;
- struct thread_struct *t;
- struct pt_regs *regs;
- pfm_context_t *ctx;
- unsigned long psr, dcr, info, flags;
- int i, this_cpu;
-
- local_irq_save(flags);
-
- this_cpu = smp_processor_id();
- regs = task_pt_regs(current);
- info = PFM_CPUINFO_GET();
- dcr = ia64_getreg(_IA64_REG_CR_DCR);
-
- if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) {
- local_irq_restore(flags);
- return;
- }
-
- printk("CPU%d from %s() current [%d] iip=0x%lx %s\n",
- this_cpu,
- from,
- current->pid,
- regs->cr_iip,
- current->comm);
-
- task = GET_PMU_OWNER();
- ctx = GET_PMU_CTX();
-
- printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
-
- psr = pfm_get_psr();
-
- printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n",
- this_cpu,
- ia64_get_pmc(0),
- psr & IA64_PSR_PP ? 1 : 0,
- psr & IA64_PSR_UP ? 1 : 0,
- dcr & IA64_DCR_PP ? 1 : 0,
- info,
- ia64_psr(regs)->up,
- ia64_psr(regs)->pp);
-
- ia64_psr(regs)->up = 0;
- ia64_psr(regs)->pp = 0;
-
- t = &current->thread;
-
- for (i=1; PMC_IS_LAST(i) == 0; i++) {
- if (PMC_IS_IMPL(i) == 0) continue;
- printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
- }
-
- for (i=1; PMD_IS_LAST(i) == 0; i++) {
- if (PMD_IS_IMPL(i) == 0) continue;
- printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
- }
-
- if (ctx) {
- printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n",
- this_cpu,
- ctx->ctx_state,
- ctx->ctx_smpl_vaddr,
- ctx->ctx_smpl_hdr,
- ctx->ctx_msgq_head,
- ctx->ctx_msgq_tail,
- ctx->ctx_saved_psr_up);
- }
- local_irq_restore(flags);
-}
-
-/*
- * called from process.c:copy_thread(). task is new child.
- */
-void
-pfm_inherit(struct task_struct *task, struct pt_regs *regs)
-{
- struct thread_struct *thread;
-
- DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
-
- thread = &task->thread;
-
- /*
- * cut links inherited from parent (current)
- */
- thread->pfm_context = NULL;
-
- PFM_SET_WORK_PENDING(task, 0);
-
- /*
- * the psr bits are already set properly in copy_threads()
- */
-}
-#else /* !CONFIG_PERFMON */
-asmlinkage long
-sys_perfmonctl (int fd, int cmd, void *arg, int count)
-{
- return -ENOSYS;
-}
-#endif /* CONFIG_PERFMON */
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
deleted file mode 100644
index c7af364b42..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
+++ /dev/null
@@ -1,1030 +0,0 @@
-/*
- * Architecture-specific setup.
- *
- * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@hpl.hp.com>
- * Stephane Eranian <eranian@hpl.hp.com>
- * Copyright (C) 2000, 2004 Intel Corp
- * Rohit Seth <rohit.seth@intel.com>
- * Suresh Siddha <suresh.b.siddha@intel.com>
- * Gordon Jin <gordon.jin@intel.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- *
- * 12/26/04 S.Siddha, G.Jin, R.Seth
- * Add multi-threading and multi-core detection
- * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
- * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
- * 03/31/00 R.Seth cpu_initialized and current->processor fixes
- * 02/04/00 D.Mosberger some more get_cpuinfo fixes...
- * 02/01/00 R.Seth fixed get_cpuinfo for SMP
- * 01/07/99 S.Eranian added the support for command line argument
- * 06/24/99 W.Drummond added boot_cpu_data.
- * 05/28/05 Z. Menyhart Dynamic stride size for "flush_icache_range()"
- */
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include <linux/acpi.h>
-#include <linux/bootmem.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/reboot.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/string.h>
-#include <linux/threads.h>
-#include <linux/screen_info.h>
-#include <linux/dmi.h>
-#include <linux/serial.h>
-#include <linux/serial_core.h>
-#include <linux/efi.h>
-#include <linux/initrd.h>
-#include <linux/pm.h>
-#include <linux/cpufreq.h>
-
-#include <asm/ia32.h>
-#include <asm/machvec.h>
-#include <asm/mca.h>
-#include <asm/meminit.h>
-#include <asm/page.h>
-#include <asm/patch.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/sal.h>
-#include <asm/sections.h>
-#include <asm/serial.h>
-#include <asm/setup.h>
-#include <asm/smp.h>
-#include <asm/system.h>
-#include <asm/unistd.h>
-#include <asm/system.h>
-#ifdef CONFIG_XEN
-#include <asm/hypervisor.h>
-#include <asm/xen/xencomm.h>
-#include <xen/xencons.h>
-#endif
-#include <linux/dma-mapping.h>
-
-#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
-# error "struct cpuinfo_ia64 too big!"
-#endif
-
-#ifdef CONFIG_SMP
-unsigned long __per_cpu_offset[NR_CPUS];
-EXPORT_SYMBOL(__per_cpu_offset);
-#endif
-
-#ifdef CONFIG_XEN
-static void
-xen_panic_hypercall(struct unw_frame_info *info, void *arg)
-{
- current->thread.ksp = (__u64)info->sw - 16;
- HYPERVISOR_shutdown(SHUTDOWN_crash);
- /* we're never actually going to get here... */
-}
-
-static int
-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- unw_init_running(xen_panic_hypercall, NULL);
- /* we're never actually going to get here... */
- return NOTIFY_DONE;
-}
-
-static struct notifier_block xen_panic_block = {
- xen_panic_event, NULL, 0 /* try to go last */
-};
-
-void xen_pm_power_off(void)
-{
- local_irq_disable();
- HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
-#endif
-
-extern void ia64_setup_printk_clock(void);
-
-DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
-DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
-DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
-unsigned long ia64_cycles_per_usec;
-struct ia64_boot_param *ia64_boot_param;
-struct screen_info screen_info;
-unsigned long vga_console_iobase;
-unsigned long vga_console_membase;
-
-static struct resource data_resource = {
- .name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource code_resource = {
- .name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-extern void efi_initialize_iomem_resources(struct resource *,
- struct resource *);
-extern char _text[], _end[], _etext[];
-
-unsigned long ia64_max_cacheline_size;
-
-int dma_get_cache_alignment(void)
-{
- return ia64_max_cacheline_size;
-}
-EXPORT_SYMBOL(dma_get_cache_alignment);
-
-unsigned long ia64_iobase; /* virtual address for I/O accesses */
-EXPORT_SYMBOL(ia64_iobase);
-struct io_space io_space[MAX_IO_SPACES];
-EXPORT_SYMBOL(io_space);
-unsigned int num_io_spaces;
-
-/*
- * "flush_icache_range()" needs to know what processor dependent stride size to use
- * when it makes i-cache(s) coherent with d-caches.
- */
-#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */
-unsigned long ia64_i_cache_stride_shift = ~0;
-
-/*
- * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This
- * mask specifies a mask of address bits that must be 0 in order for two buffers to be
- * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
- * address of the second buffer must be aligned to (merge_mask+1) in order to be
- * mergeable). By default, we assume there is no I/O MMU which can merge physically
- * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
- * page-size of 2^64.
- */
-unsigned long ia64_max_iommu_merge_mask = ~0UL;
-EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
-
-/*
- * We use a special marker for the end of memory and it uses the extra (+1) slot
- */
-struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
-int num_rsvd_regions __initdata;
-
-
-/*
- * Filter incoming memory segments based on the primitive map created from the boot
- * parameters. Segments contained in the map are removed from the memory ranges. A
- * caller-specified function is called with the memory ranges that remain after filtering.
- * This routine does not assume the incoming segments are sorted.
- */
-int __init
-filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
-{
- unsigned long range_start, range_end, prev_start;
- void (*func)(unsigned long, unsigned long, int);
- int i;
-
-#if IGNORE_PFN0
- if (start == PAGE_OFFSET) {
- printk(KERN_WARNING "warning: skipping physical page 0\n");
- start += PAGE_SIZE;
- if (start >= end) return 0;
- }
-#endif
- /*
- * lowest possible address(walker uses virtual)
- */
- prev_start = PAGE_OFFSET;
- func = arg;
-
- for (i = 0; i < num_rsvd_regions; ++i) {
- range_start = max(start, prev_start);
- range_end = min(end, rsvd_region[i].start);
-
- if (range_start < range_end)
- call_pernode_memory(__pa(range_start), range_end - range_start, func);
-
- /* nothing more available in this segment */
- if (range_end == end) return 0;
-
- prev_start = rsvd_region[i].end;
- }
- /* end of memory marker allows full processing inside loop body */
- return 0;
-}
-
-static void __init
-sort_regions (struct rsvd_region *rsvd_region, int max)
-{
- int j;
-
- /* simple bubble sorting */
- while (max--) {
- for (j = 0; j < max; ++j) {
- if (rsvd_region[j].start > rsvd_region[j+1].start) {
- struct rsvd_region tmp;
- tmp = rsvd_region[j];
- rsvd_region[j] = rsvd_region[j + 1];
- rsvd_region[j + 1] = tmp;
- }
- }
- }
-}
-
-/*
- * Request address space for all standard resources
- */
-static int __init register_memory(void)
-{
- code_resource.start = ia64_tpa(_text);
- code_resource.end = ia64_tpa(_etext) - 1;
- data_resource.start = ia64_tpa(_etext);
- data_resource.end = ia64_tpa(_end) - 1;
- efi_initialize_iomem_resources(&code_resource, &data_resource);
-
- return 0;
-}
-
-__initcall(register_memory);
-
-/**
- * reserve_memory - setup reserved memory areas
- *
- * Setup the reserved memory areas set aside for the boot parameters,
- * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined,
- * see include/asm-ia64/meminit.h if you need to define more.
- */
-void __init
-reserve_memory (void)
-{
- int n = 0;
-
- /*
- * none of the entries in this table overlap
- */
- rsvd_region[n].start = (unsigned long) ia64_boot_param;
- rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param);
- n++;
-
- rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
- rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
- n++;
-
- rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
- rsvd_region[n].end = (rsvd_region[n].start
- + strlen(__va(ia64_boot_param->command_line)) + 1);
- n++;
-
- rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
- rsvd_region[n].end = (unsigned long) ia64_imva(_end);
- n++;
-
-#ifdef CONFIG_XEN
- if (is_running_on_xen()) {
- rsvd_region[n].start = (unsigned long)__va((HYPERVISOR_shared_info->arch.start_info_pfn << PAGE_SHIFT));
- rsvd_region[n].end = rsvd_region[n].start + PAGE_SIZE;
- n++;
- }
-#endif
-
-#ifdef CONFIG_BLK_DEV_INITRD
- if (ia64_boot_param->initrd_start) {
- rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
- rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size;
- n++;
- }
-#endif
-
- efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
- n++;
-
- /* end of memory marker */
- rsvd_region[n].start = ~0UL;
- rsvd_region[n].end = ~0UL;
- n++;
-
- num_rsvd_regions = n;
- BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n);
-
- sort_regions(rsvd_region, num_rsvd_regions);
-}
-
-/**
- * find_initrd - get initrd parameters from the boot parameter structure
- *
- * Grab the initrd start and end from the boot parameter struct given us by
- * the boot loader.
- */
-void __init
-find_initrd (void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
- if (ia64_boot_param->initrd_start) {
- initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
- initrd_end = initrd_start+ia64_boot_param->initrd_size;
-
- printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
- initrd_start, ia64_boot_param->initrd_size);
- }
-#endif
-}
-
-static void __init
-io_port_init (void)
-{
- unsigned long phys_iobase;
-
- /*
- * Set `iobase' based on the EFI memory map or, failing that, the
- * value firmware left in ar.k0.
- *
- * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute
- * the port's virtual address, so ia32_load_state() loads it with a
- * user virtual address. But in ia64 mode, glibc uses the
- * *physical* address in ar.k0 to mmap the appropriate area from
- * /dev/mem, and the inX()/outX() interfaces use MMIO. In both
- * cases, user-mode can only use the legacy 0-64K I/O port space.
- *
- * ar.k0 is not involved in kernel I/O port accesses, which can use
- * any of the I/O port spaces and are done via MMIO using the
- * virtual mmio_base from the appropriate io_space[].
- */
- phys_iobase = efi_get_iobase();
- if (!phys_iobase) {
- phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
- printk(KERN_INFO "No I/O port range found in EFI memory map, "
- "falling back to AR.KR0 (0x%lx)\n", phys_iobase);
- }
- ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
- ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
-
- /* setup legacy IO port space */
- io_space[0].mmio_base = ia64_iobase;
- io_space[0].sparse = 1;
- num_io_spaces = 1;
-}
-
-/**
- * early_console_setup - setup debugging console
- *
- * Consoles started here require little enough setup that we can start using
- * them very early in the boot process, either right after the machine
- * vector initialization, or even before if the drivers can detect their hw.
- *
- * Returns non-zero if a console couldn't be setup.
- */
-static inline int __init
-early_console_setup (char *cmdline)
-{
- int earlycons = 0;
-
-#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
- {
- extern int sn_serial_console_early_setup(void);
- if (!sn_serial_console_early_setup())
- earlycons++;
- }
-#endif
-#ifdef CONFIG_EFI_PCDP
- if (!efi_setup_pcdp_console(cmdline))
- earlycons++;
-#endif
-#ifdef CONFIG_SERIAL_8250_CONSOLE
- if (!early_serial_console_init(cmdline))
- earlycons++;
-#endif
-
- return (earlycons) ? 0 : -1;
-}
-
-static inline void
-mark_bsp_online (void)
-{
-#ifdef CONFIG_SMP
- /* If we register an early console, allow CPU 0 to printk */
- cpu_set(smp_processor_id(), cpu_online_map);
-#endif
-}
-
-#ifdef CONFIG_SMP
-static void __init
-check_for_logical_procs (void)
-{
- pal_logical_to_physical_t info;
- s64 status;
-
- status = ia64_pal_logical_to_phys(0, &info);
- if (status == -1) {
- printk(KERN_INFO "No logical to physical processor mapping "
- "available\n");
- return;
- }
- if (status) {
- printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
- status);
- return;
- }
- /*
- * Total number of siblings that BSP has. Though not all of them
- * may have booted successfully. The correct number of siblings
- * booted is in info.overview_num_log.
- */
- smp_num_siblings = info.overview_tpc;
- smp_num_cpucores = info.overview_cpp;
-}
-#endif
-
-static __initdata int nomca;
-static __init int setup_nomca(char *s)
-{
- nomca = 1;
- return 0;
-}
-early_param("nomca", setup_nomca);
-
-void __init
-setup_arch (char **cmdline_p)
-{
- unw_init();
-
-#ifdef CONFIG_XEN
- if (is_running_on_xen()) {
- /* Must be done before any hypercall. */
- xencomm_init();
-
- setup_xen_features();
- /* Register a call for panic conditions. */
- atomic_notifier_chain_register(&panic_notifier_list,
- &xen_panic_block);
- pm_power_off = xen_pm_power_off;
- }
-#endif
-
- ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
-
- *cmdline_p = __va(ia64_boot_param->command_line);
- strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
-
- efi_init();
- io_port_init();
-
- parse_early_param();
-
-#ifdef CONFIG_IA64_GENERIC
- machvec_init(NULL);
-#endif
-
- if (early_console_setup(*cmdline_p) == 0)
- mark_bsp_online();
-
-#ifdef CONFIG_ACPI
- /* Initialize the ACPI boot-time table parser */
- acpi_table_init();
-# ifdef CONFIG_ACPI_NUMA
- acpi_numa_init();
-# endif
-#else
-# ifdef CONFIG_SMP
- smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */
-# endif
-#endif /* CONFIG_APCI_BOOT */
-
- find_memory();
-
- /* process SAL system table: */
- ia64_sal_init(__va(efi.sal_systab));
-
- ia64_setup_printk_clock();
-
-#ifdef CONFIG_SMP
- cpu_physical_id(0) = hard_smp_processor_id();
-
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
-
- check_for_logical_procs();
- if (smp_num_cpucores > 1)
- printk(KERN_INFO
- "cpu package is Multi-Core capable: number of cores=%d\n",
- smp_num_cpucores);
- if (smp_num_siblings > 1)
- printk(KERN_INFO
- "cpu package is Multi-Threading capable: number of siblings=%d\n",
- smp_num_siblings);
-#endif
-
- cpu_init(); /* initialize the bootstrap CPU */
- mmu_context_init(); /* initialize context_id bitmap */
-
-#ifdef CONFIG_ACPI
- acpi_boot_init();
-#endif
-
-#ifdef CONFIG_VT
- if (!conswitchp) {
-# if defined(CONFIG_DUMMY_CONSOLE)
- conswitchp = &dummy_con;
-# endif
-# if defined(CONFIG_VGA_CONSOLE)
- /*
- * Non-legacy systems may route legacy VGA MMIO range to system
- * memory. vga_con probes the MMIO hole, so memory looks like
- * a VGA device to it. The EFI memory map can tell us if it's
- * memory so we can avoid this problem.
- */
- if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
- conswitchp = &vga_con;
-# endif
- }
-#ifdef CONFIG_XEN
- if (is_running_on_xen()) {
- shared_info_t *s = HYPERVISOR_shared_info;
-
- xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
-
- printk("Running on Xen! start_info_pfn=0x%lx nr_pages=%ld "
- "flags=0x%x\n", s->arch.start_info_pfn,
- xen_start_info->nr_pages, xen_start_info->flags);
-
- if (!is_initial_xendomain()) {
-#if !defined(CONFIG_VT) || !defined(CONFIG_DUMMY_CONSOLE)
- conswitchp = NULL;
-#endif
- }
-
- /*
- * If a console= is NOT specified, we assume using the
- * xencons console is desired. By default, this is ttyS0
- * for dom0 and tty0 for domU.
- */
- if (!strstr(*cmdline_p, "console=")) {
- char *p, *q, name[5];
- int offset = 0;
-
- if (is_initial_xendomain())
- strncpy(name, "ttyS", 4);
- else
- strncpy(name, "tty", 3);
-
- p = strstr(*cmdline_p, "xencons=");
-
- if (p) {
- p += 8;
- if (!strncmp(p, "ttyS", 4)) {
- strncpy(name, p, 4);
- p += 4;
- offset = simple_strtol(p, &q, 10);
- if (p == q)
- offset = 0;
- } else if (!strncmp(p, "tty", 3) ||
- !strncmp(p, "xvc", 3)) {
- strncpy(name, p, 3);
- p += 3;
- offset = simple_strtol(p, &q, 10);
- if (p == q)
- offset = 0;
- } else if (!strncmp(p, "off", 3))
- offset = -1;
- }
-
- if (offset >= 0)
- add_preferred_console(name, offset, NULL);
- }
- }
- xencons_early_setup();
-#endif
-#endif
-
-
- /* enable IA-64 Machine Check Abort Handling unless disabled */
-#ifdef CONFIG_XEN
- if (is_running_on_xen() && !is_initial_xendomain())
- nomca = 1;
-#endif
- if (!nomca)
- ia64_mca_init();
-
- platform_setup(cmdline_p);
-#ifdef CONFIG_XEN
- if (!is_running_on_xen() && !ia64_platform_is("xen")) {
- extern ia64_mv_setup_t xen_setup;
- xen_setup(cmdline_p);
- }
-#endif
- paging_init();
-#ifdef CONFIG_XEN
- contiguous_bitmap_init(max_pfn);
-#endif
-}
-
-/*
- * Display cpu info for all cpu's.
- */
-static int
-show_cpuinfo (struct seq_file *m, void *v)
-{
-#ifdef CONFIG_SMP
-# define lpj c->loops_per_jiffy
-# define cpunum c->cpu
-#else
-# define lpj loops_per_jiffy
-# define cpunum 0
-#endif
- static struct {
- unsigned long mask;
- const char *feature_name;
- } feature_bits[] = {
- { 1UL << 0, "branchlong" },
- { 1UL << 1, "spontaneous deferral"},
- { 1UL << 2, "16-byte atomic ops" }
- };
- char family[32], features[128], *cp, sep;
- struct cpuinfo_ia64 *c = v;
- unsigned long mask;
- unsigned long proc_freq;
- int i;
-
- mask = c->features;
-
- switch (c->family) {
- case 0x07: memcpy(family, "Itanium", 8); break;
- case 0x1f: memcpy(family, "Itanium 2", 10); break;
- default: sprintf(family, "%u", c->family); break;
- }
-
- /* build the feature string: */
- memcpy(features, " standard", 10);
- cp = features;
- sep = 0;
- for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
- if (mask & feature_bits[i].mask) {
- if (sep)
- *cp++ = sep;
- sep = ',';
- *cp++ = ' ';
- strcpy(cp, feature_bits[i].feature_name);
- cp += strlen(feature_bits[i].feature_name);
- mask &= ~feature_bits[i].mask;
- }
- }
- if (mask) {
- /* print unknown features as a hex value: */
- if (sep)
- *cp++ = sep;
- sprintf(cp, " 0x%lx", mask);
- }
-
- proc_freq = cpufreq_quick_get(cpunum);
- if (!proc_freq)
- proc_freq = c->proc_freq / 1000;
-
- seq_printf(m,
- "processor : %d\n"
- "vendor : %s\n"
- "arch : IA-64\n"
- "family : %s\n"
- "model : %u\n"
- "revision : %u\n"
- "archrev : %u\n"
- "features :%s\n" /* don't change this---it _is_ right! */
- "cpu number : %lu\n"
- "cpu regs : %u\n"
- "cpu MHz : %lu.%06lu\n"
- "itc MHz : %lu.%06lu\n"
- "BogoMIPS : %lu.%02lu\n",
- cpunum, c->vendor, family, c->model, c->revision, c->archrev,
- features, c->ppn, c->number,
- proc_freq / 1000, proc_freq % 1000,
- c->itc_freq / 1000000, c->itc_freq % 1000000,
- lpj*HZ/500000, (lpj*HZ/5000) % 100);
-#ifdef CONFIG_SMP
- seq_printf(m, "siblings : %u\n", cpus_weight(cpu_core_map[cpunum]));
- if (c->threads_per_core > 1 || c->cores_per_socket > 1)
- seq_printf(m,
- "physical id: %u\n"
- "core id : %u\n"
- "thread id : %u\n",
- c->socket_id, c->core_id, c->thread_id);
-#endif
- seq_printf(m,"\n");
-
- return 0;
-}
-
-static void *
-c_start (struct seq_file *m, loff_t *pos)
-{
-#ifdef CONFIG_SMP
- while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
- ++*pos;
-#endif
- return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
-}
-
-static void *
-c_next (struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return c_start(m, pos);
-}
-
-static void
-c_stop (struct seq_file *m, void *v)
-{
-}
-
-struct seq_operations cpuinfo_op = {
- .start = c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo
-};
-
-static void __cpuinit
-identify_cpu (struct cpuinfo_ia64 *c)
-{
- union {
- unsigned long bits[5];
- struct {
- /* id 0 & 1: */
- char vendor[16];
-
- /* id 2 */
- u64 ppn; /* processor serial number */
-
- /* id 3: */
- unsigned number : 8;
- unsigned revision : 8;
- unsigned model : 8;
- unsigned family : 8;
- unsigned archrev : 8;
- unsigned reserved : 24;
-
- /* id 4: */
- u64 features;
- } field;
- } cpuid;
- pal_vm_info_1_u_t vm1;
- pal_vm_info_2_u_t vm2;
- pal_status_t status;
- unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */
- int i;
-
- for (i = 0; i < 5; ++i)
- cpuid.bits[i] = ia64_get_cpuid(i);
-
- memcpy(c->vendor, cpuid.field.vendor, 16);
-#ifdef CONFIG_SMP
- c->cpu = smp_processor_id();
-
- /* below default values will be overwritten by identify_siblings()
- * for Multi-Threading/Multi-Core capable cpu's
- */
- c->threads_per_core = c->cores_per_socket = c->num_log = 1;
- c->socket_id = -1;
-
- identify_siblings(c);
-#endif
- c->ppn = cpuid.field.ppn;
- c->number = cpuid.field.number;
- c->revision = cpuid.field.revision;
- c->model = cpuid.field.model;
- c->family = cpuid.field.family;
- c->archrev = cpuid.field.archrev;
- c->features = cpuid.field.features;
-
- status = ia64_pal_vm_summary(&vm1, &vm2);
- if (status == PAL_STATUS_SUCCESS) {
- impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
- phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
- }
- c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
- c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
-}
-
-void
-setup_per_cpu_areas (void)
-{
- /* start_kernel() requires this... */
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
- prefill_possible_map();
-#endif
-}
-
-/*
- * Calculate the max. cache line size.
- *
- * In addition, the minimum of the i-cache stride sizes is calculated for
- * "flush_icache_range()".
- */
-static void __cpuinit
-get_max_cacheline_size (void)
-{
- unsigned long line_size, max = 1;
- unsigned int cache_size = 0;
- u64 l, levels, unique_caches;
- pal_cache_config_info_t cci;
- s64 status;
-
- status = ia64_pal_cache_summary(&levels, &unique_caches);
- if (status != 0) {
- printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
- __FUNCTION__, status);
- max = SMP_CACHE_BYTES;
- /* Safest setup for "flush_icache_range()" */
- ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
- goto out;
- }
-
- for (l = 0; l < levels; ++l) {
- status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
- &cci);
- if (status != 0) {
- printk(KERN_ERR
- "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
- __FUNCTION__, l, status);
- max = SMP_CACHE_BYTES;
- /* The safest setup for "flush_icache_range()" */
- cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
- cci.pcci_unified = 1;
- }
- line_size = 1 << cci.pcci_line_size;
- if (line_size > max)
- max = line_size;
- if (cache_size < cci.pcci_cache_size)
- cache_size = cci.pcci_cache_size;
- if (!cci.pcci_unified) {
- status = ia64_pal_cache_config_info(l,
- /* cache_type (instruction)= */ 1,
- &cci);
- if (status != 0) {
- printk(KERN_ERR
- "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
- __FUNCTION__, l, status);
- /* The safest setup for "flush_icache_range()" */
- cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
- }
- }
- if (cci.pcci_stride < ia64_i_cache_stride_shift)
- ia64_i_cache_stride_shift = cci.pcci_stride;
- }
- out:
-#ifdef CONFIG_SMP
- max_cache_size = max(max_cache_size, cache_size);
-#endif
- if (max > ia64_max_cacheline_size)
- ia64_max_cacheline_size = max;
-}
-
-/*
- * cpu_init() initializes state that is per-CPU. This function acts
- * as a 'CPU state barrier', nothing should get across.
- */
-void __cpuinit
-cpu_init (void)
-{
- extern void __cpuinit ia64_mmu_init (void *);
- unsigned long num_phys_stacked;
- pal_vm_info_2_u_t vmi;
- unsigned int max_ctx;
- struct cpuinfo_ia64 *cpu_info;
- void *cpu_data;
-
- cpu_data = per_cpu_init();
-
- /*
- * We set ar.k3 so that assembly code in MCA handler can compute
- * physical addresses of per cpu variables with a simple:
- * phys = ar.k3 + &per_cpu_var
- */
- ia64_set_kr(IA64_KR_PER_CPU_DATA,
- ia64_tpa(cpu_data) - (long) __per_cpu_start);
-
- get_max_cacheline_size();
-
- /*
- * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
- * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it
- * depends on the data returned by identify_cpu(). We break the dependency by
- * accessing cpu_data() through the canonical per-CPU address.
- */
- cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
- identify_cpu(cpu_info);
-
-#ifdef CONFIG_MCKINLEY
- {
-# define FEATURE_SET 16
- struct ia64_pal_retval iprv;
-
- if (cpu_info->family == 0x1f) {
- PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
- if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
- PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
- (iprv.v1 | 0x80), FEATURE_SET, 0);
- }
- }
-#endif
-
- /* Clear the stack memory reserved for pt_regs: */
- memset(task_pt_regs(current), 0, sizeof(struct pt_regs));
-
- ia64_set_kr(IA64_KR_FPU_OWNER, 0);
-
- /*
- * Initialize the page-table base register to a global
- * directory with all zeroes. This ensure that we can handle
- * TLB-misses to user address-space even before we created the
- * first user address-space. This may happen, e.g., due to
- * aggressive use of lfetch.fault.
- */
- ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
-
- /*
- * Initialize default control register to defer speculative faults except
- * for those arising from TLB misses, which are not deferred. The
- * kernel MUST NOT depend on a particular setting of these bits (in other words,
- * the kernel must have recovery code for all speculative accesses). Turn on
- * dcr.lc as per recommendation by the architecture team. Most IA-32 apps
- * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
- * be fine).
- */
- ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
- | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
- atomic_inc(&init_mm.mm_count);
- current->active_mm = &init_mm;
- if (current->mm)
- BUG();
-
- ia64_mmu_init(ia64_imva(cpu_data));
- ia64_mca_cpu_init(ia64_imva(cpu_data));
-
-#ifdef CONFIG_IA32_SUPPORT
- ia32_cpu_init();
-#endif
-
- /* Clear ITC to eliminiate sched_clock() overflows in human time. */
- ia64_set_itc(0);
-
- /* disable all local interrupt sources: */
- ia64_set_itv(1 << 16);
- ia64_set_lrr0(1 << 16);
- ia64_set_lrr1(1 << 16);
- ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
- ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
-
- /* clear TPR & XTP to enable all interrupt classes: */
- ia64_setreg(_IA64_REG_CR_TPR, 0);
-#ifdef CONFIG_SMP
- normal_xtp();
-#endif
-
- /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
- if (ia64_pal_vm_summary(NULL, &vmi) == 0)
- max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
- else {
- printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
- max_ctx = (1U << 15) - 1; /* use architected minimum */
- }
- while (max_ctx < ia64_ctx.max_ctx) {
- unsigned int old = ia64_ctx.max_ctx;
- if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
- break;
- }
-
- if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
- printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
- "stacked regs\n");
- num_phys_stacked = 96;
- }
- /* size of physical stacked register partition plus 8 bytes: */
- __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
- platform_cpu_init();
-#ifdef CONFIG_XEN
- if (is_running_on_xen() && !ia64_platform_is("xen")) {
- extern ia64_mv_cpu_init_t xen_cpu_init;
- xen_cpu_init();
- }
-#endif
-
- pm_idle = default_idle;
-}
-
-/*
- * On SMP systems, when the scheduler does migration-cost autodetection,
- * it needs a way to flush as much of the CPU's caches as possible.
- */
-void sched_cacheflush(void)
-{
- ia64_sal_cache_flush(3);
-}
-
-void __init
-check_bugs (void)
-{
- ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
- (unsigned long) __end___mckinley_e9_bundles);
-}
-
-static int __init run_dmi_scan(void)
-{
- dmi_scan_machine();
- return 0;
-}
-core_initcall(run_dmi_scan);
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/time.c b/linux-2.6-xen-sparse/arch/ia64/kernel/time.c
deleted file mode 100644
index b73cffa94f..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/time.c
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * linux/arch/ia64/kernel/time.c
- *
- * Copyright (C) 1998-2003 Hewlett-Packard Co
- * Stephane Eranian <eranian@hpl.hp.com>
- * David Mosberger <davidm@hpl.hp.com>
- * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
- * Copyright (C) 1999-2000 VA Linux Systems
- * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
- */
-
-#include <linux/cpu.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/profile.h>
-#include <linux/sched.h>
-#include <linux/time.h>
-#include <linux/interrupt.h>
-#include <linux/efi.h>
-#include <linux/profile.h>
-#include <linux/timex.h>
-
-#include <asm/machvec.h>
-#include <asm/delay.h>
-#include <asm/hw_irq.h>
-#include <asm/ptrace.h>
-#include <asm/sal.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_XEN
-#include <linux/kernel_stat.h>
-#include <linux/posix-timers.h>
-#include <xen/interface/vcpu.h>
-#include <asm/percpu.h>
-#endif
-
-extern unsigned long wall_jiffies;
-
-volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
-
-#ifdef CONFIG_IA64_DEBUG_IRQ
-
-unsigned long last_cli_ip;
-EXPORT_SYMBOL(last_cli_ip);
-
-#endif
-
-#ifdef CONFIG_XEN
-DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
-DEFINE_PER_CPU(unsigned long, processed_stolen_time);
-DEFINE_PER_CPU(unsigned long, processed_blocked_time);
-#define NS_PER_TICK (1000000000LL/HZ)
-#endif
-
-static struct time_interpolator itc_interpolator = {
- .shift = 16,
- .mask = 0xffffffffffffffffLL,
- .source = TIME_SOURCE_CPU
-};
-
-#ifdef CONFIG_XEN
-static unsigned long
-consider_steal_time(unsigned long new_itm, struct pt_regs *regs)
-{
- unsigned long stolen, blocked, sched_time;
- unsigned long delta_itm = 0, stolentick = 0;
- int i, cpu = smp_processor_id();
- struct vcpu_runstate_info *runstate;
- struct task_struct *p = current;
-
- runstate = &per_cpu(runstate, smp_processor_id());
-
- do {
- sched_time = runstate->state_entry_time;
- mb();
- stolen = runstate->time[RUNSTATE_runnable] +
- runstate->time[RUNSTATE_offline] -
- per_cpu(processed_stolen_time, cpu);
- blocked = runstate->time[RUNSTATE_blocked] -
- per_cpu(processed_blocked_time, cpu);
- mb();
- } while (sched_time != runstate->state_entry_time);
-
- /*
- * Check for vcpu migration effect
- * In this case, itc value is reversed.
- * This causes huge stolen value.
- * This function just checks and reject this effect.
- */
- if (!time_after_eq(runstate->time[RUNSTATE_blocked],
- per_cpu(processed_blocked_time, cpu)))
- blocked = 0;
-
- if (!time_after_eq(runstate->time[RUNSTATE_runnable] +
- runstate->time[RUNSTATE_offline],
- per_cpu(processed_stolen_time, cpu)))
- stolen = 0;
-
- if (!time_after(delta_itm + new_itm, ia64_get_itc()))
- stolentick = ia64_get_itc() - delta_itm - new_itm;
-
- do_div(stolentick, NS_PER_TICK);
- stolentick++;
-
- do_div(stolen, NS_PER_TICK);
-
- if (stolen > stolentick)
- stolen = stolentick;
-
- stolentick -= stolen;
- do_div(blocked, NS_PER_TICK);
-
- if (blocked > stolentick)
- blocked = stolentick;
-
- if (stolen > 0 || blocked > 0) {
- account_steal_time(NULL, jiffies_to_cputime(stolen));
- account_steal_time(idle_task(cpu), jiffies_to_cputime(blocked));
- run_local_timers();
-
- if (rcu_pending(cpu))
- rcu_check_callbacks(cpu, user_mode(regs));
-
- scheduler_tick();
- run_posix_cpu_timers(p);
- delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
-
- if (cpu == time_keeper_id) {
- write_seqlock(&xtime_lock);
- for(i = 0; i < stolen + blocked; i++)
- do_timer(regs);
- local_cpu_data->itm_next = delta_itm + new_itm;
- write_sequnlock(&xtime_lock);
- } else {
- local_cpu_data->itm_next = delta_itm + new_itm;
- }
- per_cpu(processed_stolen_time,cpu) += NS_PER_TICK * stolen;
- per_cpu(processed_blocked_time,cpu) += NS_PER_TICK * blocked;
- }
- return delta_itm;
-}
-#else
-#define consider_steal_time(new_itm, regs) (0)
-#endif
-
-static irqreturn_t
-timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
-{
- unsigned long new_itm;
- unsigned long delta_itm; /* XEN */
-
- if (unlikely(cpu_is_offline(smp_processor_id()))) {
- return IRQ_HANDLED;
- }
-
- platform_timer_interrupt(irq, dev_id, regs);
-
- new_itm = local_cpu_data->itm_next;
-
- if (!time_after(ia64_get_itc(), new_itm))
- printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
- ia64_get_itc(), new_itm);
-
- profile_tick(CPU_PROFILING, regs);
-
- if (is_running_on_xen()) {
- delta_itm = consider_steal_time(new_itm, regs);
- new_itm += delta_itm;
- if (time_after(new_itm, ia64_get_itc()) && delta_itm)
- goto skip_process_time_accounting;
- }
-
- while (1) {
- update_process_times(user_mode(regs));
-
- new_itm += local_cpu_data->itm_delta;
-
- if (smp_processor_id() == time_keeper_id) {
- /*
- * Here we are in the timer irq handler. We have irqs locally
- * disabled, but we don't know if the timer_bh is running on
- * another CPU. We need to avoid to SMP race by acquiring the
- * xtime_lock.
- */
- write_seqlock(&xtime_lock);
- do_timer(regs);
- local_cpu_data->itm_next = new_itm;
- write_sequnlock(&xtime_lock);
- } else
- local_cpu_data->itm_next = new_itm;
-
- if (time_after(new_itm, ia64_get_itc()))
- break;
- }
-
-skip_process_time_accounting: /* XEN */
-
- do {
- /*
- * If we're too close to the next clock tick for
- * comfort, we increase the safety margin by
- * intentionally dropping the next tick(s). We do NOT
- * update itm.next because that would force us to call
- * do_timer() which in turn would let our clock run
- * too fast (with the potentially devastating effect
- * of losing monotony of time).
- */
- while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
- new_itm += local_cpu_data->itm_delta;
- ia64_set_itm(new_itm);
- /* double check, in case we got hit by a (slow) PMI: */
- } while (time_after_eq(ia64_get_itc(), new_itm));
- return IRQ_HANDLED;
-}
-
-/*
- * Encapsulate access to the itm structure for SMP.
- */
-void
-ia64_cpu_local_tick (void)
-{
- int cpu = smp_processor_id();
- unsigned long shift = 0, delta;
-
- /* arrange for the cycle counter to generate a timer interrupt: */
- ia64_set_itv(IA64_TIMER_VECTOR);
-
- delta = local_cpu_data->itm_delta;
- /*
- * Stagger the timer tick for each CPU so they don't occur all at (almost) the
- * same time:
- */
- if (cpu) {
- unsigned long hi = 1UL << ia64_fls(cpu);
- shift = (2*(cpu - hi) + 1) * delta/hi/2;
- }
- local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
- ia64_set_itm(local_cpu_data->itm_next);
-}
-
-static int nojitter;
-
-static int __init nojitter_setup(char *str)
-{
- nojitter = 1;
- printk("Jitter checking for ITC timers disabled\n");
- return 1;
-}
-
-__setup("nojitter", nojitter_setup);
-
-#ifdef CONFIG_XEN
-/* taken from i386/kernel/time-xen.c */
-static void init_missing_ticks_accounting(int cpu)
-{
- struct vcpu_register_runstate_memory_area area;
- struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
- int rc;
-
- memset(runstate, 0, sizeof(*runstate));
-
- area.addr.v = runstate;
- rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
- WARN_ON(rc && rc != -ENOSYS);
-
- per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
- per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
- + runstate->time[RUNSTATE_offline];
-}
-
-static int xen_ia64_settimefoday_after_resume;
-
-static int __init __xen_ia64_settimeofday_after_resume(char *str)
-{
- xen_ia64_settimefoday_after_resume = 1;
- return 1;
-}
-
-__setup("xen_ia64_settimefoday_after_resume",
- __xen_ia64_settimeofday_after_resume);
-
-/* Called after suspend, to resume time. */
-void
-time_resume(void)
-{
- unsigned int cpu;
-
- /* Just trigger a tick. */
- ia64_cpu_local_tick();
-
- if (xen_ia64_settimefoday_after_resume) {
- /* do_settimeofday() resets timer interplator */
- struct timespec xen_time;
- int ret;
- efi_gettimeofday(&xen_time);
-
- ret = do_settimeofday(&xen_time);
- WARN_ON(ret);
- } else {
-#if 0
- /* adjust EFI time */
- struct timespec my_time = CURRENT_TIME;
- struct timespec xen_time;
- static timespec diff;
- struct xen_domctl domctl;
- int ret;
-
- efi_gettimeofday(&xen_time);
- diff = timespec_sub(&xen_time, &my_time);
- domctl.cmd = XEN_DOMCTL_settimeoffset;
- domctl.domain = DOMID_SELF;
- domctl.u.settimeoffset.timeoffset_seconds = diff.tv_sec;
- ret = HYPERVISOR_domctl_op(&domctl);
- WARN_ON(ret);
-#endif
- /* Time interpolator remembers the last timer status.
- Forget it */
- write_seqlock_irq(&xtime_lock);
- time_interpolator_reset();
- write_sequnlock_irq(&xtime_lock);
- }
-
- for_each_online_cpu(cpu)
- init_missing_ticks_accounting(cpu);
-
- touch_softlockup_watchdog();
-}
-#else
-#define init_missing_ticks_accounting(cpu) do {} while (0)
-#endif
-
-void __devinit
-ia64_init_itm (void)
-{
- unsigned long platform_base_freq, itc_freq;
- struct pal_freq_ratio itc_ratio, proc_ratio;
- long status, platform_base_drift, itc_drift;
-
- /*
- * According to SAL v2.6, we need to use a SAL call to determine the platform base
- * frequency and then a PAL call to determine the frequency ratio between the ITC
- * and the base frequency.
- */
- status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
- &platform_base_freq, &platform_base_drift);
- if (status != 0) {
- printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
- } else {
- status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
- if (status != 0)
- printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
- }
- if (status != 0) {
- /* invent "random" values */
- printk(KERN_ERR
- "SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
- platform_base_freq = 100000000;
- platform_base_drift = -1; /* no drift info */
- itc_ratio.num = 3;
- itc_ratio.den = 1;
- }
- if (platform_base_freq < 40000000) {
- printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
- platform_base_freq);
- platform_base_freq = 75000000;
- platform_base_drift = -1;
- }
- if (!proc_ratio.den)
- proc_ratio.den = 1; /* avoid division by zero */
- if (!itc_ratio.den)
- itc_ratio.den = 1; /* avoid division by zero */
-
- itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
-
- local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
- printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, "
- "ITC freq=%lu.%03luMHz", smp_processor_id(),
- platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
- itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
-
- if (platform_base_drift != -1) {
- itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
- printk("+/-%ldppm\n", itc_drift);
- } else {
- itc_drift = -1;
- printk("\n");
- }
-
- local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
- local_cpu_data->itc_freq = itc_freq;
- local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
- local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
- + itc_freq/2)/itc_freq;
-
- if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
- itc_interpolator.frequency = local_cpu_data->itc_freq;
- itc_interpolator.drift = itc_drift;
-#ifdef CONFIG_SMP
- /* On IA64 in an SMP configuration ITCs are never accurately synchronized.
- * Jitter compensation requires a cmpxchg which may limit
- * the scalability of the syscalls for retrieving time.
- * The ITC synchronization is usually successful to within a few
- * ITC ticks but this is not a sure thing. If you need to improve
- * timer performance in SMP situations then boot the kernel with the
- * "nojitter" option. However, doing so may result in time fluctuating (maybe
- * even going backward) if the ITC offsets between the individual CPUs
- * are too large.
- */
- if (!nojitter) itc_interpolator.jitter = 1;
-#endif
- register_time_interpolator(&itc_interpolator);
- }
-
- if (is_running_on_xen())
- init_missing_ticks_accounting(smp_processor_id());
-
- /* avoid softlock up message when cpu is unplug and plugged again. */
- touch_softlockup_watchdog();
-
- /* Setup the CPU local timer tick */
- ia64_cpu_local_tick();
-}
-
-static struct irqaction timer_irqaction = {
- .handler = timer_interrupt,
- .flags = IRQF_DISABLED,
- .name = "timer"
-};
-
-void __devinit ia64_disable_timer(void)
-{
- ia64_set_itv(1 << 16);
-}
-
-void __init
-time_init (void)
-{
- register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
- efi_gettimeofday(&xtime);
- ia64_init_itm();
-
- /*
- * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
- * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
- */
- set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
-}
-
-/*
- * Generic udelay assumes that if preemption is allowed and the thread
- * migrates to another CPU, that the ITC values are synchronized across
- * all CPUs.
- */
-static void
-ia64_itc_udelay (unsigned long usecs)
-{
- unsigned long start = ia64_get_itc();
- unsigned long end = start + usecs*local_cpu_data->cyc_per_usec;
-
- while (time_before(ia64_get_itc(), end))
- cpu_relax();
-}
-
-void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay;
-
-void
-udelay (unsigned long usecs)
-{
- (*ia64_udelay)(usecs);
-}
-EXPORT_SYMBOL(udelay);
-
-static unsigned long long ia64_itc_printk_clock(void)
-{
- if (ia64_get_kr(IA64_KR_PER_CPU_DATA))
- return sched_clock();
- return 0;
-}
-
-static unsigned long long ia64_default_printk_clock(void)
-{
- return (unsigned long long)(jiffies_64 - INITIAL_JIFFIES) *
- (1000000000/HZ);
-}
-
-unsigned long long (*ia64_printk_clock)(void) = &ia64_default_printk_clock;
-
-unsigned long long printk_clock(void)
-{
- return ia64_printk_clock();
-}
-
-void __init
-ia64_setup_printk_clock(void)
-{
- if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT))
- ia64_printk_clock = ia64_itc_printk_clock;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c b/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c
deleted file mode 100644
index c14ac662a3..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
- * Bjorn Helgaas <bjorn.helgaas@hp.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/compiler.h>
-#include <linux/module.h>
-#include <linux/efi.h>
-#include <asm/io.h>
-#include <asm/meminit.h>
-
-static inline void __iomem *
-__ioremap (unsigned long offset, unsigned long size)
-{
- offset = HYPERVISOR_ioremap(offset, size);
- if (IS_ERR_VALUE(offset))
- return (void __iomem*)offset;
- return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset);
-}
-
-void __iomem *
-ioremap (unsigned long offset, unsigned long size)
-{
- u64 attr;
- unsigned long gran_base, gran_size;
-
- /*
- * For things in kern_memmap, we must use the same attribute
- * as the rest of the kernel. For more details, see
- * Documentation/ia64/aliasing.txt.
- */
- attr = kern_mem_attribute(offset, size);
- if (attr & EFI_MEMORY_WB)
- return (void __iomem *) phys_to_virt(offset);
- else if (attr & EFI_MEMORY_UC)
- return __ioremap(offset, size);
-
- /*
- * Some chipsets don't support UC access to memory. If
- * WB is supported for the whole granule, we prefer that.
- */
- gran_base = GRANULEROUNDDOWN(offset);
- gran_size = GRANULEROUNDUP(offset + size) - gran_base;
- if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB)
- return (void __iomem *) phys_to_virt(offset);
-
- return __ioremap(offset, size);
-}
-EXPORT_SYMBOL(ioremap);
-
-void __iomem *
-ioremap_nocache (unsigned long offset, unsigned long size)
-{
- if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB)
- return NULL;
-
- return __ioremap(offset, size);
-}
-EXPORT_SYMBOL(ioremap_nocache);
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile b/linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile
deleted file mode 100644
index 555d4a9d7a..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-obj-$(CONFIG_OPROFILE) += oprofile.o
-
-DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
- oprof.o cpu_buffer.o buffer_sync.o \
- event_buffer.o oprofile_files.o \
- oprofilefs.o oprofile_stats.o \
- timer_int.o )
-
-oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
-oprofile-$(CONFIG_PERFMON) += perfmon.o
-ifeq ($(CONFIG_XEN), y)
-oprofile-$(CONFIG_PERFMON) += xenoprof.o \
- ../../../drivers/xen/xenoprof/xenoprofile.o
-endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/init.c b/linux-2.6-xen-sparse/arch/ia64/oprofile/init.c
deleted file mode 100644
index f218b7eb45..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/init.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * @file init.c
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/oprofile.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include "oprofile_perfmon.h"
-
-extern int perfmon_init(struct oprofile_operations * ops);
-extern void perfmon_exit(void);
-extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
-
-int __init oprofile_arch_init(struct oprofile_operations * ops)
-{
- int ret = -ENODEV;
-
- if (is_running_on_xen()) {
- ret = xen_perfmon_init();
- if (ret)
- return ret;
- return xenoprofile_init(ops);
- }
-
-#ifdef CONFIG_PERFMON
- /* perfmon_init() can fail, but we have no way to report it */
- ret = perfmon_init(ops);
-#endif
- ops->backtrace = ia64_backtrace;
-
- return ret;
-}
-
-
-void oprofile_arch_exit(void)
-{
- if (is_running_on_xen()) {
- xenoprofile_exit();
- xen_perfmon_exit();
- return;
- }
-
-#ifdef CONFIG_PERFMON
- perfmon_exit();
-#endif
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h b/linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h
deleted file mode 100644
index 6ba1170bd6..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef OPROFILE_PERFMON_H
-#define OPROFILE_PERFMON_H
-
-#ifdef CONFIG_PERFMON
-int __perfmon_init(void);
-void __perfmon_exit(void);
-int perfmon_start(void);
-void perfmon_stop(void);
-#else
-#define __perfmon_init() (-ENOSYS)
-#define __perfmon_exit() do {} while (0)
-#endif /* CONFIG_PERFMON */
-
-#ifdef CONFIG_XEN
-#define STATIC_IF_NO_XEN /* nothing */
-#define xen_perfmon_init() __perfmon_init()
-#define xen_perfmon_exit() __perfmon_exit()
-extern int xenoprofile_init(struct oprofile_operations * ops);
-extern void xenoprofile_exit(void);
-#else
-#define STATIC_IF_NO_XEN static
-#define xen_perfmon_init() (-ENOSYS)
-#define xen_perfmon_exit() do {} while (0)
-#define xenoprofile_init() (-ENOSYS)
-#define xenoprofile_exit() do {} while (0)
-#endif /* CONFIG_XEN */
-
-#endif /* OPROFILE_PERFMON_H */
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c b/linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c
deleted file mode 100644
index 89dc71f1c4..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- * @file perfmon.c
- *
- * @remark Copyright 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <asm/perfmon.h>
-#include <asm/ptrace.h>
-#include <asm/errno.h>
-#include "oprofile_perfmon.h"
-
-static int allow_ints;
-
-static int
-perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg,
- struct pt_regs *regs, unsigned long stamp)
-{
- int event = arg->pmd_eventid;
-
- arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1;
-
- /* the owner of the oprofile event buffer may have exited
- * without perfmon being shutdown (e.g. SIGSEGV)
- */
- if (allow_ints)
- oprofile_add_sample(regs, event);
- return 0;
-}
-
-
-STATIC_IF_NO_XEN
-int perfmon_start(void)
-{
- allow_ints = 1;
- return 0;
-}
-
-
-STATIC_IF_NO_XEN
-void perfmon_stop(void)
-{
- allow_ints = 0;
-}
-
-
-#define OPROFILE_FMT_UUID { \
- 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c }
-
-static pfm_buffer_fmt_t oprofile_fmt = {
- .fmt_name = "oprofile_format",
- .fmt_uuid = OPROFILE_FMT_UUID,
- .fmt_handler = perfmon_handler,
-};
-
-
-static char * get_cpu_type(void)
-{
- __u8 family = local_cpu_data->family;
-
- switch (family) {
- case 0x07:
- return "ia64/itanium";
- case 0x1f:
- return "ia64/itanium2";
- default:
- return "ia64/ia64";
- }
-}
-
-
-/* all the ops are handled via userspace for IA64 perfmon */
-
-static int using_perfmon;
-
-STATIC_IF_NO_XEN
-int __perfmon_init(void)
-{
- int ret = pfm_register_buffer_fmt(&oprofile_fmt);
- if (ret)
- return -ENODEV;
-
- using_perfmon = 1;
- return 0;
-}
-
-STATIC_IF_NO_XEN
-void __perfmon_exit(void)
-{
- if (!using_perfmon)
- return;
-
- pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid);
-}
-
-int perfmon_init(struct oprofile_operations * ops)
-{
- int ret = __perfmon_init();
- if (ret)
- return -ENODEV;
-
- ops->cpu_type = get_cpu_type();
- ops->start = perfmon_start;
- ops->stop = perfmon_stop;
- printk(KERN_INFO "oprofile: using perfmon.\n");
- return 0;
-}
-
-
-void perfmon_exit(void)
-{
- __perfmon_exit();
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c b/linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c
deleted file mode 100644
index 998be3e66b..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/******************************************************************************
- * xenoprof ia64 specific part
- *
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- * VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-#include <linux/init.h>
-#include <linux/oprofile.h>
-#include <linux/ioport.h>
-
-#include <xen/driver_util.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/xenoprof.h>
-#include <xen/xenoprof.h>
-
-#include "oprofile_perfmon.h"
-
-void __init xenoprof_arch_init_counter(struct xenoprof_init *init)
-{
- init->num_events = 0; /* perfmon manages. */
-}
-
-void xenoprof_arch_counter(void)
-{
- /* nothing. perfmon does. */
-}
-
-void xenoprof_arch_start(void)
-{
- perfmon_start();
-}
-
-void xenoprof_arch_stop(void)
-{
- perfmon_stop();
-}
-
-/* XXX move them to an appropriate header file. */
-struct resource* xen_ia64_allocate_resource(unsigned long size);
-void xen_ia64_release_resource(struct resource* res);
-void xen_ia64_unmap_resource(struct resource* res);
-
-struct resource*
-xenoprof_ia64_allocate_resource(int32_t max_samples)
-{
- unsigned long bufsize;
-
- /* XXX add hypercall to get bufsize? */
- /* this value is taken from alloc_xenoprof_struct(). */
-#if 0
- bufsize = NR_CPUS * (sizeof(struct xenoprof_buf) +
- (max_samples - 1) * sizeof(struct event_log));
- bufsize = PAGE_ALIGN(bufsize) + PAGE_SIZE;
-#else
-#define MAX_OPROF_SHARED_PAGES 32
- bufsize = (MAX_OPROF_SHARED_PAGES + 1) * PAGE_SIZE;
-#endif
- return xen_ia64_allocate_resource(bufsize);
-}
-
-void xenoprof_arch_unmap_shared_buffer(struct xenoprof_shared_buffer* sbuf)
-{
- if (sbuf->buffer) {
- xen_ia64_unmap_resource(sbuf->arch.res);
- sbuf->buffer = NULL;
- sbuf->arch.res = NULL;
- }
-}
-
-int xenoprof_arch_map_shared_buffer(struct xenoprof_get_buffer* get_buffer,
- struct xenoprof_shared_buffer* sbuf)
-{
- int ret;
- struct resource* res;
-
- sbuf->buffer = NULL;
- sbuf->arch.res = NULL;
-
- res = xenoprof_ia64_allocate_resource(get_buffer->max_samples);
- if (IS_ERR(res))
- return PTR_ERR(res);
-
- get_buffer->buf_gmaddr = res->start;
-
- ret = HYPERVISOR_xenoprof_op(XENOPROF_get_buffer, get_buffer);
- if (ret) {
- xen_ia64_release_resource(res);
- return ret;
- }
-
- BUG_ON((res->end - res->start + 1) <
- get_buffer->bufsize * get_buffer->nbuf);
-
- sbuf->buffer = __va(res->start);
- sbuf->arch.res = res;
-
- return ret;
-}
-
-int xenoprof_arch_set_passive(struct xenoprof_passive* pdomain,
- struct xenoprof_shared_buffer* sbuf)
-{
- int ret;
- struct resource* res;
-
- sbuf->buffer = NULL;
- sbuf->arch.res = NULL;
-
- res = xenoprof_ia64_allocate_resource(pdomain->max_samples);
- if (IS_ERR(res))
- return PTR_ERR(res);
-
- pdomain->buf_gmaddr = res->start;
-
- ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, pdomain);
- if (ret) {
- xen_ia64_release_resource(res);
- return ret;
- }
-
- BUG_ON((res->end - res->start + 1) < pdomain->bufsize * pdomain->nbuf);
-
- sbuf->buffer = __va(res->start);
- sbuf->arch.res = res;
-
- return ret;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/pci/pci.c b/linux-2.6-xen-sparse/arch/ia64/pci/pci.c
deleted file mode 100644
index 4d3684156a..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/pci/pci.c
+++ /dev/null
@@ -1,836 +0,0 @@
-/*
- * pci.c - Low-Level PCI Access in IA-64
- *
- * Derived from bios32.c of i386 tree.
- *
- * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P.
- * David Mosberger-Tang <davidm@hpl.hp.com>
- * Bjorn Helgaas <bjorn.helgaas@hp.com>
- * Copyright (C) 2004 Silicon Graphics, Inc.
- *
- * Note: Above list of copyright holders is incomplete...
- */
-
-#include <linux/acpi.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/slab.h>
-#include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-
-#include <asm/machvec.h>
-#include <asm/page.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/sal.h>
-#include <asm/smp.h>
-#include <asm/irq.h>
-#include <asm/hw_irq.h>
-
-/*
- * Low-level SAL-based PCI configuration access functions. Note that SAL
- * calls are already serialized (via sal_lock), so we don't need another
- * synchronization mechanism here.
- */
-
-#define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \
- (((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg))
-
-/* SAL 3.2 adds support for extended config space. */
-
-#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \
- (((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg))
-
-static int
-pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
- int reg, int len, u32 *value)
-{
- u64 addr, data = 0;
- int mode, result;
-
- if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
- return -EINVAL;
-
- if ((seg | reg) <= 255) {
- addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
- mode = 0;
- } else {
- addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
- mode = 1;
- }
- result = ia64_sal_pci_config_read(addr, mode, len, &data);
- if (result != 0)
- return -EINVAL;
-
- *value = (u32) data;
- return 0;
-}
-
-static int
-pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
- int reg, int len, u32 value)
-{
- u64 addr;
- int mode, result;
-
- if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
- return -EINVAL;
-
- if ((seg | reg) <= 255) {
- addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
- mode = 0;
- } else {
- addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
- mode = 1;
- }
- result = ia64_sal_pci_config_write(addr, mode, len, value);
- if (result != 0)
- return -EINVAL;
- return 0;
-}
-
-static struct pci_raw_ops pci_sal_ops = {
- .read = pci_sal_read,
- .write = pci_sal_write
-};
-
-struct pci_raw_ops *raw_pci_ops = &pci_sal_ops;
-
-static int
-pci_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
-{
- return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
- devfn, where, size, value);
-}
-
-static int
-pci_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
-{
- return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
- devfn, where, size, value);
-}
-
-struct pci_ops pci_root_ops = {
- .read = pci_read,
- .write = pci_write,
-};
-
-/* Called by ACPI when it finds a new root bus. */
-
-static struct pci_controller * __devinit
-alloc_pci_controller (int seg)
-{
- struct pci_controller *controller;
-
- controller = kmalloc(sizeof(*controller), GFP_KERNEL);
- if (!controller)
- return NULL;
-
- memset(controller, 0, sizeof(*controller));
- controller->segment = seg;
- controller->node = -1;
- return controller;
-}
-
-struct pci_root_info {
- struct pci_controller *controller;
- char *name;
-};
-
-static unsigned int
-new_space (u64 phys_base, int sparse)
-{
- u64 mmio_base;
- int i;
-
- if (phys_base == 0)
- return 0; /* legacy I/O port space */
-
- mmio_base = (u64) ioremap(phys_base, 0);
- for (i = 0; i < num_io_spaces; i++)
- if (io_space[i].mmio_base == mmio_base &&
- io_space[i].sparse == sparse)
- return i;
-
- if (num_io_spaces == MAX_IO_SPACES) {
- printk(KERN_ERR "PCI: Too many IO port spaces "
- "(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES);
- return ~0;
- }
-
- i = num_io_spaces++;
- io_space[i].mmio_base = mmio_base;
- io_space[i].sparse = sparse;
-
- return i;
-}
-
-static u64 __devinit
-add_io_space (struct pci_root_info *info, struct acpi_resource_address64 *addr)
-{
- struct resource *resource;
- char *name;
- u64 base, min, max, base_port;
- unsigned int sparse = 0, space_nr, len;
-
- resource = kzalloc(sizeof(*resource), GFP_KERNEL);
- if (!resource) {
- printk(KERN_ERR "PCI: No memory for %s I/O port space\n",
- info->name);
- goto out;
- }
-
- len = strlen(info->name) + 32;
- name = kzalloc(len, GFP_KERNEL);
- if (!name) {
- printk(KERN_ERR "PCI: No memory for %s I/O port space name\n",
- info->name);
- goto free_resource;
- }
-
- min = addr->minimum;
- max = min + addr->address_length - 1;
- if (addr->info.io.translation_type == ACPI_SPARSE_TRANSLATION)
- sparse = 1;
-
- space_nr = new_space(addr->translation_offset, sparse);
- if (space_nr == ~0)
- goto free_name;
-
- base = __pa(io_space[space_nr].mmio_base);
- base_port = IO_SPACE_BASE(space_nr);
- snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->name,
- base_port + min, base_port + max);
-
- /*
- * The SDM guarantees the legacy 0-64K space is sparse, but if the
- * mapping is done by the processor (not the bridge), ACPI may not
- * mark it as sparse.
- */
- if (space_nr == 0)
- sparse = 1;
-
- resource->name = name;
- resource->flags = IORESOURCE_MEM;
- resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min);
- resource->end = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max);
- insert_resource(&iomem_resource, resource);
-
- return base_port;
-
-free_name:
- kfree(name);
-free_resource:
- kfree(resource);
-out:
- return ~0;
-}
-
-static acpi_status __devinit resource_to_window(struct acpi_resource *resource,
- struct acpi_resource_address64 *addr)
-{
- acpi_status status;
-
- /*
- * We're only interested in _CRS descriptors that are
- * - address space descriptors for memory or I/O space
- * - non-zero size
- * - producers, i.e., the address space is routed downstream,
- * not consumed by the bridge itself
- */
- status = acpi_resource_to_address64(resource, addr);
- if (ACPI_SUCCESS(status) &&
- (addr->resource_type == ACPI_MEMORY_RANGE ||
- addr->resource_type == ACPI_IO_RANGE) &&
- addr->address_length &&
- addr->producer_consumer == ACPI_PRODUCER)
- return AE_OK;
-
- return AE_ERROR;
-}
-
-static acpi_status __devinit
-count_window (struct acpi_resource *resource, void *data)
-{
- unsigned int *windows = (unsigned int *) data;
- struct acpi_resource_address64 addr;
- acpi_status status;
-
- status = resource_to_window(resource, &addr);
- if (ACPI_SUCCESS(status))
- (*windows)++;
-
- return AE_OK;
-}
-
-static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
-{
- struct pci_root_info *info = data;
- struct pci_window *window;
- struct acpi_resource_address64 addr;
- acpi_status status;
- unsigned long flags, offset = 0;
- struct resource *root;
-
- /* Return AE_OK for non-window resources to keep scanning for more */
- status = resource_to_window(res, &addr);
- if (!ACPI_SUCCESS(status))
- return AE_OK;
-
- if (addr.resource_type == ACPI_MEMORY_RANGE) {
- flags = IORESOURCE_MEM;
- root = &iomem_resource;
- offset = addr.translation_offset;
- } else if (addr.resource_type == ACPI_IO_RANGE) {
- flags = IORESOURCE_IO;
- root = &ioport_resource;
- offset = add_io_space(info, &addr);
- if (offset == ~0)
- return AE_OK;
- } else
- return AE_OK;
-
- window = &info->controller->window[info->controller->windows++];
- window->resource.name = info->name;
- window->resource.flags = flags;
- window->resource.start = addr.minimum + offset;
- window->resource.end = window->resource.start + addr.address_length - 1;
- window->resource.child = NULL;
- window->offset = offset;
-
- if (insert_resource(root, &window->resource)) {
- printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n",
- window->resource.start, window->resource.end,
- root->name, info->name);
- }
-
- return AE_OK;
-}
-
-static void __devinit
-pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
-{
- int i, j;
-
- j = 0;
- for (i = 0; i < ctrl->windows; i++) {
- struct resource *res = &ctrl->window[i].resource;
- /* HP's firmware has a hack to work around a Windows bug.
- * Ignore these tiny memory ranges */
- if ((res->flags & IORESOURCE_MEM) &&
- (res->end - res->start < 16))
- continue;
- if (j >= PCI_BUS_NUM_RESOURCES) {
- printk("Ignoring range [%lx-%lx] (%lx)\n", res->start,
- res->end, res->flags);
- continue;
- }
- bus->resource[j++] = res;
- }
-}
-
-struct pci_bus * __devinit
-pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
-{
- struct pci_root_info info;
- struct pci_controller *controller;
- unsigned int windows = 0;
- struct pci_bus *pbus;
- char *name;
- int pxm;
-
- controller = alloc_pci_controller(domain);
- if (!controller)
- goto out1;
-
- controller->acpi_handle = device->handle;
-
- pxm = acpi_get_pxm(controller->acpi_handle);
-#ifdef CONFIG_NUMA
- if (pxm >= 0)
- controller->node = pxm_to_node(pxm);
-#endif
-
- acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
- &windows);
- controller->window = kmalloc_node(sizeof(*controller->window) * windows,
- GFP_KERNEL, controller->node);
- if (!controller->window)
- goto out2;
-
- name = kmalloc(16, GFP_KERNEL);
- if (!name)
- goto out3;
-
- sprintf(name, "PCI Bus %04x:%02x", domain, bus);
- info.controller = controller;
- info.name = name;
- acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window,
- &info);
-
- pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller);
- if (pbus)
- pcibios_setup_root_windows(pbus, controller);
-
- return pbus;
-
-out3:
- kfree(controller->window);
-out2:
- kfree(controller);
-out1:
- return NULL;
-}
-
-void pcibios_resource_to_bus(struct pci_dev *dev,
- struct pci_bus_region *region, struct resource *res)
-{
- struct pci_controller *controller = PCI_CONTROLLER(dev);
- unsigned long offset = 0;
- int i;
-
- for (i = 0; i < controller->windows; i++) {
- struct pci_window *window = &controller->window[i];
- if (!(window->resource.flags & res->flags))
- continue;
- if (window->resource.start > res->start)
- continue;
- if (window->resource.end < res->end)
- continue;
- offset = window->offset;
- break;
- }
-
- region->start = res->start - offset;
- region->end = res->end - offset;
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-void pcibios_bus_to_resource(struct pci_dev *dev,
- struct resource *res, struct pci_bus_region *region)
-{
- struct pci_controller *controller = PCI_CONTROLLER(dev);
- unsigned long offset = 0;
- int i;
-
- for (i = 0; i < controller->windows; i++) {
- struct pci_window *window = &controller->window[i];
- if (!(window->resource.flags & res->flags))
- continue;
- if (window->resource.start - window->offset > region->start)
- continue;
- if (window->resource.end - window->offset < region->end)
- continue;
- offset = window->offset;
- break;
- }
-
- res->start = region->start + offset;
- res->end = region->end + offset;
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
-static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
-{
- unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
- struct resource *devr = &dev->resource[idx];
-
- if (!dev->bus)
- return 0;
- for (i=0; i<PCI_BUS_NUM_RESOURCES; i++) {
- struct resource *busr = dev->bus->resource[i];
-
- if (!busr || ((busr->flags ^ devr->flags) & type_mask))
- continue;
- if ((devr->start) && (devr->start >= busr->start) &&
- (devr->end <= busr->end))
- return 1;
- }
- return 0;
-}
-
-static void __devinit
-pcibios_fixup_resources(struct pci_dev *dev, int start, int limit)
-{
- struct pci_bus_region region;
- int i;
-
- for (i = start; i < limit; i++) {
- if (!dev->resource[i].flags)
- continue;
- region.start = dev->resource[i].start;
- region.end = dev->resource[i].end;
- pcibios_bus_to_resource(dev, &dev->resource[i], &region);
- if ((is_valid_resource(dev, i)))
- pci_claim_resource(dev, i);
- }
-}
-
-static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
-{
- pcibios_fixup_resources(dev, 0, PCI_BRIDGE_RESOURCES);
-}
-
-static void __devinit pcibios_fixup_bridge_resources(struct pci_dev *dev)
-{
- pcibios_fixup_resources(dev, PCI_BRIDGE_RESOURCES, PCI_NUM_RESOURCES);
-}
-
-/*
- * Called after each bus is probed, but before its children are examined.
- */
-void __devinit
-pcibios_fixup_bus (struct pci_bus *b)
-{
- struct pci_dev *dev;
-
- if (b->self) {
- pci_read_bridge_bases(b);
- pcibios_fixup_bridge_resources(b->self);
- }
- list_for_each_entry(dev, &b->devices, bus_list)
- pcibios_fixup_device_resources(dev);
-
- return;
-}
-
-void __devinit
-pcibios_update_irq (struct pci_dev *dev, int irq)
-{
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-
- /* ??? FIXME -- record old value for shutdown. */
-}
-
-static inline int
-pcibios_enable_resources (struct pci_dev *dev, int mask)
-{
- u16 cmd, old_cmd;
- int idx;
- struct resource *r;
- unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-
- if (!dev)
- return -EINVAL;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- old_cmd = cmd;
- for (idx=0; idx<PCI_NUM_RESOURCES; idx++) {
- /* Only set up the desired resources. */
- if (!(mask & (1 << idx)))
- continue;
-
- r = &dev->resource[idx];
- if (!(r->flags & type_mask))
- continue;
- if ((idx == PCI_ROM_RESOURCE) &&
- (!(r->flags & IORESOURCE_ROM_ENABLE)))
- continue;
- if (!r->start && r->end) {
- printk(KERN_ERR
- "PCI: Device %s not available because of resource collisions\n",
- pci_name(dev));
- return -EINVAL;
- }
- if (r->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (r->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- if (cmd != old_cmd) {
- printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
-
-int
-pcibios_enable_device (struct pci_dev *dev, int mask)
-{
- int ret;
-
- ret = pcibios_enable_resources(dev, mask);
- if (ret < 0)
- return ret;
-
- return acpi_pci_irq_enable(dev);
-}
-
-void
-pcibios_disable_device (struct pci_dev *dev)
-{
- acpi_pci_irq_disable(dev);
-}
-
-void
-pcibios_align_resource (void *data, struct resource *res,
- resource_size_t size, resource_size_t align)
-{
-}
-
-/*
- * PCI BIOS setup, always defaults to SAL interface
- */
-char * __init
-pcibios_setup (char *str)
-{
- return str;
-}
-
-int
-pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine)
-{
- /*
- * I/O space cannot be accessed via normal processor loads and
- * stores on this platform.
- */
- if (mmap_state == pci_mmap_io)
- /*
- * XXX we could relax this for I/O spaces for which ACPI
- * indicates that the space is 1-to-1 mapped. But at the
- * moment, we don't support multiple PCI address spaces and
- * the legacy I/O space is not 1-to-1 mapped, so this is moot.
- */
- return -EINVAL;
-
- /*
- * Leave vm_pgoff as-is, the PCI space address is the physical
- * address on this platform.
- */
- if (write_combine && efi_range_is_wc(vma->vm_start,
- vma->vm_end - vma->vm_start))
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- else
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- if (is_initial_xendomain()) {
- unsigned long addr = vma->vm_pgoff << PAGE_SHIFT;
- size_t size = vma->vm_end - vma->vm_start;
- unsigned long offset = HYPERVISOR_ioremap(addr, size);
- if (IS_ERR_VALUE(offset))
- return offset;
- }
-
- if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start, vma->vm_page_prot))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * ia64_pci_get_legacy_mem - generic legacy mem routine
- * @bus: bus to get legacy memory base address for
- *
- * Find the base of legacy memory for @bus. This is typically the first
- * megabyte of bus address space for @bus or is simply 0 on platforms whose
- * chipsets support legacy I/O and memory routing. Returns the base address
- * or an error pointer if an error occurred.
- *
- * This is the ia64 generic version of this routine. Other platforms
- * are free to override it with a machine vector.
- */
-char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
-{
- return (char *)__IA64_UNCACHED_OFFSET;
-}
-
-/**
- * pci_mmap_legacy_page_range - map legacy memory space to userland
- * @bus: bus whose legacy space we're mapping
- * @vma: vma passed in by mmap
- *
- * Map legacy memory space for this device back to userspace using a machine
- * vector to get the base address.
- */
-int
-pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
-{
- unsigned long size = vma->vm_end - vma->vm_start;
- pgprot_t prot;
- char *addr;
-
- /*
- * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt
- * for more details.
- */
- if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
- return -EINVAL;
- prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size,
- vma->vm_page_prot);
- if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot)))
- return -EINVAL;
-
- addr = pci_get_legacy_mem(bus);
- if (IS_ERR(addr))
- return PTR_ERR(addr);
-
- vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT;
- vma->vm_page_prot = prot;
-
- if (is_initial_xendomain()) {
- unsigned long addr = vma->vm_pgoff << PAGE_SHIFT;
- size_t size = vma->vm_end - vma->vm_start;
- unsigned long offset = HYPERVISOR_ioremap(addr, size);
- if (IS_ERR_VALUE(offset))
- return offset;
- }
-
- if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- size, vma->vm_page_prot))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * ia64_pci_legacy_read - read from legacy I/O space
- * @bus: bus to read
- * @port: legacy port value
- * @val: caller allocated storage for returned value
- * @size: number of bytes to read
- *
- * Simply reads @size bytes from @port and puts the result in @val.
- *
- * Again, this (and the write routine) are generic versions that can be
- * overridden by the platform. This is necessary on platforms that don't
- * support legacy I/O routing or that hard fail on legacy I/O timeouts.
- */
-int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
-{
- int ret = size;
-
- switch (size) {
- case 1:
- *val = inb(port);
- break;
- case 2:
- *val = inw(port);
- break;
- case 4:
- *val = inl(port);
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
-/**
- * ia64_pci_legacy_write - perform a legacy I/O write
- * @bus: bus pointer
- * @port: port to write
- * @val: value to write
- * @size: number of bytes to write from @val
- *
- * Simply writes @size bytes of @val to @port.
- */
-int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
-{
- int ret = size;
-
- switch (size) {
- case 1:
- outb(val, port);
- break;
- case 2:
- outw(val, port);
- break;
- case 4:
- outl(val, port);
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
-/**
- * pci_cacheline_size - determine cacheline size for PCI devices
- * @dev: void
- *
- * We want to use the line-size of the outer-most cache. We assume
- * that this line-size is the same for all CPUs.
- *
- * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info().
- *
- * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
- */
-static unsigned long
-pci_cacheline_size (void)
-{
- u64 levels, unique_caches;
- s64 status;
- pal_cache_config_info_t cci;
- static u8 cacheline_size;
-
- if (cacheline_size)
- return cacheline_size;
-
- status = ia64_pal_cache_summary(&levels, &unique_caches);
- if (status != 0) {
- printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
- __FUNCTION__, status);
- return SMP_CACHE_BYTES;
- }
-
- status = ia64_pal_cache_config_info(levels - 1, /* cache_type (data_or_unified)= */ 2,
- &cci);
- if (status != 0) {
- printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed (status=%ld)\n",
- __FUNCTION__, status);
- return SMP_CACHE_BYTES;
- }
- cacheline_size = 1 << cci.pcci_line_size;
- return cacheline_size;
-}
-
-/**
- * pcibios_prep_mwi - helper function for drivers/pci/pci.c:pci_set_mwi()
- * @dev: the PCI device for which MWI is enabled
- *
- * For ia64, we can get the cacheline sizes from PAL.
- *
- * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
- */
-int
-pcibios_prep_mwi (struct pci_dev *dev)
-{
- unsigned long desired_linesize, current_linesize;
- int rc = 0;
- u8 pci_linesize;
-
- desired_linesize = pci_cacheline_size();
-
- pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &pci_linesize);
- current_linesize = 4 * pci_linesize;
- if (desired_linesize != current_linesize) {
- printk(KERN_WARNING "PCI: slot %s has incorrect PCI cache line size of %lu bytes,",
- pci_name(dev), current_linesize);
- if (current_linesize > desired_linesize) {
- printk(" expected %lu bytes instead\n", desired_linesize);
- rc = -EINVAL;
- } else {
- printk(" correcting to %lu\n", desired_linesize);
- pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, desired_linesize / 4);
- }
- }
- return rc;
-}
-
-int pci_vector_resources(int last, int nr_released)
-{
- int count = nr_released;
-
- count += (IA64_LAST_DEVICE_VECTOR - last);
-
- return count;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile
deleted file mode 100644
index 6d19da28df..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for Xen components
-#
-
-obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o \
- hypervisor.o util.o xencomm.o xcom_hcall.o xcom_mini.o \
- xcom_privcmd.o mem.o xen_dma.o
-
-obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
deleted file mode 100644
index dc5977886e..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Support routines for Xen hypercalls
- *
- * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
- */
-
-#include <asm/processor.h>
-#include <asm/asmmacro.h>
-
-GLOBAL_ENTRY(xen_get_psr)
- XEN_HYPER_GET_PSR
- br.ret.sptk.many rp
- ;;
-END(xen_get_psr)
-
-GLOBAL_ENTRY(xen_get_ivr)
- XEN_HYPER_GET_IVR
- br.ret.sptk.many rp
- ;;
-END(xen_get_ivr)
-
-GLOBAL_ENTRY(xen_get_tpr)
- XEN_HYPER_GET_TPR
- br.ret.sptk.many rp
- ;;
-END(xen_get_tpr)
-
-GLOBAL_ENTRY(xen_set_tpr)
- mov r8=r32
- XEN_HYPER_SET_TPR
- br.ret.sptk.many rp
- ;;
-END(xen_set_tpr)
-
-GLOBAL_ENTRY(xen_eoi)
- mov r8=r32
- XEN_HYPER_EOI
- br.ret.sptk.many rp
- ;;
-END(xen_eoi)
-
-GLOBAL_ENTRY(xen_thash)
- mov r8=r32
- XEN_HYPER_THASH
- br.ret.sptk.many rp
- ;;
-END(xen_thash)
-
-GLOBAL_ENTRY(xen_set_itm)
- mov r8=r32
- XEN_HYPER_SET_ITM
- br.ret.sptk.many rp
- ;;
-END(xen_set_itm)
-
-GLOBAL_ENTRY(xen_ptcga)
- mov r8=r32
- mov r9=r33
- XEN_HYPER_PTC_GA
- br.ret.sptk.many rp
- ;;
-END(xen_ptcga)
-
-GLOBAL_ENTRY(xen_get_rr)
- mov r8=r32
- XEN_HYPER_GET_RR
- br.ret.sptk.many rp
- ;;
-END(xen_get_rr)
-
-GLOBAL_ENTRY(xen_set_rr)
- mov r8=r32
- mov r9=r33
- XEN_HYPER_SET_RR
- br.ret.sptk.many rp
- ;;
-END(xen_set_rr)
-
-GLOBAL_ENTRY(xen_set_kr)
- mov r8=r32
- mov r9=r33
- XEN_HYPER_SET_KR
- br.ret.sptk.many rp
-END(xen_set_kr)
-
-GLOBAL_ENTRY(xen_fc)
- mov r8=r32
- XEN_HYPER_FC
- br.ret.sptk.many rp
-END(xen_fc)
-
-GLOBAL_ENTRY(xen_get_cpuid)
- mov r8=r32
- XEN_HYPER_GET_CPUID
- br.ret.sptk.many rp
-END(xen_get_cpuid)
-
-GLOBAL_ENTRY(xen_get_pmd)
- mov r8=r32
- XEN_HYPER_GET_PMD
- br.ret.sptk.many rp
-END(xen_get_pmd)
-
-#ifdef CONFIG_IA32_SUPPORT
-GLOBAL_ENTRY(xen_get_eflag)
- XEN_HYPER_GET_EFLAG
- br.ret.sptk.many rp
-END(xen_get_eflag)
-
-// some bits aren't set if pl!=0, see SDM vol1 3.1.8
-GLOBAL_ENTRY(xen_set_eflag)
- mov r8=r32
- XEN_HYPER_SET_EFLAG
- br.ret.sptk.many rp
-END(xen_set_eflag)
-#endif
-
-GLOBAL_ENTRY(xen_send_ipi)
- mov r14=r32
- mov r15=r33
- mov r2=0x400
- break 0x1000
- ;;
- br.ret.sptk.many rp
- ;;
-END(xen_send_ipi)
-
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-// Those are vdso specialized.
-// In fsys mode, call, ret can't be used.
-
- // see xen_ssm_i() in privop.h
- // r22 = &vcpu->vcpu_info->evtchn_upcall_mask
- // r23 = &vpsr.ic
- // r24 = &vcpu->vcpu_info->evtchn_upcall_pending
- // r25 = tmp
- // r31 = tmp
- // p11 = tmp
- // p14 = tmp
-#define XEN_SET_PSR_I \
- ld1 r31=[r22]; \
- ld1 r25=[r24]; \
- ;; \
- st1 [r22]=r0; \
- cmp.ne.unc p14,p0=r0,r31; \
- ;; \
-(p14) cmp.ne.unc p11,p0=r0,r25; \
- ;; \
-(p11) st1 [r22]=r20; \
-(p11) XEN_HYPER_SSM_I;
-
-GLOBAL_ENTRY(xen_ssm_i_0)
- XEN_SET_PSR_I
- brl.cond.sptk .vdso_ssm_i_0_ret
- ;;
-END(xen_ssm_i_0)
-
-GLOBAL_ENTRY(xen_ssm_i_1)
- XEN_SET_PSR_I
- brl.cond.sptk .vdso_ssm_i_1_ret
- ;;
-END(xen_ssm_i_1)
-
-GLOBAL_ENTRY(__hypercall)
- mov r2=r37
- break 0x1000
- br.ret.sptk.many b0
- ;;
-END(__hypercall)
-#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
deleted file mode 100644
index e895ef0d96..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
+++ /dev/null
@@ -1,1264 +0,0 @@
-/******************************************************************************
- * include/asm-ia64/shadow.h
- *
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- * VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-//#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/bootmem.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/efi.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/meminit.h>
-#include <asm/hypervisor.h>
-#include <asm/hypercall.h>
-#include <xen/interface/memory.h>
-#include <xen/xencons.h>
-#include <xen/balloon.h>
-
-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
-EXPORT_SYMBOL(HYPERVISOR_shared_info);
-
-start_info_t *xen_start_info;
-EXPORT_SYMBOL(xen_start_info);
-
-int running_on_xen;
-EXPORT_SYMBOL(running_on_xen);
-
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
-static int p2m_expose_init(void);
-#else
-#define p2m_expose_init() (-ENOSYS)
-#define p2m_expose_resume() ((void)0)
-#endif
-
-EXPORT_SYMBOL(__hypercall);
-
-void __init
-xen_setup(char **cmdline_p)
-{
- extern void dig_setup(char **cmdline_p);
- if (ia64_platform_is("xen"))
- dig_setup(cmdline_p);
-
- if (!is_running_on_xen() || !is_initial_xendomain())
- return;
-
- if (xen_start_info->console.dom0.info_size >=
- sizeof(struct dom0_vga_console_info)) {
- const struct dom0_vga_console_info *info =
- (struct dom0_vga_console_info *)(
- (char *)xen_start_info +
- xen_start_info->console.dom0.info_off);
- dom0_init_screen_info(info);
- }
- xen_start_info->console.domU.mfn = 0;
- xen_start_info->console.domU.evtchn = 0;
-}
-
-void __cpuinit
-xen_cpu_init(void)
-{
- extern void xen_smp_intr_init(void);
- xen_smp_intr_init();
-}
-
-//XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
-// move those to lib/contiguous_bitmap?
-//XXX discontigmem/sparsemem
-
-/*
- * Bitmap is indexed by page number. If bit is set, the page is part of a
- * xen_create_contiguous_region() area of memory.
- */
-unsigned long *contiguous_bitmap;
-
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-/* Following logic is stolen from create_mem_map_table() for virtual memmap */
-static int
-create_contiguous_bitmap(u64 start, u64 end, void *arg)
-{
- unsigned long address, start_page, end_page;
- unsigned long bitmap_start, bitmap_end;
- unsigned char *bitmap;
- int node;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- bitmap_start = (unsigned long)contiguous_bitmap +
- ((__pa(start) >> PAGE_SHIFT) >> 3);
- bitmap_end = (unsigned long)contiguous_bitmap +
- (((__pa(end) >> PAGE_SHIFT) + 2 * BITS_PER_LONG) >> 3);
-
- start_page = bitmap_start & PAGE_MASK;
- end_page = PAGE_ALIGN(bitmap_end);
- node = paddr_to_nid(__pa(start));
-
- bitmap = alloc_bootmem_pages_node(NODE_DATA(node),
- end_page - start_page);
- BUG_ON(!bitmap);
- memset(bitmap, 0, end_page - start_page);
-
- for (address = start_page; address < end_page; address += PAGE_SIZE) {
- pgd = pgd_offset_k(address);
- if (pgd_none(*pgd))
- pgd_populate(&init_mm, pgd,
- alloc_bootmem_pages_node(NODE_DATA(node),
- PAGE_SIZE));
- pud = pud_offset(pgd, address);
-
- if (pud_none(*pud))
- pud_populate(&init_mm, pud,
- alloc_bootmem_pages_node(NODE_DATA(node),
- PAGE_SIZE));
- pmd = pmd_offset(pud, address);
-
- if (pmd_none(*pmd))
- pmd_populate_kernel(&init_mm, pmd,
- alloc_bootmem_pages_node
- (NODE_DATA(node), PAGE_SIZE));
- pte = pte_offset_kernel(pmd, address);
-
- if (pte_none(*pte))
- set_pte(pte,
- pfn_pte(__pa(bitmap + (address - start_page))
- >> PAGE_SHIFT, PAGE_KERNEL));
- }
- return 0;
-}
-#endif
-
-static void
-__contiguous_bitmap_init(unsigned long size)
-{
- contiguous_bitmap = alloc_bootmem_pages(size);
- BUG_ON(!contiguous_bitmap);
- memset(contiguous_bitmap, 0, size);
-}
-
-void
-contiguous_bitmap_init(unsigned long end_pfn)
-{
- unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
-#ifndef CONFIG_VIRTUAL_MEM_MAP
- __contiguous_bitmap_init(size);
-#else
- unsigned long max_gap = 0;
-
- efi_memmap_walk(find_largest_hole, (u64*)&max_gap);
- if (max_gap < LARGE_GAP) {
- __contiguous_bitmap_init(size);
- } else {
- unsigned long map_size = PAGE_ALIGN(size);
- vmalloc_end -= map_size;
- contiguous_bitmap = (unsigned long*)vmalloc_end;
- efi_memmap_walk(create_contiguous_bitmap, NULL);
- }
-#endif
-}
-
-#if 0
-int
-contiguous_bitmap_test(void* p)
-{
- return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap);
-}
-#endif
-
-static void contiguous_bitmap_set(
- unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] |=
- ((1UL<<end_off)-1) & -(1UL<<start_off);
- } else {
- contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
- while ( ++curr_idx < end_idx )
- contiguous_bitmap[curr_idx] = ~0UL;
- contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
- }
-}
-
-static void contiguous_bitmap_clear(
- unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] &=
- -(1UL<<end_off) | ((1UL<<start_off)-1);
- } else {
- contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
- while ( ++curr_idx != end_idx )
- contiguous_bitmap[curr_idx] = 0;
- contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
- }
-}
-
-// __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
-// are based on i386 xen_create_contiguous_region(),
-// xen_destroy_contiguous_region()
-
-/* Protected by balloon_lock. */
-#define MAX_CONTIG_ORDER 7
-static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
-
-/* Ensure multi-page extents are contiguous in machine memory. */
-int
-__xen_create_contiguous_region(unsigned long vstart,
- unsigned int order, unsigned int address_bits)
-{
- unsigned long error = 0;
- unsigned long gphys = __pa(vstart);
- unsigned long start_gpfn = gphys >> PAGE_SHIFT;
- unsigned long num_gpfn = 1 << order;
- unsigned long i;
- unsigned long flags;
-
- unsigned long *in_frames = discontig_frames, out_frame;
- int success;
- struct xen_memory_exchange exchange = {
- .in = {
- .nr_extents = num_gpfn,
- .extent_order = 0,
- .domid = DOMID_SELF
- },
- .out = {
- .nr_extents = 1,
- .extent_order = order,
- .address_bits = address_bits,
- .domid = DOMID_SELF
- },
- .nr_exchanged = 0
- };
-
- if (unlikely(order > MAX_CONTIG_ORDER))
- return -ENOMEM;
-
- set_xen_guest_handle(exchange.in.extent_start, in_frames);
- set_xen_guest_handle(exchange.out.extent_start, &out_frame);
-
- scrub_pages(vstart, num_gpfn);
-
- balloon_lock(flags);
-
- /* Get a new contiguous memory extent. */
- for (i = 0; i < num_gpfn; i++) {
- in_frames[i] = start_gpfn + i;
- }
- out_frame = start_gpfn;
- error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
- success = (exchange.nr_exchanged == num_gpfn);
- BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
- BUG_ON(success && (error != 0));
- if (unlikely(error == -ENOSYS)) {
- /* Compatibility when XENMEM_exchange is unsupported. */
- error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &exchange.in);
- BUG_ON(error != num_gpfn);
- error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
- &exchange.out);
- if (error != 1) {
- /* Couldn't get special memory: fall back to normal. */
- for (i = 0; i < num_gpfn; i++) {
- in_frames[i] = start_gpfn + i;
- }
- error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
- &exchange.in);
- BUG_ON(error != num_gpfn);
- success = 0;
- } else
- success = 1;
- }
- if (success)
- contiguous_bitmap_set(start_gpfn, num_gpfn);
-#if 0
- if (success) {
- unsigned long mfn;
- unsigned long mfn_prev = ~0UL;
- for (i = 0; i < num_gpfn; i++) {
- mfn = pfn_to_mfn_for_dma(start_gpfn + i);
- if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
- xprintk("\n");
- xprintk("%s:%d order %d "
- "start 0x%lx bus 0x%lx "
- "machine 0x%lx\n",
- __func__, __LINE__, order,
- vstart, virt_to_bus((void*)vstart),
- phys_to_machine_for_dma(gphys));
- xprintk("mfn: ");
- for (i = 0; i < num_gpfn; i++) {
- mfn = pfn_to_mfn_for_dma(
- start_gpfn + i);
- xprintk("0x%lx ", mfn);
- }
- xprintk("\n");
- break;
- }
- mfn_prev = mfn;
- }
- }
-#endif
- balloon_unlock(flags);
- return success? 0: -ENOMEM;
-}
-
-void
-__xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
-{
- unsigned long flags;
- unsigned long error = 0;
- unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
- unsigned long num_gpfn = 1UL << order;
- unsigned long i;
-
- unsigned long *out_frames = discontig_frames, in_frame;
- int success;
- struct xen_memory_exchange exchange = {
- .in = {
- .nr_extents = 1,
- .extent_order = order,
- .domid = DOMID_SELF
- },
- .out = {
- .nr_extents = num_gpfn,
- .extent_order = 0,
- .address_bits = 0,
- .domid = DOMID_SELF
- },
- .nr_exchanged = 0
- };
-
-
- if (!test_bit(start_gpfn, contiguous_bitmap))
- return;
-
- if (unlikely(order > MAX_CONTIG_ORDER))
- return;
-
- set_xen_guest_handle(exchange.in.extent_start, &in_frame);
- set_xen_guest_handle(exchange.out.extent_start, out_frames);
-
- scrub_pages(vstart, num_gpfn);
-
- balloon_lock(flags);
-
- contiguous_bitmap_clear(start_gpfn, num_gpfn);
-
- /* Do the exchange for non-contiguous MFNs. */
- in_frame = start_gpfn;
- for (i = 0; i < num_gpfn; i++) {
- out_frames[i] = start_gpfn + i;
- }
- error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
- success = (exchange.nr_exchanged == 1);
- BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
- BUG_ON(success && (error != 0));
- if (unlikely(error == -ENOSYS)) {
- /* Compatibility when XENMEM_exchange is unsupported. */
- error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &exchange.in);
- BUG_ON(error != 1);
-
- error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
- &exchange.out);
- BUG_ON(error != num_gpfn);
- }
- balloon_unlock(flags);
-}
-
-
-///////////////////////////////////////////////////////////////////////////
-// grant table hack
-// cmd: GNTTABOP_xxx
-
-#include <linux/mm.h>
-#include <xen/interface/xen.h>
-#include <xen/gnttab.h>
-
-static void
-gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
-{
- uint32_t flags;
-
- flags = uop->flags;
-
- if (flags & GNTMAP_host_map) {
- if (flags & GNTMAP_application_map) {
- xprintd("GNTMAP_application_map is not supported yet: flags 0x%x\n", flags);
- BUG();
- }
- if (flags & GNTMAP_contains_pte) {
- xprintd("GNTMAP_contains_pte is not supported yet flags 0x%x\n", flags);
- BUG();
- }
- } else if (flags & GNTMAP_device_map) {
- xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
- BUG();//XXX not yet. actually this flag is not used.
- } else {
- BUG();
- }
-}
-
-int
-HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
-{
- if (cmd == GNTTABOP_map_grant_ref) {
- unsigned int i;
- for (i = 0; i < count; i++) {
- gnttab_map_grant_ref_pre(
- (struct gnttab_map_grant_ref*)uop + i);
- }
- }
- return xencomm_mini_hypercall_grant_table_op(cmd, uop, count);
-}
-EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
-
-///////////////////////////////////////////////////////////////////////////
-// foreign mapping
-#include <linux/efi.h>
-#include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}()
-
-static unsigned long privcmd_resource_min = 0;
-// Xen/ia64 currently can handle pseudo physical address bits up to
-// (PAGE_SHIFT * 3)
-static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
-static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
-
-static unsigned long
-md_end_addr(const efi_memory_desc_t *md)
-{
- return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-}
-
-#define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE (1024 * 1024 * 1024UL)
-static int
-xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
-{
- return (start < end &&
- (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
-}
-
-static int __init
-xen_ia64_privcmd_init(void)
-{
- void *efi_map_start, *efi_map_end, *p;
- u64 efi_desc_size;
- efi_memory_desc_t *md;
- unsigned long tmp_min;
- unsigned long tmp_max;
- unsigned long gap_size;
- unsigned long prev_end;
-
- if (!is_running_on_xen())
- return -1;
-
- efi_map_start = __va(ia64_boot_param->efi_memmap);
- efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
- efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
- // at first check the used highest address
- for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
- // nothing
- }
- md = p - efi_desc_size;
- privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
- if (xen_ia64_privcmd_check_size(privcmd_resource_min,
- privcmd_resource_max)) {
- goto out;
- }
-
- // the used highest address is too large. try to find the largest gap.
- tmp_min = privcmd_resource_max;
- tmp_max = 0;
- gap_size = 0;
- prev_end = 0;
- for (p = efi_map_start;
- p < efi_map_end - efi_desc_size;
- p += efi_desc_size) {
- unsigned long end;
- efi_memory_desc_t* next;
- unsigned long next_start;
-
- md = p;
- end = md_end_addr(md);
- if (end > privcmd_resource_max) {
- break;
- }
- if (end < prev_end) {
- // work around.
- // Xen may pass incompletely sorted memory
- // descriptors like
- // [x, x + length]
- // [x, x]
- // this order should be reversed.
- continue;
- }
- next = p + efi_desc_size;
- next_start = next->phys_addr;
- if (next_start > privcmd_resource_max) {
- next_start = privcmd_resource_max;
- }
- if (end < next_start && gap_size < (next_start - end)) {
- tmp_min = end;
- tmp_max = next_start;
- gap_size = tmp_max - tmp_min;
- }
- prev_end = end;
- }
-
- privcmd_resource_min = GRANULEROUNDUP(tmp_min);
- if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
- privcmd_resource_max = tmp_max;
- goto out;
- }
-
- privcmd_resource_min = tmp_min;
- privcmd_resource_max = tmp_max;
- if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
- privcmd_resource_max)) {
- // Any large enough gap isn't found.
- // go ahead anyway with the warning hoping that large region
- // won't be requested.
- printk(KERN_WARNING "xen privcmd: large enough region for privcmd mmap is not found.\n");
- }
-
-out:
- printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 0x%lx] (%ldMB)\n",
- privcmd_resource_min, privcmd_resource_max,
- (privcmd_resource_max - privcmd_resource_min) >> 20);
- BUG_ON(privcmd_resource_min >= privcmd_resource_max);
-
- // XXX this should be somewhere appropriate
- (void)p2m_expose_init();
-
- return 0;
-}
-late_initcall(xen_ia64_privcmd_init);
-
-struct xen_ia64_privcmd_entry {
- atomic_t map_count;
-#define INVALID_GPFN (~0UL)
- unsigned long gpfn;
-};
-
-struct xen_ia64_privcmd_range {
- atomic_t ref_count;
- unsigned long pgoff; // in PAGE_SIZE
- struct resource* res;
-
- unsigned long num_entries;
- struct xen_ia64_privcmd_entry entries[0];
-};
-
-struct xen_ia64_privcmd_vma {
- int is_privcmd_mmapped;
- struct xen_ia64_privcmd_range* range;
-
- unsigned long num_entries;
- struct xen_ia64_privcmd_entry* entries;
-};
-
-static void
-xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
-{
- atomic_set(&entry->map_count, 0);
- entry->gpfn = INVALID_GPFN;
-}
-
-static int
-xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
- unsigned long addr,
- struct xen_ia64_privcmd_range* privcmd_range,
- int i,
- unsigned long gmfn,
- pgprot_t prot,
- domid_t domid)
-{
- int error = 0;
- struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
- unsigned long gpfn;
- unsigned long flags;
-
- if ((addr & ~PAGE_MASK) != 0 || gmfn == INVALID_MFN) {
- error = -EINVAL;
- goto out;
- }
-
- if (entry->gpfn != INVALID_GPFN) {
- error = -EBUSY;
- goto out;
- }
- gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
-
- flags = ASSIGN_writable;
- if (pgprot_val(prot) == PROT_READ) {
- flags = ASSIGN_readonly;
- }
- error = HYPERVISOR_add_physmap_with_gmfn(gpfn, gmfn, flags, domid);
- if (error != 0) {
- goto out;
- }
-
- prot = vma->vm_page_prot;
- error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
- if (error != 0) {
- error = HYPERVISOR_zap_physmap(gpfn, 0);
- if (error) {
- BUG();//XXX
- }
- } else {
- atomic_inc(&entry->map_count);
- entry->gpfn = gpfn;
- }
-
-out:
- return error;
-}
-
-static void
-xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range,
- int i)
-{
- struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
- unsigned long gpfn = entry->gpfn;
- //gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
- // (vma->vm_pgoff - privcmd_range->pgoff);
- int error;
-
- error = HYPERVISOR_zap_physmap(gpfn, 0);
- if (error) {
- BUG();//XXX
- }
- entry->gpfn = INVALID_GPFN;
-}
-
-static void
-xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range,
- int i)
-{
- struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
- if (entry->gpfn != INVALID_GPFN) {
- atomic_inc(&entry->map_count);
- } else {
- BUG_ON(atomic_read(&entry->map_count) != 0);
- }
-}
-
-static void
-xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range,
- int i)
-{
- struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
- if (entry->gpfn != INVALID_GPFN &&
- atomic_dec_and_test(&entry->map_count)) {
- xen_ia64_privcmd_entry_munmap(privcmd_range, i);
- }
-}
-
-static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
-static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
-
-struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
- .open = &xen_ia64_privcmd_vma_open,
- .close = &xen_ia64_privcmd_vma_close,
-};
-
-static void
-__xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
- struct xen_ia64_privcmd_vma* privcmd_vma,
- struct xen_ia64_privcmd_range* privcmd_range)
-{
- unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
- unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
- unsigned long i;
-
- BUG_ON(entry_offset < 0);
- BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
-
- privcmd_vma->range = privcmd_range;
- privcmd_vma->num_entries = num_entries;
- privcmd_vma->entries = &privcmd_range->entries[entry_offset];
- vma->vm_private_data = privcmd_vma;
- for (i = 0; i < privcmd_vma->num_entries; i++) {
- xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
- }
-
- vma->vm_private_data = privcmd_vma;
- vma->vm_ops = &xen_ia64_privcmd_vm_ops;
-}
-
-static void
-xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
-{
- struct xen_ia64_privcmd_vma* old_privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
- struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
- struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
-
- atomic_inc(&privcmd_range->ref_count);
- // vm_op->open() can't fail.
- privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
- // copy original value if necessary
- privcmd_vma->is_privcmd_mmapped = old_privcmd_vma->is_privcmd_mmapped;
-
- __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
-}
-
-static void
-xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
-{
- struct xen_ia64_privcmd_vma* privcmd_vma =
- (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
- struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
- unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
- unsigned long i;
-
- for (i = 0; i < privcmd_vma->num_entries; i++) {
- xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
- }
- vma->vm_private_data = NULL;
- kfree(privcmd_vma);
-
- if (atomic_dec_and_test(&privcmd_range->ref_count)) {
-#if 1
- for (i = 0; i < privcmd_range->num_entries; i++) {
- struct xen_ia64_privcmd_entry* entry =
- &privcmd_range->entries[i];
- BUG_ON(atomic_read(&entry->map_count) != 0);
- BUG_ON(entry->gpfn != INVALID_GPFN);
- }
-#endif
- release_resource(privcmd_range->res);
- kfree(privcmd_range->res);
- vfree(privcmd_range);
- }
-}
-
-int
-privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
-{
- struct xen_ia64_privcmd_vma* privcmd_vma =
- (struct xen_ia64_privcmd_vma *)vma->vm_private_data;
- return (xchg(&privcmd_vma->is_privcmd_mmapped, 1) == 0);
-}
-
-int
-privcmd_mmap(struct file * file, struct vm_area_struct * vma)
-{
- int error;
- unsigned long size = vma->vm_end - vma->vm_start;
- unsigned long num_entries = size >> PAGE_SHIFT;
- struct xen_ia64_privcmd_range* privcmd_range = NULL;
- struct xen_ia64_privcmd_vma* privcmd_vma = NULL;
- struct resource* res = NULL;
- unsigned long i;
- BUG_ON(!is_running_on_xen());
-
- BUG_ON(file->private_data != NULL);
-
- error = -ENOMEM;
- privcmd_range =
- vmalloc(sizeof(*privcmd_range) +
- sizeof(privcmd_range->entries[0]) * num_entries);
- if (privcmd_range == NULL) {
- goto out_enomem0;
- }
- privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
- if (privcmd_vma == NULL) {
- goto out_enomem1;
- }
- privcmd_vma->is_privcmd_mmapped = 0;
-
- res = kzalloc(sizeof(*res), GFP_KERNEL);
- if (res == NULL) {
- goto out_enomem1;
- }
- res->name = "Xen privcmd mmap";
- error = allocate_resource(&iomem_resource, res, size,
- privcmd_resource_min, privcmd_resource_max,
- privcmd_resource_align, NULL, NULL);
- if (error) {
- goto out_enomem1;
- }
- privcmd_range->res = res;
-
- /* DONTCOPY is essential for Xen as copy_page_range is broken. */
- vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
-
- atomic_set(&privcmd_range->ref_count, 1);
- privcmd_range->pgoff = vma->vm_pgoff;
- privcmd_range->num_entries = num_entries;
- for (i = 0; i < privcmd_range->num_entries; i++) {
- xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
- }
-
- __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
- return 0;
-
-out_enomem1:
- kfree(res);
- kfree(privcmd_vma);
-out_enomem0:
- vfree(privcmd_range);
- return error;
-}
-
-int
-direct_remap_pfn_range(struct vm_area_struct *vma,
- unsigned long address, // process virtual address
- unsigned long gmfn, // gmfn, gmfn + 1, ... gmfn + size/PAGE_SIZE
- unsigned long size,
- pgprot_t prot,
- domid_t domid) // target domain
-{
- struct xen_ia64_privcmd_vma* privcmd_vma =
- (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
- struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
- unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
-
- unsigned long i;
- unsigned long offset;
- int error = 0;
- BUG_ON(!is_running_on_xen());
-
-#if 0
- if (prot != vm->vm_page_prot) {
- return -EINVAL;
- }
-#endif
-
- i = (address - vma->vm_start) >> PAGE_SHIFT;
- for (offset = 0; offset < size; offset += PAGE_SIZE) {
- error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, gmfn, prot, domid);
- if (error != 0) {
- break;
- }
-
- i++;
- gmfn++;
- }
-
- return error;
-}
-
-
-///////////////////////////////////////////////////////////////////////////
-// expose p2m table
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
-#include <linux/cpu.h>
-#include <asm/uaccess.h>
-
-int p2m_initialized __read_mostly = 0;
-
-unsigned long p2m_min_low_pfn __read_mostly;
-unsigned long p2m_max_low_pfn __read_mostly;
-unsigned long p2m_convert_min_pfn __read_mostly;
-unsigned long p2m_convert_max_pfn __read_mostly;
-
-static struct resource p2m_resource = {
- .name = "Xen p2m table",
- .flags = IORESOURCE_MEM,
-};
-static unsigned long p2m_assign_start_pfn __read_mostly;
-static unsigned long p2m_assign_end_pfn __read_mostly;
-static unsigned long p2m_expose_size; // this is referenced only when resume.
- // so __read_mostly doesn't make sense.
-volatile const pte_t* p2m_pte __read_mostly;
-
-#define GRNULE_PFN PTRS_PER_PTE
-static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN;
-
-#define ROUNDDOWN(x, y) ((x) & ~((y) - 1))
-#define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1))
-
-#define P2M_PREFIX "Xen p2m: "
-
-static int xen_ia64_p2m_expose __read_mostly = 1;
-module_param(xen_ia64_p2m_expose, int, 0);
-MODULE_PARM_DESC(xen_ia64_p2m_expose,
- "enable/disable xen/ia64 p2m exposure optimization\n");
-
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
-static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
-module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
-MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
- "use/unuse dtr to map exposed p2m table\n");
-
-static const int p2m_page_shifts[] = {
- _PAGE_SIZE_4K,
- _PAGE_SIZE_8K,
- _PAGE_SIZE_16K,
- _PAGE_SIZE_64K,
- _PAGE_SIZE_256K,
- _PAGE_SIZE_1M,
- _PAGE_SIZE_4M,
- _PAGE_SIZE_16M,
- _PAGE_SIZE_64M,
- _PAGE_SIZE_256M,
-};
-
-struct p2m_itr_arg {
- unsigned long vaddr;
- unsigned long pteval;
- unsigned long log_page_size;
-};
-static struct p2m_itr_arg p2m_itr_arg __read_mostly;
-
-// This should be in asm-ia64/kregs.h
-#define IA64_TR_P2M_TABLE 3
-
-static void
-p2m_itr(void* info)
-{
- struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info;
- ia64_itr(0x2, IA64_TR_P2M_TABLE,
- arg->vaddr, arg->pteval, arg->log_page_size);
- ia64_srlz_d();
-}
-
-static int
-p2m_expose_dtr_call(struct notifier_block *self,
- unsigned long event, void* ptr)
-{
- unsigned int cpu = (unsigned int)(long)ptr;
- if (event != CPU_ONLINE)
- return 0;
- if (p2m_initialized && xen_ia64_p2m_expose_use_dtr) {
- unsigned int me = get_cpu();
- if (cpu == me)
- p2m_itr(&p2m_itr_arg);
- else
- smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg,
- 1, 1);
- put_cpu();
- }
- return 0;
-}
-
-static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
- .notifier_call = p2m_expose_dtr_call,
- .next = NULL,
- .priority = 0
-};
-#endif
-
-static int
-p2m_expose_init(void)
-{
- unsigned long num_pfn;
- unsigned long p2m_size = 0;
- unsigned long align = ~0UL;
- int error = 0;
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
- int i;
- unsigned long page_size;
- unsigned long log_page_size = 0;
-#endif
-
- if (!xen_ia64_p2m_expose)
- return -ENOSYS;
- if (p2m_initialized)
- return 0;
-
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
- error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
- if (error < 0)
- return error;
-#endif
-
- lock_cpu_hotplug();
- if (p2m_initialized)
- goto out;
-
-#ifdef CONFIG_DISCONTIGMEM
- p2m_min_low_pfn = min_low_pfn;
- p2m_max_low_pfn = max_low_pfn;
-#else
- p2m_min_low_pfn = 0;
- p2m_max_low_pfn = max_pfn;
-#endif
-
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
- if (xen_ia64_p2m_expose_use_dtr) {
- unsigned long granule_pfn = 0;
- p2m_size = p2m_max_low_pfn - p2m_min_low_pfn;
- for (i = 0;
- i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
- i++) {
- log_page_size = p2m_page_shifts[i];
- page_size = 1UL << log_page_size;
- if (page_size < p2m_size)
- continue;
-
- granule_pfn = max(page_size >> PAGE_SHIFT,
- p2m_granule_pfn);
- p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
- granule_pfn);
- p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
- granule_pfn);
- num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
- p2m_expose_size = num_pfn << PAGE_SHIFT;
- p2m_size = num_pfn / PTRS_PER_PTE;
- p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT);
- if (p2m_size == page_size)
- break;
- }
- if (p2m_size != page_size) {
- printk(KERN_ERR "p2m_size != page_size\n");
- error = -EINVAL;
- goto out;
- }
- align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
- } else
-#endif
- {
- BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
- p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
- p2m_granule_pfn);
- p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn);
- num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
- p2m_expose_size = num_pfn << PAGE_SHIFT;
- p2m_size = num_pfn / PTRS_PER_PTE;
- p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
- align = max(privcmd_resource_align,
- p2m_granule_pfn << PAGE_SHIFT);
- }
-
- // use privcmd region
- error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
- privcmd_resource_min, privcmd_resource_max,
- align, NULL, NULL);
- if (error) {
- printk(KERN_ERR P2M_PREFIX
- "can't allocate region for p2m exposure "
- "[0x%016lx, 0x%016lx) 0x%016lx\n",
- p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
- goto out;
- }
-
- p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
- p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
-
- error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
- p2m_assign_start_pfn,
- p2m_expose_size, p2m_granule_pfn);
- if (error) {
- printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
- error);
- printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
- "expose_size 0x%016lx granule 0x%016lx\n",
- p2m_convert_min_pfn, p2m_assign_start_pfn,
- p2m_expose_size, p2m_granule_pfn);;
- release_resource(&p2m_resource);
- goto out;
- }
- p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
- if (xen_ia64_p2m_expose_use_dtr) {
- p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
- << PAGE_SHIFT);
- p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
- PAGE_KERNEL));
- p2m_itr_arg.log_page_size = log_page_size;
- smp_mb();
- smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
- p2m_itr(&p2m_itr_arg);
- }
-#endif
- smp_mb();
- p2m_initialized = 1;
- printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
- p2m_convert_min_pfn << PAGE_SHIFT,
- p2m_convert_max_pfn << PAGE_SHIFT);
- printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
- p2m_assign_start_pfn << PAGE_SHIFT,
- p2m_assign_end_pfn << PAGE_SHIFT,
- p2m_size / 1024);
-out:
- unlock_cpu_hotplug();
- return error;
-}
-
-#ifdef notyet
-void
-p2m_expose_cleanup(void)
-{
- BUG_ON(!p2m_initialized);
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
- unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
-#endif
- release_resource(&p2m_resource);
-}
-#endif
-
-static void
-p2m_expose_resume(void)
-{
- int error;
-
- if (!xen_ia64_p2m_expose || !p2m_initialized)
- return;
-
- /*
- * We can't call {lock, unlock}_cpu_hotplug() because
- * they require process context.
- * We don't need them because we're the only one cpu and
- * interrupts are masked when resume.
- */
- error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
- p2m_assign_start_pfn,
- p2m_expose_size, p2m_granule_pfn);
- if (error) {
- printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
- error);
- printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
- "expose_size 0x%016lx granule 0x%016lx\n",
- p2m_convert_min_pfn, p2m_assign_start_pfn,
- p2m_expose_size, p2m_granule_pfn);;
- p2m_initialized = 0;
- smp_mb();
- ia64_ptr(0x2, p2m_itr_arg.vaddr, p2m_itr_arg.log_page_size);
-
- /*
- * We can't call those clean up functions because they
- * require process context.
- */
-#if 0
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
- if (xen_ia64_p2m_expose_use_dtr)
- unregister_cpu_notifier(
- &p2m_expose_dtr_hotplug_notifier);
-#endif
- release_resource(&p2m_resource);
-#endif
- }
-}
-
-//XXX inlinize?
-unsigned long
-p2m_phystomach(unsigned long gpfn)
-{
- volatile const pte_t* pte;
- unsigned long mfn;
- unsigned long pteval;
-
- if (!p2m_initialized ||
- gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
- /* || !pfn_valid(gpfn) */)
- return INVALID_MFN;
- pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
-
- mfn = INVALID_MFN;
- if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
- pte_present(__pte(pteval)) &&
- pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
- mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
-
- return mfn;
-}
-
-EXPORT_SYMBOL_GPL(p2m_initialized);
-EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
-EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
-EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
-EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
-EXPORT_SYMBOL_GPL(p2m_pte);
-EXPORT_SYMBOL_GPL(p2m_phystomach);
-#endif
-
-///////////////////////////////////////////////////////////////////////////
-// for xenoprof
-
-struct resource*
-xen_ia64_allocate_resource(unsigned long size)
-{
- struct resource* res;
- int error;
-
- res = kmalloc(sizeof(*res), GFP_KERNEL);
- if (res == NULL)
- return ERR_PTR(-ENOMEM);
-
- res->name = "Xen";
- res->flags = IORESOURCE_MEM;
- error = allocate_resource(&iomem_resource, res, PAGE_ALIGN(size),
- privcmd_resource_min, privcmd_resource_max,
- IA64_GRANULE_SIZE, NULL, NULL);
- if (error) {
- kfree(res);
- return ERR_PTR(error);
- }
- return res;
-}
-EXPORT_SYMBOL_GPL(xen_ia64_allocate_resource);
-
-void
-xen_ia64_release_resource(struct resource* res)
-{
- release_resource(res);
- kfree(res);
-}
-EXPORT_SYMBOL_GPL(xen_ia64_release_resource);
-
-void
-xen_ia64_unmap_resource(struct resource* res)
-{
- unsigned long gpfn = res->start >> PAGE_SHIFT;
- unsigned long nr_pages = (res->end - res->start) >> PAGE_SHIFT;
- unsigned long i;
-
- for (i = 0; i < nr_pages; i++) {
- int error = HYPERVISOR_zap_physmap(gpfn + i, 0);
- if (error)
- printk(KERN_ERR
- "%s:%d zap_phsymap failed %d gpfn %lx\n",
- __func__, __LINE__, error, gpfn + i);
- }
- xen_ia64_release_resource(res);
-}
-EXPORT_SYMBOL_GPL(xen_ia64_unmap_resource);
-
-///////////////////////////////////////////////////////////////////////////
-// suspend/resume
-void
-xen_post_suspend(int suspend_cancelled)
-{
- if (suspend_cancelled)
- return;
-
- p2m_expose_resume();
- /* add more if necessary */
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/machvec.c b/linux-2.6-xen-sparse/arch/ia64/xen/machvec.c
deleted file mode 100644
index 4ad588a7c2..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/machvec.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define MACHVEC_PLATFORM_NAME xen
-#define MACHVEC_PLATFORM_HEADER <asm/machvec_xen.h>
-#include <asm/machvec_init.h>
-
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/mem.c b/linux-2.6-xen-sparse/arch/ia64/xen/mem.c
deleted file mode 100644
index dc93097c70..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/mem.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Originally from linux/drivers/char/mem.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * Added devfs support.
- * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
- * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
- */
-/*
- * taken from
- * linux/drivers/char/mem.c and linux-2.6-xen-sparse/drivers/xen/char/mem.c.
- * adjusted for IA64 and made transparent.
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- * VA Linux Systems Japan K.K.
- */
-
-#include <linux/mm.h>
-#include <linux/efi.h>
-
-/*
- * Architectures vary in how they handle caching for addresses
- * outside of main memory.
- *
- */
-static inline int uncached_access(struct file *file, unsigned long addr)
-{
- /*
- * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
- */
- return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
-}
-
-int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
-{
- unsigned long addr = vma->vm_pgoff << PAGE_SHIFT;
- size_t size = vma->vm_end - vma->vm_start;
-
-
-#if 0
- /*
- *XXX FIXME: linux-2.6.16.29, linux-2.6.17
- * valid_mmap_phys_addr_range() in linux/arch/ia64/kernel/efi.c
- * fails checks.
- * linux-2.6.18.1's returns always 1.
- * Its comments says
- *
- * MMIO regions are often missing from the EFI memory map.
- * We must allow mmap of them for programs like X, so we
- * currently can't do any useful validation.
- */
- if (!valid_mmap_phys_addr_range(addr, &size))
- return -EINVAL;
- if (size < vma->vm_end - vma->vm_start)
- return -EINVAL;
-#endif
-
- if (is_running_on_xen()) {
- unsigned long offset = HYPERVISOR_ioremap(addr, size);
- if (IS_ERR_VALUE(offset))
- return offset;
- }
-
- if (uncached_access(file, vma->vm_pgoff << PAGE_SHIFT))
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
- if (remap_pfn_range(vma,
- vma->vm_start,
- vma->vm_pgoff,
- size,
- vma->vm_page_prot))
- return -EAGAIN;
- return 0;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c b/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c
deleted file mode 100644
index fbc4664bbd..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c
+++ /dev/null
@@ -1,882 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- *
- * This implementation is for IA-64 and EM64T platforms that do not support
- * I/O TLBs (aka DMA address translation hardware).
- * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
- * Copyright (C) 2000, 2003 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
- * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
- * unnecessary i-cache flushing.
- * 04/07/.. ak Better overflow handling. Assorted fixes.
- * 05/09/10 linville Add support for syncing ranges, support syncing for
- * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
- */
-
-#include <linux/cache.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ctype.h>
-
-#include <asm/io.h>
-#include <asm/dma.h>
-#include <asm/scatterlist.h>
-
-#include <linux/init.h>
-#include <linux/bootmem.h>
-
-#ifdef CONFIG_XEN
-/*
- * What DMA mask should Xen use to remap the bounce buffer pool? Most
- * reports seem to indicate 30 bits is sufficient, except maybe for old
- * sound cards that we probably don't care about anyway. If we need to,
- * we could put in some smarts to try to lower, but hopefully it's not
- * necessary.
- */
-#define DMA_BITS (30)
-#endif
-
-#define OFFSET(val,align) ((unsigned long) \
- ( (val) & ( (align) - 1)))
-
-#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
-#define SG_ENT_PHYS_ADDRESS(SG) virt_to_bus(SG_ENT_VIRT_ADDRESS(SG))
-
-/*
- * Maximum allowable number of contiguous slabs to map,
- * must be a power of 2. What is the appropriate value ?
- * The complexity of {map,unmap}_single is linearly dependent on this value.
- */
-#define IO_TLB_SEGSIZE 128
-
-/*
- * log of the size of each IO TLB slab. The number of slabs is command line
- * controllable.
- */
-#define IO_TLB_SHIFT 11
-
-#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
-
-/*
- * Minimum IO TLB size to bother booting with. Systems with mainly
- * 64bit capable cards will only lightly use the swiotlb. If we can't
- * allocate a contiguous 1MB, we're probably in trouble anyway.
- */
-#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-
-/*
- * Enumeration for sync targets
- */
-enum dma_sync_target {
- SYNC_FOR_CPU = 0,
- SYNC_FOR_DEVICE = 1,
-};
-
-int swiotlb_force;
-
-/*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-static char *io_tlb_start, *io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
- * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-void *io_tlb_overflow_buffer;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
-
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-static unsigned char **io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
-static int __init
-setup_io_tlb_npages(char *str)
-{
- if (isdigit(*str)) {
- io_tlb_nslabs = simple_strtoul(str, &str, 0);
- /* avoid tail segment of size < IO_TLB_SEGSIZE */
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
- }
- if (*str == ',')
- ++str;
- if (!strcmp(str, "force"))
- swiotlb_force = 1;
- return 1;
-}
-__setup("swiotlb=", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
-
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void
-swiotlb_init_with_default_size (size_t default_size)
-{
- unsigned long i;
-
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
- }
-
-#ifdef CONFIG_XEN
- if (is_running_on_xen())
- io_tlb_nslabs = roundup_pow_of_two(io_tlb_nslabs);
-#endif
- /*
- * Get IO TLB memory from the low pages
- */
- io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
- if (!io_tlb_start)
- panic("Cannot allocate SWIOTLB buffer");
- io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
-
-#ifdef CONFIG_XEN
- for (i = 0 ; i < io_tlb_nslabs ; i += IO_TLB_SEGSIZE) {
- if (xen_create_contiguous_region(
- (unsigned long)io_tlb_start +
- (i << IO_TLB_SHIFT),
- get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
- DMA_BITS))
- panic("Failed to setup Xen contiguous region");
- }
-#endif
-
- /*
- * Allocate and initialize the free list array. This array is used
- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
- */
- io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
- for (i = 0; i < io_tlb_nslabs; i++)
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
- io_tlb_index = 0;
- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
-
- /*
- * Get the overflow emergency buffer
- */
- io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
-#ifdef CONFIG_XEN
- if (xen_create_contiguous_region((unsigned long)io_tlb_overflow_buffer,
- get_order(io_tlb_overflow), DMA_BITS))
- panic("Failed to setup Xen contiguous region for overflow");
-#endif
- printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
- virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
-}
-
-void
-swiotlb_init (void)
-{
- swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */
-}
-
-/*
- * Systems with larger DMA zones (those that don't support ISA) can
- * initialize the swiotlb later using the slab allocator if needed.
- * This should be just like above, but with some error catching.
- */
-int
-swiotlb_late_init_with_default_size (size_t default_size)
-{
- unsigned long i, req_nslabs = io_tlb_nslabs;
- unsigned int order;
-
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
- }
-
-#ifdef CONFIG_XEN
- if (is_running_on_xen())
- io_tlb_nslabs = roundup_pow_of_two(io_tlb_nslabs);
-#endif
- /*
- * Get IO TLB memory from the low pages
- */
- order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
- io_tlb_nslabs = SLABS_PER_PAGE << order;
-
- while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
- io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
- order);
- if (io_tlb_start)
- break;
- order--;
- }
-
- if (!io_tlb_start)
- goto cleanup1;
-
- if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) {
- printk(KERN_WARNING "Warning: only able to allocate %ld MB "
- "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
- io_tlb_nslabs = SLABS_PER_PAGE << order;
- }
- io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
- memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT));
-
-#ifdef CONFIG_XEN
- for (i = 0 ; i < io_tlb_nslabs ; i += IO_TLB_SEGSIZE) {
- if (xen_create_contiguous_region(
- (unsigned long)io_tlb_start +
- (i << IO_TLB_SHIFT),
- get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
- DMA_BITS))
- panic("Failed to setup Xen contiguous region");
- }
-#endif
- /*
- * Allocate and initialize the free list array. This array is used
- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
- */
- io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(int)));
- if (!io_tlb_list)
- goto cleanup2;
-
- for (i = 0; i < io_tlb_nslabs; i++)
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
- io_tlb_index = 0;
-
- io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(char *)));
- if (!io_tlb_orig_addr)
- goto cleanup3;
-
- memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
-
- /*
- * Get the overflow emergency buffer
- */
- io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
- get_order(io_tlb_overflow));
- if (!io_tlb_overflow_buffer)
- goto cleanup4;
-
-#ifdef CONFIG_XEN
- if (xen_create_contiguous_region((unsigned long)io_tlb_overflow_buffer,
- get_order(io_tlb_overflow), DMA_BITS))
- panic("Failed to setup Xen contiguous region for overflow");
-#endif
- printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - "
- "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20,
- virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
-
- return 0;
-
-cleanup4:
- free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
- sizeof(char *)));
- io_tlb_orig_addr = NULL;
-cleanup3:
- free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
- sizeof(int)));
- io_tlb_list = NULL;
- io_tlb_end = NULL;
-cleanup2:
- free_pages((unsigned long)io_tlb_start, order);
- io_tlb_start = NULL;
-cleanup1:
- io_tlb_nslabs = req_nslabs;
- return -ENOMEM;
-}
-
-static inline int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
-{
- dma_addr_t mask = 0xffffffff;
- /* If the device has a mask, use it, otherwise default to 32 bits */
- if (hwdev && hwdev->dma_mask)
- mask = *hwdev->dma_mask;
- return (addr & ~mask) != 0;
-}
-
-/*
- * Allocates bounce buffer and returns its kernel virtual address.
- */
-static void *
-map_single(struct device *hwdev, char *buffer, size_t size, int dir)
-{
- unsigned long flags;
- char *dma_addr;
- unsigned int nslots, stride, index, wrap;
- int i;
-
- /*
- * For mappings greater than a page, we limit the stride (and
- * hence alignment) to a page size.
- */
- nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- if (size > PAGE_SIZE)
- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
- else
- stride = 1;
-
- BUG_ON(!nslots);
-
- /*
- * Find suitable number of IO TLB entries size that will fit this
- * request and allocate a buffer from that IO TLB pool.
- */
- spin_lock_irqsave(&io_tlb_lock, flags);
- {
- wrap = index = ALIGN(io_tlb_index, stride);
-
- if (index >= io_tlb_nslabs)
- wrap = index = 0;
-
- do {
- /*
- * If we find a slot that indicates we have 'nslots'
- * number of contiguous buffers, we allocate the
- * buffers from that slot and mark the entries as '0'
- * indicating unavailable.
- */
- if (io_tlb_list[index] >= nslots) {
- int count = 0;
-
- for (i = index; i < (int) (index + nslots); i++)
- io_tlb_list[i] = 0;
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
- dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
- /*
- * Update the indices to avoid searching in
- * the next round.
- */
- io_tlb_index = ((index + nslots) < io_tlb_nslabs
- ? (index + nslots) : 0);
-
- goto found;
- }
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
- } while (index != wrap);
-
- spin_unlock_irqrestore(&io_tlb_lock, flags);
- return NULL;
- }
- found:
- spin_unlock_irqrestore(&io_tlb_lock, flags);
-
- /*
- * Save away the mapping from the original address to the DMA address.
- * This is needed when we sync the memory. Then we sync the buffer if
- * needed.
- */
- io_tlb_orig_addr[index] = buffer;
- if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- memcpy(dma_addr, buffer, size);
-
- return dma_addr;
-}
-
-/*
- * dma_addr is the kernel virtual address of the bounce buffer to unmap.
- */
-static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
-{
- unsigned long flags;
- int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
-
- /*
- * First, sync the memory before unmapping the entry
- */
- if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- /*
- * bounce... copy the data back into the original buffer * and
- * delete the bounce buffer.
- */
- memcpy(buffer, dma_addr, size);
-
- /*
- * Return the buffer to the free list by setting the corresponding
- * entries to indicate the number of contigous entries available.
- * While returning the entries to the free list, we merge the entries
- * with slots below and above the pool being returned.
- */
- spin_lock_irqsave(&io_tlb_lock, flags);
- {
- count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
- io_tlb_list[index + nslots] : 0);
- /*
- * Step 1: return the slots to the free list, merging the
- * slots with superceeding slots
- */
- for (i = index + nslots - 1; i >= index; i--)
- io_tlb_list[i] = ++count;
- /*
- * Step 2: merge the returned slots with the preceding slots,
- * if available (non zero)
- */
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
- }
- spin_unlock_irqrestore(&io_tlb_lock, flags);
-}
-
-static void
-sync_single(struct device *hwdev, char *dma_addr, size_t size,
- int dir, int target)
-{
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
-
- switch (target) {
- case SYNC_FOR_CPU:
- if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- memcpy(buffer, dma_addr, size);
- else
- BUG_ON(dir != DMA_TO_DEVICE);
- break;
- case SYNC_FOR_DEVICE:
- if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- memcpy(dma_addr, buffer, size);
- else
- BUG_ON(dir != DMA_FROM_DEVICE);
- break;
- default:
- BUG();
- }
-}
-
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags)
-{
- unsigned long dev_addr;
- void *ret;
- int order = get_order(size);
-
- /*
- * XXX fix me: the DMA API should pass us an explicit DMA mask
- * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
- * bit range instead of a 16MB one).
- */
- flags |= GFP_DMA;
-
- ret = (void *)__get_free_pages(flags, order);
-#ifdef CONFIG_XEN
- if (ret && is_running_on_xen()) {
- if (xen_create_contiguous_region((unsigned long)ret, order,
- fls64(hwdev->coherent_dma_mask))) {
- free_pages((unsigned long)ret, order);
- ret = NULL;
- } else {
- /*
- * Short circuit the rest, xen_create_contiguous_region
- * should fail if it didn't give us an address within
- * the mask requested.
- */
- memset(ret, 0, size);
- *dma_handle = virt_to_bus(ret);
- return ret;
- }
- }
-#endif
- if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) {
- /*
- * The allocated memory isn't reachable by the device.
- * Fall back on swiotlb_map_single().
- */
- free_pages((unsigned long) ret, order);
- ret = NULL;
- }
- if (!ret) {
- /*
- * We are either out of memory or the device can't DMA
- * to GFP_DMA memory; fall back on
- * swiotlb_map_single(), which will grab memory from
- * the lowest available address range.
- */
- dma_addr_t handle;
- handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
- if (swiotlb_dma_mapping_error(handle))
- return NULL;
-
- ret = bus_to_virt(handle);
- }
-
- memset(ret, 0, size);
- dev_addr = virt_to_bus(ret);
-
- /* Confirm address can be DMA'd by device */
- if (address_needs_mapping(hwdev, dev_addr)) {
- printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n",
- (unsigned long long)*hwdev->dma_mask, dev_addr);
- panic("swiotlb_alloc_coherent: allocated memory is out of "
- "range for device");
- }
- *dma_handle = dev_addr;
- return ret;
-}
-
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
- dma_addr_t dma_handle)
-{
- if (!(vaddr >= (void *)io_tlb_start
- && vaddr < (void *)io_tlb_end)) {
-#ifdef CONFIG_XEN
- xen_destroy_contiguous_region((unsigned long)vaddr,
- get_order(size));
-#endif
- free_pages((unsigned long) vaddr, get_order(size));
- } else
- /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
- swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
-}
-
-static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
-{
- /*
- * Ran out of IOMMU space for this operation. This is very bad.
- * Unfortunately the drivers cannot handle this operation properly.
- * unless they check for dma_mapping_error (most don't)
- * When the mapping is small enough return a static buffer to limit
- * the damage, or panic when the transfer is too big.
- */
- printk(KERN_ERR "DMA: Out of SW-IOMMU space for %lu bytes at "
- "device %s\n", size, dev ? dev->bus_id : "?");
-
- if (size > io_tlb_overflow && do_panic) {
- if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
- panic("DMA: Memory would be corrupted\n");
- if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- panic("DMA: Random memory would be DMAed\n");
- }
-}
-
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode. The
- * physical address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
- */
-dma_addr_t
-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
-{
- unsigned long dev_addr = virt_to_bus(ptr);
- void *map;
-
- BUG_ON(dir == DMA_NONE);
- /*
- * If the pointer passed in happens to be in the device's DMA window,
- * we can safely return the device addr and not worry about bounce
- * buffering it.
- */
- if (!range_straddles_page_boundary(ptr, size) &&
- !address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
- return dev_addr;
-
- /*
- * Oh well, have to allocate and map a bounce buffer.
- */
- map = map_single(hwdev, ptr, size, dir);
- if (!map) {
- swiotlb_full(hwdev, size, dir, 1);
- map = io_tlb_overflow_buffer;
- }
-
- dev_addr = virt_to_bus(map);
-
- /*
- * Ensure that the address returned is DMA'ble
- */
- if (address_needs_mapping(hwdev, dev_addr))
- panic("map_single: bounce buffer is not DMA'ble");
-
- return dev_addr;
-}
-
-/*
- * Since DMA is i-cache coherent, any (complete) pages that were written via
- * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
- * flush them when they get mapped into an executable vm-area.
- */
-static void
-mark_clean(void *addr, size_t size)
-{
- unsigned long pg_addr, end;
-
-#ifdef CONFIG_XEN
- /* XXX: Bad things happen when starting domUs if this is enabled. */
- if (is_running_on_xen())
- return;
-#endif
-
- pg_addr = PAGE_ALIGN((unsigned long) addr);
- end = (unsigned long) addr + size;
- while (pg_addr + PAGE_SIZE <= end) {
- struct page *page = virt_to_page(pg_addr);
- set_bit(PG_arch_1, &page->flags);
- pg_addr += PAGE_SIZE;
- }
-}
-
-/*
- * Unmap a single streaming mode DMA translation. The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call. All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guaranteed to see
- * whatever the device wrote there.
- */
-void
-swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
- int dir)
-{
- char *dma_addr = bus_to_virt(dev_addr);
-
- BUG_ON(dir == DMA_NONE);
- if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
- unmap_single(hwdev, dma_addr, size, dir);
- else if (dir == DMA_FROM_DEVICE)
- mark_clean(dma_addr, size);
-}
-
-/*
- * Make physical memory consistent for a single streaming mode DMA translation
- * after a transfer.
- *
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
- * using the cpu, yet do not wish to teardown the dma mapping, you must
- * call this function before doing so. At the next point you give the dma
- * address back to the card, you must first perform a
- * swiotlb_dma_sync_for_device, and then the device again owns the buffer
- */
-static inline void
-swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir, int target)
-{
- char *dma_addr = bus_to_virt(dev_addr);
-
- BUG_ON(dir == DMA_NONE);
- if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
- sync_single(hwdev, dma_addr, size, dir, target);
- else if (dir == DMA_FROM_DEVICE)
- mark_clean(dma_addr, size);
-}
-
-void
-swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir)
-{
- swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir)
-{
- swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
-}
-
-/*
- * Same as above, but for a sub-range of the mapping.
- */
-static inline void
-swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
- unsigned long offset, size_t size,
- int dir, int target)
-{
- char *dma_addr = bus_to_virt(dev_addr) + offset;
-
- BUG_ON(dir == DMA_NONE);
- if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
- sync_single(hwdev, dma_addr, size, dir, target);
- else if (dir == DMA_FROM_DEVICE)
- mark_clean(dma_addr, size);
-}
-
-void
-swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- unsigned long offset, size_t size, int dir)
-{
- swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
- SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
- unsigned long offset, size_t size, int dir)
-{
- swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
- SYNC_FOR_DEVICE);
-}
-
-/*
- * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
- * interface. Here the scatter gather list elements are each tagged with the
- * appropriate dma address and length. They are obtained via
- * sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for swiotlb_map_single are the
- * same here.
- */
-int
-swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
- int dir)
-{
- void *addr;
- unsigned long dev_addr;
- int i;
-
- BUG_ON(dir == DMA_NONE);
-
- for (i = 0; i < nelems; i++, sg++) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
- dev_addr = virt_to_bus(addr);
- if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
- void *map = map_single(hwdev, addr, sg->length, dir);
- sg->dma_address = virt_to_bus(map);
- if (!map) {
- /* Don't panic here, we expect map_sg users
- to do proper error handling. */
- swiotlb_full(hwdev, sg->length, dir, 0);
- swiotlb_unmap_sg(hwdev, sg - i, i, dir);
- sg[0].dma_length = 0;
- return 0;
- }
- } else
- sg->dma_address = dev_addr;
- sg->dma_length = sg->length;
- }
- return nelems;
-}
-
-/*
- * Unmap a set of streaming mode DMA translations. Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
- */
-void
-swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
- int dir)
-{
- int i;
-
- BUG_ON(dir == DMA_NONE);
-
- for (i = 0; i < nelems; i++, sg++)
- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
- unmap_single(hwdev, (void *) bus_to_virt(sg->dma_address), sg->dma_length, dir);
- else if (dir == DMA_FROM_DEVICE)
- mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA translations
- * after a transfer.
- *
- * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
- * and usage.
- */
-static inline void
-swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg,
- int nelems, int dir, int target)
-{
- int i;
-
- BUG_ON(dir == DMA_NONE);
-
- for (i = 0; i < nelems; i++, sg++)
- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
- sync_single(hwdev, (void *) sg->dma_address,
- sg->dma_length, dir, target);
-}
-
-void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
- int nelems, int dir)
-{
- swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
- int nelems, int dir)
-{
- swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
-}
-
-int
-swiotlb_dma_mapping_error(dma_addr_t dma_addr)
-{
- return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
-}
-
-/*
- * Return whether the given device DMA address mask can be supported
- * properly. For example, if your device can only drive the low 24-bits
- * during bus mastering, then you would pass 0x00ffffff as the mask to
- * this function.
- */
-int
-swiotlb_dma_supported (struct device *hwdev, u64 mask)
-{
- return (virt_to_bus(io_tlb_end) - 1) <= mask;
-}
-
-EXPORT_SYMBOL(swiotlb_init);
-EXPORT_SYMBOL(swiotlb_map_single);
-EXPORT_SYMBOL(swiotlb_unmap_single);
-EXPORT_SYMBOL(swiotlb_map_sg);
-EXPORT_SYMBOL(swiotlb_unmap_sg);
-EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
-EXPORT_SYMBOL(swiotlb_sync_single_for_device);
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
-EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
-EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
-EXPORT_SYMBOL(swiotlb_dma_mapping_error);
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-EXPORT_SYMBOL(swiotlb_free_coherent);
-EXPORT_SYMBOL(swiotlb_dma_supported);
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/util.c b/linux-2.6-xen-sparse/arch/ia64/xen/util.c
deleted file mode 100644
index 387a1c3368..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/util.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/util.c
- * This file is the ia64 counterpart of drivers/xen/util.c
- *
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- * VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <asm/uaccess.h>
-#include <xen/driver_util.h>
-#include <xen/interface/memory.h>
-#include <asm/hypercall.h>
-
-struct vm_struct *alloc_vm_area(unsigned long size)
-{
- int order;
- unsigned long virt;
- unsigned long nr_pages;
- struct vm_struct* area;
-
- order = get_order(size);
- virt = __get_free_pages(GFP_KERNEL, order);
- if (virt == 0) {
- goto err0;
- }
- nr_pages = 1 << order;
- scrub_pages(virt, nr_pages);
-
- area = kmalloc(sizeof(*area), GFP_KERNEL);
- if (area == NULL) {
- goto err1;
- }
-
- area->flags = VM_IOREMAP;//XXX
- area->addr = (void*)virt;
- area->size = size;
- area->pages = NULL; //XXX
- area->nr_pages = nr_pages;
- area->phys_addr = 0; /* xenbus_map_ring_valloc uses this field! */
-
- return area;
-
-err1:
- free_pages(virt, order);
-err0:
- return NULL;
-
-}
-EXPORT_SYMBOL_GPL(alloc_vm_area);
-
-void free_vm_area(struct vm_struct *area)
-{
- unsigned int order = get_order(area->size);
- unsigned long i;
- unsigned long phys_addr = __pa(area->addr);
-
- // This area is used for foreign page mappping.
- // So underlying machine page may not be assigned.
- for (i = 0; i < (1 << order); i++) {
- unsigned long ret;
- unsigned long gpfn = (phys_addr >> PAGE_SHIFT) + i;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .address_bits = 0,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &gpfn);
- ret = HYPERVISOR_memory_op(XENMEM_populate_physmap,
- &reservation);
- BUG_ON(ret != 1);
- }
- free_pages((unsigned long)area->addr, order);
- kfree(area);
-}
-EXPORT_SYMBOL_GPL(free_vm_area);
-
-/*
- * Local variables:
- * c-file-style: "linux"
- * indent-tabs-mode: t
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c
deleted file mode 100644
index 4c90b5b01e..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Tristan Gingold <tristan.gingold@bull.net>
- */
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/module.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/platform.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/xencomm.h>
-#include <xen/interface/version.h>
-#include <xen/interface/sched.h>
-#include <xen/interface/event_channel.h>
-#include <xen/interface/physdev.h>
-#include <xen/interface/grant_table.h>
-#include <xen/interface/callback.h>
-#include <xen/interface/acm_ops.h>
-#include <xen/interface/hvm/params.h>
-#include <xen/interface/xenoprof.h>
-#include <xen/interface/vcpu.h>
-#include <asm/hypercall.h>
-#include <asm/page.h>
-#include <asm/uaccess.h>
-#include <asm/xen/xencomm.h>
-#include <asm/perfmon.h>
-
-/* Xencomm notes:
- * This file defines hypercalls to be used by xencomm. The hypercalls simply
- * create inlines descriptors for pointers and then call the raw arch hypercall
- * xencomm_arch_hypercall_XXX
- *
- * If the arch wants to directly use these hypercalls, simply define macros
- * in asm/hypercall.h, eg:
- * #define HYPERVISOR_sched_op xencomm_hypercall_sched_op
- *
- * The arch may also define HYPERVISOR_xxx as a function and do more operations
- * before/after doing the hypercall.
- *
- * Note: because only inline descriptors are created these functions must only
- * be called with in kernel memory parameters.
- */
-
-int
-xencomm_hypercall_console_io(int cmd, int count, char *str)
-{
- return xencomm_arch_hypercall_console_io
- (cmd, count, xencomm_create_inline(str));
-}
-
-int
-xencomm_hypercall_event_channel_op(int cmd, void *op)
-{
- return xencomm_arch_hypercall_event_channel_op
- (cmd, xencomm_create_inline(op));
-}
-
-int
-xencomm_hypercall_xen_version(int cmd, void *arg)
-{
- switch (cmd) {
- case XENVER_version:
- case XENVER_extraversion:
- case XENVER_compile_info:
- case XENVER_capabilities:
- case XENVER_changeset:
- case XENVER_platform_parameters:
- case XENVER_pagesize:
- case XENVER_get_features:
- break;
- default:
- printk("%s: unknown version cmd %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- return xencomm_arch_hypercall_xen_version
- (cmd, xencomm_create_inline(arg));
-}
-
-int
-xencomm_hypercall_physdev_op(int cmd, void *op)
-{
- return xencomm_arch_hypercall_physdev_op
- (cmd, xencomm_create_inline(op));
-}
-
-static void *
-xencommize_grant_table_op(unsigned int cmd, void *op, unsigned int count)
-{
- switch (cmd) {
- case GNTTABOP_map_grant_ref:
- case GNTTABOP_unmap_grant_ref:
- break;
- case GNTTABOP_setup_table:
- {
- struct gnttab_setup_table *setup = op;
- struct xencomm_handle *frame_list;
-
- frame_list = xencomm_create_inline
- (xen_guest_handle(setup->frame_list));
-
- set_xen_guest_handle(setup->frame_list, (void *)frame_list);
- break;
- }
- case GNTTABOP_dump_table:
- case GNTTABOP_transfer:
- case GNTTABOP_copy:
- break;
- default:
- printk("%s: unknown grant table op %d\n", __func__, cmd);
- BUG();
- }
-
- return xencomm_create_inline(op);
-}
-
-int
-xencomm_hypercall_grant_table_op(unsigned int cmd, void *op, unsigned int count)
-{
- void *desc = xencommize_grant_table_op (cmd, op, count);
-
- return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
-}
-
-int
-xencomm_hypercall_sched_op(int cmd, void *arg)
-{
- switch (cmd) {
- case SCHEDOP_yield:
- case SCHEDOP_block:
- case SCHEDOP_shutdown:
- case SCHEDOP_remote_shutdown:
- break;
- case SCHEDOP_poll:
- {
- sched_poll_t *poll = arg;
- struct xencomm_handle *ports;
-
- ports = xencomm_create_inline(xen_guest_handle(poll->ports));
-
- set_xen_guest_handle(poll->ports, (void *)ports);
- break;
- }
- default:
- printk("%s: unknown sched op %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- return xencomm_arch_hypercall_sched_op(cmd, xencomm_create_inline(arg));
-}
-
-int
-xencomm_hypercall_multicall(void *call_list, int nr_calls)
-{
- int i;
- multicall_entry_t *mce;
-
- for (i = 0; i < nr_calls; i++) {
- mce = (multicall_entry_t *)call_list + i;
-
- switch (mce->op) {
- case __HYPERVISOR_update_va_mapping:
- case __HYPERVISOR_mmu_update:
- /* No-op on ia64. */
- break;
- case __HYPERVISOR_grant_table_op:
- mce->args[1] = (unsigned long)xencommize_grant_table_op
- (mce->args[0], (void *)mce->args[1],
- mce->args[2]);
- break;
- case __HYPERVISOR_memory_op:
- default:
- printk("%s: unhandled multicall op entry op %lu\n",
- __func__, mce->op);
- return -ENOSYS;
- }
- }
-
- return xencomm_arch_hypercall_multicall
- (xencomm_create_inline(call_list), nr_calls);
-}
-
-int
-xencomm_hypercall_callback_op(int cmd, void *arg)
-{
- switch (cmd)
- {
- case CALLBACKOP_register:
- case CALLBACKOP_unregister:
- break;
- default:
- printk("%s: unknown callback op %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- return xencomm_arch_hypercall_callback_op
- (cmd, xencomm_create_inline(arg));
-}
-
-static void
-xencommize_memory_reservation (xen_memory_reservation_t *mop)
-{
- struct xencomm_handle *desc;
-
- desc = xencomm_create_inline(xen_guest_handle(mop->extent_start));
- set_xen_guest_handle(mop->extent_start, (void *)desc);
-}
-
-int
-xencomm_hypercall_memory_op(unsigned int cmd, void *arg)
-{
- XEN_GUEST_HANDLE(xen_pfn_t) extent_start_va[2];
- xen_memory_reservation_t *xmr = NULL, *xme_in = NULL, *xme_out = NULL;
- xen_memory_map_t *memmap = NULL;
- XEN_GUEST_HANDLE(void) buffer;
- int rc;
-
- switch (cmd) {
- case XENMEM_increase_reservation:
- case XENMEM_decrease_reservation:
- case XENMEM_populate_physmap:
- xmr = (xen_memory_reservation_t *)arg;
- xen_guest_handle(extent_start_va[0]) =
- xen_guest_handle(xmr->extent_start);
- xencommize_memory_reservation((xen_memory_reservation_t *)arg);
- break;
-
- case XENMEM_maximum_ram_page:
- break;
-
- case XENMEM_exchange:
- xme_in = &((xen_memory_exchange_t *)arg)->in;
- xme_out = &((xen_memory_exchange_t *)arg)->out;
- xen_guest_handle(extent_start_va[0]) =
- xen_guest_handle(xme_in->extent_start);
- xen_guest_handle(extent_start_va[1]) =
- xen_guest_handle(xme_out->extent_start);
- xencommize_memory_reservation
- (&((xen_memory_exchange_t *)arg)->in);
- xencommize_memory_reservation
- (&((xen_memory_exchange_t *)arg)->out);
- break;
-
- case XENMEM_machine_memory_map:
- memmap = (xen_memory_map_t *)arg;
- xen_guest_handle(buffer) = xen_guest_handle(memmap->buffer);
- set_xen_guest_handle(memmap->buffer,
- (void *)xencomm_create_inline(
- xen_guest_handle(memmap->buffer)));
- break;
-
- default:
- printk("%s: unknown memory op %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- rc = xencomm_arch_hypercall_memory_op(cmd, xencomm_create_inline(arg));
-
- switch (cmd) {
- case XENMEM_increase_reservation:
- case XENMEM_decrease_reservation:
- case XENMEM_populate_physmap:
- xen_guest_handle(xmr->extent_start) =
- xen_guest_handle(extent_start_va[0]);
- break;
-
- case XENMEM_exchange:
- xen_guest_handle(xme_in->extent_start) =
- xen_guest_handle(extent_start_va[0]);
- xen_guest_handle(xme_out->extent_start) =
- xen_guest_handle(extent_start_va[1]);
- break;
-
- case XENMEM_machine_memory_map:
- xen_guest_handle(memmap->buffer) = xen_guest_handle(buffer);
- break;
- }
-
- return rc;
-}
-
-unsigned long
-xencomm_hypercall_hvm_op(int cmd, void *arg)
-{
- switch (cmd) {
- case HVMOP_set_param:
- case HVMOP_get_param:
- break;
- default:
- printk("%s: unknown hvm op %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- return xencomm_arch_hypercall_hvm_op(cmd, xencomm_create_inline(arg));
-}
-
-int
-xencomm_hypercall_suspend(unsigned long srec)
-{
- struct sched_shutdown arg;
-
- arg.reason = SHUTDOWN_suspend;
-
- return xencomm_arch_hypercall_suspend(xencomm_create_inline(&arg));
-}
-
-int
-xencomm_hypercall_xenoprof_op(int op, void *arg)
-{
- switch (op) {
- case XENOPROF_init:
- case XENOPROF_set_active:
- case XENOPROF_set_passive:
- case XENOPROF_counter:
- case XENOPROF_get_buffer:
- break;
-
- case XENOPROF_reset_active_list:
- case XENOPROF_reset_passive_list:
- case XENOPROF_reserve_counters:
- case XENOPROF_setup_events:
- case XENOPROF_enable_virq:
- case XENOPROF_start:
- case XENOPROF_stop:
- case XENOPROF_disable_virq:
- case XENOPROF_release_counters:
- case XENOPROF_shutdown:
- return xencomm_arch_hypercall_xenoprof_op(op, arg);
- break;
-
- default:
- printk("%s: op %d isn't supported\n", __func__, op);
- return -ENOSYS;
- }
- return xencomm_arch_hypercall_xenoprof_op(op,
- xencomm_create_inline(arg));
-}
-
-int
-xencomm_hypercall_perfmon_op(unsigned long cmd, void* arg, unsigned long count)
-{
- switch (cmd) {
- case PFM_GET_FEATURES:
- case PFM_CREATE_CONTEXT:
- case PFM_WRITE_PMCS:
- case PFM_WRITE_PMDS:
- case PFM_LOAD_CONTEXT:
- break;
-
- case PFM_DESTROY_CONTEXT:
- case PFM_UNLOAD_CONTEXT:
- case PFM_START:
- case PFM_STOP:
- return xencomm_arch_hypercall_perfmon_op(cmd, arg, count);
-
- default:
- printk("%s:%d cmd %ld isn't supported\n",
- __func__,__LINE__, cmd);
- BUG();
- }
-
- return xencomm_arch_hypercall_perfmon_op(cmd,
- xencomm_create_inline(arg),
- count);
-}
-
-long
-xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg)
-{
- switch (cmd) {
- case VCPUOP_register_runstate_memory_area:
- xencommize_memory_reservation((xen_memory_reservation_t *)arg);
- break;
-
- default:
- printk("%s: unknown vcpu op %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- return xencomm_arch_hypercall_vcpu_op(cmd, cpu,
- xencomm_create_inline(arg));
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c
deleted file mode 100644
index 3c0baff1f0..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Tristan Gingold <tristan.gingold@bull.net>
- */
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/platform.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/xencomm.h>
-#include <xen/interface/version.h>
-#include <xen/interface/event_channel.h>
-#include <xen/interface/physdev.h>
-#include <xen/interface/grant_table.h>
-#include <xen/interface/hvm/params.h>
-#include <xen/interface/xenoprof.h>
-#ifdef CONFIG_VMX_GUEST
-#include <asm/hypervisor.h>
-#else
-#include <asm/hypercall.h>
-#endif
-#include <asm/xen/xencomm.h>
-#include <asm/perfmon.h>
-
-int
-xencomm_mini_hypercall_event_channel_op(int cmd, void *op)
-{
- struct xencomm_mini xc_area[2];
- int nbr_area = 2;
- struct xencomm_handle *desc;
- int rc;
-
- rc = xencomm_create_mini(xc_area, &nbr_area,
- op, sizeof(evtchn_op_t), &desc);
- if (rc)
- return rc;
-
- return xencomm_arch_hypercall_event_channel_op(cmd, desc);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_event_channel_op);
-
-static int
-xencommize_mini_grant_table_op(struct xencomm_mini *xc_area, int *nbr_area,
- unsigned int cmd, void *op, unsigned int count,
- struct xencomm_handle **desc)
-{
- struct xencomm_handle *desc1;
- unsigned int argsize;
- int rc;
-
- switch (cmd) {
- case GNTTABOP_map_grant_ref:
- argsize = sizeof(struct gnttab_map_grant_ref);
- break;
- case GNTTABOP_unmap_grant_ref:
- argsize = sizeof(struct gnttab_unmap_grant_ref);
- break;
- case GNTTABOP_setup_table:
- {
- struct gnttab_setup_table *setup = op;
-
- argsize = sizeof(*setup);
-
- if (count != 1)
- return -EINVAL;
- rc = xencomm_create_mini
- (xc_area, nbr_area,
- xen_guest_handle(setup->frame_list),
- setup->nr_frames
- * sizeof(*xen_guest_handle(setup->frame_list)),
- &desc1);
- if (rc)
- return rc;
- set_xen_guest_handle(setup->frame_list, (void *)desc1);
- break;
- }
- case GNTTABOP_dump_table:
- argsize = sizeof(struct gnttab_dump_table);
- break;
- case GNTTABOP_transfer:
- argsize = sizeof(struct gnttab_transfer);
- break;
- case GNTTABOP_copy:
- argsize = sizeof(struct gnttab_copy);
- break;
- case GNTTABOP_query_size:
- argsize = sizeof(struct gnttab_query_size);
- break;
- default:
- printk("%s: unknown mini grant table op %d\n", __func__, cmd);
- BUG();
- }
-
- rc = xencomm_create_mini(xc_area, nbr_area, op, count * argsize, desc);
- if (rc)
- return rc;
-
- return 0;
-}
-
-int
-xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op,
- unsigned int count)
-{
- int rc;
- struct xencomm_handle *desc;
- int nbr_area = 2;
- struct xencomm_mini xc_area[2];
-
- rc = xencommize_mini_grant_table_op(xc_area, &nbr_area,
- cmd, op, count, &desc);
- if (rc)
- return rc;
-
- return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_grant_table_op);
-
-int
-xencomm_mini_hypercall_multicall(void *call_list, int nr_calls)
-{
- int i;
- multicall_entry_t *mce;
- int nbr_area = 2 + nr_calls * 3;
- struct xencomm_mini xc_area[nbr_area];
- struct xencomm_handle *desc;
- int rc;
-
- for (i = 0; i < nr_calls; i++) {
- mce = (multicall_entry_t *)call_list + i;
-
- switch (mce->op) {
- case __HYPERVISOR_update_va_mapping:
- case __HYPERVISOR_mmu_update:
- /* No-op on ia64. */
- break;
- case __HYPERVISOR_grant_table_op:
- rc = xencommize_mini_grant_table_op
- (xc_area, &nbr_area,
- mce->args[0], (void *)mce->args[1],
- mce->args[2], &desc);
- if (rc)
- return rc;
- mce->args[1] = (unsigned long)desc;
- break;
- case __HYPERVISOR_memory_op:
- default:
- printk("%s: unhandled multicall op entry op %lu\n",
- __func__, mce->op);
- return -ENOSYS;
- }
- }
-
- rc = xencomm_create_mini(xc_area, &nbr_area, call_list,
- nr_calls * sizeof(multicall_entry_t), &desc);
- if (rc)
- return rc;
-
- return xencomm_arch_hypercall_multicall(desc, nr_calls);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_multicall);
-
-static int
-xencommize_mini_memory_reservation(struct xencomm_mini *area, int *nbr_area,
- xen_memory_reservation_t *mop)
-{
- struct xencomm_handle *desc;
- int rc;
-
- rc = xencomm_create_mini
- (area, nbr_area,
- xen_guest_handle(mop->extent_start),
- mop->nr_extents
- * sizeof(*xen_guest_handle(mop->extent_start)),
- &desc);
- if (rc)
- return rc;
-
- set_xen_guest_handle(mop->extent_start, (void *)desc);
-
- return 0;
-}
-
-int
-xencomm_mini_hypercall_memory_op(unsigned int cmd, void *arg)
-{
- int nbr_area = 4;
- struct xencomm_mini xc_area[4];
- struct xencomm_handle *desc;
- int rc;
- unsigned int argsize;
-
- switch (cmd) {
- case XENMEM_increase_reservation:
- case XENMEM_decrease_reservation:
- case XENMEM_populate_physmap:
- argsize = sizeof(xen_memory_reservation_t);
- rc = xencommize_mini_memory_reservation
- (xc_area, &nbr_area, (xen_memory_reservation_t *)arg);
- if (rc)
- return rc;
- break;
-
- case XENMEM_maximum_ram_page:
- argsize = 0;
- break;
-
- case XENMEM_exchange:
- argsize = sizeof(xen_memory_exchange_t);
- rc = xencommize_mini_memory_reservation
- (xc_area, &nbr_area,
- &((xen_memory_exchange_t *)arg)->in);
- if (rc)
- return rc;
- rc = xencommize_mini_memory_reservation
- (xc_area, &nbr_area,
- &((xen_memory_exchange_t *)arg)->out);
- if (rc)
- return rc;
- break;
-
- case XENMEM_add_to_physmap:
- argsize = sizeof (xen_add_to_physmap_t);
- break;
-
- case XENMEM_machine_memory_map:
- {
- xen_memory_map_t *memmap = (xen_memory_map_t *)arg;
- argsize = sizeof(*memmap);
- rc = xencomm_create_mini(xc_area, &nbr_area,
- xen_guest_handle(memmap->buffer),
- memmap->nr_entries, &desc);
- if (rc)
- return rc;
- set_xen_guest_handle(memmap->buffer, (void *)desc);
- break;
- }
-
- default:
- printk("%s: unknown mini memory op %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
- if (rc)
- return rc;
-
- return xencomm_arch_hypercall_memory_op(cmd, desc);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_memory_op);
-
-unsigned long
-xencomm_mini_hypercall_hvm_op(int cmd, void *arg)
-{
- struct xencomm_handle *desc;
- int nbr_area = 2;
- struct xencomm_mini xc_area[2];
- unsigned int argsize;
- int rc;
-
- switch (cmd) {
- case HVMOP_get_param:
- case HVMOP_set_param:
- argsize = sizeof(xen_hvm_param_t);
- break;
- default:
- printk("%s: unknown HVMOP %d\n", __func__, cmd);
- return -EINVAL;
- }
-
- rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
- if (rc)
- return rc;
-
- return xencomm_arch_hypercall_hvm_op(cmd, desc);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_hvm_op);
-
-int
-xencomm_mini_hypercall_xen_version(int cmd, void *arg)
-{
- struct xencomm_handle *desc;
- int nbr_area = 2;
- struct xencomm_mini xc_area[2];
- unsigned int argsize;
- int rc;
-
- switch (cmd) {
- case XENVER_version:
- /* do not actually pass an argument */
- return xencomm_arch_hypercall_xen_version(cmd, 0);
- case XENVER_extraversion:
- argsize = sizeof(xen_extraversion_t);
- break;
- case XENVER_compile_info:
- argsize = sizeof(xen_compile_info_t);
- break;
- case XENVER_capabilities:
- argsize = sizeof(xen_capabilities_info_t);
- break;
- case XENVER_changeset:
- argsize = sizeof(xen_changeset_info_t);
- break;
- case XENVER_platform_parameters:
- argsize = sizeof(xen_platform_parameters_t);
- break;
- case XENVER_pagesize:
- argsize = (arg == NULL) ? 0 : sizeof(void *);
- break;
- case XENVER_get_features:
- argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t);
- break;
-
- default:
- printk("%s: unknown version op %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
- if (rc)
- return rc;
-
- return xencomm_arch_hypercall_xen_version(cmd, desc);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_xen_version);
-
-int
-xencomm_mini_hypercall_xenoprof_op(int op, void *arg)
-{
- unsigned int argsize;
- struct xencomm_mini xc_area[2];
- int nbr_area = 2;
- struct xencomm_handle *desc;
- int rc;
-
- switch (op) {
- case XENOPROF_init:
- argsize = sizeof(xenoprof_init_t);
- break;
- case XENOPROF_set_active:
- argsize = sizeof(domid_t);
- break;
- case XENOPROF_set_passive:
- argsize = sizeof(xenoprof_passive_t);
- break;
- case XENOPROF_counter:
- argsize = sizeof(xenoprof_counter_t);
- break;
- case XENOPROF_get_buffer:
- argsize = sizeof(xenoprof_get_buffer_t);
- break;
-
- case XENOPROF_reset_active_list:
- case XENOPROF_reset_passive_list:
- case XENOPROF_reserve_counters:
- case XENOPROF_setup_events:
- case XENOPROF_enable_virq:
- case XENOPROF_start:
- case XENOPROF_stop:
- case XENOPROF_disable_virq:
- case XENOPROF_release_counters:
- case XENOPROF_shutdown:
- return xencomm_arch_hypercall_xenoprof_op(op, arg);
-
- default:
- printk("%s: op %d isn't supported\n", __func__, op);
- return -ENOSYS;
- }
- rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
- if (rc)
- return rc;
- return xencomm_arch_hypercall_xenoprof_op(op, desc);
-}
-EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_xenoprof_op);
-
-int
-xencomm_mini_hypercall_perfmon_op(unsigned long cmd, void* arg,
- unsigned long count)
-{
- unsigned int argsize;
- struct xencomm_mini xc_area[2];
- int nbr_area = 2;
- struct xencomm_handle *desc;
- int rc;
-
- switch (cmd) {
- case PFM_GET_FEATURES:
- argsize = sizeof(pfarg_features_t);
- break;
- case PFM_CREATE_CONTEXT:
- argsize = sizeof(pfarg_context_t);
- break;
- case PFM_LOAD_CONTEXT:
- argsize = sizeof(pfarg_load_t);
- break;
- case PFM_WRITE_PMCS:
- case PFM_WRITE_PMDS:
- argsize = sizeof(pfarg_reg_t) * count;
- break;
-
- case PFM_DESTROY_CONTEXT:
- case PFM_UNLOAD_CONTEXT:
- case PFM_START:
- case PFM_STOP:
- return xencomm_arch_hypercall_perfmon_op(cmd, arg, count);
-
- default:
- printk("%s:%d cmd %ld isn't supported\n",
- __func__, __LINE__, cmd);
- BUG();
- }
-
- rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
- if (rc)
- return rc;
- return xencomm_arch_hypercall_perfmon_op(cmd, desc, count);
-}
-EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_perfmon_op);
-
-int
-xencomm_mini_hypercall_sched_op(int cmd, void *arg)
-{
- int rc, nbr_area = 2;
- struct xencomm_mini xc_area[2];
- struct xencomm_handle *desc;
- unsigned int argsize;
-
- switch (cmd) {
- case SCHEDOP_yield:
- case SCHEDOP_block:
- argsize = 0;
- break;
- case SCHEDOP_shutdown:
- argsize = sizeof(sched_shutdown_t);
- break;
- case SCHEDOP_poll:
- argsize = sizeof(sched_poll_t);
- break;
- case SCHEDOP_remote_shutdown:
- argsize = sizeof(sched_remote_shutdown_t);
- break;
-
- default:
- printk("%s: unknown sched op %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
- if (rc)
- return rc;
-
- return xencomm_arch_hypercall_sched_op(cmd, desc);
-}
-EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_sched_op);
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c
deleted file mode 100644
index 7c67373ccc..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c
+++ /dev/null
@@ -1,673 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- * Tristan Gingold <tristan.gingold@bull.net>
- */
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/module.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/platform.h>
-#define __XEN__
-#include <xen/interface/domctl.h>
-#include <xen/interface/sysctl.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/version.h>
-#include <xen/interface/event_channel.h>
-#include <xen/interface/acm_ops.h>
-#include <xen/interface/hvm/params.h>
-#include <xen/public/privcmd.h>
-#include <asm/hypercall.h>
-#include <asm/page.h>
-#include <asm/uaccess.h>
-#include <asm/xen/xencomm.h>
-
-#define ROUND_DIV(v,s) (((v) + (s) - 1) / (s))
-
-static int
-xencomm_privcmd_platform_op(privcmd_hypercall_t *hypercall)
-{
- struct xen_platform_op kern_op;
- struct xen_platform_op __user *user_op = (struct xen_platform_op __user *)hypercall->arg[0];
- struct xencomm_handle *op_desc;
- struct xencomm_handle *desc = NULL;
- int ret = 0;
-
- if (copy_from_user(&kern_op, user_op, sizeof(struct xen_platform_op)))
- return -EFAULT;
-
- if (kern_op.interface_version != XENPF_INTERFACE_VERSION)
- return -EACCES;
-
- op_desc = xencomm_create_inline(&kern_op);
-
- switch (kern_op.cmd) {
- default:
- printk("%s: unknown platform cmd %d\n", __func__, kern_op.cmd);
- return -ENOSYS;
- }
-
- if (ret) {
- /* error mapping the nested pointer */
- return ret;
- }
-
- ret = xencomm_arch_hypercall_platform_op(op_desc);
-
- /* FIXME: should we restore the handle? */
- if (copy_to_user(user_op, &kern_op, sizeof(struct xen_platform_op)))
- ret = -EFAULT;
-
- if (desc)
- xencomm_free(desc);
- return ret;
-}
-
-/*
- * Temporarily disable the NUMA PHYSINFO code until the rest of the
- * changes are upstream.
- */
-#undef IA64_NUMA_PHYSINFO
-
-static int
-xencomm_privcmd_sysctl(privcmd_hypercall_t *hypercall)
-{
- xen_sysctl_t kern_op;
- xen_sysctl_t __user *user_op;
- struct xencomm_handle *op_desc;
- struct xencomm_handle *desc = NULL;
- struct xencomm_handle *desc1 = NULL;
- int ret = 0;
-
- user_op = (xen_sysctl_t __user *)hypercall->arg[0];
-
- if (copy_from_user(&kern_op, user_op, sizeof(xen_sysctl_t)))
- return -EFAULT;
-
- if (kern_op.interface_version != XEN_SYSCTL_INTERFACE_VERSION)
- return -EACCES;
-
- op_desc = xencomm_create_inline(&kern_op);
-
- switch (kern_op.cmd) {
- case XEN_SYSCTL_readconsole:
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.readconsole.buffer),
- kern_op.u.readconsole.count,
- &desc, GFP_KERNEL);
- set_xen_guest_handle(kern_op.u.readconsole.buffer,
- (void *)desc);
- break;
- case XEN_SYSCTL_tbuf_op:
-#ifndef IA64_NUMA_PHYSINFO
- case XEN_SYSCTL_physinfo:
-#endif
- case XEN_SYSCTL_sched_id:
- break;
- case XEN_SYSCTL_perfc_op:
- {
- struct xencomm_handle *tmp_desc;
- xen_sysctl_t tmp_op = {
- .cmd = XEN_SYSCTL_perfc_op,
- .interface_version = XEN_SYSCTL_INTERFACE_VERSION,
- .u.perfc_op = {
- .cmd = XEN_SYSCTL_PERFCOP_query,
- // .desc.p = NULL,
- // .val.p = NULL,
- },
- };
-
- if (xen_guest_handle(kern_op.u.perfc_op.desc) == NULL) {
- if (xen_guest_handle(kern_op.u.perfc_op.val) != NULL)
- return -EINVAL;
- break;
- }
-
- /* query the buffer size for xencomm */
- tmp_desc = xencomm_create_inline(&tmp_op);
- ret = xencomm_arch_hypercall_sysctl(tmp_desc);
- if (ret)
- return ret;
-
- ret = xencomm_create(xen_guest_handle(kern_op.u.perfc_op.desc),
- tmp_op.u.perfc_op.nr_counters *
- sizeof(xen_sysctl_perfc_desc_t),
- &desc, GFP_KERNEL);
- if (ret)
- return ret;
-
- set_xen_guest_handle(kern_op.u.perfc_op.desc, (void *)desc);
-
- ret = xencomm_create(xen_guest_handle(kern_op.u.perfc_op.val),
- tmp_op.u.perfc_op.nr_vals *
- sizeof(xen_sysctl_perfc_val_t),
- &desc1, GFP_KERNEL);
- if (ret)
- xencomm_free(desc);
-
- set_xen_guest_handle(kern_op.u.perfc_op.val, (void *)desc1);
- break;
- }
- case XEN_SYSCTL_getdomaininfolist:
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.getdomaininfolist.buffer),
- kern_op.u.getdomaininfolist.max_domains *
- sizeof(xen_domctl_getdomaininfo_t),
- &desc, GFP_KERNEL);
- set_xen_guest_handle(kern_op.u.getdomaininfolist.buffer,
- (void *)desc);
- break;
- case XEN_SYSCTL_debug_keys:
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.debug_keys.keys),
- kern_op.u.debug_keys.nr_keys,
- &desc, GFP_KERNEL);
- set_xen_guest_handle(kern_op.u.debug_keys.keys,
- (void *)desc);
- break;
-
-#ifdef IA64_NUMA_PHYSINFO
- case XEN_SYSCTL_physinfo:
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.physinfo.memory_chunks),
- PUBLIC_MAXCHUNKS * sizeof(node_data_t),
- &desc, GFP_KERNEL);
- if (ret)
- return ret;
- set_xen_guest_handle(kern_op.u.physinfo.memory_chunks,
- (void *)desc);
-
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.physinfo.cpu_to_node),
- PUBLIC_MAX_NUMNODES * sizeof(u64),
- &desc1, GFP_KERNEL);
- if (ret)
- xencomm_free(desc);
- set_xen_guest_handle(kern_op.u.physinfo.cpu_to_node,
- (void *)desc1);
- break;
-#endif
- default:
- printk("%s: unknown sysctl cmd %d\n", __func__, kern_op.cmd);
- return -ENOSYS;
- }
-
- if (ret) {
- /* error mapping the nested pointer */
- return ret;
- }
-
- ret = xencomm_arch_hypercall_sysctl(op_desc);
-
- /* FIXME: should we restore the handles? */
- if (copy_to_user(user_op, &kern_op, sizeof(xen_sysctl_t)))
- ret = -EFAULT;
-
- if (desc)
- xencomm_free(desc);
- if (desc1)
- xencomm_free(desc1);
- return ret;
-}
-
-static int
-xencomm_privcmd_domctl(privcmd_hypercall_t *hypercall)
-{
- xen_domctl_t kern_op;
- xen_domctl_t __user *user_op;
- struct xencomm_handle *op_desc;
- struct xencomm_handle *desc = NULL;
- int ret = 0;
-
- user_op = (xen_domctl_t __user *)hypercall->arg[0];
-
- if (copy_from_user(&kern_op, user_op, sizeof(xen_domctl_t)))
- return -EFAULT;
-
- if (kern_op.interface_version != XEN_DOMCTL_INTERFACE_VERSION)
- return -EACCES;
-
- op_desc = xencomm_create_inline(&kern_op);
-
- switch (kern_op.cmd) {
- case XEN_DOMCTL_createdomain:
- case XEN_DOMCTL_destroydomain:
- case XEN_DOMCTL_pausedomain:
- case XEN_DOMCTL_unpausedomain:
- case XEN_DOMCTL_getdomaininfo:
- break;
- case XEN_DOMCTL_getmemlist:
- {
- unsigned long nr_pages = kern_op.u.getmemlist.max_pfns;
-
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.getmemlist.buffer),
- nr_pages * sizeof(unsigned long),
- &desc, GFP_KERNEL);
- set_xen_guest_handle(kern_op.u.getmemlist.buffer,
- (void *)desc);
- break;
- }
- case XEN_DOMCTL_getpageframeinfo:
- break;
- case XEN_DOMCTL_getpageframeinfo2:
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.getpageframeinfo2.array),
- kern_op.u.getpageframeinfo2.num,
- &desc, GFP_KERNEL);
- set_xen_guest_handle(kern_op.u.getpageframeinfo2.array,
- (void *)desc);
- break;
- case XEN_DOMCTL_shadow_op:
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap),
- ROUND_DIV(kern_op.u.shadow_op.pages, 8),
- &desc, GFP_KERNEL);
- set_xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap,
- (void *)desc);
- break;
- case XEN_DOMCTL_max_mem:
- break;
- case XEN_DOMCTL_setvcpucontext:
- case XEN_DOMCTL_getvcpucontext:
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.vcpucontext.ctxt),
- sizeof(vcpu_guest_context_t),
- &desc, GFP_KERNEL);
- set_xen_guest_handle(kern_op.u.vcpucontext.ctxt, (void *)desc);
- break;
- case XEN_DOMCTL_getvcpuinfo:
- break;
- case XEN_DOMCTL_setvcpuaffinity:
- case XEN_DOMCTL_getvcpuaffinity:
- ret = xencomm_create(
- xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap),
- ROUND_DIV(kern_op.u.vcpuaffinity.cpumap.nr_cpus, 8),
- &desc, GFP_KERNEL);
- set_xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap,
- (void *)desc);
- break;
- case XEN_DOMCTL_max_vcpus:
- case XEN_DOMCTL_scheduler_op:
- case XEN_DOMCTL_setdomainhandle:
- case XEN_DOMCTL_setdebugging:
- case XEN_DOMCTL_irq_permission:
- case XEN_DOMCTL_iomem_permission:
- case XEN_DOMCTL_ioport_permission:
- case XEN_DOMCTL_hypercall_init:
- case XEN_DOMCTL_arch_setup:
- case XEN_DOMCTL_settimeoffset:
- case XEN_DOMCTL_sendtrigger:
- break;
- default:
- printk("%s: unknown domctl cmd %d\n", __func__, kern_op.cmd);
- return -ENOSYS;
- }
-
- if (ret) {
- /* error mapping the nested pointer */
- return ret;
- }
-
- ret = xencomm_arch_hypercall_domctl (op_desc);
-
- /* FIXME: should we restore the handle? */
- if (copy_to_user(user_op, &kern_op, sizeof(xen_domctl_t)))
- ret = -EFAULT;
-
- if (desc)
- xencomm_free(desc);
- return ret;
-}
-
-static int
-xencomm_privcmd_acm_op(privcmd_hypercall_t *hypercall)
-{
- int cmd = hypercall->arg[0];
- void __user *arg = (void __user *)hypercall->arg[1];
- struct xencomm_handle *op_desc;
- struct xencomm_handle *desc = NULL;
- int ret;
-
- switch (cmd) {
- case ACMOP_getssid:
- {
- struct acm_getssid kern_arg;
-
- if (copy_from_user(&kern_arg, arg, sizeof (kern_arg)))
- return -EFAULT;
-
- op_desc = xencomm_create_inline(&kern_arg);
-
- ret = xencomm_create(xen_guest_handle(kern_arg.ssidbuf),
- kern_arg.ssidbuf_size, &desc, GFP_KERNEL);
- if (ret)
- return ret;
-
- set_xen_guest_handle(kern_arg.ssidbuf, (void *)desc);
-
- ret = xencomm_arch_hypercall_acm_op(cmd, op_desc);
-
- xencomm_free(desc);
-
- if (copy_to_user(arg, &kern_arg, sizeof (kern_arg)))
- return -EFAULT;
-
- return ret;
- }
- default:
- printk("%s: unknown acm_op cmd %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- return ret;
-}
-
-static int
-xencomm_privcmd_memory_op(privcmd_hypercall_t *hypercall)
-{
- const unsigned long cmd = hypercall->arg[0];
- int ret = 0;
-
- switch (cmd) {
- case XENMEM_increase_reservation:
- case XENMEM_decrease_reservation:
- case XENMEM_populate_physmap:
- {
- xen_memory_reservation_t kern_op;
- xen_memory_reservation_t __user *user_op;
- struct xencomm_handle *desc = NULL;
- struct xencomm_handle *desc_op;
-
- user_op = (xen_memory_reservation_t __user *)hypercall->arg[1];
- if (copy_from_user(&kern_op, user_op,
- sizeof(xen_memory_reservation_t)))
- return -EFAULT;
- desc_op = xencomm_create_inline(&kern_op);
-
- if (xen_guest_handle(kern_op.extent_start)) {
- void * addr;
-
- addr = xen_guest_handle(kern_op.extent_start);
- ret = xencomm_create
- (addr,
- kern_op.nr_extents *
- sizeof(*xen_guest_handle
- (kern_op.extent_start)),
- &desc, GFP_KERNEL);
- if (ret)
- return ret;
- set_xen_guest_handle(kern_op.extent_start,
- (void *)desc);
- }
-
- ret = xencomm_arch_hypercall_memory_op(cmd, desc_op);
-
- if (desc)
- xencomm_free(desc);
-
- if (ret != 0)
- return ret;
-
- if (copy_to_user(user_op, &kern_op,
- sizeof(xen_memory_reservation_t)))
- return -EFAULT;
-
- return ret;
- }
- case XENMEM_translate_gpfn_list:
- {
- xen_translate_gpfn_list_t kern_op;
- xen_translate_gpfn_list_t __user *user_op;
- struct xencomm_handle *desc_gpfn = NULL;
- struct xencomm_handle *desc_mfn = NULL;
- struct xencomm_handle *desc_op;
- void *addr;
-
- user_op = (xen_translate_gpfn_list_t __user *)
- hypercall->arg[1];
- if (copy_from_user(&kern_op, user_op,
- sizeof(xen_translate_gpfn_list_t)))
- return -EFAULT;
- desc_op = xencomm_create_inline(&kern_op);
-
- if (kern_op.nr_gpfns) {
- /* gpfn_list. */
- addr = xen_guest_handle(kern_op.gpfn_list);
-
- ret = xencomm_create(addr, kern_op.nr_gpfns *
- sizeof(*xen_guest_handle
- (kern_op.gpfn_list)),
- &desc_gpfn, GFP_KERNEL);
- if (ret)
- return ret;
- set_xen_guest_handle(kern_op.gpfn_list,
- (void *)desc_gpfn);
-
- /* mfn_list. */
- addr = xen_guest_handle(kern_op.mfn_list);
-
- ret = xencomm_create(addr, kern_op.nr_gpfns *
- sizeof(*xen_guest_handle
- (kern_op.mfn_list)),
- &desc_mfn, GFP_KERNEL);
- if (ret)
- return ret;
- set_xen_guest_handle(kern_op.mfn_list,
- (void *)desc_mfn);
- }
-
- ret = xencomm_arch_hypercall_memory_op(cmd, desc_op);
-
- if (desc_gpfn)
- xencomm_free(desc_gpfn);
-
- if (desc_mfn)
- xencomm_free(desc_mfn);
-
- if (ret != 0)
- return ret;
-
- return ret;
- }
- default:
- printk("%s: unknown memory op %lu\n", __func__, cmd);
- ret = -ENOSYS;
- }
- return ret;
-}
-
-static int
-xencomm_privcmd_xen_version(privcmd_hypercall_t *hypercall)
-{
- int cmd = hypercall->arg[0];
- void __user *arg = (void __user *)hypercall->arg[1];
- struct xencomm_handle *desc;
- size_t argsize;
- int rc;
-
- switch (cmd) {
- case XENVER_version:
- /* do not actually pass an argument */
- return xencomm_arch_hypercall_xen_version(cmd, 0);
- case XENVER_extraversion:
- argsize = sizeof(xen_extraversion_t);
- break;
- case XENVER_compile_info:
- argsize = sizeof(xen_compile_info_t);
- break;
- case XENVER_capabilities:
- argsize = sizeof(xen_capabilities_info_t);
- break;
- case XENVER_changeset:
- argsize = sizeof(xen_changeset_info_t);
- break;
- case XENVER_platform_parameters:
- argsize = sizeof(xen_platform_parameters_t);
- break;
- case XENVER_pagesize:
- argsize = (arg == NULL) ? 0 : sizeof(void *);
- break;
- case XENVER_get_features:
- argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t);
- break;
-
- default:
- printk("%s: unknown version op %d\n", __func__, cmd);
- return -ENOSYS;
- }
-
- rc = xencomm_create(arg, argsize, &desc, GFP_KERNEL);
- if (rc)
- return rc;
-
- rc = xencomm_arch_hypercall_xen_version(cmd, desc);
-
- xencomm_free(desc);
-
- return rc;
-}
-
-static int
-xencomm_privcmd_event_channel_op(privcmd_hypercall_t *hypercall)
-{
- int cmd = hypercall->arg[0];
- struct xencomm_handle *desc;
- unsigned int argsize;
- int ret;
-
- switch (cmd) {
- case EVTCHNOP_alloc_unbound:
- argsize = sizeof(evtchn_alloc_unbound_t);
- break;
-
- case EVTCHNOP_status:
- argsize = sizeof(evtchn_status_t);
- break;
-
- default:
- printk("%s: unknown EVTCHNOP %d\n", __func__, cmd);
- return -EINVAL;
- }
-
- ret = xencomm_create((void *)hypercall->arg[1], argsize,
- &desc, GFP_KERNEL);
- if (ret)
- return ret;
-
- ret = xencomm_arch_hypercall_event_channel_op(cmd, desc);
-
- xencomm_free(desc);
- return ret;
-}
-
-static int
-xencomm_privcmd_hvm_op(privcmd_hypercall_t *hypercall)
-{
- int cmd = hypercall->arg[0];
- struct xencomm_handle *desc;
- unsigned int argsize;
- int ret;
-
- switch (cmd) {
- case HVMOP_get_param:
- case HVMOP_set_param:
- argsize = sizeof(xen_hvm_param_t);
- break;
- case HVMOP_set_pci_intx_level:
- argsize = sizeof(xen_hvm_set_pci_intx_level_t);
- break;
- case HVMOP_set_isa_irq_level:
- argsize = sizeof(xen_hvm_set_isa_irq_level_t);
- break;
- case HVMOP_set_pci_link_route:
- argsize = sizeof(xen_hvm_set_pci_link_route_t);
- break;
-
- default:
- printk("%s: unknown HVMOP %d\n", __func__, cmd);
- return -EINVAL;
- }
-
- ret = xencomm_create((void *)hypercall->arg[1], argsize,
- &desc, GFP_KERNEL);
- if (ret)
- return ret;
-
- ret = xencomm_arch_hypercall_hvm_op(cmd, desc);
-
- xencomm_free(desc);
- return ret;
-}
-
-static int
-xencomm_privcmd_sched_op(privcmd_hypercall_t *hypercall)
-{
- int cmd = hypercall->arg[0];
- struct xencomm_handle *desc;
- unsigned int argsize;
- int ret;
-
- switch (cmd) {
- case SCHEDOP_remote_shutdown:
- argsize = sizeof(sched_remote_shutdown_t);
- break;
- default:
- printk("%s: unknown SCHEDOP %d\n", __func__, cmd);
- return -EINVAL;
- }
-
- ret = xencomm_create((void *)hypercall->arg[1], argsize,
- &desc, GFP_KERNEL);
- if (ret)
- return ret;
-
- ret = xencomm_arch_hypercall_sched_op(cmd, desc);
-
- xencomm_free(desc);
- return ret;
-}
-
-int
-privcmd_hypercall(privcmd_hypercall_t *hypercall)
-{
- switch (hypercall->op) {
- case __HYPERVISOR_platform_op:
- return xencomm_privcmd_platform_op(hypercall);
- case __HYPERVISOR_domctl:
- return xencomm_privcmd_domctl(hypercall);
- case __HYPERVISOR_sysctl:
- return xencomm_privcmd_sysctl(hypercall);
- case __HYPERVISOR_acm_op:
- return xencomm_privcmd_acm_op(hypercall);
- case __HYPERVISOR_xen_version:
- return xencomm_privcmd_xen_version(hypercall);
- case __HYPERVISOR_memory_op:
- return xencomm_privcmd_memory_op(hypercall);
- case __HYPERVISOR_event_channel_op:
- return xencomm_privcmd_event_channel_op(hypercall);
- case __HYPERVISOR_hvm_op:
- return xencomm_privcmd_hvm_op(hypercall);
- case __HYPERVISOR_sched_op:
- return xencomm_privcmd_sched_op(hypercall);
- default:
- printk("%s: unknown hcall (%ld)\n", __func__, hypercall->op);
- return -ENOSYS;
- }
-}
-
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c b/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c
deleted file mode 100644
index 5962e73aa6..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
- * Alex Williamson <alex.williamson@hp.com>
- *
- * Basic DMA mapping services for Xen guests.
- * Based on arch/i386/kernel/pci-dma-xen.c.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <asm/scatterlist.h>
-
-#define IOMMU_BUG_ON(test) \
-do { \
- if (unlikely(test)) { \
- printk(KERN_ALERT "Fatal DMA error!\n"); \
- BUG(); \
- } \
-} while (0)
-
-
-/*
- * This should be broken out of swiotlb and put in a common place
- * when merged with upstream Linux.
- */
-static inline int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
-{
- dma_addr_t mask = 0xffffffff;
-
- /* If the device has a mask, use it, otherwise default to 32 bits */
- if (hwdev && hwdev->dma_mask)
- mask = *hwdev->dma_mask;
- return (addr & ~mask) != 0;
-}
-
-int
-xen_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
- int direction)
-{
- int i;
-
- for (i = 0 ; i < nents ; i++) {
- sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
- sg[i].dma_length = sg[i].length;
-
- IOMMU_BUG_ON(address_needs_mapping(hwdev, sg[i].dma_address));
- }
-
- return nents;
-}
-EXPORT_SYMBOL(xen_map_sg);
-
-void
-xen_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
- int direction)
-{
-}
-EXPORT_SYMBOL(xen_unmap_sg);
-
-int
-xen_dma_mapping_error(dma_addr_t dma_addr)
-{
- return 0;
-}
-EXPORT_SYMBOL(xen_dma_mapping_error);
-
-int
-xen_dma_supported(struct device *dev, u64 mask)
-{
- return 1;
-}
-EXPORT_SYMBOL(xen_dma_supported);
-
-void *
-xen_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp)
-{
- unsigned long vaddr;
- unsigned int order = get_order(size);
-
- vaddr = __get_free_pages(gfp, order);
-
- if (!vaddr)
- return NULL;
-
- if (xen_create_contiguous_region(vaddr, order,
- dev->coherent_dma_mask)) {
- free_pages(vaddr, order);
- return NULL;
- }
-
- memset((void *)vaddr, 0, size);
- *dma_handle = virt_to_bus((void *)vaddr);
-
- return (void *)vaddr;
-}
-EXPORT_SYMBOL(xen_alloc_coherent);
-
-void
-xen_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
-{
- unsigned int order = get_order(size);
-
- xen_destroy_contiguous_region((unsigned long)vaddr, order);
- free_pages((unsigned long)vaddr, order);
-}
-EXPORT_SYMBOL(xen_free_coherent);
-
-dma_addr_t
-xen_map_single(struct device *dev, void *ptr, size_t size,
- int direction)
-{
- dma_addr_t dma_addr = virt_to_bus(ptr);
-
- IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
- IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
-
- return dma_addr;
-}
-EXPORT_SYMBOL(xen_map_single);
-
-void
-xen_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
- int direction)
-{
-}
-EXPORT_SYMBOL(xen_unmap_single);
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c b/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c
deleted file mode 100644
index 367b6b32de..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/gfp.h>
-#include <linux/mm.h>
-#include <xen/interface/xen.h>
-#include <asm/page.h>
-
-#ifdef HAVE_XEN_PLATFORM_COMPAT_H
-#include <xen/platform-compat.h>
-#endif
-
-#include <asm/xen/xencomm.h>
-
-static int xencomm_debug = 0;
-
-static unsigned long kernel_start_pa;
-
-void
-xencomm_init (void)
-{
- kernel_start_pa = KERNEL_START - ia64_tpa(KERNEL_START);
-}
-
-/* Translate virtual address to physical address. */
-unsigned long
-xencomm_vaddr_to_paddr(unsigned long vaddr)
-{
-#ifndef CONFIG_VMX_GUEST
- struct page *page;
- struct vm_area_struct *vma;
-#endif
-
- if (vaddr == 0)
- return 0;
-
-#ifdef __ia64__
- if (REGION_NUMBER(vaddr) == 5) {
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *ptep;
-
- /* On ia64, TASK_SIZE refers to current. It is not initialized
- during boot.
- Furthermore the kernel is relocatable and __pa() doesn't
- work on addresses. */
- if (vaddr >= KERNEL_START
- && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE)) {
- return vaddr - kernel_start_pa;
- }
-
- /* In kernel area -- virtually mapped. */
- pgd = pgd_offset_k(vaddr);
- if (pgd_none(*pgd) || pgd_bad(*pgd))
- return ~0UL;
-
- pud = pud_offset(pgd, vaddr);
- if (pud_none(*pud) || pud_bad(*pud))
- return ~0UL;
-
- pmd = pmd_offset(pud, vaddr);
- if (pmd_none(*pmd) || pmd_bad(*pmd))
- return ~0UL;
-
- ptep = pte_offset_kernel(pmd, vaddr);
- if (!ptep)
- return ~0UL;
-
- return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK);
- }
-#endif
-
- if (vaddr > TASK_SIZE) {
- /* kernel address */
- return __pa(vaddr);
- }
-
-
-#ifdef CONFIG_VMX_GUEST
- /* No privcmd within vmx guest. */
- return ~0UL;
-#else
- /* XXX double-check (lack of) locking */
- vma = find_extend_vma(current->mm, vaddr);
- if (!vma)
- return ~0UL;
-
- /* We assume the page is modified. */
- page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH);
- if (!page)
- return ~0UL;
-
- return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
-#endif
-}
-
-static int
-xencomm_init_desc(struct xencomm_desc *desc, void *buffer, unsigned long bytes)
-{
- unsigned long recorded = 0;
- int i = 0;
-
- BUG_ON((buffer == NULL) && (bytes > 0));
-
- /* record the physical pages used */
- if (buffer == NULL)
- desc->nr_addrs = 0;
-
- while ((recorded < bytes) && (i < desc->nr_addrs)) {
- unsigned long vaddr = (unsigned long)buffer + recorded;
- unsigned long paddr;
- int offset;
- int chunksz;
-
- offset = vaddr % PAGE_SIZE; /* handle partial pages */
- chunksz = min(PAGE_SIZE - offset, bytes - recorded);
-
- paddr = xencomm_vaddr_to_paddr(vaddr);
- if (paddr == ~0UL) {
- printk("%s: couldn't translate vaddr %lx\n",
- __func__, vaddr);
- return -EINVAL;
- }
-
- desc->address[i++] = paddr;
- recorded += chunksz;
- }
-
- if (recorded < bytes) {
- printk("%s: could only translate %ld of %ld bytes\n",
- __func__, recorded, bytes);
- return -ENOSPC;
- }
-
- /* mark remaining addresses invalid (just for safety) */
- while (i < desc->nr_addrs)
- desc->address[i++] = XENCOMM_INVALID;
-
- desc->magic = XENCOMM_MAGIC;
-
- return 0;
-}
-
-static struct xencomm_desc *
-xencomm_alloc(gfp_t gfp_mask)
-{
- struct xencomm_desc *desc;
-
- desc = (struct xencomm_desc *)__get_free_page(gfp_mask);
- if (desc == NULL)
- panic("%s: page allocation failed\n", __func__);
-
- desc->nr_addrs = (PAGE_SIZE - sizeof(struct xencomm_desc)) /
- sizeof(*desc->address);
-
- return desc;
-}
-
-void
-xencomm_free(struct xencomm_handle *desc)
-{
- if (desc)
- free_page((unsigned long)__va(desc));
-}
-
-int
-xencomm_create(void *buffer, unsigned long bytes,
- struct xencomm_handle **ret, gfp_t gfp_mask)
-{
- struct xencomm_desc *desc;
- struct xencomm_handle *handle;
- int rc;
-
- if (xencomm_debug)
- printk("%s: %p[%ld]\n", __func__, buffer, bytes);
-
- if (buffer == NULL || bytes == 0) {
- *ret = (struct xencomm_handle *)NULL;
- return 0;
- }
-
- desc = xencomm_alloc(gfp_mask);
- if (!desc) {
- printk("%s failure\n", "xencomm_alloc");
- return -ENOMEM;
- }
- handle = (struct xencomm_handle *)__pa(desc);
-
- rc = xencomm_init_desc(desc, buffer, bytes);
- if (rc) {
- printk("%s failure: %d\n", "xencomm_init_desc", rc);
- xencomm_free(handle);
- return rc;
- }
-
- *ret = handle;
- return 0;
-}
-
-/* "mini" routines, for stack-based communications: */
-
-static void *
-xencomm_alloc_mini(struct xencomm_mini *area, int *nbr_area)
-{
- unsigned long base;
- unsigned int pageoffset;
-
- while (*nbr_area >= 0) {
- /* Allocate an area. */
- (*nbr_area)--;
-
- base = (unsigned long)(area + *nbr_area);
- pageoffset = base % PAGE_SIZE;
-
- /* If the area does not cross a page, use it. */
- if ((PAGE_SIZE - pageoffset) >= sizeof(struct xencomm_mini))
- return &area[*nbr_area];
- }
- /* No more area. */
- return NULL;
-}
-
-int
-xencomm_create_mini(struct xencomm_mini *area, int *nbr_area,
- void *buffer, unsigned long bytes,
- struct xencomm_handle **ret)
-{
- struct xencomm_desc *desc;
- int rc;
- unsigned long res;
-
- desc = xencomm_alloc_mini(area, nbr_area);
- if (!desc)
- return -ENOMEM;
- desc->nr_addrs = XENCOMM_MINI_ADDRS;
-
- rc = xencomm_init_desc(desc, buffer, bytes);
- if (rc)
- return rc;
-
- res = xencomm_vaddr_to_paddr((unsigned long)desc);
- if (res == ~0UL)
- return -EINVAL;
-
- *ret = (struct xencomm_handle*)res;
- return 0;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
deleted file mode 100644
index b9394ab050..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
+++ /dev/null
@@ -1,931 +0,0 @@
-/*
- * ia64/xen/entry.S
- *
- * Alternate kernel routines for Xen. Heavily leveraged from
- * ia64/kernel/entry.S
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@.hp.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/kregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/percpu.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-
-#ifdef CONFIG_XEN
-#include "xenminstate.h"
-#else
-#include "minstate.h"
-#endif
-
-/*
- * prev_task <- ia64_switch_to(struct task_struct *next)
- * With Ingo's new scheduler, interrupts are disabled when this routine gets
- * called. The code starting at .map relies on this. The rest of the code
- * doesn't care about the interrupt masking status.
- */
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_switch_to)
- .prologue
- alloc r16=ar.pfs,1,0,0,0
- movl r22=running_on_xen;;
- ld4 r22=[r22];;
- cmp.eq p7,p0=r22,r0
-(p7) br.cond.sptk.many __ia64_switch_to;;
-#else
-GLOBAL_ENTRY(ia64_switch_to)
- .prologue
- alloc r16=ar.pfs,1,0,0,0
-#endif
- DO_SAVE_SWITCH_STACK
- .body
-
- adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
- movl r25=init_task
- mov r27=IA64_KR(CURRENT_STACK)
- adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
- dep r20=0,in0,61,3 // physical address of "next"
- ;;
- st8 [r22]=sp // save kernel stack pointer of old task
- shr.u r26=r20,IA64_GRANULE_SHIFT
- cmp.eq p7,p6=r25,in0
- ;;
- /*
- * If we've already mapped this task's page, we can skip doing it again.
- */
-(p6) cmp.eq p7,p6=r26,r27
-(p6) br.cond.dpnt .map
- ;;
-.done:
- ld8 sp=[r21] // load kernel stack pointer of new task
-#ifdef CONFIG_XEN
- // update "current" application register
- mov r8=IA64_KR_CURRENT
- mov r9=in0;;
- XEN_HYPER_SET_KR
-#else
- mov IA64_KR(CURRENT)=in0 // update "current" application register
-#endif
- mov r8=r13 // return pointer to previously running task
- mov r13=in0 // set "current" pointer
- ;;
- DO_LOAD_SWITCH_STACK
-
-#ifdef CONFIG_SMP
- sync.i // ensure "fc"s done by this CPU are visible on other CPUs
-#endif
- br.ret.sptk.many rp // boogie on out in new context
-
-.map:
-#ifdef CONFIG_XEN
- movl r25=XSI_PSR_IC // clear psr.ic
- ;;
- st4 [r25]=r0
- ;;
-#else
- rsm psr.ic // interrupts (psr.i) are already disabled here
-#endif
- movl r25=PAGE_KERNEL
- ;;
- srlz.d
- or r23=r25,r20 // construct PA | page properties
- mov r25=IA64_GRANULE_SHIFT<<2
- ;;
-#ifdef CONFIG_XEN
- movl r8=XSI_ITIR
- ;;
- st8 [r8]=r25
- ;;
- movl r8=XSI_IFA
- ;;
- st8 [r8]=in0 // VA of next task...
- ;;
- mov r25=IA64_TR_CURRENT_STACK
- // remember last page we mapped...
- mov r8=IA64_KR_CURRENT_STACK
- mov r9=r26;;
- XEN_HYPER_SET_KR;;
-#else
- mov cr.itir=r25
- mov cr.ifa=in0 // VA of next task...
- ;;
- mov r25=IA64_TR_CURRENT_STACK
- mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
-#endif
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
-#ifdef CONFIG_XEN
- ;;
- srlz.d
- mov r9=1
- movl r8=XSI_PSR_IC
- ;;
- st4 [r8]=r9
- ;;
-#else
- ssm psr.ic // reenable the psr.ic bit
- ;;
- srlz.d
-#endif
- br.cond.sptk .done
-#ifdef CONFIG_XEN
-END(xen_switch_to)
-#else
-END(ia64_switch_to)
-#endif
-
- /*
- * Invoke a system call, but do some tracing before and after the call.
- * We MUST preserve the current register frame throughout this routine
- * because some system calls (such as ia64_execve) directly
- * manipulate ar.pfs.
- */
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_trace_syscall)
- PT_REGS_UNWIND_INFO(0)
- movl r16=running_on_xen;;
- ld4 r16=[r16];;
- cmp.eq p7,p0=r16,r0
-(p7) br.cond.sptk.many __ia64_trace_syscall;;
-#else
-GLOBAL_ENTRY(ia64_trace_syscall)
- PT_REGS_UNWIND_INFO(0)
-#endif
- /*
- * We need to preserve the scratch registers f6-f11 in case the system
- * call is sigreturn.
- */
- adds r16=PT(F6)+16,sp
- adds r17=PT(F7)+16,sp
- ;;
- stf.spill [r16]=f6,32
- stf.spill [r17]=f7,32
- ;;
- stf.spill [r16]=f8,32
- stf.spill [r17]=f9,32
- ;;
- stf.spill [r16]=f10
- stf.spill [r17]=f11
- br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
- adds r16=PT(F6)+16,sp
- adds r17=PT(F7)+16,sp
- ;;
- ldf.fill f6=[r16],32
- ldf.fill f7=[r17],32
- ;;
- ldf.fill f8=[r16],32
- ldf.fill f9=[r17],32
- ;;
- ldf.fill f10=[r16]
- ldf.fill f11=[r17]
- // the syscall number may have changed, so re-load it and re-calculate the
- // syscall entry-point:
- adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #)
- ;;
- ld8 r15=[r15]
- mov r3=NR_syscalls - 1
- ;;
- adds r15=-1024,r15
- movl r16=sys_call_table
- ;;
- shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
- cmp.leu p6,p7=r15,r3
- ;;
-(p6) ld8 r20=[r20] // load address of syscall entry point
-(p7) movl r20=sys_ni_syscall
- ;;
- mov b6=r20
- br.call.sptk.many rp=b6 // do the syscall
-.strace_check_retval:
- cmp.lt p6,p0=r8,r0 // syscall failed?
- adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
- adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
- mov r10=0
-(p6) br.cond.sptk strace_error // syscall failed ->
- ;; // avoid RAW on r10
-.strace_save_retval:
-.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8
-.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10
- br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
-.ret3:
-(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
- br.cond.sptk .work_pending_syscall_end
-
-strace_error:
- ld8 r3=[r2] // load pt_regs.r8
- sub r9=0,r8 // negate return value to get errno value
- ;;
- cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0?
- adds r3=16,r2 // r3=&pt_regs.r10
- ;;
-(p6) mov r10=-1
-(p6) mov r8=r9
- br.cond.sptk .strace_save_retval
-#ifdef CONFIG_XEN
-END(xen_trace_syscall)
-#else
-END(ia64_trace_syscall)
-#endif
-
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_ret_from_clone)
- PT_REGS_UNWIND_INFO(0)
- movl r16=running_on_xen;;
- ld4 r16=[r16];;
- cmp.eq p7,p0=r16,r0
-(p7) br.cond.sptk.many __ia64_ret_from_clone;;
-#else
-GLOBAL_ENTRY(ia64_ret_from_clone)
- PT_REGS_UNWIND_INFO(0)
-#endif
-{ /*
- * Some versions of gas generate bad unwind info if the first instruction of a
- * procedure doesn't go into the first slot of a bundle. This is a workaround.
- */
- nop.m 0
- nop.i 0
- /*
- * We need to call schedule_tail() to complete the scheduling process.
- * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the
- * address of the previously executing task.
- */
- br.call.sptk.many rp=ia64_invoke_schedule_tail
-}
-.ret8:
- adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- ld4 r2=[r2]
- ;;
- mov r8=0
- and r2=_TIF_SYSCALL_TRACEAUDIT,r2
- ;;
- cmp.ne p6,p0=r2,r0
-(p6) br.cond.spnt .strace_check_retval
- ;; // added stop bits to prevent r8 dependency
-#ifdef CONFIG_XEN
- br.cond.sptk ia64_ret_from_syscall
-END(xen_ret_from_clone)
-#else
-END(ia64_ret_from_clone)
-#endif
-/*
- * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
- * need to switch to bank 0 and doesn't restore the scratch registers.
- * To avoid leaking kernel bits, the scratch registers are set to
- * the following known-to-be-safe values:
- *
- * r1: restored (global pointer)
- * r2: cleared
- * r3: 1 (when returning to user-level)
- * r8-r11: restored (syscall return value(s))
- * r12: restored (user-level stack pointer)
- * r13: restored (user-level thread pointer)
- * r14: set to __kernel_syscall_via_epc
- * r15: restored (syscall #)
- * r16-r17: cleared
- * r18: user-level b6
- * r19: cleared
- * r20: user-level ar.fpsr
- * r21: user-level b0
- * r22: cleared
- * r23: user-level ar.bspstore
- * r24: user-level ar.rnat
- * r25: user-level ar.unat
- * r26: user-level ar.pfs
- * r27: user-level ar.rsc
- * r28: user-level ip
- * r29: user-level psr
- * r30: user-level cfm
- * r31: user-level pr
- * f6-f11: cleared
- * pr: restored (user-level pr)
- * b0: restored (user-level rp)
- * b6: restored
- * b7: set to __kernel_syscall_via_epc
- * ar.unat: restored (user-level ar.unat)
- * ar.pfs: restored (user-level ar.pfs)
- * ar.rsc: restored (user-level ar.rsc)
- * ar.rnat: restored (user-level ar.rnat)
- * ar.bspstore: restored (user-level ar.bspstore)
- * ar.fpsr: restored (user-level ar.fpsr)
- * ar.ccv: cleared
- * ar.csd: cleared
- * ar.ssd: cleared
- */
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_leave_syscall)
- PT_REGS_UNWIND_INFO(0)
- movl r22=running_on_xen;;
- ld4 r22=[r22];;
- cmp.eq p7,p0=r22,r0
-(p7) br.cond.sptk.many __ia64_leave_syscall;;
-#else
-ENTRY(ia64_leave_syscall)
- PT_REGS_UNWIND_INFO(0)
-#endif
- /*
- * work.need_resched etc. mustn't get changed by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on.
- *
- * p6 controls whether current_thread_info()->flags needs to be check for
- * extra work. We always check for extra work when returning to user-level.
- * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
- * is 0. After extra work processing has been completed, execution
- * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
- * needs to be redone.
- */
-#ifdef CONFIG_PREEMPT
- rsm psr.i // disable interrupts
- cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
- ;;
- .pred.rel.mutex pUStk,pKStk
-(pKStk) ld4 r21=[r20] // r21 <- preempt_count
-(pUStk) mov r21=0 // r21 <- 0
- ;;
- cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
-#else /* !CONFIG_PREEMPT */
-#ifdef CONFIG_XEN
- movl r2=XSI_PSR_I_ADDR
- mov r18=1
- ;;
- ld8 r2=[r2]
- ;;
-(pUStk) st1 [r2]=r18
-#else
-(pUStk) rsm psr.i
-#endif
- cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
-(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
-#endif
-.work_processed_syscall:
- adds r2=PT(LOADRS)+16,r12
- adds r3=PT(AR_BSPSTORE)+16,r12
- adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
-(p6) ld4 r31=[r18] // load current_thread_info()->flags
- ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
- nop.i 0
- ;;
- mov r16=ar.bsp // M2 get existing backing store pointer
- ld8 r18=[r2],PT(R9)-PT(B6) // load b6
-(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
- ;;
- ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
-(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
-(p6) br.cond.spnt .work_pending_syscall
- ;;
- // start restoring the state saved on the kernel stack (struct pt_regs):
- ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
- ld8 r11=[r3],PT(CR_IIP)-PT(R11)
-(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE!
- ;;
- invala // M0|1 invalidate ALAT
-#ifdef CONFIG_XEN
- movl r28=XSI_PSR_I_ADDR
- movl r29=XSI_PSR_IC
- ;;
- ld8 r28=[r28]
- mov r30=1
- ;;
- st1 [r28]=r30
- st4 [r29]=r0 // note: clears both vpsr.i and vpsr.ic!
- ;;
-#else
- rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection
-#endif
- cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs
-
- ld8 r29=[r2],16 // M0|1 load cr.ipsr
- ld8 r28=[r3],16 // M0|1 load cr.iip
- mov r22=r0 // A clear r22
- ;;
- ld8 r30=[r2],16 // M0|1 load cr.ifs
- ld8 r25=[r3],16 // M0|1 load ar.unat
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
- ;;
- ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
-#ifdef CONFIG_XEN
-(pKStk) mov r21=r8
-(pKStk) XEN_HYPER_GET_PSR
- ;;
-(pKStk) mov r22=r8
-(pKStk) mov r8=r21
- ;;
-#else
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
-#endif
- nop 0
- ;;
- ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
- ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc
- mov f6=f0 // F clear f6
- ;;
- ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage)
- ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates
- mov f7=f0 // F clear f7
- ;;
- ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr
- ld8.fill r1=[r3],16 // M0|1 load r1
-(pUStk) mov r17=1 // A
- ;;
-(pUStk) st1 [r14]=r17 // M2|3
- ld8.fill r13=[r3],16 // M0|1
- mov f8=f0 // F clear f8
- ;;
- ld8.fill r12=[r2] // M0|1 restore r12 (sp)
- ld8.fill r15=[r3] // M0|1 restore r15
- mov b6=r18 // I0 restore b6
-
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
- mov f9=f0 // F clear f9
-(pKStk) br.cond.dpnt.many skip_rbs_switch // B
-
- srlz.d // M0 ensure interruption collection is off (for cover)
- shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
-#ifdef CONFIG_XEN
- XEN_HYPER_COVER;
-#else
- cover // B add current frame into dirty partition & set cr.ifs
-#endif
- ;;
-(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8
- mov r19=ar.bsp // M2 get new backing store pointer
- mov f10=f0 // F clear f10
-
- nop.m 0
- movl r14=__kernel_syscall_via_epc // X
- ;;
- mov.m ar.csd=r0 // M2 clear ar.csd
- mov.m ar.ccv=r0 // M2 clear ar.ccv
- mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
-
- mov.m ar.ssd=r0 // M2 clear ar.ssd
- mov f11=f0 // F clear f11
- br.cond.sptk.many rbs_switch // B
-#ifdef CONFIG_XEN
-END(xen_leave_syscall)
-#else
-END(ia64_leave_syscall)
-#endif
-
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_leave_kernel)
- PT_REGS_UNWIND_INFO(0)
- movl r22=running_on_xen;;
- ld4 r22=[r22];;
- cmp.eq p7,p0=r22,r0
-(p7) br.cond.sptk.many __ia64_leave_kernel;;
-#else
-GLOBAL_ENTRY(ia64_leave_kernel)
- PT_REGS_UNWIND_INFO(0)
-#endif
- /*
- * work.need_resched etc. mustn't get changed by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on.
- *
- * p6 controls whether current_thread_info()->flags needs to be check for
- * extra work. We always check for extra work when returning to user-level.
- * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
- * is 0. After extra work processing has been completed, execution
- * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
- * needs to be redone.
- */
-#ifdef CONFIG_PREEMPT
- rsm psr.i // disable interrupts
- cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
- ;;
- .pred.rel.mutex pUStk,pKStk
-(pKStk) ld4 r21=[r20] // r21 <- preempt_count
-(pUStk) mov r21=0 // r21 <- 0
- ;;
- cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
-#else
-#ifdef CONFIG_XEN
-(pUStk) movl r17=XSI_PSR_I_ADDR
-(pUStk) mov r31=1
- ;;
-(pUStk) ld8 r17=[r17]
- ;;
-(pUStk) st1 [r17]=r31
- ;;
-#else
-(pUStk) rsm psr.i
-#endif
- cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
-(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
-#endif
-.work_processed_kernel:
- adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
-(p6) ld4 r31=[r17] // load current_thread_info()->flags
- adds r21=PT(PR)+16,r12
- ;;
-
- lfetch [r21],PT(CR_IPSR)-PT(PR)
- adds r2=PT(B6)+16,r12
- adds r3=PT(R16)+16,r12
- ;;
- lfetch [r21]
- ld8 r28=[r2],8 // load b6
- adds r29=PT(R24)+16,r12
-
- ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
- adds r30=PT(AR_CCV)+16,r12
-(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
- ;;
- ld8.fill r24=[r29]
- ld8 r15=[r30] // load ar.ccv
-(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending?
- ;;
- ld8 r29=[r2],16 // load b7
- ld8 r30=[r3],16 // load ar.csd
-(p6) br.cond.spnt .work_pending
- ;;
- ld8 r31=[r2],16 // load ar.ssd
- ld8.fill r8=[r3],16
- ;;
- ld8.fill r9=[r2],16
- ld8.fill r10=[r3],PT(R17)-PT(R10)
- ;;
- ld8.fill r11=[r2],PT(R18)-PT(R11)
- ld8.fill r17=[r3],16
- ;;
- ld8.fill r18=[r2],16
- ld8.fill r19=[r3],16
- ;;
- ld8.fill r20=[r2],16
- ld8.fill r21=[r3],16
- mov ar.csd=r30
- mov ar.ssd=r31
- ;;
-#ifdef CONFIG_XEN
- movl r23=XSI_PSR_I_ADDR
- movl r22=XSI_PSR_IC
- ;;
- ld8 r23=[r23]
- mov r25=1
- ;;
- st1 [r23]=r25
- st4 [r22]=r0 // note: clears both vpsr.i and vpsr.ic!
- ;;
-#else
- rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection
-#endif
- invala // invalidate ALAT
- ;;
- ld8.fill r22=[r2],24
- ld8.fill r23=[r3],24
- mov b6=r28
- ;;
- ld8.fill r25=[r2],16
- ld8.fill r26=[r3],16
- mov b7=r29
- ;;
- ld8.fill r27=[r2],16
- ld8.fill r28=[r3],16
- ;;
- ld8.fill r29=[r2],16
- ld8.fill r30=[r3],24
- ;;
- ld8.fill r31=[r2],PT(F9)-PT(R31)
- adds r3=PT(F10)-PT(F6),r3
- ;;
- ldf.fill f9=[r2],PT(F6)-PT(F9)
- ldf.fill f10=[r3],PT(F8)-PT(F10)
- ;;
- ldf.fill f6=[r2],PT(F7)-PT(F6)
- ;;
- ldf.fill f7=[r2],PT(F11)-PT(F7)
- ldf.fill f8=[r3],32
- ;;
- srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned)
- mov ar.ccv=r15
- ;;
- ldf.fill f11=[r2]
-#ifdef CONFIG_XEN
- ;;
- // r16-r31 all now hold bank1 values
- mov r15=ar.unat
- movl r2=XSI_BANK1_R16
- movl r3=XSI_BANK1_R16+8
- ;;
-.mem.offset 0,0; st8.spill [r2]=r16,16
-.mem.offset 8,0; st8.spill [r3]=r17,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r18,16
-.mem.offset 8,0; st8.spill [r3]=r19,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r20,16
-.mem.offset 8,0; st8.spill [r3]=r21,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r22,16
-.mem.offset 8,0; st8.spill [r3]=r23,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r24,16
-.mem.offset 8,0; st8.spill [r3]=r25,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r26,16
-.mem.offset 8,0; st8.spill [r3]=r27,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r28,16
-.mem.offset 8,0; st8.spill [r3]=r29,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r30,16
-.mem.offset 8,0; st8.spill [r3]=r31,16
- ;;
- mov r3=ar.unat
- movl r2=XSI_B1NAT
- ;;
- st8 [r2]=r3
- mov ar.unat=r15
- movl r2=XSI_BANKNUM;;
- st4 [r2]=r0;
-#else
- bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
-#endif
- ;;
-(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
- adds r16=PT(CR_IPSR)+16,r12
- adds r17=PT(CR_IIP)+16,r12
-
-#ifdef CONFIG_XEN
-(pKStk) mov r29=r8
-(pKStk) XEN_HYPER_GET_PSR
- ;;
-(pKStk) mov r22=r8
-(pKStk) mov r8=r29
- ;;
-#else
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
-#endif
- nop.i 0
- nop.i 0
- ;;
- ld8 r29=[r16],16 // load cr.ipsr
- ld8 r28=[r17],16 // load cr.iip
- ;;
- ld8 r30=[r16],16 // load cr.ifs
- ld8 r25=[r17],16 // load ar.unat
- ;;
- ld8 r26=[r16],16 // load ar.pfs
- ld8 r27=[r17],16 // load ar.rsc
- cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
- ;;
- ld8 r24=[r16],16 // load ar.rnat (may be garbage)
- ld8 r23=[r17],16 // load ar.bspstore (may be garbage)
- ;;
- ld8 r31=[r16],16 // load predicates
- ld8 r21=[r17],16 // load b0
- ;;
- ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
- ld8.fill r1=[r17],16 // load r1
- ;;
- ld8.fill r12=[r16],16
- ld8.fill r13=[r17],16
-(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
- ;;
- ld8 r20=[r16],16 // ar.fpsr
- ld8.fill r15=[r17],16
- ;;
- ld8.fill r14=[r16],16
- ld8.fill r2=[r17]
-(pUStk) mov r17=1
- ;;
- ld8.fill r3=[r16]
-(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
- shr.u r18=r19,16 // get byte size of existing "dirty" partition
- ;;
- mov r16=ar.bsp // get existing backing store pointer
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
- ;;
- ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
-(pKStk) br.cond.dpnt skip_rbs_switch
-
- /*
- * Restore user backing store.
- *
- * NOTE: alloc, loadrs, and cover can't be predicated.
- */
-(pNonSys) br.cond.dpnt dont_preserve_current_frame
-
-#ifdef CONFIG_XEN
- XEN_HYPER_COVER;
-#else
- cover // add current frame into dirty partition and set cr.ifs
-#endif
- ;;
- mov r19=ar.bsp // get new backing store pointer
-rbs_switch:
- sub r16=r16,r18 // krbs = old bsp - size of dirty partition
- cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs
- ;;
- sub r19=r19,r16 // calculate total byte size of dirty partition
- add r18=64,r18 // don't force in0-in7 into memory...
- ;;
- shl r19=r19,16 // shift size of dirty partition into loadrs position
- ;;
-dont_preserve_current_frame:
- /*
- * To prevent leaking bits between the kernel and user-space,
- * we must clear the stacked registers in the "invalid" partition here.
- * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
- * 5 registers/cycle on McKinley).
- */
-# define pRecurse p6
-# define pReturn p7
-#ifdef CONFIG_ITANIUM
-# define Nregs 10
-#else
-# define Nregs 14
-#endif
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
- sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize
- ;;
- mov ar.rsc=r19 // load ar.rsc to be used for "loadrs"
- shladd in0=loc1,3,r17
- mov in1=0
- ;;
- TEXT_ALIGN(32)
-rse_clear_invalid:
-#ifdef CONFIG_ITANIUM
- // cycle 0
- { .mii
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
- add out0=-Nregs*8,in0
-}{ .mfb
- add out1=1,in1 // increment recursion count
- nop.f 0
- nop.b 0 // can't do br.call here because of alloc (WAW on CFM)
- ;;
-}{ .mfi // cycle 1
- mov loc1=0
- nop.f 0
- mov loc2=0
-}{ .mib
- mov loc3=0
- mov loc4=0
-(pRecurse) br.call.sptk.many b0=rse_clear_invalid
-
-}{ .mfi // cycle 2
- mov loc5=0
- nop.f 0
- cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
-}{ .mib
- mov loc6=0
- mov loc7=0
-(pReturn) br.ret.sptk.many b0
-}
-#else /* !CONFIG_ITANIUM */
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
- add out0=-Nregs*8,in0
- add out1=1,in1 // increment recursion count
- mov loc1=0
- mov loc2=0
- ;;
- mov loc3=0
- mov loc4=0
- mov loc5=0
- mov loc6=0
- mov loc7=0
-(pRecurse) br.call.dptk.few b0=rse_clear_invalid
- ;;
- mov loc8=0
- mov loc9=0
- cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
- mov loc10=0
- mov loc11=0
-(pReturn) br.ret.dptk.many b0
-#endif /* !CONFIG_ITANIUM */
-# undef pRecurse
-# undef pReturn
- ;;
- alloc r17=ar.pfs,0,0,0,0 // drop current register frame
- ;;
- loadrs
- ;;
-skip_rbs_switch:
- mov ar.unat=r25 // M2
-(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22
-(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise
- ;;
-(pUStk) mov ar.bspstore=r23 // M2
-(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp
-(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise
- ;;
-#ifdef CONFIG_XEN
- movl r25=XSI_IPSR
- ;;
- st8[r25]=r29,XSI_IFS_OFS-XSI_IPSR_OFS
- ;;
-#else
- mov cr.ipsr=r29 // M2
-#endif
- mov ar.pfs=r26 // I0
-(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise
-
-#ifdef CONFIG_XEN
-(p9) st8 [r25]=r30
- ;;
- adds r25=XSI_IIP_OFS-XSI_IFS_OFS,r25
- ;;
-#else
-(p9) mov cr.ifs=r30 // M2
-#endif
- mov b0=r21 // I0
-(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise
-
- mov ar.fpsr=r20 // M2
-#ifdef CONFIG_XEN
- st8 [r25]=r28
-#else
- mov cr.iip=r28 // M2
-#endif
- nop 0
- ;;
-(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode
- nop 0
-(pLvSys)mov r2=r0
-
- mov ar.rsc=r27 // M2
- mov pr=r31,-1 // I0
-#ifdef CONFIG_XEN
- ;;
- XEN_HYPER_RFI;
-#else
- rfi // B
-#endif
-
- /*
- * On entry:
- * r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
- * r31 = current->thread_info->flags
- * On exit:
- * p6 = TRUE if work-pending-check needs to be redone
- */
-.work_pending_syscall:
- add r2=-8,r2
- add r3=-8,r3
- ;;
- st8 [r2]=r8
- st8 [r3]=r10
-.work_pending:
- tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
-(p6) br.cond.sptk.few .notify
-#ifdef CONFIG_PREEMPT
-(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
- ;;
-(pKStk) st4 [r20]=r21
- ssm psr.i // enable interrupts
-#endif
- br.call.spnt.many rp=schedule
-.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1
-#ifdef CONFIG_XEN
- movl r2=XSI_PSR_I_ADDR
- mov r20=1
- ;;
- ld8 r2=[r2]
- ;;
- st1 [r2]=r20
-#else
- rsm psr.i // disable interrupts
-#endif
- ;;
-#ifdef CONFIG_PREEMPT
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
- ;;
-(pKStk) st4 [r20]=r0 // preempt_count() <- 0
-#endif
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // re-check
-
-.notify:
-(pUStk) br.call.spnt.many rp=notify_resume_user
-.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // don't re-check
-
-.work_pending_syscall_end:
- adds r2=PT(R8)+16,r12
- adds r3=PT(R10)+16,r12
- ;;
- ld8 r8=[r2]
- ld8 r10=[r3]
- br.cond.sptk.many .work_processed_syscall // re-check
-
-#ifdef CONFIG_XEN
-END(xen_leave_kernel)
-#else
-END(ia64_leave_kernel)
-#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c b/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c
deleted file mode 100644
index 3bc6cdbf7e..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c
+++ /dev/null
@@ -1,19 +0,0 @@
-
-extern unsigned long xen_get_cpuid(int);
-
-int
-running_on_sim(void)
-{
- int i;
- long cpuid[6];
-
- for (i = 0; i < 5; ++i)
- cpuid[i] = xen_get_cpuid(i);
- if ((cpuid[0] & 0xff) != 'H') return 0;
- if ((cpuid[3] & 0xff) != 0x4) return 0;
- if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0;
- if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0;
- if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0;
- return 1;
-}
-
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
deleted file mode 100644
index a411bb3a4a..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
+++ /dev/null
@@ -1,2177 +0,0 @@
-/*
- * arch/ia64/xen/ivt.S
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@hp.com>
- */
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for critical
- * interruptions like TLB misses.
- *
- * For each entry, the comment is as follows:
- *
- * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
- * entry offset ----/ / / / /
- * entry number ---------/ / / /
- * size of the entry -------------/ / /
- * vector name -------------------------------------/ /
- * interruptions triggering this vector ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-#include <asm/asmmacro.h>
-#include <asm/break.h>
-#include <asm/ia32.h>
-#include <asm/kregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-
-#ifdef CONFIG_XEN
-#define ia64_ivt xen_ivt
-#endif
-
-#if 1
-# define PSR_DEFAULT_BITS psr.ac
-#else
-# define PSR_DEFAULT_BITS 0
-#endif
-
-#if 0
- /*
- * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't
- * needed for something else before enabling this...
- */
-# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16
-#else
-# define DBG_FAULT(i)
-#endif
-
-#define MINSTATE_VIRT /* needed by minstate.h */
-#include "xenminstate.h"
-
-#define FAULT(n) \
- mov r31=pr; \
- mov r19=n;; /* prepare to save predicates */ \
- br.sptk.many dispatch_to_fault_handler
-
- .section .text.ivt,"ax"
-
- .align 32768 // align on 32KB boundary
- .global ia64_ivt
-ia64_ivt:
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(vhpt_miss)
- DBG_FAULT(0)
- /*
- * The VHPT vector is invoked when the TLB entry for the virtual page table
- * is missing. This happens only as a result of a previous
- * (the "original") TLB miss, which may either be caused by an instruction
- * fetch or a data access (or non-access).
- *
- * What we do here is normal TLB miss handing for the _original_ miss,
- * followed by inserting the TLB entry for the virtual page table page
- * that the VHPT walker was attempting to access. The latter gets
- * inserted as long as page table entry above pte level have valid
- * mappings for the faulting address. The TLB entry for the original
- * miss gets inserted only if the pte entry indicates that the page is
- * present.
- *
- * do_page_fault gets invoked in the following cases:
- * - the faulting virtual address uses unimplemented address bits
- * - the faulting virtual address has no valid page table mapping
- */
-#ifdef CONFIG_XEN
- movl r16=XSI_IFA
- ;;
- ld8 r16=[r16]
-#ifdef CONFIG_HUGETLB_PAGE
- movl r18=PAGE_SHIFT
- movl r25=XSI_ITIR
- ;;
- ld8 r25=[r25]
-#endif
- ;;
-#else
- mov r16=cr.ifa // get address that caused the TLB miss
-#ifdef CONFIG_HUGETLB_PAGE
- movl r18=PAGE_SHIFT
- mov r25=cr.itir
-#endif
-#endif
- ;;
-#ifdef CONFIG_XEN
- XEN_HYPER_RSM_PSR_DT;
-#else
- rsm psr.dt // use physical addressing for data
-#endif
- mov r31=pr // save the predicate registers
- mov r19=IA64_KR(PT_BASE) // get page table base address
- shl r21=r16,3 // shift bit 60 into sign bit
- shr.u r17=r16,61 // get the region number into r17
- ;;
- shr.u r22=r21,3
-#ifdef CONFIG_HUGETLB_PAGE
- extr.u r26=r25,2,6
- ;;
- cmp.ne p8,p0=r18,r26
- sub r27=r26,r18
- ;;
-(p8) dep r25=r18,r25,2,6
-(p8) shr r22=r22,r27
-#endif
- ;;
- cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
- shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd index bit
- ;;
-(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
-
- srlz.d
- LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
-
- .pred.rel "mutex", p6, p7
-(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
-(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
- ;;
-(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5
-(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4]
- cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
-#ifdef CONFIG_PGTABLE_4
- shr.u r28=r22,PUD_SHIFT // shift pud index into position
-#else
- shr.u r18=r22,PMD_SHIFT // shift pmd index into position
-#endif
- ;;
- ld8 r17=[r17] // get *pgd (may be 0)
- ;;
-(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL?
-#ifdef CONFIG_PGTABLE_4
- dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr)
- ;;
- shr.u r18=r22,PMD_SHIFT // shift pmd index into position
-(p7) ld8 r29=[r28] // get *pud (may be 0)
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r29,r0 // was pud_present(*pud) == NULL?
- dep r17=r18,r29,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr)
-#else
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pgd,addr)
-#endif
- ;;
-(p7) ld8 r20=[r17] // get *pmd (may be 0)
- shr.u r19=r22,PAGE_SHIFT // shift pte index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was pmd_present(*pmd) == NULL?
- dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr)
- ;;
-(p7) ld8 r18=[r21] // read *pte
-#ifdef CONFIG_XEN
- movl r19=XSI_ISR
- ;;
- ld8 r19=[r19]
-#else
- mov r19=cr.isr // cr.isr bit 32 tells us if this is an insn miss
-#endif
- ;;
-(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
-#ifdef CONFIG_XEN
- movl r22=XSI_IHA
- ;;
- ld8 r22=[r22]
-#else
- mov r22=cr.iha // get the VHPT address that caused the TLB miss
-#endif
- ;; // avoid RAW on p7
-(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss?
- dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address
- ;;
-#ifdef CONFIG_XEN
- mov r24=r8
- mov r8=r18
- ;;
-(p10) XEN_HYPER_ITC_I
- ;;
-(p11) XEN_HYPER_ITC_D
- ;;
- mov r8=r24
- ;;
-#else
-(p10) itc.i r18 // insert the instruction TLB entry
-(p11) itc.d r18 // insert the data TLB entry
-#endif
-(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault)
-#ifdef CONFIG_XEN
- movl r24=XSI_IFA
- ;;
- st8 [r24]=r22
- ;;
-#else
- mov cr.ifa=r22
-#endif
-
-#ifdef CONFIG_HUGETLB_PAGE
-(p8) mov cr.itir=r25 // change to default page-size for VHPT
-#endif
-
- /*
- * Now compute and insert the TLB entry for the virtual page table. We never
- * execute in a page table page so there is no need to set the exception deferral
- * bit.
- */
- adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
- ;;
-#ifdef CONFIG_XEN
-(p7) mov r25=r8
-(p7) mov r8=r24
- ;;
-(p7) XEN_HYPER_ITC_D
- ;;
-(p7) mov r8=r25
- ;;
-#else
-(p7) itc.d r24
-#endif
- ;;
-#ifdef CONFIG_SMP
- /*
- * Tell the assemblers dependency-violation checker that the above "itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- /*
- * Re-check pagetable entry. If they changed, we may have received a ptc.g
- * between reading the pagetable and the "itc". If so, flush the entry we
- * inserted and retry. At this point, we have:
- *
- * r28 = equivalent of pud_offset(pgd, ifa)
- * r17 = equivalent of pmd_offset(pud, ifa)
- * r21 = equivalent of pte_offset(pmd, ifa)
- *
- * r29 = *pud
- * r20 = *pmd
- * r18 = *pte
- */
- ld8 r25=[r21] // read *pte again
- ld8 r26=[r17] // read *pmd again
-#ifdef CONFIG_PGTABLE_4
- ld8 r19=[r28] // read *pud again
-#endif
- cmp.ne p6,p7=r0,r0
- ;;
- cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change
-#ifdef CONFIG_PGTABLE_4
- cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change
-#endif
- mov r27=PAGE_SHIFT<<2
- ;;
-(p6) ptc.l r22,r27 // purge PTE page translation
-(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did *pte change
- ;;
-(p6) ptc.l r16,r27 // purge translation
-#endif
-
- mov pr=r31,-1 // restore predicate registers
-#ifdef CONFIG_XEN
- XEN_HYPER_RFI
- dv_serialize_data
-#else
- rfi
-#endif
-END(vhpt_miss)
-
- .org ia64_ivt+0x400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(itlb_miss)
- DBG_FAULT(1)
- /*
- * The ITLB handler accesses the PTE via the virtually mapped linear
- * page table. If a nested TLB miss occurs, we switch into physical
- * mode, walk the page table, and then re-execute the PTE read and
- * go on normally after that.
- */
-#ifdef CONFIG_XEN
- movl r16=XSI_IFA
- ;;
- ld8 r16=[r16]
-#else
- mov r16=cr.ifa // get virtual address
-#endif
- mov r29=b0 // save b0
- mov r31=pr // save predicates
-.itlb_fault:
-#ifdef CONFIG_XEN
- movl r17=XSI_IHA
- ;;
- ld8 r17=[r17] // get virtual address of L3 PTE
-#else
- mov r17=cr.iha // get virtual address of PTE
-#endif
- movl r30=1f // load nested fault continuation point
- ;;
-1: ld8 r18=[r17] // read *pte
- ;;
- mov b0=r29
- tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
-(p6) br.cond.spnt page_fault
- ;;
-#ifdef CONFIG_XEN
- mov r19=r8
- mov r8=r18
- ;;
- XEN_HYPER_ITC_I
- ;;
- mov r8=r19
-#else
- itc.i r18
-#endif
- ;;
-#ifdef CONFIG_SMP
- /*
- * Tell the assemblers dependency-violation checker that the above "itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- ld8 r19=[r17] // read *pte again and see if same
- mov r20=PAGE_SHIFT<<2 // setup page size for purge
- ;;
- cmp.ne p7,p0=r18,r19
- ;;
-(p7) ptc.l r16,r20
-#endif
- mov pr=r31,-1
-#ifdef CONFIG_XEN
- XEN_HYPER_RFI
- dv_serialize_data
-#else
- rfi
-#endif
-END(itlb_miss)
-
- .org ia64_ivt+0x0800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(dtlb_miss)
- DBG_FAULT(2)
- /*
- * The DTLB handler accesses the PTE via the virtually mapped linear
- * page table. If a nested TLB miss occurs, we switch into physical
- * mode, walk the page table, and then re-execute the PTE read and
- * go on normally after that.
- */
-#ifdef CONFIG_XEN
- movl r16=XSI_IFA
- ;;
- ld8 r16=[r16]
-#else
- mov r16=cr.ifa // get virtual address
-#endif
- mov r29=b0 // save b0
- mov r31=pr // save predicates
-dtlb_fault:
-#ifdef CONFIG_XEN
- movl r17=XSI_IHA
- ;;
- ld8 r17=[r17] // get virtual address of L3 PTE
-#else
- mov r17=cr.iha // get virtual address of PTE
-#endif
- movl r30=1f // load nested fault continuation point
- ;;
-1: ld8 r18=[r17] // read *pte
- ;;
- mov b0=r29
- tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
-(p6) br.cond.spnt page_fault
- ;;
-#ifdef CONFIG_XEN
- mov r19=r8
- mov r8=r18
- ;;
- XEN_HYPER_ITC_D
- ;;
- mov r8=r19
- ;;
-#else
- itc.d r18
-#endif
- ;;
-#ifdef CONFIG_SMP
- /*
- * Tell the assemblers dependency-violation checker that the above "itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- ld8 r19=[r17] // read *pte again and see if same
- mov r20=PAGE_SHIFT<<2 // setup page size for purge
- ;;
- cmp.ne p7,p0=r18,r19
- ;;
-(p7) ptc.l r16,r20
-#endif
- mov pr=r31,-1
-#ifdef CONFIG_XEN
- XEN_HYPER_RFI
- dv_serialize_data
-#else
- rfi
-#endif
-END(dtlb_miss)
-
- .org ia64_ivt+0x0c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(alt_itlb_miss)
- DBG_FAULT(3)
-#ifdef CONFIG_XEN
- movl r31=XSI_IPSR
- ;;
- ld8 r21=[r31],XSI_IFA_OFS-XSI_IPSR_OFS // get ipsr, point to ifa
- movl r17=PAGE_KERNEL
- ;;
- ld8 r16=[r31] // get ifa
-#else
- mov r16=cr.ifa // get address that caused the TLB miss
- movl r17=PAGE_KERNEL
- mov r21=cr.ipsr
-#endif
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- mov r31=pr
- ;;
-#ifdef CONFIG_DISABLE_VHPT
- shr.u r22=r16,61 // get the region number into r21
- ;;
- cmp.gt p8,p0=6,r22 // user mode
- ;;
-#ifndef CONFIG_XEN
-(p8) thash r17=r16
- ;;
-(p8) mov cr.iha=r17
-#endif
-(p8) mov r29=b0 // save b0
-(p8) br.cond.dptk .itlb_fault
-#endif
- extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
- and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
- shr.u r18=r16,57 // move address bit 61 to bit 4
- ;;
- andcm r18=0x10,r18 // bit 4=~address-bit(61)
- cmp.ne p8,p0=r0,r23 // psr.cpl != 0?
- or r19=r17,r19 // insert PTE control bits into r19
- ;;
- or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
-(p8) br.cond.spnt page_fault
- ;;
-#ifdef CONFIG_XEN
- mov r18=r8
- mov r8=r19
- ;;
- XEN_HYPER_ITC_I
- ;;
- mov r8=r18
- ;;
- mov pr=r31,-1
- ;;
- XEN_HYPER_RFI;
-#else
- itc.i r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
-#endif
-END(alt_itlb_miss)
-
- .org ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(alt_dtlb_miss)
- DBG_FAULT(4)
-#ifdef CONFIG_XEN
- movl r31=XSI_IPSR
- ;;
- ld8 r21=[r31],XSI_ISR_OFS-XSI_IPSR_OFS // get ipsr, point to isr
- movl r17=PAGE_KERNEL
- ;;
- ld8 r20=[r31],XSI_IFA_OFS-XSI_ISR_OFS // get isr, point to ifa
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- ;;
- ld8 r16=[r31] // get ifa
-#else
- mov r16=cr.ifa // get address that caused the TLB miss
- movl r17=PAGE_KERNEL
- mov r20=cr.isr
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- mov r21=cr.ipsr
-#endif
- mov r31=pr
- ;;
-#ifdef CONFIG_DISABLE_VHPT
- shr.u r22=r16,61 // get the region number into r21
- ;;
- cmp.gt p8,p0=6,r22 // access to region 0-5
- ;;
-#ifndef CONFIG_XEN
-(p8) thash r17=r16
- ;;
-(p8) mov cr.iha=r17
-#endif
-(p8) mov r29=b0 // save b0
-(p8) br.cond.dptk dtlb_fault
-#endif
- extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
- and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
- tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
- shr.u r18=r16,57 // move address bit 61 to bit 4
- and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
- tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
- ;;
- andcm r18=0x10,r18 // bit 4=~address-bit(61)
- cmp.ne p8,p0=r0,r23
-(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
-(p8) br.cond.spnt page_fault
-
- dep r21=-1,r21,IA64_PSR_ED_BIT,1
- or r19=r19,r17 // insert PTE control bits into r19
- ;;
- or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
-(p6) mov cr.ipsr=r21
- ;;
-#ifdef CONFIG_XEN
-(p7) mov r18=r8
-(p7) mov r8=r19
- ;;
-(p7) XEN_HYPER_ITC_D
- ;;
-(p7) mov r8=r18
- ;;
- mov pr=r31,-1
- ;;
- XEN_HYPER_RFI;
-#else
-(p7) itc.d r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
-#endif
-END(alt_dtlb_miss)
-
- .org ia64_ivt+0x1400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(nested_dtlb_miss)
- /*
- * In the absence of kernel bugs, we get here when the virtually mapped linear
- * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction
- * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page
- * table is missing, a nested TLB miss fault is triggered and control is
- * transferred to this point. When this happens, we lookup the pte for the
- * faulting address by walking the page table in physical mode and return to the
- * continuation point passed in register r30 (or call page_fault if the address is
- * not mapped).
- *
- * Input: r16: faulting address
- * r29: saved b0
- * r30: continuation address
- * r31: saved pr
- *
- * Output: r17: physical address of PTE of faulting address
- * r29: saved b0
- * r30: continuation address
- * r31: saved pr
- *
- * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared)
- */
-#ifdef CONFIG_XEN
- XEN_HYPER_RSM_PSR_DT;
-#else
- rsm psr.dt // switch to using physical data addressing
-#endif
- mov r19=IA64_KR(PT_BASE) // get the page table base address
- shl r21=r16,3 // shift bit 60 into sign bit
-#ifdef CONFIG_XEN
- movl r18=XSI_ITIR
- ;;
- ld8 r18=[r18]
-#else
- mov r18=cr.itir
-#endif
- ;;
- shr.u r17=r16,61 // get the region number into r17
- extr.u r18=r18,2,6 // get the faulting page size
- ;;
- cmp.eq p6,p7=5,r17 // is faulting address in region 5?
- add r22=-PAGE_SHIFT,r18 // adjustment for hugetlb address
- add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
- ;;
- shr.u r22=r16,r22
- shr.u r18=r16,r18
-(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
-
- srlz.d
- LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
-
- .pred.rel "mutex", p6, p7
-(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
-(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
- ;;
-(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5
-(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4]
- cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
-#ifdef CONFIG_PGTABLE_4
- shr.u r18=r22,PUD_SHIFT // shift pud index into position
-#else
- shr.u r18=r22,PMD_SHIFT // shift pmd index into position
-#endif
- ;;
- ld8 r17=[r17] // get *pgd (may be 0)
- ;;
-(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr)
- ;;
-#ifdef CONFIG_PGTABLE_4
-(p7) ld8 r17=[r17] // get *pud (may be 0)
- shr.u r18=r22,PMD_SHIFT // shift pmd index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pud_present(*pud) == NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr)
- ;;
-#endif
-(p7) ld8 r17=[r17] // get *pmd (may be 0)
- shr.u r19=r22,PAGE_SHIFT // shift pte index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pmd_present(*pmd) == NULL?
- dep r17=r19,r17,3,(PAGE_SHIFT-3) // r17=pte_offset(pmd,addr);
-(p6) br.cond.spnt page_fault
- mov b0=r30
- br.sptk.many b0 // return to continuation point
-END(nested_dtlb_miss)
-
- .org ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(ikey_miss)
- DBG_FAULT(6)
- FAULT(6)
-END(ikey_miss)
-
- //-----------------------------------------------------------------------------------
- // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
-ENTRY(page_fault)
-#ifdef CONFIG_XEN
- XEN_HYPER_SSM_PSR_DT
-#else
- ssm psr.dt
- ;;
- srlz.i
-#endif
- ;;
- SAVE_MIN_WITH_COVER
- alloc r15=ar.pfs,0,0,3,0
-#ifdef CONFIG_XEN
- movl r3=XSI_ISR
- ;;
- ld8 out1=[r3],XSI_IFA_OFS-XSI_ISR_OFS // get vcr.isr, point to ifa
- ;;
- ld8 out0=[r3] // get vcr.ifa
- mov r14=1
- ;;
- add r3=XSI_PSR_IC_OFS-XSI_IFA_OFS, r3 // point to vpsr.ic
- ;;
- st4 [r3]=r14 // vpsr.ic = 1
- adds r3=8,r2 // set up second base pointer
- ;;
-#else
- mov out0=cr.ifa
- mov out1=cr.isr
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collectin is on
- ;;
-#endif
-#ifdef CONFIG_XEN
-
-#define MASK_TO_PEND_OFS (-1)
-
-(p15) movl r14=XSI_PSR_I_ADDR
- ;;
-(p15) ld8 r14=[r14]
- ;;
-(p15) st1 [r14]=r0,MASK_TO_PEND_OFS // if (p15) vpsr.i = 1
- ;; // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0
-(p15) ld1 r14=[r14] // if (vcpu->vcpu_info->evtchn_upcall_pending)
- ;;
-(p15) cmp.ne p15,p0=r14,r0
- ;;
-(p15) XEN_HYPER_SSM_I
-#else
-(p15) ssm psr.i // restore psr.i
-#endif
- movl r14=ia64_leave_kernel
- ;;
- SAVE_REST
- mov rp=r14
- ;;
- adds out2=16,r12 // out2 = pointer to pt_regs
- br.call.sptk.many b6=ia64_do_page_fault // ignore return address
-END(page_fault)
-
- .org ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(dkey_miss)
- DBG_FAULT(7)
- FAULT(7)
-END(dkey_miss)
-
- .org ia64_ivt+0x2000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(dirty_bit)
- DBG_FAULT(8)
- /*
- * What we do here is to simply turn on the dirty bit in the PTE. We need to
- * update both the page-table and the TLB entry. To efficiently access the PTE,
- * we address it through the virtual page table. Most likely, the TLB entry for
- * the relevant virtual page table page is still present in the TLB so we can
- * normally do this without additional TLB misses. In case the necessary virtual
- * page table TLB entry isn't present, we take a nested TLB miss hit where we look
- * up the physical address of the L3 PTE and then continue at label 1 below.
- */
-#ifdef CONFIG_XEN
- movl r16=XSI_IFA
- ;;
- ld8 r16=[r16]
- ;;
-#else
- mov r16=cr.ifa // get the address that caused the fault
-#endif
- movl r30=1f // load continuation point in case of nested fault
- ;;
-#ifdef CONFIG_XEN
- mov r18=r8;
- mov r8=r16;
- XEN_HYPER_THASH;;
- mov r17=r8;
- mov r8=r18;;
-#else
- thash r17=r16 // compute virtual address of L3 PTE
-#endif
- mov r29=b0 // save b0 in case of nested fault
- mov r31=pr // save pr
-#ifdef CONFIG_SMP
- mov r28=ar.ccv // save ar.ccv
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- mov ar.ccv=r18 // set compare value for cmpxchg
- or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
- tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit
- ;;
-(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only update if page is present
- mov r24=PAGE_SHIFT<<2
- ;;
-(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present
- ;;
-#ifdef CONFIG_XEN
-(p6) mov r18=r8
-(p6) mov r8=r25
- ;;
-(p6) XEN_HYPER_ITC_D
- ;;
-(p6) mov r8=r18
-#else
-(p6) itc.d r25 // install updated PTE
-#endif
- ;;
- /*
- * Tell the assemblers dependency-violation checker that the above "itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- ld8 r18=[r17] // read PTE again
- ;;
- cmp.eq p6,p7=r18,r25 // is it same as the newly installed
- ;;
-(p7) ptc.l r16,r24
- mov b0=r29 // restore b0
- mov ar.ccv=r28
-#else
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
- mov b0=r29 // restore b0
- ;;
- st8 [r17]=r18 // store back updated PTE
- itc.d r18 // install updated PTE
-#endif
- mov pr=r31,-1 // restore pr
-#ifdef CONFIG_XEN
- XEN_HYPER_RFI
- dv_serialize_data
-#else
- rfi
-#endif
-END(dirty_bit)
-
- .org ia64_ivt+0x2400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(iaccess_bit)
- DBG_FAULT(9)
- // Like Entry 8, except for instruction access
-#ifdef CONFIG_XEN
- movl r16=XSI_IFA
- ;;
- ld8 r16=[r16]
- ;;
-#else
- mov r16=cr.ifa // get the address that caused the fault
-#endif
- movl r30=1f // load continuation point in case of nested fault
- mov r31=pr // save predicates
-#ifdef CONFIG_ITANIUM
- /*
- * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
- */
- mov r17=cr.ipsr
- ;;
- mov r18=cr.iip
- tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set?
- ;;
-(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa
-#endif /* CONFIG_ITANIUM */
- ;;
-#ifdef CONFIG_XEN
- mov r18=r8;
- mov r8=r16;
- XEN_HYPER_THASH;;
- mov r17=r8;
- mov r8=r18;;
-#else
- thash r17=r16 // compute virtual address of L3 PTE
-#endif
- mov r29=b0 // save b0 in case of nested fault)
-#ifdef CONFIG_SMP
- mov r28=ar.ccv // save ar.ccv
- ;;
-1: ld8 r18=[r17]
- ;;
- mov ar.ccv=r18 // set compare value for cmpxchg
- or r25=_PAGE_A,r18 // set the accessed bit
- tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit
- ;;
-(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page present
- mov r24=PAGE_SHIFT<<2
- ;;
-(p6) cmp.eq p6,p7=r26,r18 // Only if page present
- ;;
-#ifdef CONFIG_XEN
- mov r26=r8
- mov r8=r25
- ;;
-(p6) XEN_HYPER_ITC_I
- ;;
- mov r8=r26
- ;;
-#else
-(p6) itc.i r25 // install updated PTE
-#endif
- ;;
- /*
- * Tell the assemblers dependency-violation checker that the above "itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- ld8 r18=[r17] // read PTE again
- ;;
- cmp.eq p6,p7=r18,r25 // is it same as the newly installed
- ;;
-(p7) ptc.l r16,r24
- mov b0=r29 // restore b0
- mov ar.ccv=r28
-#else /* !CONFIG_SMP */
- ;;
-1: ld8 r18=[r17]
- ;;
- or r18=_PAGE_A,r18 // set the accessed bit
- mov b0=r29 // restore b0
- ;;
- st8 [r17]=r18 // store back updated PTE
- itc.i r18 // install updated PTE
-#endif /* !CONFIG_SMP */
- mov pr=r31,-1
-#ifdef CONFIG_XEN
- XEN_HYPER_RFI
- dv_serialize_data
-#else
- rfi
-#endif
-END(iaccess_bit)
-
- .org ia64_ivt+0x2800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(daccess_bit)
- DBG_FAULT(10)
- // Like Entry 8, except for data access
-#ifdef CONFIG_XEN
- movl r16=XSI_IFA
- ;;
- ld8 r16=[r16]
- ;;
-#else
- mov r16=cr.ifa // get the address that caused the fault
-#endif
- movl r30=1f // load continuation point in case of nested fault
- ;;
-#ifdef CONFIG_XEN
- mov r18=r8
- mov r8=r16
- XEN_HYPER_THASH
- ;;
- mov r17=r8
- mov r8=r18
- ;;
-#else
- thash r17=r16 // compute virtual address of L3 PTE
-#endif
- mov r31=pr
- mov r29=b0 // save b0 in case of nested fault)
-#ifdef CONFIG_SMP
- mov r28=ar.ccv // save ar.ccv
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- mov ar.ccv=r18 // set compare value for cmpxchg
- or r25=_PAGE_A,r18 // set the dirty bit
- tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit
- ;;
-(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page is present
- mov r24=PAGE_SHIFT<<2
- ;;
-(p6) cmp.eq p6,p7=r26,r18 // Only if page is present
- ;;
-#ifdef CONFIG_XEN
- mov r26=r8
- mov r8=r25
- ;;
-(p6) XEN_HYPER_ITC_D
- ;;
- mov r8=r26
- ;;
-#else
-(p6) itc.d r25 // install updated PTE
-#endif
- /*
- * Tell the assemblers dependency-violation checker that the above "itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
- ;;
- ld8 r18=[r17] // read PTE again
- ;;
- cmp.eq p6,p7=r18,r25 // is it same as the newly installed
- ;;
-(p7) ptc.l r16,r24
- mov ar.ccv=r28
-#else
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- or r18=_PAGE_A,r18 // set the accessed bit
- ;;
- st8 [r17]=r18 // store back updated PTE
- itc.d r18 // install updated PTE
-#endif
- mov b0=r29 // restore b0
- mov pr=r31,-1
-#ifdef CONFIG_XEN
- XEN_HYPER_RFI
- dv_serialize_data
-#else
- rfi
-#endif
-END(daccess_bit)
-
- .org ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(break_fault)
- /*
- * The streamlined system call entry/exit paths only save/restore the initial part
- * of pt_regs. This implies that the callers of system-calls must adhere to the
- * normal procedure calling conventions.
- *
- * Registers to be saved & restored:
- * CR registers: cr.ipsr, cr.iip, cr.ifs
- * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr
- * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
- * Registers to be restored only:
- * r8-r11: output value from the system call.
- *
- * During system call exit, scratch registers (including r15) are modified/cleared
- * to prevent leaking bits from kernel to user level.
- */
- DBG_FAULT(11)
- mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc)
-#ifdef CONFIG_XEN
- movl r22=XSI_IPSR
- ;;
- ld8 r29=[r22],XSI_IIM_OFS-XSI_IPSR_OFS // get ipsr, point to iip
-#else
- mov r29=cr.ipsr // M2 (12 cyc)
-#endif
- mov r31=pr // I0 (2 cyc)
-
-#ifdef CONFIG_XEN
- ;;
- ld8 r17=[r22],XSI_IIP_OFS-XSI_IIM_OFS
-#else
- mov r17=cr.iim // M2 (2 cyc)
-#endif
- mov.m r27=ar.rsc // M2 (12 cyc)
- mov r18=__IA64_BREAK_SYSCALL // A
-
- mov.m ar.rsc=0 // M2
- mov.m r21=ar.fpsr // M2 (12 cyc)
- mov r19=b6 // I0 (2 cyc)
- ;;
- mov.m r23=ar.bspstore // M2 (12 cyc)
- mov.m r24=ar.rnat // M2 (5 cyc)
- mov.i r26=ar.pfs // I0 (2 cyc)
-
- invala // M0|1
- nop.m 0 // M
- mov r20=r1 // A save r1
-
- nop.m 0
- movl r30=sys_call_table // X
-
-#ifdef CONFIG_XEN
- ld8 r28=[r22]
-#else
- mov r28=cr.iip // M2 (2 cyc)
-#endif
- cmp.eq p0,p7=r18,r17 // I0 is this a system call?
-(p7) br.cond.spnt non_syscall // B no ->
- //
- // From this point on, we are definitely on the syscall-path
- // and we can use (non-banked) scratch registers.
- //
-///////////////////////////////////////////////////////////////////////
- mov r1=r16 // A move task-pointer to "addl"-addressable reg
- mov r2=r16 // A setup r2 for ia64_syscall_setup
- add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = &current_thread_info()->flags
-
- adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
- adds r15=-1024,r15 // A subtract 1024 from syscall number
- mov r3=NR_syscalls - 1
- ;;
- ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag
- ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags
- extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr
-
- shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024)
- addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS
- cmp.leu p6,p7=r15,r3 // A syscall number in range?
- ;;
-
- lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS
-(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point
- tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT?
-
- mov.m ar.bspstore=r22 // M2 switch to kernel RBS
- cmp.eq p8,p9=2,r8 // A isr.ei==2?
- ;;
-
-(p8) mov r8=0 // A clear ei to 0
-(p7) movl r30=sys_ni_syscall // X
-
-(p8) adds r28=16,r28 // A switch cr.iip to next bundle
-(p9) adds r8=1,r8 // A increment ei to next slot
- nop.i 0
- ;;
-
- mov.m r25=ar.unat // M2 (5 cyc)
- dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr
- adds r15=1024,r15 // A restore original syscall number
- //
- // If any of the above loads miss in L1D, we'll stall here until
- // the data arrives.
- //
-///////////////////////////////////////////////////////////////////////
- st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
- mov b6=r30 // I0 setup syscall handler branch reg early
- cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already?
-
- and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit
- mov r18=ar.bsp // M2 (12 cyc)
-(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS
- ;;
-.back_from_break_fixup:
-(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack
- cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
- br.call.sptk.many b7=ia64_syscall_setup // B
-1:
- mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
- nop 0
-#ifdef CONFIG_XEN
- mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;;
-#else
- bsw.1 // B (6 cyc) regs are saved, switch to bank 1
-#endif
- ;;
-
-#ifdef CONFIG_XEN
- movl r16=XSI_PSR_IC
- mov r3=1
- ;;
- st4 [r16]=r3,XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS // vpsr.ic = 1
-#else
- ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection
-#endif
- movl r3=ia64_ret_from_syscall // X
- ;;
-
- srlz.i // M0 ensure interruption collection is on
- mov rp=r3 // I0 set the real return addr
-(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
-
-#ifdef CONFIG_XEN
-(p15) ld8 r16=[r16] // vpsr.i
- ;;
-(p15) st1 [r16]=r0,MASK_TO_PEND_OFS // if (p15) vpsr.i = 1
- ;; // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0
-(p15) ld1 r2=[r16] // if (vcpu->vcpu_info->evtchn_upcall_pending)
- ;;
-(p15) cmp.ne.unc p6,p0=r2,r0
- ;;
-(p6) XEN_HYPER_SSM_I // do a real ssm psr.i
-#else
-(p15) ssm psr.i // M2 restore psr.i
-#endif
-(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr)
- br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic
- // NOT REACHED
-///////////////////////////////////////////////////////////////////////
- // On entry, we optimistically assumed that we're coming from user-space.
- // For the rare cases where a system-call is done from within the kernel,
- // we fix things up at this point:
-.break_fixup:
- add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure
- mov ar.rnat=r24 // M2 restore kernel's AR.RNAT
- ;;
- mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE
- br.cond.sptk .back_from_break_fixup
-END(break_fault)
-
- .org ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(interrupt)
- DBG_FAULT(12)
- mov r31=pr // prepare to save predicates
- ;;
- SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
-#ifdef CONFIG_XEN
- movl r3=XSI_PSR_IC
- mov r14=1
- ;;
- st4 [r3]=r14
-#else
- ssm psr.ic | PSR_DEFAULT_BITS
-#endif
- ;;
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- srlz.i // ensure everybody knows psr.ic is back on
- ;;
- SAVE_REST
- ;;
- alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
-#ifdef CONFIG_XEN
- ;;
- br.call.sptk.many rp=xen_get_ivr
- ;;
- mov out0=r8 // pass cr.ivr as first arg
-#else
- mov out0=cr.ivr // pass cr.ivr as first arg
-#endif
- add out1=16,sp // pass pointer to pt_regs as second arg
- ;;
- srlz.d // make sure we see the effect of cr.ivr
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.call.sptk.many b6=ia64_handle_irq
-END(interrupt)
-
- .org ia64_ivt+0x3400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
- DBG_FAULT(13)
- FAULT(13)
-
- .org ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
- DBG_FAULT(14)
- FAULT(14)
-
- /*
- * There is no particular reason for this code to be here, other than that
- * there happens to be space here that would go unused otherwise. If this
- * fault ever gets "unreserved", simply moved the following code to a more
- * suitable spot...
- *
- * ia64_syscall_setup() is a separate subroutine so that it can
- * allocate stacked registers so it can safely demine any
- * potential NaT values from the input registers.
- *
- * On entry:
- * - executing on bank 0 or bank 1 register set (doesn't matter)
- * - r1: stack pointer
- * - r2: current task pointer
- * - r3: preserved
- * - r11: original contents (saved ar.pfs to be saved)
- * - r12: original contents (sp to be saved)
- * - r13: original contents (tp to be saved)
- * - r15: original contents (syscall # to be saved)
- * - r18: saved bsp (after switching to kernel stack)
- * - r19: saved b6
- * - r20: saved r1 (gp)
- * - r21: saved ar.fpsr
- * - r22: kernel's register backing store base (krbs_base)
- * - r23: saved ar.bspstore
- * - r24: saved ar.rnat
- * - r25: saved ar.unat
- * - r26: saved ar.pfs
- * - r27: saved ar.rsc
- * - r28: saved cr.iip
- * - r29: saved cr.ipsr
- * - r31: saved pr
- * - b0: original contents (to be saved)
- * On exit:
- * - p10: TRUE if syscall is invoked with more than 8 out
- * registers or r15's Nat is true
- * - r1: kernel's gp
- * - r3: preserved (same as on entry)
- * - r8: -EINVAL if p10 is true
- * - r12: points to kernel stack
- * - r13: points to current task
- * - r14: preserved (same as on entry)
- * - p13: preserved
- * - p15: TRUE if interrupts need to be re-enabled
- * - ar.fpsr: set to kernel settings
- * - b6: preserved (same as on entry)
- */
-#ifndef CONFIG_XEN
-GLOBAL_ENTRY(ia64_syscall_setup)
-#if PT(B6) != 0
-# error This code assumes that b6 is the first field in pt_regs.
-#endif
- st8 [r1]=r19 // save b6
- add r16=PT(CR_IPSR),r1 // initialize first base pointer
- add r17=PT(R11),r1 // initialize second base pointer
- ;;
- alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable
- st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr
- tnat.nz p8,p0=in0
-
- st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11
- tnat.nz p9,p0=in1
-(pKStk) mov r18=r0 // make sure r18 isn't NaT
- ;;
-
- st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs
- st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
- mov r28=b0 // save b0 (2 cyc)
- ;;
-
- st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat
- dep r19=0,r19,38,26 // clear all bits but 0..37 [I0]
-(p8) mov in0=-1
- ;;
-
- st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs
- extr.u r11=r19,7,7 // I0 // get sol of ar.pfs
- and r8=0x7f,r19 // A // get sof of ar.pfs
-
- st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
- tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
-(p9) mov in1=-1
- ;;
-
-(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8
- tnat.nz p10,p0=in2
- add r11=8,r11
- ;;
-(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field
-(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field
- tnat.nz p11,p0=in3
- ;;
-(p10) mov in2=-1
- tnat.nz p12,p0=in4 // [I0]
-(p11) mov in3=-1
- ;;
-(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat
-(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore
- shl r18=r18,16 // compute ar.rsc to be used for "loadrs"
- ;;
- st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates
- st8 [r17]=r28,PT(R1)-PT(B0) // save b0
- tnat.nz p13,p0=in5 // [I0]
- ;;
- st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs"
- st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1
-(p12) mov in4=-1
- ;;
-
-.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
-.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
-(p13) mov in5=-1
- ;;
- st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
- tnat.nz p13,p0=in6
- cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
- ;;
- mov r8=1
-(p9) tnat.nz p10,p0=r15
- adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
-
- st8.spill [r17]=r15 // save r15
- tnat.nz p8,p0=in7
- nop.i 0
-
- mov r13=r2 // establish `current'
- movl r1=__gp // establish kernel global pointer
- ;;
- st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
-(p13) mov in6=-1
-(p8) mov in7=-1
-
- cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
- movl r17=FPSR_DEFAULT
- ;;
- mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
-(p10) mov r8=-EINVAL
- br.ret.sptk.many b7
-END(ia64_syscall_setup)
-#endif
-
- .org ia64_ivt+0x3c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
- DBG_FAULT(15)
- FAULT(15)
-
- /*
- * Squatting in this space ...
- *
- * This special case dispatcher for illegal operation faults allows preserved
- * registers to be modified through a callback function (asm only) that is handed
- * back from the fault handler in r8. Up to three arguments can be passed to the
- * callback function by returning an aggregate with the callback as its first
- * element, followed by the arguments.
- */
-ENTRY(dispatch_illegal_op_fault)
- .prologue
- .body
- SAVE_MIN_WITH_COVER
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- ;;
- alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
- mov out0=ar.ec
- ;;
- SAVE_REST
- PT_REGS_UNWIND_INFO(0)
- ;;
- br.call.sptk.many rp=ia64_illegal_op_fault
-.ret0: ;;
- alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
- mov out0=r9
- mov out1=r10
- mov out2=r11
- movl r15=ia64_leave_kernel
- ;;
- mov rp=r15
- mov b6=r8
- ;;
- cmp.ne p6,p0=0,r8
-(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
- br.sptk.many ia64_leave_kernel
-END(dispatch_illegal_op_fault)
-
- .org ia64_ivt+0x4000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
- DBG_FAULT(16)
- FAULT(16)
-
- .org ia64_ivt+0x4400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
- DBG_FAULT(17)
- FAULT(17)
-
-ENTRY(non_syscall)
- mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER
- ;;
- SAVE_MIN_WITH_COVER
-
- // There is no particular reason for this code to be here, other than that
- // there happens to be space here that would go unused otherwise. If this
- // fault ever gets "unreserved", simply moved the following code to a more
- // suitable spot...
-
- alloc r14=ar.pfs,0,0,2,0
- mov out0=cr.iim
- add out1=16,sp
- adds r3=8,r2 // set up second base pointer for SAVE_REST
-
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- movl r15=ia64_leave_kernel
- ;;
- SAVE_REST
- mov rp=r15
- ;;
- br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
-END(non_syscall)
-
- .org ia64_ivt+0x4800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
- DBG_FAULT(18)
- FAULT(18)
-
- /*
- * There is no particular reason for this code to be here, other than that
- * there happens to be space here that would go unused otherwise. If this
- * fault ever gets "unreserved", simply moved the following code to a more
- * suitable spot...
- */
-
-ENTRY(dispatch_unaligned_handler)
- SAVE_MIN_WITH_COVER
- ;;
- alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
- mov out0=cr.ifa
- adds out1=16,sp
-
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer
- ;;
- SAVE_REST
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.sptk.many ia64_prepare_handle_unaligned
-END(dispatch_unaligned_handler)
-
- .org ia64_ivt+0x4c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
- DBG_FAULT(19)
- FAULT(19)
-
- /*
- * There is no particular reason for this code to be here, other than that
- * there happens to be space here that would go unused otherwise. If this
- * fault ever gets "unreserved", simply moved the following code to a more
- * suitable spot...
- */
-
-ENTRY(dispatch_to_fault_handler)
- /*
- * Input:
- * psr.ic: off
- * r19: fault vector number (e.g., 24 for General Exception)
- * r31: contains saved predicates (pr)
- */
- SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,5,0
- mov out0=r15
-#ifdef CONFIG_XEN
- movl out1=XSI_ISR
- ;;
- adds out2=XSI_IFA-XSI_ISR,out1
- adds out3=XSI_IIM-XSI_ISR,out1
- adds out4=XSI_ITIR-XSI_ISR,out1
- ;;
- ld8 out1=[out1]
- ld8 out2=[out2]
- ld8 out3=[out4]
- ld8 out4=[out4]
- ;;
-#else
- mov out1=cr.isr
- mov out2=cr.ifa
- mov out3=cr.iim
- mov out4=cr.itir
- ;;
-#endif
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- ;;
- SAVE_REST
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.call.sptk.many b6=ia64_fault
-END(dispatch_to_fault_handler)
-
-//
-// --- End of long entries, Beginning of short entries
-//
-
- .org ia64_ivt+0x5000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
-ENTRY(page_not_present)
- DBG_FAULT(20)
- mov r16=cr.ifa
- rsm psr.dt
- /*
- * The Linux page fault handler doesn't expect non-present pages to be in
- * the TLB. Flush the existing entry now, so we meet that expectation.
- */
- mov r17=PAGE_SHIFT<<2
- ;;
- ptc.l r16,r17
- ;;
- mov r31=pr
- srlz.d
- br.sptk.many page_fault
-END(page_not_present)
-
- .org ia64_ivt+0x5100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
-ENTRY(key_permission)
- DBG_FAULT(21)
- mov r16=cr.ifa
- rsm psr.dt
- mov r31=pr
- ;;
- srlz.d
- br.sptk.many page_fault
-END(key_permission)
-
- .org ia64_ivt+0x5200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(iaccess_rights)
- DBG_FAULT(22)
- mov r16=cr.ifa
- rsm psr.dt
- mov r31=pr
- ;;
- srlz.d
- br.sptk.many page_fault
-END(iaccess_rights)
-
- .org ia64_ivt+0x5300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(daccess_rights)
- DBG_FAULT(23)
-#ifdef CONFIG_XEN
- movl r16=XSI_IFA
- ;;
- ld8 r16=[r16]
- ;;
- XEN_HYPER_RSM_PSR_DT
-#else
- mov r16=cr.ifa
- rsm psr.dt
-#endif
- mov r31=pr
- ;;
- srlz.d
- br.sptk.many page_fault
-END(daccess_rights)
-
- .org ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(general_exception)
- DBG_FAULT(24)
- mov r16=cr.isr
- mov r31=pr
- ;;
- cmp4.eq p6,p0=0,r16
-(p6) br.sptk.many dispatch_illegal_op_fault
- ;;
- mov r19=24 // fault number
- br.sptk.many dispatch_to_fault_handler
-END(general_exception)
-
- .org ia64_ivt+0x5500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(disabled_fp_reg)
- DBG_FAULT(25)
- rsm psr.dfh // ensure we can access fph
- ;;
- srlz.d
- mov r31=pr
- mov r19=25
- br.sptk.many dispatch_to_fault_handler
-END(disabled_fp_reg)
-
- .org ia64_ivt+0x5600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(nat_consumption)
- DBG_FAULT(26)
-
- mov r16=cr.ipsr
- mov r17=cr.isr
- mov r31=pr // save PR
- ;;
- and r18=0xf,r17 // r18 = cr.ipsr.code{3:0}
- tbit.z p6,p0=r17,IA64_ISR_NA_BIT
- ;;
- cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
- dep r16=-1,r16,IA64_PSR_ED_BIT,1
-(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
- ;;
- mov cr.ipsr=r16 // set cr.ipsr.na
- mov pr=r31,-1
- ;;
- rfi
-
-1: mov pr=r31,-1
- ;;
- FAULT(26)
-END(nat_consumption)
-
- .org ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(speculation_vector)
- DBG_FAULT(27)
- /*
- * A [f]chk.[as] instruction needs to take the branch to the recovery code but
- * this part of the architecture is not implemented in hardware on some CPUs, such
- * as Itanium. Thus, in general we need to emulate the behavior. IIM contains
- * the relative target (not yet sign extended). So after sign extending it we
- * simply add it to IIP. We also need to reset the EI field of the IPSR to zero,
- * i.e., the slot to restart into.
- *
- * cr.imm contains zero_ext(imm21)
- */
- mov r18=cr.iim
- ;;
- mov r17=cr.iip
- shl r18=r18,43 // put sign bit in position (43=64-21)
- ;;
-
- mov r16=cr.ipsr
- shr r18=r18,39 // sign extend (39=43-4)
- ;;
-
- add r17=r17,r18 // now add the offset
- ;;
- mov cr.iip=r17
- dep r16=0,r16,41,2 // clear EI
- ;;
-
- mov cr.ipsr=r16
- ;;
-
-#ifdef CONFIG_XEN
- XEN_HYPER_RFI;
-#else
- rfi // and go back
-#endif
-END(speculation_vector)
-
- .org ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
- DBG_FAULT(28)
- FAULT(28)
-
- .org ia64_ivt+0x5900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(debug_vector)
- DBG_FAULT(29)
- FAULT(29)
-END(debug_vector)
-
- .org ia64_ivt+0x5a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(unaligned_access)
- DBG_FAULT(30)
- mov r31=pr // prepare to save predicates
- ;;
- br.sptk.many dispatch_unaligned_handler
-END(unaligned_access)
-
- .org ia64_ivt+0x5b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(unsupported_data_reference)
- DBG_FAULT(31)
- FAULT(31)
-END(unsupported_data_reference)
-
- .org ia64_ivt+0x5c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
-ENTRY(floating_point_fault)
- DBG_FAULT(32)
- FAULT(32)
-END(floating_point_fault)
-
- .org ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(floating_point_trap)
- DBG_FAULT(33)
- FAULT(33)
-END(floating_point_trap)
-
- .org ia64_ivt+0x5e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(lower_privilege_trap)
- DBG_FAULT(34)
- FAULT(34)
-END(lower_privilege_trap)
-
- .org ia64_ivt+0x5f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(taken_branch_trap)
- DBG_FAULT(35)
- FAULT(35)
-END(taken_branch_trap)
-
- .org ia64_ivt+0x6000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(single_step_trap)
- DBG_FAULT(36)
- FAULT(36)
-END(single_step_trap)
-
- .org ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Reserved
- DBG_FAULT(37)
- FAULT(37)
-
- .org ia64_ivt+0x6200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
- DBG_FAULT(38)
- FAULT(38)
-
- .org ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
- DBG_FAULT(39)
- FAULT(39)
-
- .org ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
- DBG_FAULT(40)
- FAULT(40)
-
- .org ia64_ivt+0x6500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
- DBG_FAULT(41)
- FAULT(41)
-
- .org ia64_ivt+0x6600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
- DBG_FAULT(42)
- FAULT(42)
-
- .org ia64_ivt+0x6700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
- DBG_FAULT(43)
- FAULT(43)
-
- .org ia64_ivt+0x6800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
- DBG_FAULT(44)
- FAULT(44)
-
- .org ia64_ivt+0x6900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(ia32_exception)
- DBG_FAULT(45)
- FAULT(45)
-END(ia32_exception)
-
- .org ia64_ivt+0x6a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
-ENTRY(ia32_intercept)
- DBG_FAULT(46)
-#ifdef CONFIG_IA32_SUPPORT
- mov r31=pr
- mov r16=cr.isr
- ;;
- extr.u r17=r16,16,8 // get ISR.code
- mov r18=ar.eflag
- mov r19=cr.iim // old eflag value
- ;;
- cmp.ne p6,p0=2,r17
-(p6) br.cond.spnt 1f // not a system flag fault
- xor r16=r18,r19
- ;;
- extr.u r17=r16,18,1 // get the eflags.ac bit
- ;;
- cmp.eq p6,p0=0,r17
-(p6) br.cond.spnt 1f // eflags.ac bit didn't change
- ;;
- mov pr=r31,-1 // restore predicate registers
-#ifdef CONFIG_XEN
- XEN_HYPER_RFI;
-#else
- rfi
-#endif
-
-1:
-#endif // CONFIG_IA32_SUPPORT
- FAULT(46)
-END(ia32_intercept)
-
- .org ia64_ivt+0x6b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
-ENTRY(ia32_interrupt)
- DBG_FAULT(47)
-#ifdef CONFIG_IA32_SUPPORT
- mov r31=pr
- br.sptk.many dispatch_to_ia32_handler
-#else
- FAULT(47)
-#endif
-END(ia32_interrupt)
-
- .org ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
- DBG_FAULT(48)
- FAULT(48)
-
- .org ia64_ivt+0x6d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
- DBG_FAULT(49)
- FAULT(49)
-
- .org ia64_ivt+0x6e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
- DBG_FAULT(50)
- FAULT(50)
-
- .org ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
- DBG_FAULT(51)
- FAULT(51)
-
- .org ia64_ivt+0x7000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7000 Entry 52 (size 16 bundles) Reserved
- DBG_FAULT(52)
- FAULT(52)
-
- .org ia64_ivt+0x7100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
- DBG_FAULT(53)
- FAULT(53)
-
- .org ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
- DBG_FAULT(54)
- FAULT(54)
-
- .org ia64_ivt+0x7300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
- DBG_FAULT(55)
- FAULT(55)
-
- .org ia64_ivt+0x7400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
- DBG_FAULT(56)
- FAULT(56)
-
- .org ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
- DBG_FAULT(57)
- FAULT(57)
-
- .org ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
- DBG_FAULT(58)
- FAULT(58)
-
- .org ia64_ivt+0x7700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
- DBG_FAULT(59)
- FAULT(59)
-
- .org ia64_ivt+0x7800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
- DBG_FAULT(60)
- FAULT(60)
-
- .org ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
- DBG_FAULT(61)
- FAULT(61)
-
- .org ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
- DBG_FAULT(62)
- FAULT(62)
-
- .org ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
- DBG_FAULT(63)
- FAULT(63)
-
- .org ia64_ivt+0x7c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
- DBG_FAULT(64)
- FAULT(64)
-
- .org ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
- DBG_FAULT(65)
- FAULT(65)
-
- .org ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
- DBG_FAULT(66)
- FAULT(66)
-
- .org ia64_ivt+0x7f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
- DBG_FAULT(67)
- FAULT(67)
-
-#ifdef CONFIG_IA32_SUPPORT
-
- /*
- * There is no particular reason for this code to be here, other than that
- * there happens to be space here that would go unused otherwise. If this
- * fault ever gets "unreserved", simply moved the following code to a more
- * suitable spot...
- */
-
- // IA32 interrupt entry point
-
-ENTRY(dispatch_to_ia32_handler)
- SAVE_MIN
- ;;
- mov r14=cr.isr
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
-(p15) ssm psr.i
- adds r3=8,r2 // Base pointer for SAVE_REST
- ;;
- SAVE_REST
- ;;
- mov r15=0x80
- shr r14=r14,16 // Get interrupt number
- ;;
- cmp.ne p6,p0=r14,r15
-(p6) br.call.dpnt.many b6=non_ia32_syscall
-
- adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions
- adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
- ;;
- cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
- ld8 r8=[r14] // get r8
- ;;
- st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP)
- ;;
- alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
- ;;
- ld4 r8=[r14],8 // r8 == eax (syscall number)
- mov r15=IA32_NR_syscalls
- ;;
- cmp.ltu.unc p6,p7=r8,r15
- ld4 out1=[r14],8 // r9 == ecx
- ;;
- ld4 out2=[r14],8 // r10 == edx
- ;;
- ld4 out0=[r14] // r11 == ebx
- adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
- ;;
- ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp
- ;;
- ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi
- adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- ld4 out4=[r14] // r15 == edi
- movl r16=ia32_syscall_table
- ;;
-(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number
- ld4 r2=[r2] // r2 = current_thread_info()->flags
- ;;
- ld8 r16=[r16]
- and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
- ;;
- mov b6=r16
- movl r15=ia32_ret_from_syscall
- cmp.eq p8,p0=r2,r0
- ;;
- mov rp=r15
-(p8) br.call.sptk.many b6=b6
- br.cond.sptk ia32_trace_syscall
-
-non_ia32_syscall:
- alloc r15=ar.pfs,0,0,2,0
- mov out0=r14 // interrupt #
- add out1=16,sp // pointer to pt_regs
- ;; // avoid WAW on CFM
- br.call.sptk.many rp=ia32_bad_interrupt
-.ret1: movl r15=ia64_leave_kernel
- ;;
- mov rp=r15
- br.ret.sptk.many rp
-END(dispatch_to_ia32_handler)
-#endif /* CONFIG_IA32_SUPPORT */
-
-#ifdef CONFIG_XEN
- .section .text,"ax"
-GLOBAL_ENTRY(xen_event_callback)
- mov r31=pr // prepare to save predicates
- ;;
- SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
- ;;
- movl r3=XSI_PSR_IC
- mov r14=1
- ;;
- st4 [r3]=r14
- ;;
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- srlz.i // ensure everybody knows psr.ic is back on
- ;;
- SAVE_REST
- ;;
-1:
- alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
- add out0=16,sp // pass pointer to pt_regs as first arg
- ;;
- br.call.sptk.many b0=evtchn_do_upcall
- ;;
- movl r20=XSI_PSR_I_ADDR
- ;;
- ld8 r20=[r20]
- ;;
- adds r20=-1,r20 // vcpu_info->evtchn_upcall_pending
- ;;
- ld1 r20=[r20]
- ;;
- cmp.ne p6,p0=r20,r0 // if there are pending events,
- (p6) br.spnt.few 1b // call evtchn_do_upcall again.
- br.sptk.many ia64_leave_kernel
-END(xen_event_callback)
-
-
- /*
- * There is no particular reason for this code to be here, other than that
- * there happens to be space here that would go unused otherwise. If this
- * fault ever gets "unreserved", simply moved the following code to a more
- * suitable spot...
- */
-
-GLOBAL_ENTRY(xen_bsw1)
- /* FIXME: THIS CODE IS NOT NaT SAFE! */
- mov r14=ar.unat
- movl r30=XSI_B1NAT
- ;;
- ld8 r30=[r30];;
- mov ar.unat=r30
- movl r30=XSI_BANKNUM;
- mov r31=1;;
- st4 [r30]=r31;
- movl r30=XSI_BANK1_R16;
- movl r31=XSI_BANK1_R16+8;;
- ld8.fill r16=[r30],16; ld8.fill r17=[r31],16;;
- ld8.fill r18=[r30],16; ld8.fill r19=[r31],16;;
- ld8.fill r20=[r30],16; ld8.fill r21=[r31],16;;
- ld8.fill r22=[r30],16; ld8.fill r23=[r31],16;;
- ld8.fill r24=[r30],16; ld8.fill r25=[r31],16;;
- ld8.fill r26=[r30],16; ld8.fill r27=[r31],16;;
- ld8.fill r28=[r30],16; ld8.fill r29=[r31],16;;
- ld8.fill r30=[r30]; ld8.fill r31=[r31];;
- mov ar.unat=r14
- br.ret.sptk.many b0
-END(xen_bsw1)
-
-
-#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
deleted file mode 100644
index 5741b4e75d..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
+++ /dev/null
@@ -1,358 +0,0 @@
-#include <asm/cache.h>
-
-#ifdef CONFIG_XEN
-#include "../kernel/entry.h"
-#else
-#include "entry.h"
-#endif
-
-/*
- * For ivt.s we want to access the stack virtually so we don't have to disable translation
- * on interrupts.
- *
- * On entry:
- * r1: pointer to current task (ar.k6)
- */
-#define MINSTATE_START_SAVE_MIN_VIRT \
-(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
- ;; \
-(pUStk) mov.m r24=ar.rnat; \
-(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
-(pKStk) mov r1=sp; /* get sp */ \
- ;; \
-(pUStk) lfetch.fault.excl.nt1 [r22]; \
-(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
-(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
- ;; \
-(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
- ;; \
-(pUStk) mov r18=ar.bsp; \
-(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
-
-#define MINSTATE_END_SAVE_MIN_VIRT \
- bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
- ;;
-
-/*
- * For mca_asm.S we want to access the stack physically since the state is saved before we
- * go virtual and don't want to destroy the iip or ipsr.
- */
-#define MINSTATE_START_SAVE_MIN_PHYS \
-(pKStk) mov r3=IA64_KR(PER_CPU_DATA);; \
-(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \
-(pKStk) ld8 r3 = [r3];; \
-(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \
-(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \
-(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
-(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
- ;; \
-(pUStk) mov r24=ar.rnat; \
-(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
-(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
-(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \
- ;; \
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
-(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
- ;; \
-(pUStk) mov r18=ar.bsp; \
-(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
-
-#define MINSTATE_END_SAVE_MIN_PHYS \
- dep r12=-1,r12,61,3; /* make sp a kernel virtual address */ \
- ;;
-
-#ifdef MINSTATE_VIRT
-# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT)
-# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT
-# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT
-#endif
-
-#ifdef MINSTATE_PHYS
-# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; tpa reg=reg
-# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS
-# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS
-#endif
-
-/*
- * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- * psr.ic: off
- * r31: contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- * psr.ic: off
- * r2 = points to &pt_regs.r16
- * r8 = contents of ar.ccv
- * r9 = contents of ar.csd
- * r10 = contents of ar.ssd
- * r11 = FPSR_DEFAULT
- * r12 = kernel sp (kernel virtual address)
- * r13 = points to current task_struct (kernel virtual address)
- * p15 = TRUE if psr.i is set in cr.ipsr
- * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- * preserved
- * CONFIG_XEN note: p6/p7 are not preserved
- *
- * Note that psr.ic is NOT turned on by this macro. This is so that
- * we can pass interruption state as arguments to a handler.
- */
-#ifdef CONFIG_XEN
-#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
- MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
- mov r27=ar.rsc; /* M */ \
- mov r20=r1; /* A */ \
- mov r25=ar.unat; /* M */ \
- /* mov r29=cr.ipsr; /* M */ \
- movl r29=XSI_IPSR;; \
- ld8 r29=[r29];; \
- mov r26=ar.pfs; /* I */ \
- /* mov r28=cr.iip; /* M */ \
- movl r28=XSI_IIP;; \
- ld8 r28=[r28];; \
- mov r21=ar.fpsr; /* M */ \
- COVER; /* B;; (or nothing) */ \
- ;; \
- adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \
- ;; \
- ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \
- st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \
- adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \
- /* switch from user to kernel RBS: */ \
- ;; \
- invala; /* M */ \
- /* SAVE_IFS; /* see xen special handling below */ \
- cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \
- ;; \
- MINSTATE_START_SAVE_MIN \
- adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \
- adds r16=PT(CR_IPSR),r1; \
- ;; \
- lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
- st8 [r16]=r29; /* save cr.ipsr */ \
- ;; \
- lfetch.fault.excl.nt1 [r17]; \
- tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
- mov r29=b0 \
- ;; \
- adds r16=PT(R8),r1; /* initialize first base pointer */ \
- adds r17=PT(R9),r1; /* initialize second base pointer */ \
-(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r8,16; \
-.mem.offset 8,0; st8.spill [r17]=r9,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r10,24; \
-.mem.offset 8,0; st8.spill [r17]=r11,24; \
- ;; \
- /* xen special handling for possibly lazy cover */ \
- movl r8=XSI_PRECOVER_IFS; \
- ;; \
- ld8 r30=[r8]; \
- ;; \
- st8 [r16]=r28,16; /* save cr.iip */ \
- st8 [r17]=r30,16; /* save cr.ifs */ \
-(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
- mov r8=ar.ccv; \
- mov r9=ar.csd; \
- mov r10=ar.ssd; \
- movl r11=FPSR_DEFAULT; /* L-unit */ \
- ;; \
- st8 [r16]=r25,16; /* save ar.unat */ \
- st8 [r17]=r26,16; /* save ar.pfs */ \
- shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
- ;; \
- st8 [r16]=r27,16; /* save ar.rsc */ \
-(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \
-(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \
- ;; /* avoid RAW on r16 & r17 */ \
-(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \
- st8 [r17]=r31,16; /* save predicates */ \
-(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \
- ;; \
- st8 [r16]=r29,16; /* save b0 */ \
- st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
- cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \
-.mem.offset 8,0; st8.spill [r17]=r12,16; \
- adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r13,16; \
-.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \
- mov r13=IA64_KR(CURRENT); /* establish `current' */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r15,16; \
-.mem.offset 8,0; st8.spill [r17]=r14,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r2,16; \
-.mem.offset 8,0; st8.spill [r17]=r3,16; \
- ;; \
- EXTRA; \
- mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2; \
- adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
- ;; \
- movl r1=__gp; /* establish kernel global pointer */ \
- ;; \
- /* MINSTATE_END_SAVE_MIN */
-#else
-#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
- MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
- mov r27=ar.rsc; /* M */ \
- mov r20=r1; /* A */ \
- mov r25=ar.unat; /* M */ \
- mov r29=cr.ipsr; /* M */ \
- mov r26=ar.pfs; /* I */ \
- mov r28=cr.iip; /* M */ \
- mov r21=ar.fpsr; /* M */ \
- COVER; /* B;; (or nothing) */ \
- ;; \
- adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \
- ;; \
- ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \
- st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \
- adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \
- /* switch from user to kernel RBS: */ \
- ;; \
- invala; /* M */ \
- SAVE_IFS; \
- cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \
- ;; \
- MINSTATE_START_SAVE_MIN \
- adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \
- adds r16=PT(CR_IPSR),r1; \
- ;; \
- lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
- st8 [r16]=r29; /* save cr.ipsr */ \
- ;; \
- lfetch.fault.excl.nt1 [r17]; \
- tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
- mov r29=b0 \
- ;; \
- adds r16=PT(R8),r1; /* initialize first base pointer */ \
- adds r17=PT(R9),r1; /* initialize second base pointer */ \
-(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r8,16; \
-.mem.offset 8,0; st8.spill [r17]=r9,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r10,24; \
-.mem.offset 8,0; st8.spill [r17]=r11,24; \
- ;; \
- st8 [r16]=r28,16; /* save cr.iip */ \
- st8 [r17]=r30,16; /* save cr.ifs */ \
-(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
- mov r8=ar.ccv; \
- mov r9=ar.csd; \
- mov r10=ar.ssd; \
- movl r11=FPSR_DEFAULT; /* L-unit */ \
- ;; \
- st8 [r16]=r25,16; /* save ar.unat */ \
- st8 [r17]=r26,16; /* save ar.pfs */ \
- shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
- ;; \
- st8 [r16]=r27,16; /* save ar.rsc */ \
-(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \
-(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \
- ;; /* avoid RAW on r16 & r17 */ \
-(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \
- st8 [r17]=r31,16; /* save predicates */ \
-(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \
- ;; \
- st8 [r16]=r29,16; /* save b0 */ \
- st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
- cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \
-.mem.offset 8,0; st8.spill [r17]=r12,16; \
- adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r13,16; \
-.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \
- mov r13=IA64_KR(CURRENT); /* establish `current' */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r15,16; \
-.mem.offset 8,0; st8.spill [r17]=r14,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r2,16; \
-.mem.offset 8,0; st8.spill [r17]=r3,16; \
- adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
- ;; \
- EXTRA; \
- movl r1=__gp; /* establish kernel global pointer */ \
- ;; \
- MINSTATE_END_SAVE_MIN
-#endif
-
-/*
- * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
- *
- * Assumed state upon entry:
- * psr.ic: on
- * r2: points to &pt_regs.r16
- * r3: points to &pt_regs.r17
- * r8: contents of ar.ccv
- * r9: contents of ar.csd
- * r10: contents of ar.ssd
- * r11: FPSR_DEFAULT
- *
- * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
- */
-#define SAVE_REST \
-.mem.offset 0,0; st8.spill [r2]=r16,16; \
-.mem.offset 8,0; st8.spill [r3]=r17,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r2]=r18,16; \
-.mem.offset 8,0; st8.spill [r3]=r19,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r2]=r20,16; \
-.mem.offset 8,0; st8.spill [r3]=r21,16; \
- mov r18=b6; \
- ;; \
-.mem.offset 0,0; st8.spill [r2]=r22,16; \
-.mem.offset 8,0; st8.spill [r3]=r23,16; \
- mov r19=b7; \
- ;; \
-.mem.offset 0,0; st8.spill [r2]=r24,16; \
-.mem.offset 8,0; st8.spill [r3]=r25,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r2]=r26,16; \
-.mem.offset 8,0; st8.spill [r3]=r27,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r2]=r28,16; \
-.mem.offset 8,0; st8.spill [r3]=r29,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r2]=r30,16; \
-.mem.offset 8,0; st8.spill [r3]=r31,32; \
- ;; \
- mov ar.fpsr=r11; /* M-unit */ \
- st8 [r2]=r8,8; /* ar.ccv */ \
- adds r24=PT(B6)-PT(F7),r3; \
- ;; \
- stf.spill [r2]=f6,32; \
- stf.spill [r3]=f7,32; \
- ;; \
- stf.spill [r2]=f8,32; \
- stf.spill [r3]=f9,32; \
- ;; \
- stf.spill [r2]=f10; \
- stf.spill [r3]=f11; \
- adds r25=PT(B7)-PT(F11),r3; \
- ;; \
- st8 [r24]=r18,16; /* b6 */ \
- st8 [r25]=r19,16; /* b7 */ \
- ;; \
- st8 [r24]=r9; /* ar.csd */ \
- st8 [r25]=r10; /* ar.ssd */ \
- ;;
-
-#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,)
-#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
-#ifdef CONFIG_XEN
-#define SAVE_MIN break 0;; /* FIXME: non-cover version only for ia32 support? */
-#else
-#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, )
-#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S
deleted file mode 100644
index d8ebfb994a..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * ia64/xen/xenpal.S
- *
- * Alternate PAL routines for Xen. Heavily leveraged from
- * ia64/kernel/pal.S
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@.hp.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-
-GLOBAL_ENTRY(xen_pal_call_static)
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
- alloc loc1 = ar.pfs,5,5,0,0
-#ifdef CONFIG_XEN
- movl r22=running_on_xen;;
- ld4 r22=[r22];;
- cmp.eq p7,p0=r22,r0
-(p7) br.cond.spnt.many __ia64_pal_call_static;;
-#endif
- movl loc2 = pal_entry_point
-1: {
- mov r28 = in0
- mov r29 = in1
- mov r8 = ip
- }
- ;;
- ld8 loc2 = [loc2] // loc2 <- entry point
- tbit.nz p6,p7 = in4, 0
- adds r8 = 1f-1b,r8
- mov loc4=ar.rsc // save RSE configuration
- ;;
- mov ar.rsc=0 // put RSE in enforced lazy, LE mode
-#ifdef CONFIG_XEN
- mov r9 = r8
- XEN_HYPER_GET_PSR
- ;;
- mov loc3 = r8
- mov r8 = r9
- ;;
-#else
- mov loc3 = psr
-#endif
- mov loc0 = rp
- .body
- mov r30 = in2
-
-#ifdef CONFIG_XEN
- // this is low priority for paravirtualization, but is called
- // from the idle loop so confuses privop counting
- movl r31=XSI_PSR_I_ADDR
- ;;
- ld8 r31=[r31]
- mov r22=1
- ;;
- st1 [r31]=r22
- ;;
-(p6) movl r31=XSI_PSR_IC
- ;;
-(p6) st4.rel [r31]=r0
- ;;
- mov r31 = in3
- mov b7 = loc2
- ;;
-#else
-(p6) rsm psr.i | psr.ic
- mov r31 = in3
- mov b7 = loc2
-
-(p7) rsm psr.i
- ;;
-(p6) srlz.i
-#endif
- mov rp = r8
- br.cond.sptk.many b7
-1: mov psr.l = loc3
- mov ar.rsc = loc4 // restore RSE configuration
- mov ar.pfs = loc1
- mov rp = loc0
- ;;
- srlz.d // seralize restoration of psr.l
- br.ret.sptk.many b0
-END(xen_pal_call_static)
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
deleted file mode 100644
index 469f39e226..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Support routines for Xen
- *
- * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
- */
-
-#include <asm/processor.h>
-#include <asm/asmmacro.h>
-
-#define isBP p3 // are we the Bootstrap Processor?
-
- .text
-GLOBAL_ENTRY(early_xen_setup)
- mov r8=ar.rsc // Initialized in head.S
-(isBP) movl r9=running_on_xen;;
- extr.u r8=r8,2,2;; // Extract pl fields
- cmp.eq p7,p0=r8,r0 // p7: !running on xen
- mov r8=1 // booleanize.
-(p7) br.ret.sptk.many rp;;
-(isBP) st4 [r9]=r8
- movl r10=xen_ivt;;
-
- mov cr.iva=r10
-
- /* Set xsi base. */
-#define FW_HYPERCALL_SET_SHARED_INFO_VA 0x600
-(isBP) mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA
-(isBP) movl r28=XSI_BASE;;
-(isBP) break 0x1000;;
-
- br.ret.sptk.many rp
- ;;
-END(early_xen_setup)
-
-#include <xen/interface/xen.h>
-
-/* Stub for suspend.
- Just force the stacked registers to be written in memory. */
-GLOBAL_ENTRY(xencomm_arch_hypercall_suspend)
- ;;
- alloc r20=ar.pfs,0,0,6,0
- mov r2=__HYPERVISOR_sched_op
- ;;
- /* We don't want to deal with RSE. */
- flushrs
- mov r33=r32
- mov r32=2 // SCHEDOP_shutdown
- ;;
- break 0x1000
- ;;
- br.ret.sptk.many b0
-END(xencomm_arch_hypercall_suspend)