aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvh249@kneesaa.uk.xensource.com <vh249@kneesaa.uk.xensource.com>2005-07-11 09:35:19 -0500
committervh249@kneesaa.uk.xensource.com <vh249@kneesaa.uk.xensource.com>2005-07-11 09:35:19 -0500
commita7585e4041167bd489707e7c7bb1e54718888568 (patch)
treebb2f2b8c211a034f6378f19b77c4b97bfc91799f
parent105077619922d8c782b74491afd1b406dc654fa7 (diff)
downloadxen-a7585e4041167bd489707e7c7bb1e54718888568.tar.gz
xen-a7585e4041167bd489707e7c7bb1e54718888568.tar.bz2
xen-a7585e4041167bd489707e7c7bb1e54718888568.zip
upgrade linux sparse tree from 2.6.11 to 2.6.12
Signed-off-by: Vincent Hanquez <vincent@xensource.com> --HG-- rename : patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch => patches/linux-2.6.12/i386-cpu-hotplug-updated-for-mm.patch rename : patches/linux-2.6.11/net-csum.patch => patches/linux-2.6.12/net-csum.patch rename : patches/linux-2.6.11/rcu-nohz.patch => patches/linux-2.6.12/rcu-nohz.patch rename : patches/linux-2.6.11/smp-alts.patch => patches/linux-2.6.12/smp-alts.patch rename : patches/linux-2.6.11/x86_64-linux.patch => patches/linux-2.6.12/x86_64-linux.patch
-rw-r--r--buildconfigs/mk.linux-2.6-xen02
-rw-r--r--buildconfigs/mk.linux-2.6-xenU2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/Kconfig2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/Kconfig.debug129
-rw-r--r--linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_3271
-rw-r--r--linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_6477
-rw-r--r--linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_3265
-rw-r--r--linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_6442
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/Kconfig116
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/Makefile5
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile15
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c6
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c52
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S292
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c11
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c20
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c5
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c11
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c54
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c82
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c23
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c71
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c118
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c70
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/mm/init.c63
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c81
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig67
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile6
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S8
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c92
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile1
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c101
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S96
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S11
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c7
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c102
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c5
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c27
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c59
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c192
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c3
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/signal.c78
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c35
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c781
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c250
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c119
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c18
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c35
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c26
-rw-r--r--linux-2.6-xen-sparse/drivers/Makefile3
-rw-r--r--linux-2.6-xen-sparse/drivers/char/mem.c293
-rw-r--r--linux-2.6-xen-sparse/drivers/char/tty_io.c7
-rw-r--r--linux-2.6-xen-sparse/include/asm-generic/pgtable.h148
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h4
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h2
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h2
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h11
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h6
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h1
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h3
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h25
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h23
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h5
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h2
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h8
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h3
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h15
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h4
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h3
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h29
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h8
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h5
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/segment.h7
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h31
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h2
-rw-r--r--linux-2.6-xen-sparse/include/linux/gfp.h44
-rw-r--r--linux-2.6-xen-sparse/include/linux/mm.h27
-rw-r--r--linux-2.6-xen-sparse/include/linux/skbuff.h122
-rw-r--r--linux-2.6-xen-sparse/mm/highmem.c12
-rw-r--r--linux-2.6-xen-sparse/mm/memory.c987
-rw-r--r--linux-2.6-xen-sparse/mm/mmap.c312
-rw-r--r--linux-2.6-xen-sparse/mm/page_alloc.c171
-rw-r--r--linux-2.6-xen-sparse/net/core/dev.c84
-rw-r--r--linux-2.6-xen-sparse/net/core/skbuff.c169
-rw-r--r--patches/linux-2.6.11/agpgart.patch437
-rw-r--r--patches/linux-2.6.11/iomap.patch120
-rw-r--r--patches/linux-2.6.11/linux-2.6.11.12.patch2579
-rw-r--r--patches/linux-2.6.11/udp-frag.patch55
-rw-r--r--patches/linux-2.6.12/i386-cpu-hotplug-updated-for-mm.patch (renamed from patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch)257
-rw-r--r--patches/linux-2.6.12/net-csum.patch (renamed from patches/linux-2.6.11/net-csum.patch)11
-rw-r--r--patches/linux-2.6.12/rcu-nohz.patch (renamed from patches/linux-2.6.11/rcu-nohz.patch)0
-rw-r--r--patches/linux-2.6.12/smp-alts.patch (renamed from patches/linux-2.6.11/smp-alts.patch)0
-rw-r--r--patches/linux-2.6.12/x86_64-linux.patch (renamed from patches/linux-2.6.11/x86_64-linux.patch)0
100 files changed, 3548 insertions, 6107 deletions
diff --git a/buildconfigs/mk.linux-2.6-xen0 b/buildconfigs/mk.linux-2.6-xen0
index 72f49267ca..b06f289078 100644
--- a/buildconfigs/mk.linux-2.6-xen0
+++ b/buildconfigs/mk.linux-2.6-xen0
@@ -2,7 +2,7 @@
OS = linux
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.11
+LINUX_VER = 2.6.12
EXTRAVERSION = xen0
diff --git a/buildconfigs/mk.linux-2.6-xenU b/buildconfigs/mk.linux-2.6-xenU
index bd9856c39b..c98e296742 100644
--- a/buildconfigs/mk.linux-2.6-xenU
+++ b/buildconfigs/mk.linux-2.6-xenU
@@ -2,7 +2,7 @@
OS = linux
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.11
+LINUX_VER = 2.6.12
EXTRAVERSION = xenU
diff --git a/linux-2.6-xen-sparse/arch/xen/Kconfig b/linux-2.6-xen-sparse/arch/xen/Kconfig
index 480c4e8fd1..9ff5a27ad1 100644
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig
@@ -194,3 +194,5 @@ source "security/Kconfig"
source "crypto/Kconfig"
source "lib/Kconfig"
+
+source "arch/xen/Kconfig.debug"
diff --git a/linux-2.6-xen-sparse/arch/xen/Kconfig.debug b/linux-2.6-xen-sparse/arch/xen/Kconfig.debug
new file mode 100644
index 0000000000..663eacc9e5
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig.debug
@@ -0,0 +1,129 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+# X86
+config EARLY_PRINTK
+ bool "Early printk" if EMBEDDED && DEBUG_KERNEL
+ default y
+ depends on X86
+ help
+ Write kernel log output directly into the VGA buffer or to a serial
+ port.
+
+ This is useful for kernel debugging when your machine crashes very
+ early before the console code is initialized. For normal operation
+ it is not recommended because it looks ugly and doesn't cooperate
+ with klogd/syslogd or the X server. You should normally N here,
+ unless you want to debug such a crash.
+
+config DEBUG_STACKOVERFLOW
+ bool "Check for stack overflows"
+ depends on DEBUG_KERNEL && X86
+
+config KPROBES
+ bool "Kprobes"
+ depends on DEBUG_KERNEL && X86
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
+config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL && X86
+ help
+ Enables the display of the minimum amount of free stack which each
+ task has ever had available in the sysrq-T and sysrq-P debug output.
+
+ This option will slow down process creation somewhat.
+
+comment "Page alloc debug is incompatible with Software Suspend on i386"
+ depends on DEBUG_KERNEL && SOFTWARE_SUSPEND && X86
+
+config DEBUG_PAGEALLOC
+ bool "Page alloc debugging"
+ depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND && X86
+ help
+ Unmap pages from the kernel linear mapping after free_pages().
+ This results in a large slowdown, but helps to find certain types
+ of memory corruptions.
+
+config 4KSTACKS
+ bool "Use 4Kb for kernel stacks instead of 8Kb"
+ depends on DEBUG_KERNEL && X86
+ help
+ If you say Y here the kernel will use a 4Kb stacksize for the
+ kernel stack attached to each process/thread. This facilitates
+ running more threads on a system and also reduces the pressure
+ on the VM subsystem for higher order allocations. This option
+ will also use IRQ stacks to compensate for the reduced stackspace.
+
+config X86_FIND_SMP_CONFIG
+ bool
+ depends on X86_LOCAL_APIC || X86_VOYAGER && X86
+ default y
+
+config X86_MPPARSE
+ bool
+ depends on X86_LOCAL_APIC && !X86_VISWS && X86
+ default y
+
+# X86_64
+
+# !SMP for now because the context switch early causes GPF in segment reloading
+# and the GS base checking does the wrong thing then, causing a hang.
+config CHECKING
+ bool "Additional run-time checks"
+ depends on DEBUG_KERNEL && !SMP && X86_64
+ help
+ Enables some internal consistency checks for kernel debugging.
+ You should normally say N.
+
+config INIT_DEBUG
+ bool "Debug __init statements"
+ depends on DEBUG_KERNEL && X86_64
+ help
+ Fill __init and __initdata at the end of boot. This helps debugging
+ illegal uses of __init and __initdata after initialization.
+
+config IOMMU_DEBUG
+ depends on GART_IOMMU && DEBUG_KERNEL && X86_64
+ bool "Enable IOMMU debugging"
+ help
+ Force the IOMMU to on even when you have less than 4GB of
+ memory and add debugging code. On overflow always panic. And
+ allow to enable IOMMU leak tracing. Can be disabled at boot
+ time with iommu=noforce. This will also enable scatter gather
+ list merging. Currently not recommended for production
+ code. When you use it make sure you have a big enough
+ IOMMU/AGP aperture. Most of the options enabled by this can
+ be set more finegrained using the iommu= command line
+ options. See Documentation/x86_64/boot-options.txt for more
+ details.
+
+config IOMMU_LEAK
+ bool "IOMMU leak tracing"
+ depends on DEBUG_KERNEL && X86_64
+ depends on IOMMU_DEBUG
+ help
+ Add a simple leak tracer to the IOMMU code. This is useful when you
+ are debugging a buggy device driver that leaks IOMMU mappings.
+
+#config X86_REMOTE_DEBUG
+# bool "kgdb debugging stub"
+
+# X86 & X86_64
+config KPROBES
+ bool "Kprobes"
+ depends on DEBUG_KERNEL
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
+endmenu
diff --git a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
index 4df0524b23..ba014eac18 100644
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11.12-xen0
-# Wed Jul 6 18:26:29 2005
+# Linux kernel version: 2.6.12-xen0
+# Sat Jul 9 09:19:47 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -34,6 +34,7 @@ CONFIG_EXPERIMENTAL=y
CONFIG_BROKEN=y
CONFIG_BROKEN_ON_SMP=y
CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
#
# General setup
@@ -45,7 +46,6 @@ CONFIG_SYSVIPC=y
# CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
@@ -53,15 +53,18 @@ CONFIG_KOBJECT_UEVENT=y
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SHMEM=y
CONFIG_CC_ALIGN_FUNCTIONS=0
CONFIG_CC_ALIGN_LABELS=0
CONFIG_CC_ALIGN_LOOPS=0
CONFIG_CC_ALIGN_JUMPS=0
# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
#
# Loadable module support
@@ -101,6 +104,7 @@ CONFIG_MPENTIUM4=y
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_X86_GENERIC is not set
@@ -121,6 +125,7 @@ CONFIG_X86_USE_PPRO_CHECKSUM=y
# CONFIG_SMP is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_BKL=y
+# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_MICROCODE=y
CONFIG_X86_CPUID=y
@@ -154,6 +159,8 @@ CONFIG_PCI_MMCONFIG=y
# CONFIG_PCI_MSI is not set
CONFIG_PCI_LEGACY_PROC=y
# CONFIG_PCI_NAMES is not set
+# CONFIG_PCI_DEBUG is not set
+CONFIG_ISA_DMA_API=y
CONFIG_ISA=y
# CONFIG_EISA is not set
# CONFIG_MCA is not set
@@ -165,11 +172,6 @@ CONFIG_ISA=y
# CONFIG_PCCARD is not set
#
-# PC-card bridges
-#
-CONFIG_PCMCIA_PROBE=y
-
-#
# PCI Hotplug Support
#
# CONFIG_HOTPLUG_PCI is not set
@@ -177,8 +179,10 @@ CONFIG_PCMCIA_PROBE=y
#
# Kernel hacking
#
+# CONFIG_PRINTK_TIME is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
# CONFIG_SCHEDSTATS is not set
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_PREEMPT is not set
@@ -202,6 +206,7 @@ CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_PC=y
+CONFIG_SECCOMP=y
#
# Executable file formats
@@ -358,7 +363,7 @@ CONFIG_BLK_DEV_SD=y
#
# SCSI Transport Attributes
#
-# CONFIG_SCSI_SPI_ATTRS is not set
+CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
@@ -435,6 +440,7 @@ CONFIG_SCSI_QLA2XXX=y
# CONFIG_SCSI_QLA2300 is not set
# CONFIG_SCSI_QLA2322 is not set
# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_SEAGATE is not set
# CONFIG_SCSI_SYM53C416 is not set
# CONFIG_SCSI_DC395x is not set
@@ -468,6 +474,7 @@ CONFIG_BLK_DEV_DM=y
CONFIG_DM_SNAPSHOT=y
CONFIG_DM_MIRROR=y
# CONFIG_DM_ZERO is not set
+# CONFIG_DM_MULTIPATH is not set
#
# Fusion MPT device support
@@ -496,7 +503,6 @@ CONFIG_NET=y
#
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
CONFIG_UNIX=y
# CONFIG_NET_KEY is not set
CONFIG_INET=y
@@ -676,7 +682,6 @@ CONFIG_PCNET32=y
# CONFIG_DGRS is not set
# CONFIG_EEPRO100 is not set
CONFIG_E100=y
-# CONFIG_E100_NAPI is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
CONFIG_NE2K_PCI=y
@@ -709,6 +714,7 @@ CONFIG_E1000=y
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
+# CONFIG_BNX2 is not set
#
# Ethernet (10000 Mbit)
@@ -766,19 +772,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_EVBUG is not set
#
-# Input I/O drivers
-#
-# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_CT82C710 is not set
-# CONFIG_SERIO_PCIPS2 is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
-
-#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
@@ -799,6 +792,18 @@ CONFIG_MOUSE_PS2=y
# CONFIG_INPUT_MISC is not set
#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
# Character devices
#
CONFIG_VT=y
@@ -814,6 +819,7 @@ CONFIG_HW_CONSOLE=y
#
# Non-8250 serial port support
#
+# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256
@@ -846,7 +852,6 @@ CONFIG_AGP_ATI=m
CONFIG_AGP_AMD=m
CONFIG_AGP_AMD64=m
CONFIG_AGP_INTEL=m
-CONFIG_AGP_INTEL_MCH=m
CONFIG_AGP_NVIDIA=m
CONFIG_AGP_SIS=m
CONFIG_AGP_SWORKS=m
@@ -868,6 +873,11 @@ CONFIG_DRM_SIS=m
# CONFIG_HANGCHECK_TIMER is not set
#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+
+#
# I2C support
#
# CONFIG_I2C is not set
@@ -913,6 +923,8 @@ CONFIG_DUMMY_CONSOLE=y
#
# USB support
#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB=y
# CONFIG_USB_DEBUG is not set
@@ -923,14 +935,14 @@ CONFIG_USB=y
# CONFIG_USB_BANDWIDTH is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_OTG is not set
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
#
# USB Host Controller Drivers
#
# CONFIG_USB_EHCI_HCD is not set
CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
@@ -967,7 +979,6 @@ CONFIG_USB_HIDINPUT=y
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
-# CONFIG_USB_HPUSBSCSI is not set
#
# USB Multimedia devices
@@ -986,6 +997,7 @@ CONFIG_USB_HIDINPUT=y
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
#
# USB port drivers
@@ -1232,6 +1244,7 @@ CONFIG_CRYPTO_SHA1=m
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
CONFIG_CRYPTO_DES=m
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
diff --git a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
index 2150f4381b..b1e78d5503 100644
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11.1-xen0
-# Tue May 10 11:07:02 2005
+# Linux kernel version: 2.6.12-xen0
+# Wed Jun 29 10:01:20 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -33,6 +33,7 @@ CONFIG_EXPERIMENTAL=y
# CONFIG_CLEAN_COMPILE is not set
CONFIG_BROKEN=y
CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
#
# General setup
@@ -44,22 +45,24 @@ CONFIG_SYSVIPC=y
# CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
# CONFIG_HOTPLUG is not set
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SHMEM=y
CONFIG_CC_ALIGN_FUNCTIONS=0
CONFIG_CC_ALIGN_LABELS=0
CONFIG_CC_ALIGN_LOOPS=0
CONFIG_CC_ALIGN_JUMPS=0
# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
#
# Loadable module support
@@ -74,6 +77,7 @@ CONFIG_KMOD=y
CONFIG_XENARCH="x86_64"
CONFIG_X86=y
CONFIG_MMU=y
+CONFIG_UID16=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_X86_CMPXCHG=y
@@ -93,15 +97,17 @@ CONFIG_X86_IO_APIC=y
CONFIG_PCI=y
CONFIG_PCI_DIRECT=y
# CONFIG_PCI_MMCONFIG is not set
-CONFIG_EARLY_PRINTK=y
+CONFIG_ISA_DMA_API=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_SECCOMP=y
#
# X86_64 processor configuration
#
CONFIG_X86_64=y
CONFIG_64BIT=y
+CONFIG_EARLY_PRINTK=y
#
# Processor type and features
@@ -135,6 +141,9 @@ CONFIG_DUMMY_IOMMU=y
#
CONFIG_IA32_EMULATION=y
# CONFIG_IA32_AOUT is not set
+CONFIG_COMPAT=y
+CONFIG_SYSVIPC_COMPAT=y
+
#
# Executable file formats
#
@@ -285,7 +294,7 @@ CONFIG_BLK_DEV_SD=y
#
# SCSI Transport Attributes
#
-# CONFIG_SCSI_SPI_ATTRS is not set
+CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
@@ -352,6 +361,7 @@ CONFIG_SCSI_QLA2XXX=y
# CONFIG_SCSI_QLA2300 is not set
# CONFIG_SCSI_QLA2322 is not set
# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_DEBUG is not set
@@ -388,7 +398,6 @@ CONFIG_NET=y
#
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
CONFIG_UNIX=y
# CONFIG_NET_KEY is not set
CONFIG_INET=y
@@ -553,7 +562,6 @@ CONFIG_PCNET32=y
# CONFIG_DGRS is not set
# CONFIG_EEPRO100 is not set
CONFIG_E100=y
-# CONFIG_E100_NAPI is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
CONFIG_NE2K_PCI=y
@@ -584,6 +592,7 @@ CONFIG_E1000=y
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
+# CONFIG_BNX2 is not set
#
# Ethernet (10000 Mbit)
@@ -641,19 +650,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_EVBUG is not set
#
-# Input I/O drivers
-#
-# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_CT82C710 is not set
-# CONFIG_SERIO_PCIPS2 is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
-
-#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
@@ -671,6 +667,18 @@ CONFIG_MOUSE_PS2=y
# CONFIG_INPUT_MISC is not set
#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
# Character devices
#
CONFIG_VT=y
@@ -686,6 +694,7 @@ CONFIG_HW_CONSOLE=y
#
# Non-8250 serial port support
#
+# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256
@@ -702,7 +711,6 @@ CONFIG_LEGACY_PTY_COUNT=256
# CONFIG_HW_RANDOM is not set
# CONFIG_NVRAM is not set
CONFIG_RTC=y
-# CONFIG_GEN_RTC is not set
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
@@ -713,7 +721,7 @@ CONFIG_RTC=y
# CONFIG_FTAPE is not set
CONFIG_AGP=m
CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL_MCH=m
+# CONFIG_AGP_INTEL is not set
CONFIG_DRM=m
CONFIG_DRM_TDFX=m
# CONFIG_DRM_GAMMA is not set
@@ -727,6 +735,11 @@ CONFIG_DRM_SIS=m
# CONFIG_HANGCHECK_TIMER is not set
#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+
+#
# I2C support
#
# CONFIG_I2C is not set
@@ -771,13 +784,9 @@ CONFIG_DUMMY_CONSOLE=y
#
# USB support
#
-# CONFIG_USB is not set
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
-#
+# CONFIG_USB is not set
#
# USB Gadget Support
@@ -994,6 +1003,7 @@ CONFIG_CRYPTO_SHA1=m
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
CONFIG_CRYPTO_DES=m
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
@@ -1019,5 +1029,14 @@ CONFIG_CRYPTO_CRC32C=m
#
# CONFIG_CRC_CCITT is not set
CONFIG_CRC32=y
-CONFIG_LIBCRC32C=y
+CONFIG_LIBCRC32C=m
CONFIG_ZLIB_INFLATE=y
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_DEBUG_KERNEL is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
diff --git a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
index c2011b63b2..10d3dac92b 100644
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11.12-xenU
-# Wed Jul 6 22:40:19 2005
+# Linux kernel version: 2.6.12-xenU
+# Sun Jul 10 17:32:04 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -29,6 +29,7 @@ CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
CONFIG_EXPERIMENTAL=y
CONFIG_CLEAN_COMPILE=y
CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
#
# General setup
@@ -40,23 +41,26 @@ CONFIG_SYSVIPC=y
# CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SHMEM=y
CONFIG_CC_ALIGN_FUNCTIONS=0
CONFIG_CC_ALIGN_LABELS=0
CONFIG_CC_ALIGN_LOOPS=0
CONFIG_CC_ALIGN_JUMPS=0
# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
#
# Loadable module support
@@ -97,6 +101,7 @@ CONFIG_MPENTIUM4=y
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_X86_GENERIC is not set
@@ -120,6 +125,7 @@ CONFIG_NR_CPUS=8
# CONFIG_SCHED_SMT is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_BKL=y
+# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_X86_CPUID=y
#
@@ -132,35 +138,14 @@ CONFIG_HIGHMEM=y
CONFIG_HAVE_DEC_LOCK=y
# CONFIG_REGPARM is not set
CONFIG_HOTPLUG_CPU=y
-
-#
-# Kernel hacking
-#
-CONFIG_DEBUG_KERNEL=y
-CONFIG_MAGIC_SYSRQ=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_PREEMPT is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_HIGHMEM is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_FRAME_POINTER is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_4KSTACKS is not set
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_SMP=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
CONFIG_PC=y
+CONFIG_SECCOMP=y
+CONFIG_EARLY_PRINTK=y
#
# Executable file formats
@@ -256,7 +241,6 @@ CONFIG_NET=y
#
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
CONFIG_UNIX=y
# CONFIG_NET_KEY is not set
CONFIG_INET=y
@@ -523,6 +507,7 @@ CONFIG_CRYPTO_MD5=m
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
# CONFIG_CRYPTO_DES is not set
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
@@ -551,3 +536,27 @@ CONFIG_CRYPTO_CRC32C=m
# CONFIG_CRC32 is not set
CONFIG_LIBCRC32C=m
CONFIG_ZLIB_INFLATE=y
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
diff --git a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
index de7cc84038..68364af0d0 100644
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11.10-xenU
-# Mon May 23 15:07:58 2005
+# Linux kernel version: 2.6.12-xenU
+# Thu Jul 7 11:43:14 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -29,6 +29,7 @@ CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
CONFIG_EXPERIMENTAL=y
CONFIG_CLEAN_COMPILE=y
CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
#
# General setup
@@ -42,22 +43,24 @@ CONFIG_BSD_PROCESS_ACCT=y
CONFIG_SYSCTL=y
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
-CONFIG_LOG_BUF_SHIFT=14
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SHMEM=y
CONFIG_CC_ALIGN_FUNCTIONS=0
CONFIG_CC_ALIGN_LABELS=0
CONFIG_CC_ALIGN_LOOPS=0
CONFIG_CC_ALIGN_JUMPS=0
# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
#
# Loadable module support
@@ -72,6 +75,7 @@ CONFIG_KMOD=y
CONFIG_XENARCH="x86_64"
CONFIG_X86=y
CONFIG_MMU=y
+CONFIG_UID16=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_X86_CMPXCHG=y
@@ -89,15 +93,17 @@ CONFIG_X86_CPUID=y
# CONFIG_X86_LOCAL_APIC is not set
# CONFIG_X86_IO_APIC is not set
# CONFIG_PCI is not set
-CONFIG_EARLY_PRINTK=y
+CONFIG_ISA_DMA_API=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_SECCOMP=y
#
# X86_64 processor configuration
#
CONFIG_X86_64=y
CONFIG_64BIT=y
+CONFIG_EARLY_PRINTK=y
#
# Processor type and features
@@ -130,6 +136,9 @@ CONFIG_DUMMY_IOMMU=y
#
CONFIG_IA32_EMULATION=y
# CONFIG_IA32_AOUT is not set
+CONFIG_COMPAT=y
+CONFIG_SYSVIPC_COMPAT=y
+
#
# Executable file formats
#
@@ -226,6 +235,7 @@ CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
CONFIG_DM_MIRROR=m
CONFIG_DM_ZERO=m
+# CONFIG_DM_MULTIPATH is not set
#
# Networking support
@@ -237,7 +247,6 @@ CONFIG_NET=y
#
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
-CONFIG_NETLINK_DEV=y
CONFIG_UNIX=y
CONFIG_NET_KEY=m
CONFIG_INET=y
@@ -246,6 +255,7 @@ CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_FWMARK=y
CONFIG_IP_ROUTE_MULTIPATH=y
+# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
CONFIG_IP_ROUTE_VERBOSE=y
# CONFIG_IP_PNP is not set
CONFIG_NET_IPIP=m
@@ -373,7 +383,7 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
#
-# IPv6: Netfilter Configuration
+# IPv6: Netfilter Configuration (EXPERIMENTAL)
#
# CONFIG_IP6_NF_QUEUE is not set
CONFIG_IP6_NF_IPTABLES=m
@@ -480,6 +490,7 @@ CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_QOS=y
CONFIG_NET_ESTIMATOR=y
CONFIG_NET_CLS=y
+# CONFIG_NET_CLS_BASIC is not set
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_ROUTE=y
@@ -490,6 +501,7 @@ CONFIG_NET_CLS_IND=y
# CONFIG_CLS_U32_MARK is not set
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
+# CONFIG_NET_EMATCH is not set
# CONFIG_NET_CLS_ACT is not set
CONFIG_NET_CLS_POLICE=y
@@ -554,6 +566,11 @@ CONFIG_ACT200L_DONGLE=m
#
# FIR device drivers
#
+# CONFIG_NSC_FIR is not set
+# CONFIG_WINBOND_FIR is not set
+# CONFIG_SMC_IRCC_FIR is not set
+# CONFIG_ALI_FIR is not set
+# CONFIG_VIA_FIR is not set
CONFIG_BT=m
CONFIG_BT_L2CAP=m
CONFIG_BT_SCO=m
@@ -577,7 +594,6 @@ CONFIG_DUMMY=m
CONFIG_BONDING=m
CONFIG_EQUALIZER=m
CONFIG_TUN=m
-CONFIG_ETHERTAP=m
#
# Ethernet (10 or 100Mbit)
@@ -853,7 +869,7 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_SECURITY_SELINUX_DEVELOP=y
CONFIG_SECURITY_SELINUX_AVC_STATS=y
-# CONFIG_SECURITY_SELINUX_MLS is not set
+CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
#
# Cryptographic options
@@ -867,6 +883,7 @@ CONFIG_CRYPTO_SHA1=y
CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_WP512=m
+# CONFIG_CRYPTO_TGR192 is not set
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_TWOFISH=m
@@ -895,3 +912,10 @@ CONFIG_CRC32=y
CONFIG_LIBCRC32C=m
CONFIG_ZLIB_INFLATE=y
CONFIG_ZLIB_DEFLATE=m
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_DEBUG_KERNEL is not set
+CONFIG_LOG_BUF_SHIFT=14
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig
index dec06cdfd1..f0cd7eac8f 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig
@@ -74,6 +74,7 @@ config M386
- "Winchip-C6" for original IDT Winchip.
- "Winchip-2" for IDT Winchip 2.
- "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
+ - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
- "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
@@ -201,6 +202,11 @@ config MWINCHIP3D
stores for this CPU, which can increase performance of some
operations.
+config MGEODEGX1
+ bool "GeodeGX1"
+ help
+ Select this for a Geode GX1 (Cyrix MediaGX) chip.
+
config MCYRIXIII
bool "CyrixIII/VIA-C3"
help
@@ -249,7 +255,7 @@ config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || X86_GENERIC
default "4" if X86_ELAN || M486 || M386
- default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2
+ default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1
default "6" if MK7 || MK8 || MPENTIUMM
config RWSEM_GENERIC_SPINLOCK
@@ -268,7 +274,7 @@ config GENERIC_CALIBRATE_DELAY
config X86_PPRO_FENCE
bool
- depends on M686 || M586MMX || M586TSC || M586 || M486 || M386
+ depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
default y
config X86_F00F_BUG
@@ -298,7 +304,7 @@ config X86_POPAD_OK
config X86_ALIGNMENT_16
bool
- depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2
+ depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
default y
config X86_GOOD_APIC
@@ -434,7 +440,7 @@ config PREEMPT_BKL
#config X86_TSC
# bool
-# depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
+# depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
# default y
#config X86_MCE
@@ -474,6 +480,24 @@ config PREEMPT_BKL
# Enabling this feature will cause a message to be printed when the P4
# enters thermal throttling.
+config X86_REBOOTFIXUPS
+ bool "Enable X86 board specific fixups for reboot"
+ depends on X86
+ default n
+ ---help---
+ This enables chipset and/or board specific fixups to be done
+ in order to get reboot to work correctly. This is only needed on
+ some combinations of hardware and BIOS. The symptom, for which
+ this config is intended, is when reboot ends with a stalled/hung
+ system.
+
+ Currently, the only fixup is for the Geode GX1/CS5530A/TROM2.1.
+ combination.
+
+ Say Y if you want to enable the fixup. Currently, it's safe to
+ enable this option even if you don't need it.
+ Say N otherwise.
+
config MICROCODE
tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
depends on XEN_PRIVILEGED_GUEST
@@ -599,6 +623,16 @@ config HAVE_ARCH_BOOTMEM_NODE
depends on NUMA
default y
+config HAVE_MEMORY_PRESENT
+ bool
+ depends on DISCONTIGMEM
+ default y
+
+config NEED_NODE_MEMMAP_SIZE
+ bool
+ depends on DISCONTIGMEM
+ default y
+
#config HIGHPTE
# bool "Allocate 3rd-level pagetables from highmem"
# depends on HIGHMEM4G || HIGHMEM64G
@@ -682,14 +716,19 @@ config REGPARM
config X86_LOCAL_APIC
bool
- depends on !SMP && X86_UP_APIC
+ depends on XEN_PRIVILEGED_GUEST && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER))
default y
config X86_IO_APIC
bool
- depends on !SMP && X86_UP_IOAPIC
+ depends on XEN_PRIVILEGED_GUEST && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)))
default y
+config X86_VISWS_APIC
+ bool
+ depends on X86_VISWS
+ default y
+
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
depends on SMP && HOTPLUG && EXPERIMENTAL
@@ -704,20 +743,10 @@ if XEN_PHYSDEV_ACCESS
menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
-config X86_VISWS_APIC
- bool
- depends on X86_VISWS
- default y
-
-config X86_LOCAL_APIC
- bool
- depends on (X86_VISWS || SMP) && !X86_VOYAGER
- default y
-
config X86_UP_APIC
- bool "Local APIC support on uniprocessors" if !SMP
- depends on !(X86_VISWS || X86_VOYAGER)
- ---help---
+ bool "Local APIC support on uniprocessors"
+ depends on !SMP && !(X86_VISWS || X86_VOYAGER)
+ help
A local APIC (Advanced Programmable Interrupt Controller) is an
integrated interrupt controller in the CPU. If you have a single-CPU
system which has a processor with a local APIC, you can say Y here to
@@ -727,28 +756,18 @@ config X86_UP_APIC
performance counters), and the NMI watchdog which detects hard
lockups.
- If you have a system with several CPUs, you do not need to say Y
- here: the local APIC will be used automatically.
-
config X86_UP_IOAPIC
bool "IO-APIC support on uniprocessors"
- depends on !SMP && X86_UP_APIC
+ depends on X86_UP_APIC
help
An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
SMP-capable replacement for PC-style interrupt controllers. Most
- SMP systems and a small number of uniprocessor systems have one.
+ SMP systems and many recent uniprocessor systems have one.
+
If you have a single-CPU system with an IO-APIC, you can say Y here
to use it. If you say Y here even though your machine doesn't have
an IO-APIC, then the kernel will still run with no slowdown at all.
- If you have a system with several CPUs, you do not need to say Y
- here: the IO-APIC will be used automatically.
-
-config X86_IO_APIC
- bool
- depends on SMP && !(X86_VISWS || X86_VOYAGER)
- default y
-
config PCI
bool "PCI support" if !X86_VISWS
depends on !X86_VOYAGER
@@ -809,7 +828,7 @@ config PCI_DIRECT
config PCI_MMCONFIG
bool
- depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI))
+ depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
select ACPI_BOOT
default y
@@ -817,6 +836,10 @@ source "drivers/pci/pcie/Kconfig"
source "drivers/pci/Kconfig"
+config ISA_DMA_API
+ bool
+ default y
+
config ISA
bool "ISA support"
depends on !(X86_VOYAGER || X86_VISWS)
@@ -846,18 +869,14 @@ config EISA
source "drivers/eisa/Kconfig"
config MCA
- bool "MCA support"
- depends on !(X86_VISWS || X86_VOYAGER)
+ bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+ default y if X86_VOYAGER
help
MicroChannel Architecture is found in some IBM PS/2 machines and
laptops. It is a bus system similar to PCI or ISA. See
<file:Documentation/mca.txt> (and especially the web page given
there) before attempting to build an MCA bus kernel.
-config MCA
- depends on X86_VOYAGER
- default y if X86_VOYAGER
-
source "drivers/mca/Kconfig"
config SCx200
@@ -880,8 +899,6 @@ endmenu
endif
-source "arch/i386/Kconfig.debug"
-
#
# Use the generic interrupt handling code in kernel/irq/:
#
@@ -918,4 +935,21 @@ config PC
depends on X86 && !EMBEDDED
default y
+config SECCOMP
+ bool "Enable seccomp to safely compute untrusted bytecode"
+ depends on PROC_FS
+ default y
+ help
+ This kernel feature is useful for number crunching applications
+ that may need to compute untrusted bytecode during their
+ execution. By using pipes or other transports made available to
+ the process as file descriptors supporting the read/write
+ syscalls, it's possible to isolate those applications in
+ their own address space using seccomp. Once seccomp is
+ enabled via /proc/<pid>/seccomp, it cannot be disabled
+ and the task is only allowed to execute a few safe syscalls
+ defined by each seccomp mode.
+
+ If unsure, say Y. Only embedded should say N here.
+
endmenu
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/Makefile b/linux-2.6-xen-sparse/arch/xen/i386/Makefile
index 053c0984ac..313f5708af 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/Makefile
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Makefile
@@ -14,6 +14,8 @@
# 19990713 Artur Skawina <skawina@geocities.com>
# Added '-march' and '-mpreferred-stack-boundary' support
#
+# 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net>
+# Added support for GEODE CPU
XENARCH := $(subst ",,$(CONFIG_XENARCH))
@@ -56,6 +58,9 @@ cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
# AMD Elan support
cflags-$(CONFIG_X86_ELAN) += -march=i486
+# Geode GX1 support
+cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486)
+
# -mregparm=3 works ok on gcc-3.0 and later
#
GCC_VERSION := $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
index 273a4b9f44..fe6e9db107 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o
c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
+c-obj-$(CONFIG_X86_REBOOTFIXUPS)+= reboot_fixups.o
c-obj-$(CONFIG_X86_NUMAQ) += numaq.o
c-obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
c-obj-$(CONFIG_MODULES) += module.o
@@ -53,11 +54,11 @@ c-obj-$(CONFIG_SCx200) += scx200.o
# Note: kbuild does not track this dependency due to usage of .incbin
$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
-targets += vsyscall.lds
+targets += vsyscall-note.o vsyscall.lds
# The DSO images are built using a special linker script.
quiet_cmd_syscall = SYSCALL $@
- cmd_syscall = $(CC) -nostdlib -m32 $(SYSCFLAGS_$(@F)) \
+ cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
-Wl,-T,$(filter-out FORCE,$^) -o $@
export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH)
@@ -67,7 +68,8 @@ SYSCFLAGS_vsyscall-sysenter.so = $(vsyscall-flags)
SYSCFLAGS_vsyscall-int80.so = $(vsyscall-flags)
$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
-$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
+ $(obj)/vsyscall-%.o FORCE
$(call if_changed,syscall)
# We also create a special relocatable object that should mirror the symbol
@@ -78,17 +80,20 @@ $(obj)/built-in.o: $(obj)/vsyscall-syms.o
$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
SYSCFLAGS_vsyscall-syms.o = -r
-$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE
+$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
+ $(obj)/vsyscall-sysenter.o FORCE
$(call if_changed,syscall)
c-link := init_task.o
-s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o vsyscall.lds.o
+s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o vsyscall.lds.o syscall_table.o
$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
@ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@
$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
+$(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S
+
obj-y += $(c-obj-y) $(s-obj-y)
obj-m += $(c-obj-m)
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
index 86ad650024..230673abb6 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
@@ -604,6 +604,12 @@ static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
acpi_fadt.sci_int = fadt->sci_int;
#endif
+#ifdef CONFIG_ACPI_BUS
+ /* initialize rev and apic_phys_dest_mode for x86_64 genapic */
+ acpi_fadt.revision = fadt->revision;
+ acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode;
+#endif
+
#ifdef CONFIG_X86_PM_TIMER
/* detect the location of the ACPI PM Timer */
if (fadt->revision >= FADT2_REVISION_ID) {
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
index 197225266d..dd1f64f830 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
@@ -22,6 +22,9 @@
DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+
static int cachesize_override __initdata = -1;
static int disable_x86_fxsr __initdata = 0;
static int disable_x86_serial_nr __initdata = 1;
@@ -202,7 +205,7 @@ static inline int flag_is_changeable_p(u32 flag)
/* Probe for the CPUID instruction */
-int __init have_cpuid_p(void)
+static int __init have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
@@ -210,7 +213,7 @@ int __init have_cpuid_p(void)
/* Do minimum CPU detection early.
Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
The others are not touched to avoid unwanted side effects. */
-void __init early_cpu_detect(void)
+static void __init early_cpu_detect(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -243,6 +246,10 @@ void __init early_cpu_detect(void)
}
early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+ phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
}
void __init generic_identify(struct cpuinfo_x86 * c)
@@ -431,25 +438,15 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
mcheck_init(c);
#endif
}
-/*
- * Perform early boot up checks for a valid TSC. See arch/i386/kernel/time.c
- */
-
-void __init dodgy_tsc(void)
-{
- if (( boot_cpu_data.x86_vendor == X86_VENDOR_CYRIX ) ||
- ( boot_cpu_data.x86_vendor == X86_VENDOR_NSC ))
- cpu_devs[X86_VENDOR_CYRIX]->c_init(&boot_cpu_data);
-}
#ifdef CONFIG_X86_HT
void __init detect_ht(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
- int index_lsb, index_msb, tmp;
+ int index_msb, tmp;
int cpu = smp_processor_id();
- if (!cpu_has(c, X86_FEATURE_HT))
+ if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
cpuid(1, &eax, &ebx, &ecx, &edx);
@@ -458,7 +455,6 @@ void __init detect_ht(struct cpuinfo_x86 *c)
if (smp_num_siblings == 1) {
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
} else if (smp_num_siblings > 1 ) {
- index_lsb = 0;
index_msb = 31;
if (smp_num_siblings > NR_CPUS) {
@@ -467,21 +463,34 @@ void __init detect_ht(struct cpuinfo_x86 *c)
return;
}
tmp = smp_num_siblings;
- while ((tmp & 1) == 0) {
- tmp >>=1 ;
- index_lsb++;
- }
- tmp = smp_num_siblings;
while ((tmp & 0x80000000 ) == 0) {
tmp <<=1 ;
index_msb--;
}
- if (index_lsb != index_msb )
+ if (smp_num_siblings & (smp_num_siblings - 1))
index_msb++;
phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
phys_proc_id[cpu]);
+
+ smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+
+ tmp = smp_num_siblings;
+ index_msb = 31;
+ while ((tmp & 0x80000000) == 0) {
+ tmp <<=1 ;
+ index_msb--;
+ }
+
+ if (smp_num_siblings & (smp_num_siblings - 1))
+ index_msb++;
+
+ cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+ if (c->x86_num_cores > 1)
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ cpu_core_id[cpu]);
}
}
#endif
@@ -528,7 +537,6 @@ extern int transmeta_init_cpu(void);
extern int rise_init_cpu(void);
extern int nexgen_init_cpu(void);
extern int umc_init_cpu(void);
-void early_cpu_detect(void);
void __init early_cpu_init(void)
{
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
index c06cf9396c..501ea3fce8 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
@@ -31,7 +31,7 @@ struct mtrr_ops *mtrr_if = &generic_mtrr_ops;
unsigned int num_var_ranges;
unsigned int *usage_table;
-void __init set_num_var_ranges(void)
+static void __init set_num_var_ranges(void)
{
dom0_op_t op;
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
index 064be004e7..1fa27ad04c 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
@@ -752,296 +752,6 @@ ENTRY(fixup_4gb_segment)
pushl $do_fixup_4gb_segment
jmp error_code
-.data
-ENTRY(sys_call_table)
- .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
- .long sys_exit
- .long sys_fork
- .long sys_read
- .long sys_write
- .long sys_open /* 5 */
- .long sys_close
- .long sys_waitpid
- .long sys_creat
- .long sys_link
- .long sys_unlink /* 10 */
- .long sys_execve
- .long sys_chdir
- .long sys_time
- .long sys_mknod
- .long sys_chmod /* 15 */
- .long sys_lchown16
- .long sys_ni_syscall /* old break syscall holder */
- .long sys_stat
- .long sys_lseek
- .long sys_getpid /* 20 */
- .long sys_mount
- .long sys_oldumount
- .long sys_setuid16
- .long sys_getuid16
- .long sys_stime /* 25 */
- .long sys_ptrace
- .long sys_alarm
- .long sys_fstat
- .long sys_pause
- .long sys_utime /* 30 */
- .long sys_ni_syscall /* old stty syscall holder */
- .long sys_ni_syscall /* old gtty syscall holder */
- .long sys_access
- .long sys_nice
- .long sys_ni_syscall /* 35 - old ftime syscall holder */
- .long sys_sync
- .long sys_kill
- .long sys_rename
- .long sys_mkdir
- .long sys_rmdir /* 40 */
- .long sys_dup
- .long sys_pipe
- .long sys_times
- .long sys_ni_syscall /* old prof syscall holder */
- .long sys_brk /* 45 */
- .long sys_setgid16
- .long sys_getgid16
- .long sys_signal
- .long sys_geteuid16
- .long sys_getegid16 /* 50 */
- .long sys_acct
- .long sys_umount /* recycled never used phys() */
- .long sys_ni_syscall /* old lock syscall holder */
- .long sys_ioctl
- .long sys_fcntl /* 55 */
- .long sys_ni_syscall /* old mpx syscall holder */
- .long sys_setpgid
- .long sys_ni_syscall /* old ulimit syscall holder */
- .long sys_olduname
- .long sys_umask /* 60 */
- .long sys_chroot
- .long sys_ustat
- .long sys_dup2
- .long sys_getppid
- .long sys_getpgrp /* 65 */
- .long sys_setsid
- .long sys_sigaction
- .long sys_sgetmask
- .long sys_ssetmask
- .long sys_setreuid16 /* 70 */
- .long sys_setregid16
- .long sys_sigsuspend
- .long sys_sigpending
- .long sys_sethostname
- .long sys_setrlimit /* 75 */
- .long sys_old_getrlimit
- .long sys_getrusage
- .long sys_gettimeofday
- .long sys_settimeofday
- .long sys_getgroups16 /* 80 */
- .long sys_setgroups16
- .long old_select
- .long sys_symlink
- .long sys_lstat
- .long sys_readlink /* 85 */
- .long sys_uselib
- .long sys_swapon
- .long sys_reboot
- .long old_readdir
- .long old_mmap /* 90 */
- .long sys_munmap
- .long sys_truncate
- .long sys_ftruncate
- .long sys_fchmod
- .long sys_fchown16 /* 95 */
- .long sys_getpriority
- .long sys_setpriority
- .long sys_ni_syscall /* old profil syscall holder */
- .long sys_statfs
- .long sys_fstatfs /* 100 */
- .long sys_ioperm
- .long sys_socketcall
- .long sys_syslog
- .long sys_setitimer
- .long sys_getitimer /* 105 */
- .long sys_newstat
- .long sys_newlstat
- .long sys_newfstat
- .long sys_uname
- .long sys_iopl /* 110 */
- .long sys_vhangup
- .long sys_ni_syscall /* old "idle" system call */
- .long sys_vm86old
- .long sys_wait4
- .long sys_swapoff /* 115 */
- .long sys_sysinfo
- .long sys_ipc
- .long sys_fsync
- .long sys_sigreturn
- .long sys_clone /* 120 */
- .long sys_setdomainname
- .long sys_newuname
- .long sys_modify_ldt
- .long sys_adjtimex
- .long sys_mprotect /* 125 */
- .long sys_sigprocmask
- .long sys_ni_syscall /* old "create_module" */
- .long sys_init_module
- .long sys_delete_module
- .long sys_ni_syscall /* 130: old "get_kernel_syms" */
- .long sys_quotactl
- .long sys_getpgid
- .long sys_fchdir
- .long sys_bdflush
- .long sys_sysfs /* 135 */
- .long sys_personality
- .long sys_ni_syscall /* reserved for afs_syscall */
- .long sys_setfsuid16
- .long sys_setfsgid16
- .long sys_llseek /* 140 */
- .long sys_getdents
- .long sys_select
- .long sys_flock
- .long sys_msync
- .long sys_readv /* 145 */
- .long sys_writev
- .long sys_getsid
- .long sys_fdatasync
- .long sys_sysctl
- .long sys_mlock /* 150 */
- .long sys_munlock
- .long sys_mlockall
- .long sys_munlockall
- .long sys_sched_setparam
- .long sys_sched_getparam /* 155 */
- .long sys_sched_setscheduler
- .long sys_sched_getscheduler
- .long sys_sched_yield
- .long sys_sched_get_priority_max
- .long sys_sched_get_priority_min /* 160 */
- .long sys_sched_rr_get_interval
- .long sys_nanosleep
- .long sys_mremap
- .long sys_setresuid16
- .long sys_getresuid16 /* 165 */
- .long sys_vm86
- .long sys_ni_syscall /* Old sys_query_module */
- .long sys_poll
- .long sys_nfsservctl
- .long sys_setresgid16 /* 170 */
- .long sys_getresgid16
- .long sys_prctl
- .long sys_rt_sigreturn
- .long sys_rt_sigaction
- .long sys_rt_sigprocmask /* 175 */
- .long sys_rt_sigpending
- .long sys_rt_sigtimedwait
- .long sys_rt_sigqueueinfo
- .long sys_rt_sigsuspend
- .long sys_pread64 /* 180 */
- .long sys_pwrite64
- .long sys_chown16
- .long sys_getcwd
- .long sys_capget
- .long sys_capset /* 185 */
- .long sys_sigaltstack
- .long sys_sendfile
- .long sys_ni_syscall /* reserved for streams1 */
- .long sys_ni_syscall /* reserved for streams2 */
- .long sys_vfork /* 190 */
- .long sys_getrlimit
- .long sys_mmap2
- .long sys_truncate64
- .long sys_ftruncate64
- .long sys_stat64 /* 195 */
- .long sys_lstat64
- .long sys_fstat64
- .long sys_lchown
- .long sys_getuid
- .long sys_getgid /* 200 */
- .long sys_geteuid
- .long sys_getegid
- .long sys_setreuid
- .long sys_setregid
- .long sys_getgroups /* 205 */
- .long sys_setgroups
- .long sys_fchown
- .long sys_setresuid
- .long sys_getresuid
- .long sys_setresgid /* 210 */
- .long sys_getresgid
- .long sys_chown
- .long sys_setuid
- .long sys_setgid
- .long sys_setfsuid /* 215 */
- .long sys_setfsgid
- .long sys_pivot_root
- .long sys_mincore
- .long sys_madvise
- .long sys_getdents64 /* 220 */
- .long sys_fcntl64
- .long sys_ni_syscall /* reserved for TUX */
- .long sys_ni_syscall
- .long sys_gettid
- .long sys_readahead /* 225 */
- .long sys_setxattr
- .long sys_lsetxattr
- .long sys_fsetxattr
- .long sys_getxattr
- .long sys_lgetxattr /* 230 */
- .long sys_fgetxattr
- .long sys_listxattr
- .long sys_llistxattr
- .long sys_flistxattr
- .long sys_removexattr /* 235 */
- .long sys_lremovexattr
- .long sys_fremovexattr
- .long sys_tkill
- .long sys_sendfile64
- .long sys_futex /* 240 */
- .long sys_sched_setaffinity
- .long sys_sched_getaffinity
- .long sys_set_thread_area
- .long sys_get_thread_area
- .long sys_io_setup /* 245 */
- .long sys_io_destroy
- .long sys_io_getevents
- .long sys_io_submit
- .long sys_io_cancel
- .long sys_fadvise64 /* 250 */
- .long sys_ni_syscall
- .long sys_exit_group
- .long sys_lookup_dcookie
- .long sys_epoll_create
- .long sys_epoll_ctl /* 255 */
- .long sys_epoll_wait
- .long sys_remap_file_pages
- .long sys_set_tid_address
- .long sys_timer_create
- .long sys_timer_settime /* 260 */
- .long sys_timer_gettime
- .long sys_timer_getoverrun
- .long sys_timer_delete
- .long sys_clock_settime
- .long sys_clock_gettime /* 265 */
- .long sys_clock_getres
- .long sys_clock_nanosleep
- .long sys_statfs64
- .long sys_fstatfs64
- .long sys_tgkill /* 270 */
- .long sys_utimes
- .long sys_fadvise64_64
- .long sys_ni_syscall /* sys_vserver */
- .long sys_mbind
- .long sys_get_mempolicy
- .long sys_set_mempolicy
- .long sys_mq_open
- .long sys_mq_unlink
- .long sys_mq_timedsend
- .long sys_mq_timedreceive /* 280 */
- .long sys_mq_notify
- .long sys_mq_getsetattr
- .long sys_ni_syscall /* reserved for kexec */
- .long sys_waitid
- .long sys_ni_syscall /* 285 */ /* available */
- .long sys_add_key
- .long sys_request_key
- .long sys_keyctl
+#include "syscall_table.S"
syscall_table_size=(.-sys_call_table)
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
index ed58906c1e..d767aa6da2 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
@@ -99,6 +99,11 @@ EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
@@ -114,7 +119,6 @@ EXPORT_SYMBOL(dma_alloc_coherent);
EXPORT_SYMBOL(dma_free_coherent);
#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pcibios_penalize_isa_irq);
EXPORT_SYMBOL(pci_mem_start);
#endif
@@ -146,7 +150,6 @@ EXPORT_SYMBOL(smp_call_function);
/* TLB flushing */
EXPORT_SYMBOL(flush_tlb_page);
-EXPORT_SYMBOL_GPL(flush_tlb_all);
#endif
#ifdef CONFIG_X86_IO_APIC
@@ -168,10 +171,6 @@ EXPORT_SYMBOL(rtc_lock);
EXPORT_SYMBOL_GPL(set_nmi_callback);
EXPORT_SYMBOL_GPL(unset_nmi_callback);
-#undef memcmp
-extern int memcmp(const void *,const void *,__kernel_size_t);
-EXPORT_SYMBOL(memcmp);
-
EXPORT_SYMBOL(register_die_notifier);
#ifdef CONFIG_HAVE_DEC_LOCK
EXPORT_SYMBOL(_atomic_dec_and_lock);
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c
index 882ff3fe9c..0b786dbfc7 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c
@@ -231,7 +231,7 @@ static void unmask_IO_APIC_irq (unsigned int irq)
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
unsigned long flags;
@@ -310,7 +310,7 @@ cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
static int physical_balance = 0;
-struct irq_cpu_info {
+static struct irq_cpu_info {
unsigned long * last_irq;
unsigned long * irq_delta;
unsigned long irq;
@@ -321,7 +321,7 @@ struct irq_cpu_info {
#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq])
#define IDLE_ENOUGH(cpu,now) \
- (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
+ (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
@@ -332,7 +332,7 @@ struct irq_cpu_info {
#define BALANCED_IRQ_MORE_DELTA (HZ/10)
#define BALANCED_IRQ_LESS_DELTA (HZ)
-long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
+static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
unsigned long now, int direction)
@@ -733,8 +733,8 @@ void fastcall send_IPI_self(int vector)
*/
#define MAX_PIRQS 8
-int pirq_entries [MAX_PIRQS];
-int pirqs_enabled;
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
int skip_ioapic_setup;
static int __init ioapic_setup(char *str)
@@ -1231,7 +1231,7 @@ static inline void ioapic_register_intr(int irq, int vector, unsigned long trigg
#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
#endif
-void __init setup_IO_APIC_irqs(void)
+static void __init setup_IO_APIC_irqs(void)
{
struct IO_APIC_route_entry entry;
int apic, pin, idx, irq, first_notcon = 1, vector;
@@ -1311,7 +1311,7 @@ void __init setup_IO_APIC_irqs(void)
* Set up the 8259A-master output pin:
*/
#ifndef CONFIG_XEN
-void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
{
struct IO_APIC_route_entry entry;
unsigned long flags;
@@ -2234,7 +2234,6 @@ static inline void check_timer(void)
disable_8259A_irq(0);
setup_nmi();
enable_8259A_irq(0);
- check_nmi_watchdog();
}
return;
}
@@ -2257,7 +2256,6 @@ static inline void check_timer(void)
add_pin_to_irq(0, 0, pin2);
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
- check_nmi_watchdog();
}
return;
}
@@ -2363,7 +2361,7 @@ struct sysfs_ioapic_data {
};
static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-static int ioapic_suspend(struct sys_device *dev, u32 state)
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
{
struct IO_APIC_route_entry *entry;
struct sysfs_ioapic_data *data;
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c
index 3565536d6c..9bdd14fc19 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c
@@ -19,6 +19,9 @@
#include <linux/cpu.h>
#include <linux/delay.h>
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
#ifndef CONFIG_X86_LOCAL_APIC
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
@@ -244,7 +247,7 @@ skip:
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
for_each_cpu(j)
- seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c
index 16f2ee8c80..c025cc3d4c 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c
@@ -49,7 +49,7 @@ int mp_bus_id_to_node [MAX_MP_BUSSES];
int mp_bus_id_to_local [MAX_MP_BUSSES];
int quad_local_to_mp_bus_id [NR_CPUS/4][4];
int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-int mp_current_pci_id;
+static int mp_current_pci_id;
/* I/O APIC entries */
struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
@@ -120,7 +120,7 @@ static int MP_valid_apicid(int apicid, int version)
#endif
#ifndef CONFIG_XEN
-void __init MP_processor_info (struct mpc_config_processor *m)
+static void __init MP_processor_info (struct mpc_config_processor *m)
{
int ver, apicid;
physid_mask_t tmp;
@@ -871,7 +871,7 @@ void __init mp_register_lapic (
#define MP_ISA_BUS 0
#define MP_MAX_IOAPIC_PIN 127
-struct mp_ioapic_routing {
+static struct mp_ioapic_routing {
int apic_id;
int gsi_base;
int gsi_end;
@@ -989,6 +989,7 @@ void __init mp_override_legacy_irq (
return;
}
+int es7000_plat;
void __init mp_config_acpi_legacy_irqs (void)
{
@@ -1003,9 +1004,9 @@ void __init mp_config_acpi_legacy_irqs (void)
Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
/*
- * ES7000 has no legacy identity mappings
+ * Older generations of ES7000 have no legacy identity mappings
*/
- if (es7000_plat)
+ if (es7000_plat == 1)
return;
/*
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
index dc51c7972a..efd0dab89b 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
@@ -25,7 +25,7 @@ struct dma_coherent_mem {
};
void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, int gfp)
+ dma_addr_t *dma_handle, unsigned int __nocast gfp)
{
void *ret;
struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
index eba38c6579..6cc2be6450 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
@@ -37,6 +37,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -60,7 +61,7 @@
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-int hlt_counter;
+static int hlt_counter;
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
@@ -77,7 +78,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
* Powermanagement idle function, if any..
*/
void (*pm_idle)(void);
-static cpumask_t cpu_idle_map;
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
void disable_hlt(void)
{
@@ -150,8 +151,8 @@ void cpu_idle (void)
while (1) {
while (!need_resched()) {
- if (cpu_isset(cpu, cpu_idle_map))
- cpu_clear(cpu, cpu_idle_map);
+ if (__get_cpu_var(cpu_idle_state))
+ __get_cpu_var(cpu_idle_state) = 0;
rmb();
if (cpu_is_offline(cpu)) {
@@ -162,7 +163,7 @@ void cpu_idle (void)
play_dead();
}
- irq_stat[cpu].idle_timestamp = jiffies;
+ __get_cpu_var(irq_stat).idle_timestamp = jiffies;
xen_idle();
}
schedule();
@@ -171,16 +172,28 @@ void cpu_idle (void)
void cpu_idle_wait(void)
{
- int cpu;
+ unsigned int cpu, this_cpu = get_cpu();
cpumask_t map;
- for_each_online_cpu(cpu)
- cpu_set(cpu, cpu_idle_map);
+ set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+ put_cpu();
+
+ cpus_clear(map);
+ for_each_online_cpu(cpu) {
+ per_cpu(cpu_idle_state, cpu) = 1;
+ cpu_set(cpu, map);
+ }
+
+ __get_cpu_var(cpu_idle_state) = 0;
wmb();
do {
ssleep(1);
- cpus_and(map, cpu_idle_map, cpu_online_map);
+ for_each_online_cpu(cpu) {
+ if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+ cpu_clear(cpu, map);
+ }
+ cpus_and(map, map, cpu_online_map);
} while (!cpus_empty(map));
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -314,6 +327,17 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
int err;
childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+ /*
+ * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * This is necessary to guarantee that the entire "struct pt_regs"
+ * is accessable even if the CPU haven't stored the SS/ESP registers
+ * on the stack (interrupt gate does not save these registers
+ * when switching to the same priv ring).
+ * Therefore beware: accessing the xss/esp fields of the
+ * "struct pt_regs" is possible, but they may contain the
+ * completely wrong values.
+ */
+ childregs = (struct pt_regs *) ((unsigned long) childregs - 8);
*childregs = *regs;
childregs->eax = 0;
childregs->esp = esp;
@@ -434,12 +458,6 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
return 1;
}
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread,register) \
- HYPERVISOR_set_debugreg((register), \
- (thread->debugreg[register]))
/*
* switch_to(x,yn) should switch tasks from x to y.
@@ -767,3 +785,9 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
return 0;
}
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (randomize_va_space)
+ sp -= get_random_int() % 8192;
+ return sp & ~0xf;
+}
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
index 938bcabd86..bc86051db3 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
@@ -40,6 +40,7 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/edd.h>
+#include <linux/nodemask.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
@@ -82,7 +83,6 @@ struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
unsigned long mmu_cr4_features;
-EXPORT_SYMBOL_GPL(mmu_cr4_features);
#ifdef CONFIG_ACPI_INTERPRETER
int acpi_disabled = 0;
@@ -125,8 +125,6 @@ struct edid_info edid_info;
struct ist_info ist_info;
struct e820map e820;
-unsigned char aux_device_present;
-
extern void early_cpu_init(void);
extern void dmi_scan_machine(void);
extern void generic_apic_probe(char *);
@@ -457,10 +455,10 @@ struct change_member {
struct e820entry *pbios; /* pointer to original bios entry */
unsigned long long addr; /* address for this change point */
};
-struct change_member change_point_list[2*E820MAX] __initdata;
-struct change_member *change_point[2*E820MAX] __initdata;
-struct e820entry *overlap_list[E820MAX] __initdata;
-struct e820entry new_bios[E820MAX] __initdata;
+static struct change_member change_point_list[2*E820MAX] __initdata;
+static struct change_member *change_point[2*E820MAX] __initdata;
+static struct e820entry *overlap_list[E820MAX] __initdata;
+static struct e820entry new_bios[E820MAX] __initdata;
static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
{
@@ -1000,8 +998,6 @@ unsigned long __init find_max_low_pfn(void)
return max_low_pfn;
}
-#ifndef CONFIG_DISCONTIGMEM
-
/*
* Free all available memory for boot time allocation. Used
* as a callback function by efi_memory_walk()
@@ -1075,15 +1071,16 @@ static void __init reserve_ebda_region(void)
reserve_bootmem(addr, PAGE_SIZE);
}
+#ifndef CONFIG_DISCONTIGMEM
+void __init setup_bootmem_allocator(void);
static unsigned long __init setup_memory(void)
{
- unsigned long bootmap_size, start_pfn, max_low_pfn;
/*
* partially used pages are not usable - thus
* we are rounding upwards:
*/
- start_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames;
+ min_low_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames;
find_max_pfn();
@@ -1099,10 +1096,43 @@ static unsigned long __init setup_memory(void)
#endif
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(max_low_pfn));
+
+ setup_bootmem_allocator();
+
+ return max_low_pfn;
+}
+
+void __init zone_sizes_init(void)
+{
+ unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned int max_dma, low;
+
+ max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+ low = max_low_pfn;
+
+ if (low < max_dma)
+ zones_size[ZONE_DMA] = low;
+ else {
+ zones_size[ZONE_DMA] = max_dma;
+ zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+ zones_size[ZONE_HIGHMEM] = highend_pfn - low;
+#endif
+ }
+ free_area_init(zones_size);
+}
+#else
+extern unsigned long setup_memory(void);
+extern void zone_sizes_init(void);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+void __init setup_bootmem_allocator(void)
+{
+ unsigned long bootmap_size;
/*
* Initialize the boot-time allocator (with low memory only):
*/
- bootmap_size = init_bootmem(start_pfn, max_low_pfn);
+ bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
register_bootmem_low_pages(max_low_pfn);
@@ -1112,7 +1142,7 @@ static unsigned long __init setup_memory(void)
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
- reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+ reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
/* reserve EBDA region, it's a 4K region */
@@ -1159,12 +1189,25 @@ static unsigned long __init setup_memory(void)
#endif
phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
+}
- return max_low_pfn;
+/*
+ * The node 0 pgdat is initialized before all of these because
+ * it's needed for bootmem. node>0 pgdats have their virtual
+ * space allocated before the pagetables are in place to access
+ * them, so they can't be cleared then.
+ *
+ * This should all compile down to nothing when NUMA is off.
+ */
+void __init remapped_pgdat_init(void)
+{
+ int nid;
+
+ for_each_online_node(nid) {
+ if (nid != 0)
+ memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+ }
}
-#else
-extern unsigned long setup_memory(void);
-#endif /* !CONFIG_DISCONTIGMEM */
/*
* Request address space for all standard RAM and ROM resources
@@ -1443,7 +1486,6 @@ void __init setup_arch(char **cmdline_p)
machine_submodel_id = SYS_DESC_TABLE.table[1];
BIOS_revision = SYS_DESC_TABLE.table[2];
}
- aux_device_present = AUX_DEVICE_INFO;
bootloader_type = LOADER_TYPE;
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
@@ -1503,6 +1545,8 @@ void __init setup_arch(char **cmdline_p)
smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
#endif
paging_init();
+ remapped_pgdat_init();
+ zone_sizes_init();
#ifdef CONFIG_X86_FIND_SMP_CONFIG
/*
@@ -1586,11 +1630,13 @@ void __init setup_arch(char **cmdline_p)
}
#endif
+#ifdef CONFIG_ACPI_BOOT
/*
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
acpi_boot_table_init();
acpi_boot_init();
+#endif
#ifdef CONFIG_X86_LOCAL_APIC
if (smp_found_config)
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c
index 9e17fc80e9..32925b5e08 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c
@@ -93,7 +93,7 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
if (act) {
old_sigset_t mask;
- if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
return -EFAULT;
@@ -105,7 +105,7 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
- if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
__put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
return -EFAULT;
@@ -187,7 +187,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
struct _fpstate __user * buf;
err |= __get_user(buf, &sc->fpstate);
if (buf) {
- if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+ if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
goto badframe;
err |= restore_i387(buf);
} else {
@@ -213,7 +213,7 @@ asmlinkage int sys_sigreturn(unsigned long __unused)
sigset_t set;
int eax;
- if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
|| (_NSIG_WORDS > 1
@@ -243,7 +243,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
sigset_t set;
int eax;
- if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
@@ -557,6 +557,16 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
}
}
+ /*
+ * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
+ * that register information in the sigcontext is correct.
+ */
+ if (unlikely(regs->eflags & TF_MASK)
+ && likely(current->ptrace & PT_DTRACE)) {
+ current->ptrace &= ~PT_DTRACE;
+ regs->eflags &= ~TF_MASK;
+ }
+
/* Set up the stack frame */
if (ka->sa.sa_flags & SA_SIGINFO)
setup_rt_frame(sig, ka, info, oldset, regs);
@@ -608,8 +618,7 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
* inside the kernel.
*/
if (unlikely(current->thread.debugreg[7])) {
- HYPERVISOR_set_debugreg(7,
- current->thread.debugreg[7]);
+ loaddebug(&current->thread, 7);
}
/* Whee! Actually deliver the signal. */
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c
index fddadbba25..56729ce885 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c
@@ -197,7 +197,7 @@ void send_IPI_mask_bitmask(cpumask_t mask, int vector)
local_irq_restore(flags);
}
-inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+void send_IPI_mask_sequence(cpumask_t mask, int vector)
{
send_IPI_mask_bitmask(mask, vector);
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
index 494befa697..485fc6abcf 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
@@ -69,6 +69,8 @@ static int __initdata smp_b_stepping;
int smp_num_siblings = 1;
int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
EXPORT_SYMBOL(phys_proc_id);
+int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
+EXPORT_SYMBOL(cpu_core_id);
/* bitmap of online cpus */
cpumask_t cpu_online_map;
@@ -84,9 +86,6 @@ u8 x86_cpu_to_apicid[NR_CPUS] =
{ [0 ... NR_CPUS-1] = 0xff };
EXPORT_SYMBOL(x86_cpu_to_apicid);
-/* Set when the idlers are all forked */
-int smp_threads_ready;
-
#if 0
/*
* Trampoline 80x86 program as an array.
@@ -122,6 +121,8 @@ static unsigned long __init setup_trampoline(void)
}
#endif
+static void map_cpu_to_logical_apicid(void);
+
/*
* We are called very early to get the low memory for the
* SMP bootup trampoline page.
@@ -352,7 +353,7 @@ extern void calibrate_delay(void);
static atomic_t init_deasserted;
-void __init smp_callin(void)
+static void __init smp_callin(void)
{
int cpuid, phys_id;
unsigned long timeout;
@@ -449,7 +450,7 @@ void __init smp_callin(void)
#endif
}
-int cpucount;
+static int cpucount;
static irqreturn_t ldebug_interrupt(
@@ -567,7 +568,7 @@ static inline void unmap_cpu_to_node(int cpu)
u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-void map_cpu_to_logical_apicid(void)
+static void map_cpu_to_logical_apicid(void)
{
int cpu = smp_processor_id();
int apicid = smp_processor_id();
@@ -576,7 +577,7 @@ void map_cpu_to_logical_apicid(void)
map_cpu_to_node(cpu, apicid_to_node(apicid));
}
-void unmap_cpu_to_logical_apicid(int cpu)
+static void unmap_cpu_to_logical_apicid(int cpu)
{
cpu_2_logical_apicid[cpu] = BAD_APICID;
unmap_cpu_to_node(cpu);
@@ -861,6 +862,9 @@ static int __init do_boot_cpu(int apicid)
if (cpu_gdt_descr[0].size > PAGE_SIZE)
BUG();
cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+ printk("GDT: copying %d bytes from %lx to %lx\n",
+ cpu_gdt_descr[0].size, cpu_gdt_descr[0].address,
+ cpu_gdt_descr[cpu].address);
memcpy((void *)cpu_gdt_descr[cpu].address,
(void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
@@ -916,6 +920,7 @@ static int __init do_boot_cpu(int apicid)
ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(swapper_pg_dir);
boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+ printk("boot error: %ld\n", boot_error);
if (!boot_error) {
/*
@@ -1016,9 +1021,6 @@ static int __init do_boot_cpu(int apicid)
return boot_error;
}
-cycles_t cacheflush_time;
-unsigned long cache_decay_ticks;
-
static void smp_tune_scheduling (void)
{
unsigned long cachesize; /* kB */
@@ -1039,7 +1041,6 @@ static void smp_tune_scheduling (void)
* this basically disables processor-affinity
* scheduling on SMP without a TSC.
*/
- cacheflush_time = 0;
return;
} else {
cachesize = boot_cpu_data.x86_cache_size;
@@ -1047,17 +1048,7 @@ static void smp_tune_scheduling (void)
cachesize = 16; /* Pentiums, 2x8kB cache */
bandwidth = 100;
}
-
- cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
}
-
- cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
-
- printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
- (long)cacheflush_time/(cpu_khz/1000),
- ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
- printk("task migration cache decay timeout: %ld msecs.\n",
- cache_decay_ticks);
}
/*
@@ -1071,6 +1062,8 @@ static int boot_cpu_logical_apicid;
void *xquad_portio;
cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_core_map);
static void __init smp_boot_cpus(unsigned int max_cpus)
{
@@ -1102,6 +1095,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
cpus_clear(cpu_sibling_map[0]);
cpu_set(0, cpu_sibling_map[0]);
+ cpus_clear(cpu_core_map[0]);
+ cpu_set(0, cpu_core_map[0]);
+
#ifdef CONFIG_X86_IO_APIC
/*
* If we couldn't find an SMP configuration at boot time,
@@ -1119,6 +1115,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
" Using dummy APIC emulation.\n");
#endif
map_cpu_to_logical_apicid();
+ cpu_set(0, cpu_sibling_map[0]);
+ cpu_set(0, cpu_core_map[0]);
return;
}
#endif
@@ -1144,6 +1142,10 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
smpboot_clear_io_apic_irqs();
phys_cpu_present_map = physid_mask_of_physid(0);
+ cpu_set(0, cpu_sibling_map[0]);
+ cpu_set(0, cpu_core_map[0]);
+ cpu_set(0, cpu_sibling_map[0]);
+ cpu_set(0, cpu_core_map[0]);
return;
}
@@ -1246,10 +1248,13 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
* construct cpu_sibling_map[], so that we can tell sibling CPUs
* efficiently.
*/
- for (cpu = 0; cpu < NR_CPUS; cpu++)
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
cpus_clear(cpu_sibling_map[cpu]);
+ cpus_clear(cpu_core_map[cpu]);
+ }
for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ struct cpuinfo_x86 *c = cpu_data + cpu;
int siblings = 0;
int i;
if (!cpu_isset(cpu, cpu_callout_map))
@@ -1259,7 +1264,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_isset(i, cpu_callout_map))
continue;
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ if (cpu_core_id[cpu] == cpu_core_id[i]) {
siblings++;
cpu_set(i, cpu_sibling_map[cpu]);
}
@@ -1269,15 +1274,23 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
cpu_set(cpu, cpu_sibling_map[cpu]);
}
- if (siblings != smp_num_siblings)
+ if (siblings != smp_num_siblings) {
printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
+ smp_num_siblings = siblings;
+ }
+ if (c->x86_num_cores > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ cpu_set(i, cpu_core_map[cpu]);
+ }
+ }
+ } else {
+ cpu_core_map[cpu] = cpu_sibling_map[cpu];
+ }
}
-#if 0
- if (nmi_watchdog == NMI_LOCAL_APIC)
- check_nmi_watchdog();
-#endif
-
smpboot_setup_io_apic();
#if 0
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
index 821d6905b0..0ca8d7eb64 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
@@ -190,6 +190,35 @@ static void __get_time_values_from_xen(void)
({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
/*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with. It is required for NMI access to the
+ * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+ unsigned char val;
+ lock_cmos_prefix(addr);
+ outb_p(addr, RTC_PORT(0));
+ val = inb_p(RTC_PORT(1));
+ lock_cmos_suffix(addr);
+ return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+ lock_cmos_prefix(addr);
+ outb_p(addr, RTC_PORT(0));
+ outb_p(val, RTC_PORT(1));
+ lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
+/*
* This version of gettimeofday has microsecond resolution
* and better than microsecond precision on fast x86 machines with TSC.
*/
@@ -349,16 +378,23 @@ static int set_rtc_mmss(unsigned long nowtime)
{
int retval;
+ WARN_ON(irqs_disabled());
+
/* gets recalled with irq locally disabled */
- spin_lock(&rtc_lock);
+ spin_lock_irq(&rtc_lock);
if (efi_enabled)
retval = efi_set_rtc_mmss(nowtime);
else
retval = mach_set_rtc_mmss(nowtime);
- spin_unlock(&rtc_lock);
+ spin_unlock_irq(&rtc_lock);
return retval;
}
+#else
+static int set_rtc_mmss(unsigned long nowtime)
+{
+ return 0;
+}
#endif
/* monotonic_clock(): returns # of nanoseconds passed since time_init()
@@ -503,29 +539,6 @@ static inline void do_timer_interrupt(int irq, void *dev_id,
last_update_to_xen = xtime.tv_sec;
}
-
- /*
- * If we have an externally synchronized Linux clock, then update
- * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
- * called as close as possible to 500 ms before the new second starts.
- */
- if ((time_status & STA_UNSYNC) == 0 &&
- xtime.tv_sec > last_rtc_update + 660 &&
- (xtime.tv_nsec / 1000)
- >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
- (xtime.tv_nsec / 1000)
- <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
- /* horrible...FIXME */
- if (efi_enabled) {
- if (efi_set_rtc_mmss(xtime.tv_sec) == 0)
- last_rtc_update = xtime.tv_sec;
- else
- last_rtc_update = xtime.tv_sec - 600;
- } else if (set_rtc_mmss(xtime.tv_sec) == 0)
- last_rtc_update = xtime.tv_sec;
- else
- last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
- }
#endif
}
@@ -565,10 +578,59 @@ unsigned long get_cmos_time(void)
return retval;
}
+static void sync_cmos_clock(unsigned long dummy);
+
+static struct timer_list sync_cmos_timer =
+ TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
+
+static void sync_cmos_clock(unsigned long dummy)
+{
+ struct timeval now, next;
+ int fail = 1;
+
+ /*
+ * If we have an externally synchronized Linux clock, then update
+ * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+ * called as close as possible to 500 ms before the new second starts.
+ * This code is run on a timer. If the clock is set, that timer
+ * may not expire at the correct time. Thus, we adjust...
+ */
+ if ((time_status & STA_UNSYNC) != 0)
+ /*
+ * Not synced, exit, do not restart a timer (if one is
+ * running, let it run out).
+ */
+ return;
+
+ do_gettimeofday(&now);
+ if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
+ now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
+ fail = set_rtc_mmss(now.tv_sec);
+
+ next.tv_usec = USEC_AFTER - now.tv_usec;
+ if (next.tv_usec <= 0)
+ next.tv_usec += USEC_PER_SEC;
+
+ if (!fail)
+ next.tv_sec = 659;
+ else
+ next.tv_sec = 0;
+
+ if (next.tv_usec >= USEC_PER_SEC) {
+ next.tv_sec++;
+ next.tv_usec -= USEC_PER_SEC;
+ }
+ mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
+}
+
+void notify_arch_cmos_timer(void)
+{
+ mod_timer(&sync_cmos_timer, jiffies + 1);
+}
static long clock_cmos_diff, sleep_start;
-static int timer_suspend(struct sys_device *dev, u32 state)
+static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
* Estimate time zone so that set_time can update the clock
@@ -626,14 +688,14 @@ device_initcall(time_init_device);
#ifdef CONFIG_HPET_TIMER
extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
-void __init hpet_time_init(void)
+static void __init hpet_time_init(void)
{
xtime.tv_sec = get_cmos_time();
xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- if (hpet_enable() >= 0) {
+ if ((hpet_enable() >= 0) && hpet_use_timer) {
printk("Using HPET for base-timer\n");
}
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
index 539c1d5b7d..d34ca827e6 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
@@ -342,8 +342,7 @@ void die(const char * str, struct pt_regs * regs, long err)
if (panic_on_oops) {
printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(5 * HZ);
+ ssleep(5);
panic("Fatal exception");
}
do_exit(SIGSEGV);
@@ -450,6 +449,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
#ifdef CONFIG_X86_MCE
DO_ERROR(18, SIGBUS, "machine check", machine_check)
#endif
@@ -636,16 +636,15 @@ void unset_nmi_callback(void)
}
#ifdef CONFIG_KPROBES
-fastcall int do_int3(struct pt_regs *regs, long error_code)
+fastcall void do_int3(struct pt_regs *regs, long error_code)
{
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
- return 1;
+ return;
/* This is an interrupt gate, because kprobes wants interrupts
disabled. Normal trap handlers don't. */
restore_interrupts(regs);
do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
- return 0;
}
#endif
@@ -702,8 +701,6 @@ fastcall void do_debug(struct pt_regs * regs, long error_code)
/*
* Single-stepping through TF: make sure we ignore any events in
* kernel space (but re-enable TF when returning to user mode).
- * And if the event was due to a debugger (PT_DTRACE), clear the
- * TF flag so that register information is correct.
*/
if (condition & DR_STEP) {
/*
@@ -713,11 +710,6 @@ fastcall void do_debug(struct pt_regs * regs, long error_code)
*/
if ((regs->xcs & 2) == 0)
goto clear_TF_reenable;
-
- if (likely(tsk->ptrace & PT_DTRACE)) {
- tsk->ptrace &= ~PT_DTRACE;
- regs->eflags &= ~TF_MASK;
- }
}
/* Ok, finally something we can handle */
@@ -807,7 +799,7 @@ fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
math_error((void __user *)regs->eip);
}
-void simd_math_error(void __user *eip)
+static void simd_math_error(void __user *eip)
{
struct task_struct * task;
siginfo_t info;
@@ -879,6 +871,51 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
}
}
+fastcall void setup_x86_bogus_stack(unsigned char * stk)
+{
+ unsigned long *switch16_ptr, *switch32_ptr;
+ struct pt_regs *regs;
+ unsigned long stack_top, stack_bot;
+ unsigned short iret_frame16_off;
+ int cpu = smp_processor_id();
+ /* reserve the space on 32bit stack for the magic switch16 pointer */
+ memmove(stk, stk + 8, sizeof(struct pt_regs));
+ switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
+ regs = (struct pt_regs *)stk;
+ /* now the switch32 on 16bit stack */
+ stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+ stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+ switch32_ptr = (unsigned long *)(stack_top - 8);
+ iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
+ /* copy iret frame on 16bit stack */
+ memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
+ /* fill in the switch pointers */
+ switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
+ switch16_ptr[1] = __ESPFIX_SS;
+ switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
+ 8 - CPU_16BIT_STACK_SIZE;
+ switch32_ptr[1] = __KERNEL_DS;
+}
+
+fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
+{
+ unsigned long *switch32_ptr;
+ unsigned char *stack16, *stack32;
+ unsigned long stack_top, stack_bot;
+ int len;
+ int cpu = smp_processor_id();
+ stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+ stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+ switch32_ptr = (unsigned long *)(stack_top - 8);
+ /* copy the data from 16bit stack to 32bit stack */
+ len = CPU_16BIT_STACK_SIZE - 8 - sp;
+ stack16 = (unsigned char *)(stack_bot + sp);
+ stack32 = (unsigned char *)
+ (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
+ memcpy(stack32, stack16, len);
+ return stack32;
+}
+
/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
@@ -980,3 +1017,10 @@ void smp_trap_init(trap_info_t *trap_ctxt)
trap_ctxt[t->vector].address = t->address;
}
}
+
+static int __init kstack_setup(char *s)
+{
+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+ return 0;
+}
+__setup("kstack=", kstack_setup);
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c b/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c
index 1bf278733f..1cfe059f8b 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c
@@ -77,7 +77,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
* force other mappings to Oops if they'll try to access
* this pte without first remap it
*/
- pte_clear(kmap_pte-idx);
+ pte_clear(&init_mm, vaddr, kmap_pte-idx);
__flush_tlb_one(vaddr);
#endif
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
index 044568c42b..7c8b95a8f2 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
@@ -248,13 +248,10 @@ static inline int page_is_ram(unsigned long pagenr)
pte_t *kmap_pte;
pgprot_t kmap_prot;
-EXPORT_SYMBOL(kmap_prot);
-EXPORT_SYMBOL(kmap_pte);
-
#define kmap_get_fixmap_pte(vaddr) \
pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
-void __init kmap_init(void)
+static void __init kmap_init(void)
{
unsigned long kmap_vstart;
@@ -265,7 +262,7 @@ void __init kmap_init(void)
kmap_prot = PAGE_KERNEL;
}
-void __init permanent_kmaps_init(pgd_t *pgd_base)
+static void __init permanent_kmaps_init(pgd_t *pgd_base)
{
pgd_t *pgd;
pud_t *pud;
@@ -297,7 +294,7 @@ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
}
#ifndef CONFIG_DISCONTIGMEM
-void __init set_highmem_pages_init(int bad_ppro)
+static void __init set_highmem_pages_init(int bad_ppro)
{
int pfn;
for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
@@ -426,38 +423,6 @@ void zap_low_mappings (void)
flush_tlb_all();
}
-#ifndef CONFIG_DISCONTIGMEM
-void __init zone_sizes_init(void)
-{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
- unsigned int /*max_dma,*/ high, low;
-
- /*
- * XEN: Our notion of "DMA memory" is fake when running over Xen.
- * We simply put all RAM in the DMA zone so that those drivers which
- * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
- * Those drivers that *do* require lowmem are screwed anyway when
- * running over Xen!
- */
- /*max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;*/
- low = max_low_pfn;
- high = highend_pfn;
-
- /*if (low < max_dma)*/
- zones_size[ZONE_DMA] = low;
- /*else*/ {
- /*zones_size[ZONE_DMA] = max_dma;*/
- /*zones_size[ZONE_NORMAL] = low - max_dma;*/
-#ifdef CONFIG_HIGHMEM
- zones_size[ZONE_HIGHMEM] = high - low;
-#endif
- }
- free_area_init(zones_size);
-}
-#else
-extern void zone_sizes_init(void);
-#endif /* !CONFIG_DISCONTIGMEM */
-
static int disable_nx __initdata = 0;
u64 __supported_pte_mask = ~_PAGE_NX;
@@ -560,7 +525,6 @@ void __init paging_init(void)
__flush_tlb_all();
kmap_init();
- zone_sizes_init();
/* Switch to the real shared_info page, and clear the dummy page. */
set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
@@ -586,7 +550,7 @@ void __init paging_init(void)
* but fortunately the switch to using exceptions got rid of all that.
*/
-void __init test_wp_bit(void)
+static void __init test_wp_bit(void)
{
printk("Checking if this processor honours the WP bit even in supervisor mode... ");
@@ -605,20 +569,17 @@ void __init test_wp_bit(void)
}
}
-#ifndef CONFIG_DISCONTIGMEM
static void __init set_max_mapnr_init(void)
{
#ifdef CONFIG_HIGHMEM
- max_mapnr = num_physpages = highend_pfn;
+ num_physpages = highend_pfn;
#else
- max_mapnr = num_physpages = max_low_pfn;
+ num_physpages = max_low_pfn;
+#endif
+#ifndef CONFIG_DISCONTIGMEM
+ max_mapnr = num_physpages;
#endif
}
-#define __free_all_bootmem() free_all_bootmem()
-#else
-#define __free_all_bootmem() free_all_bootmem_node(NODE_DATA(0))
-extern void set_max_mapnr_init(void);
-#endif /* !CONFIG_DISCONTIGMEM */
static struct kcore_list kcore_mem, kcore_vmalloc;
@@ -650,16 +611,16 @@ void __init mem_init(void)
set_max_mapnr_init();
#ifdef CONFIG_HIGHMEM
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE);
+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
printk("vmalloc area: %lx-%lx, maxmem %lx\n",
VMALLOC_START,VMALLOC_END,MAXMEM);
BUG_ON(VMALLOC_START > VMALLOC_END);
/* this will put all low memory onto the freelists */
- totalram_pages += __free_all_bootmem();
+ totalram_pages += free_all_bootmem();
/* XEN: init and count low-mem pages outside initial allocation. */
for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) {
ClearPageReserved(&mem_map[pfn]);
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
index 06495cd9de..8043cc1c4d 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
@@ -342,7 +342,7 @@ void pgd_free(pgd_t *pgd)
if (PTRS_PER_PMD > 1)
for (i = 0; i < USER_PTRS_PER_PGD; ++i)
kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
- /* in the non-PAE case, clear_page_range() clears user pgd entries */
+ /* in the non-PAE case, free_pgtables() clears user pgd entries */
kmem_cache_free(pgd_cache, pgd);
}
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c b/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c
index 7eeea04f72..3b0b096b30 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c
@@ -500,6 +500,9 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
case PCI_DEVICE_ID_INTEL_ICH6_1:
case PCI_DEVICE_ID_INTEL_ICH7_0:
case PCI_DEVICE_ID_INTEL_ICH7_1:
+ case PCI_DEVICE_ID_INTEL_ICH7_30:
+ case PCI_DEVICE_ID_INTEL_ICH7_31:
+ case PCI_DEVICE_ID_INTEL_ESB2_0:
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
@@ -1031,66 +1034,60 @@ void pcibios_penalize_isa_irq(int irq)
static int pirq_enable_irq(struct pci_dev *dev)
{
u8 pin;
- extern int via_interrupt_line_quirk;
struct pci_dev *temp_dev;
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
- char *msg;
- msg = "";
+ char *msg = "";
+
+ pin--; /* interrupt pins are numbered starting from 1 */
+
if (io_apic_assign_pci_irqs) {
int irq;
- if (pin) {
- pin--; /* interrupt pins are numbered starting from 1 */
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
- /*
- * Busses behind bridges are typically not listed in the MP-table.
- * In this case we have to look up the IRQ based on the parent bus,
- * parent slot, and pin number. The SMP code detects such bridged
- * busses itself so we should get into this branch reliably.
- */
- temp_dev = dev;
- while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
- struct pci_dev * bridge = dev->bus->self;
-
- pin = (pin + PCI_SLOT(dev->devfn)) % 4;
- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
- PCI_SLOT(bridge->devfn), pin);
- if (irq >= 0)
- printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
- pci_name(bridge), 'A' + pin, irq);
- dev = bridge;
- }
- dev = temp_dev;
- if (irq >= 0) {
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+ /*
+ * Busses behind bridges are typically not listed in the MP-table.
+ * In this case we have to look up the IRQ based on the parent bus,
+ * parent slot, and pin number. The SMP code detects such bridged
+ * busses itself so we should get into this branch reliably.
+ */
+ temp_dev = dev;
+ while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+ struct pci_dev * bridge = dev->bus->self;
+
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
+ PCI_SLOT(bridge->devfn), pin);
+ if (irq >= 0)
+ printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
+ pci_name(bridge), 'A' + pin, irq);
+ dev = bridge;
+ }
+ dev = temp_dev;
+ if (irq >= 0) {
#ifdef CONFIG_PCI_MSI
- if (!platform_legacy_irq(irq))
- irq = IO_APIC_VECTOR(irq);
+ if (!platform_legacy_irq(irq))
+ irq = IO_APIC_VECTOR(irq);
#endif
- printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
- pci_name(dev), 'A' + pin, irq);
- dev->irq = irq;
- return 0;
- } else
- msg = " Probably buggy MP table.";
- }
+ printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+ pci_name(dev), 'A' + pin, irq);
+ dev->irq = irq;
+ return 0;
+ } else
+ msg = " Probably buggy MP table.";
} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
msg = "";
else
msg = " Please try using pci=biosirq.";
-
+
/* With IDE legacy devices the IRQ lookup failure is not a problem.. */
if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
return 0;
-
+
printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
- 'A' + pin - 1, pci_name(dev), msg);
+ 'A' + pin, pci_name(dev), msg);
}
- /* VIA bridges use interrupt line for apic/pci steering across
- the V-Link */
- else if (via_interrupt_line_quirk)
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq & 15);
return 0;
}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig
index 6785cf8e8a..38e37b20b1 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig
@@ -66,23 +66,6 @@ config EARLY_PRINTK
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash.
-config HPET_TIMER
- bool
- default n
- help
- Use the IA-PC HPET (High Precision Event Timer) to manage
- time in preference to the PIT and RTC, if a HPET is
- present. The HPET provides a stable time base on SMP
- systems, unlike the RTC, but it is more expensive to access,
- as it is off-chip. You can find the HPET spec at
- <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
-
- If unsure, say Y.
-
-config HPET_EMULATE_RTC
- bool "Provide RTC interrupt"
- depends on HPET_TIMER && RTC=y
-
config GENERIC_ISA_DMA
bool
default y
@@ -255,7 +238,7 @@ config PREEMPT
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on SMP
- default off
+ default n
help
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
@@ -312,6 +295,23 @@ config NR_CPUS
This is purely to save memory - each supported CPU requires
memory in the static kernel configuration.
+config HPET_TIMER
+ bool
+ default n
+ help
+ Use the IA-PC HPET (High Precision Event Timer) to manage
+ time in preference to the PIT and RTC, if a HPET is
+ present. The HPET provides a stable time base on SMP
+ systems, unlike the RTC, but it is more expensive to access,
+ as it is off-chip. You can find the HPET spec at
+ <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
+
+ If unsure, say Y.
+
+config HPET_EMULATE_RTC
+ bool "Provide RTC interrupt"
+ depends on HPET_TIMER && RTC=y
+
config GART_IOMMU
bool "IOMMU support"
depends on PCI
@@ -346,6 +346,24 @@ config X86_MCE
machine check error logs. See
ftp://ftp.x86-64.org/pub/linux/tools/mcelog
+config SECCOMP
+ bool "Enable seccomp to safely compute untrusted bytecode"
+ depends on PROC_FS
+ default y
+ help
+ This kernel feature is useful for number crunching applications
+ that may need to compute untrusted bytecode during their
+ execution. By using pipes or other transports made available to
+ the process as file descriptors supporting the read/write
+ syscalls, it's possible to isolate those applications in
+ their own address space using seccomp. Once seccomp is
+ enabled via /proc/<pid>/seccomp, it cannot be disabled
+ and the task is only allowed to execute a few safe syscalls
+ defined by each seccomp mode.
+
+ If unsure, say Y. Only embedded should say N here.
+
+
endmenu
#
@@ -359,6 +377,11 @@ config GENERIC_IRQ_PROBE
bool
default y
+# we have no ISA slots, but we do have ISA-style DMA.
+config ISA_DMA_API
+ bool
+ default y
+
menu "Power management options"
source kernel/power/Kconfig
@@ -380,7 +403,7 @@ config PCI_DIRECT
config PCI_MMCONFIG
bool "Support mmconfig PCI config space access"
- depends on PCI
+ depends on PCI && ACPI
select ACPI_BOOT
config UNORDERED_IO
@@ -393,6 +416,8 @@ config UNORDERED_IO
from i386. Requires that the driver writer used memory barriers
properly.
+#source "drivers/pci/pcie/Kconfig"
+
#source "drivers/pci/Kconfig"
#source "drivers/pcmcia/Kconfig"
@@ -444,12 +469,8 @@ endmenu
#source "arch/x86_64/oprofile/Kconfig"
-#source "arch/x86_64/Kconfig.debug"
-
# source "security/Kconfig"
# source "crypto/Kconfig"
-# source "lib/Kconfig"
-
endmenu
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile
index 4dd24552cc..fe88b369a1 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile
@@ -48,10 +48,14 @@ $(obj)/vsyscall.lds:
$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
@ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@
-$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
+$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S ../../i386/kernel/vsyscall-note.S
$(obj)/vsyscall-sysenter.S: $(obj)/vsyscall-sigreturn.S
$(obj)/vsyscall-syscall.S: $(obj)/vsyscall-sigreturn.S
+../../i386/kernel/vsyscall-note.S:
+ @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $(srctree)/arch/xen/i386/kernel/$(notdir $@)
+ make -C arch/xen/i386/kernel vsyscall-note.S
+
obj-y += $(c-obj-y) $(s-obj-y)
clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S
index 521c881c98..388d49b8b7 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S
@@ -96,7 +96,7 @@ ENTRY(ia32_sysenter_target)
.quad 1b,ia32_badarg
.previous
GET_THREAD_INFO(%r10)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
jnz sysenter_tracesys
sysenter_do_call:
cmpl $(IA32_NR_syscalls),%eax
@@ -184,7 +184,7 @@ ENTRY(ia32_cstar_target)
.quad 1b,ia32_badarg
.previous
GET_THREAD_INFO(%r10)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
jnz cstar_tracesys
cstar_do_call:
cmpl $IA32_NR_syscalls,%eax
@@ -263,7 +263,7 @@ ENTRY(ia32_syscall)
this could be a problem. */
SAVE_ARGS 0,0,1
GET_THREAD_INFO(%r10)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
jnz ia32_tracesys
ia32_do_syscall:
cmpl $(IA32_NR_syscalls),%eax
@@ -617,7 +617,7 @@ ia32_sys_call_table:
.quad compat_sys_mq_notify
.quad compat_sys_mq_getsetattr
.quad quiet_ni_syscall /* reserved for kexec */
- .quad sys32_waitid
+ .quad compat_sys_waitid
.quad quiet_ni_syscall /* sys_altroot */
.quad sys_add_key
.quad sys_request_key
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c
index 0f97d1cf54..ecccf66a2e 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c
@@ -9,6 +9,7 @@
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/stringify.h>
+#include <linux/security.h>
#include <asm/proto.h>
#include <asm/tlbflush.h>
#include <asm/ia32_unistd.h>
@@ -50,51 +51,57 @@ extern int sysctl_vsyscall32;
char *syscall32_page;
-/*
- * Map the 32bit vsyscall page on demand.
- *
- * RED-PEN: This knows too much about high level VM.
- *
- * Alternative would be to generate a vma with appropriate backing options
- * and let it be handled by generic VM.
- */
-int __map_syscall32(struct mm_struct *mm, unsigned long address)
-{
- pgd_t *pgd;
- pud_t *pud;
- pte_t *pte;
- pmd_t *pmd;
- int err = -ENOMEM;
-
- spin_lock(&mm->page_table_lock);
- pgd = pgd_offset(mm, address);
- pud = pud_alloc(mm, pgd, address);
- if (pud) {
- pmd = pmd_alloc(mm, pud, address);
- if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) {
- if (pte_none(*pte)) {
- set_pte(pte,
- mk_pte(virt_to_page(syscall32_page),
- PAGE_KERNEL_VSYSCALL32));
- }
- /* Flush only the local CPU. Other CPUs taking a fault
- will just end up here again
- This probably not needed and just paranoia. */
- __flush_tlb_one(address);
- err = 0;
- }
- }
- spin_unlock(&mm->page_table_lock);
- return err;
+static struct page *
+syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
+{
+ struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
+ get_page(p);
+ return p;
}
-int map_syscall32(struct mm_struct *mm, unsigned long address)
+/* Prevent VMA merging */
+static void syscall32_vma_close(struct vm_area_struct *vma)
{
- int err;
- down_read(&mm->mmap_sem);
- err = __map_syscall32(mm, address);
- up_read(&mm->mmap_sem);
- return err;
+}
+
+static struct vm_operations_struct syscall32_vm_ops = {
+ .close = syscall32_vma_close,
+ .nopage = syscall32_nopage,
+};
+
+struct linux_binprm;
+
+/* Setup a VMA at program startup for the vsyscall page */
+int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
+{
+ int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma)
+ return -ENOMEM;
+ if (security_vm_enough_memory(npages)) {
+ kmem_cache_free(vm_area_cachep, vma);
+ return -ENOMEM;
+ }
+
+ memset(vma, 0, sizeof(struct vm_area_struct));
+ /* Could randomize here */
+ vma->vm_start = VSYSCALL32_BASE;
+ vma->vm_end = VSYSCALL32_END;
+ /* MAYWRITE to allow gdb to COW and set breakpoints */
+ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYEXEC|VM_MAYWRITE;
+ vma->vm_flags |= mm->def_flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+ vma->vm_ops = &syscall32_vm_ops;
+ vma->vm_mm = mm;
+
+ down_write(&mm->mmap_sem);
+ insert_vm_struct(mm, vma);
+ mm->total_vm += npages;
+ up_write(&mm->mmap_sem);
+ return 0;
}
static int __init init_syscall32(void)
@@ -102,7 +109,6 @@ static int __init init_syscall32(void)
syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
if (!syscall32_page)
panic("Cannot allocate syscall32 page");
- SetPageReserved(virt_to_page(syscall32_page));
#ifdef USE_INT80
/*
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
index c340cf6b45..8b30e7c99b 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
@@ -38,6 +38,7 @@ c-obj-$(CONFIG_X86_IO_APIC) += genapic.o genapic_cluster.o genapic_flat.o
c-obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
#obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
c-obj-$(CONFIG_MODULES) += module.o
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c
index b965d6d1eb..83c8171d5f 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c
@@ -63,8 +63,8 @@ int main(void)
offsetof (struct rt_sigframe32, uc.uc_mcontext));
BLANK();
#endif
- DEFINE(SIZEOF_PBE, sizeof(struct pbe));
DEFINE(pbe_address, offsetof(struct pbe, address));
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+ DEFINE(pbe_next, offsetof(struct pbe, next));
return 0;
}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c
index 7c56a9f53c..5751a578a6 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c
@@ -2,6 +2,12 @@
* Handle the memory map.
* The functions here do the job until bootmem takes over.
* $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
+ *
+ * Getting sanitize_e820_map() in sync with i386 version by applying change:
+ * - Provisions for empty E820 memory regions (reported by certain BIOSes).
+ * Alex Achenbach <xela@slit.de>, December 2002.
+ * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *
*/
#include <linux/config.h>
#include <linux/kernel.h>
@@ -279,7 +285,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
int chgidx, still_changing;
int overlap_entries;
int new_bios_entry;
- int old_nr, new_nr;
+ int old_nr, new_nr, chg_nr;
int i;
/*
@@ -333,20 +339,24 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
for (i=0; i < 2*old_nr; i++)
change_point[i] = &change_point_list[i];
- /* record all known change-points (starting and ending addresses) */
+ /* record all known change-points (starting and ending addresses),
+ omitting those that are for empty memory regions */
chgidx = 0;
for (i=0; i < old_nr; i++) {
- change_point[chgidx]->addr = biosmap[i].addr;
- change_point[chgidx++]->pbios = &biosmap[i];
- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
- change_point[chgidx++]->pbios = &biosmap[i];
+ if (biosmap[i].size != 0) {
+ change_point[chgidx]->addr = biosmap[i].addr;
+ change_point[chgidx++]->pbios = &biosmap[i];
+ change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+ change_point[chgidx++]->pbios = &biosmap[i];
+ }
}
+ chg_nr = chgidx;
/* sort change-point list by memory addresses (low -> high) */
still_changing = 1;
while (still_changing) {
still_changing = 0;
- for (i=1; i < 2*old_nr; i++) {
+ for (i=1; i < chg_nr; i++) {
/* if <current_addr> > <last_addr>, swap */
/* or, if current=<start_addr> & last=<end_addr>, swap */
if ((change_point[i]->addr < change_point[i-1]->addr) ||
@@ -369,7 +379,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
last_type = 0; /* start with undefined memory type */
last_addr = 0; /* start with 0 as last starting address */
/* loop through change-points, determining affect on the new bios map */
- for (chgidx=0; chgidx < 2*old_nr; chgidx++)
+ for (chgidx=0; chgidx < chg_nr; chgidx++)
{
/* keep track of all overlapping bios entries */
if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
@@ -512,22 +522,67 @@ extern unsigned long xen_override_max_pfn;
void __init parse_memopt(char *p, char **from)
{
- /*
- * mem=XXX[kKmM] limits kernel memory to XXX+1MB
- *
- * It would be more logical to count from 0 instead of from
- * HIGH_MEMORY, but we keep that for now for i386 compatibility.
- *
- * No support for custom mapping like i386. The reason is
- * that we need to read the e820 map anyways to handle the
- * ACPI mappings in the direct map. Also on x86-64 there
- * should be always a good e820 map. This is only an upper
- * limit, you cannot force usage of memory not in e820.
- *
- * -AK
- */
- end_user_pfn = memparse(p, from) + HIGH_MEMORY;
+ end_user_pfn = memparse(p, from);
end_user_pfn >>= PAGE_SHIFT;
xen_override_max_pfn = (unsigned long) end_user_pfn;
}
+unsigned long pci_mem_start = 0xaeedbabe;
+
+/*
+ * Search for the biggest gap in the low 32 bits of the e820
+ * memory space. We pass this space to PCI to assign MMIO resources
+ * for hotplug or unconfigured devices in.
+ * Hopefully the BIOS let enough space left.
+ */
+__init void e820_setup_gap(void)
+{
+ unsigned long gapstart, gapsize;
+ unsigned long last;
+ int i;
+ int found = 0;
+
+ last = 0x100000000ull;
+ gapstart = 0x10000000;
+ gapsize = 0x400000;
+ i = e820.nr_map;
+ while (--i >= 0) {
+ unsigned long long start = e820.map[i].addr;
+ unsigned long long end = start + e820.map[i].size;
+
+ /*
+ * Since "last" is at most 4GB, we know we'll
+ * fit in 32 bits if this condition is true
+ */
+ if (last > end) {
+ unsigned long gap = last - end;
+
+ if (gap > gapsize) {
+ gapsize = gap;
+ gapstart = end;
+ found = 1;
+ }
+ }
+ if (start < last)
+ last = start;
+ }
+
+ if (!found) {
+ gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
+ printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n"
+ KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n");
+ }
+
+ /*
+ * Start allocating dynamic PCI memory a bit into the gap,
+ * aligned up to the nearest megabyte.
+ *
+ * Question: should we try to pad it up a bit (do something
+ * like " + (gapsize >> 3)" in there too?). We now have the
+ * technology.
+ */
+ pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
+
+ printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
+ pci_mem_start, gapstart, gapsize);
+}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c
index 9abb0406a6..7bc0773a2d 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c
@@ -62,7 +62,7 @@ static struct console early_vga_console = {
#ifndef CONFIG_XEN
/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */
-int early_serial_base = 0x3f8; /* ttyS0 */
+static int early_serial_base = 0x3f8; /* ttyS0 */
#define XMTRDY 0x20
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
index 8f20ff647b..8ad08b3c95 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
@@ -54,10 +54,7 @@
.code64
-#ifdef CONFIG_PREEMPT
-#define preempt_stop XEN_BLOCK_EVENTS(%rsi)
-#else
-#define preempt_stop
+#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
@@ -157,7 +154,7 @@ ENTRY(ret_from_fork)
CFI_DEFAULT_STACK
call schedule_tail
GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
jnz rff_trace
rff_action:
RESTORE_REST
@@ -207,7 +204,7 @@ ENTRY(system_call)
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
XEN_UNBLOCK_EVENTS(%r11)
GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax
ja badsys
@@ -220,7 +217,7 @@ ENTRY(system_call)
*/
.globl ret_from_sys_call
ret_from_sys_call:
- movl $_TIF_WORK_MASK,%edi
+ movl $_TIF_ALLWORK_MASK,%edi
/* edi: flagmask */
sysret_check:
GET_THREAD_INFO(%rcx)
@@ -316,6 +313,7 @@ int_careful:
pushq %rdi
call schedule
popq %rdi
+ cli
jmp int_with_check
/* handle signals and tracing -- both require a full stack frame */
@@ -330,9 +328,8 @@ int_very_careful:
leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
popq %rdi
- btr $TIF_SYSCALL_TRACE,%edi
- btr $TIF_SYSCALL_AUDIT,%edi
- btr $TIF_SINGLESTEP,%edi
+ andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
+ cli
jmp int_restore_rest
int_signal:
@@ -344,6 +341,7 @@ int_signal:
1: movl $_TIF_NEED_RESCHED,%edi
int_restore_rest:
RESTORE_REST
+ cli
jmp int_with_check
CFI_ENDPROC
@@ -511,7 +509,7 @@ retint_signal:
RESTORE_REST
XEN_BLOCK_EVENTS(%rsi)
movl $_TIF_NEED_RESCHED,%edi
- GET_THREAD_INFO(%rcx)
+ GET_THREAD_INFO(%rcx)
jmp retint_check
#ifdef CONFIG_PREEMPT
@@ -524,15 +522,8 @@ retint_kernel:
bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
jnc retint_restore_args
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
- jc retint_restore_args
- movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx)
-/* sti */
- XEN_UNBLOCK_EVENTS(%rsi)
- call schedule
- XEN_BLOCK_EVENTS(%rsi)
-/* cli */
- GET_THREAD_INFO(%rcx)
- movl $0,threadinfo_preempt_count(%rcx)
+ jnc retint_restore_args
+ call preempt_schedule_irq
jmp retint_kernel /* check again */
#endif
CFI_ENDPROC
@@ -610,6 +601,7 @@ ENTRY(spurious_interrupt)
movq ORIG_RAX(%rsp),%rsi
movq $-1,ORIG_RAX(%rsp)
call \sym
+ cli
.endm
/*
@@ -934,8 +926,6 @@ ENTRY(debug)
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug
- /* switch back to process stack to restore the state ptrace touched */
- movq %rax,%rsp
jmp paranoid_exit
CFI_ENDPROC
@@ -946,38 +936,62 @@ ENTRY(nmi)
pushq $-1
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_nmi
+ /*
+ * "Paranoid" exit path from exception stack.
+ * Paranoid because this is used by NMIs and cannot take
+ * any kernel state for granted.
+ * We don't do kernel preemption checks here, because only
+ * NMI should be common and it does not enable IRQs and
+ * cannot get reschedule ticks.
+ */
/* ebx: no swapgs flag */
#endif
paranoid_exit:
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore
paranoid_swapgs:
-/* cli
- swapgs */
+/* swapgs */
paranoid_restore:
RESTORE_ALL 8
/* iretq */
paranoid_userspace:
-/* cli */
GET_THREAD_INFO(%rcx)
- movl threadinfo_flags(%rcx),%edx
- testl $_TIF_NEED_RESCHED,%edx
- jnz paranoid_resched
- testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
- jnz paranoid_signal
- jmp paranoid_swapgs
-paranoid_resched:
+# movl threadinfo_flags(%rcx),%edx
+# testl $_TIF_NEED_RESCHED,%edx
+# jnz paranoid_resched
+# testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+# jnz paranoid_signal
+# jmp paranoid_swapgs
+#paranoid_resched:
+#/* sti */
+# call schedule
+# jmp paranoid_exit
+#paranoid_signal:
+ movl threadinfo_flags(%rcx),%ebx
+ andl $_TIF_WORK_MASK,%ebx
+ jz paranoid_swapgs
+ movq %rsp,%rdi /* &pt_regs */
+ call sync_regs
+ movq %rax,%rsp /* switch stack for scheduling */
+ testl $_TIF_NEED_RESCHED,%ebx
+ jnz paranoid_schedule
+ movl %ebx,%edx /* arg3: thread flags */
/* sti */
- call schedule
- jmp paranoid_exit
-paranoid_signal:
-/* sti */
- xorl %esi,%esi /* oldset */
- movq %rsp,%rdi /* &pt_regs */
+# xorl %esi,%esi /* oldset */
+# movq %rsp,%rdi /* &pt_regs */
+ xorl %esi,%esi /* arg2: oldset */
+ movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
- jmp paranoid_exit
+# jmp paranoid_exit
+ cli
+ jmp paranoid_userspace
+paranoid_schedule:
+ sti
+ call schedule
+ cli
+ jmp paranoid_userspace
CFI_ENDPROC
-
+
ENTRY(int3)
zeroentry do_int3
@@ -1000,7 +1014,6 @@ ENTRY(reserved)
ENTRY(double_fault)
CFI_STARTPROC
paranoidentry do_double_fault
- movq %rax,%rsp
jmp paranoid_exit
CFI_ENDPROC
@@ -1014,7 +1027,6 @@ ENTRY(segment_not_present)
ENTRY(stack_segment)
CFI_STARTPROC
paranoidentry do_stack_segment
- movq %rax,%rsp
jmp paranoid_exit
CFI_ENDPROC
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
index d7000838d8..0bc0f62001 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
@@ -39,6 +39,8 @@
.text
.code64
+ .globl startup_64
+startup_64:
ENTRY(_start)
cld
movq init_rsp(%rip),%rsp
@@ -136,9 +138,8 @@ ENTRY(cpu_gdt_table)
.quad 0x00affa000000ffff /* __USER_CS */
.quad 0x00cffa000000ffff /* __KERNEL32_CS */
.quad 0,0 /* TSS */
- .quad 0 /* LDT */
+ .quad 0,0 /* LDT */
.quad 0,0,0 /* three TLS descriptors */
- .quad 0 /* unused now */
gdt_end:
/* asm/segment.h:GDT_ENTRIES must match this */
@@ -184,12 +185,6 @@ gdt:
.endr
#endif
-ENTRY(gdt_table32)
- .quad 0x0000000000000000 /* This one is magic */
- .quad 0x0000000000000000 /* unused */
- .quad 0x00af9a000000ffff /* __KERNEL_CS */
-gdt32_end:
-
/* We need valid kernel segments for data and code in long mode too
* IRET will check the segment types kkeil 2000/10/28
* Also sysret mandates a special GDT layout
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c
index a4304edf6d..990f8fb190 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c
@@ -36,8 +36,6 @@ static void __init clear_bss(void)
}
#endif
-extern char x86_boot_params[2048];
-
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
#define OLD_CL_MAGIC_ADDR 0x90020
#define OLD_CL_MAGIC 0xA33F
@@ -52,7 +50,7 @@ static void __init copy_bootdata(char *real_mode_data)
int new_data;
char * command_line;
- memcpy(x86_boot_params, real_mode_data, 2048);
+ memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
if (!new_data) {
if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
@@ -105,9 +103,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
#ifdef CONFIG_SMP
cpu_set(0, cpu_online_map);
#endif
- /* default console: */
- if (!strstr(saved_command_line, "console="))
- strcat(saved_command_line, " console=tty0");
#if 0
s = strstr(saved_command_line, "earlyprintk=");
if (s != NULL)
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c
index 647f09b32e..4cbb1aed8f 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c
@@ -37,11 +37,14 @@
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/mach_apic.h>
+#include <asm/acpi.h>
#define __apicdebuginit __init
int sis_apic_bug; /* not actually supported, dummy for compile */
+static int no_timer_check;
+
static DEFINE_SPINLOCK(ioapic_lock);
/*
@@ -192,7 +195,7 @@ static void unmask_IO_APIC_irq (unsigned int irq)
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
unsigned long flags;
@@ -232,8 +235,8 @@ static void clear_IO_APIC (void)
*/
#define MAX_PIRQS 8
-int pirq_entries [MAX_PIRQS];
-int pirqs_enabled;
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
int skip_ioapic_setup;
int ioapic_force;
@@ -758,7 +761,7 @@ static inline void ioapic_register_intr(int irq, int vector, unsigned long trigg
#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
#endif /* !CONFIG_XEN */
-void __init setup_IO_APIC_irqs(void)
+static void __init setup_IO_APIC_irqs(void)
{
struct IO_APIC_route_entry entry;
int apic, pin, idx, irq, first_notcon = 1, vector;
@@ -828,7 +831,7 @@ void __init setup_IO_APIC_irqs(void)
* Set up the 8259A-master output pin as broadcast to all
* CPUs.
*/
-void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
{
struct IO_APIC_route_entry entry;
unsigned long flags;
@@ -1000,6 +1003,8 @@ void __apicdebuginit print_IO_APIC(void)
return;
}
+#if 0
+
static __apicdebuginit void print_APIC_bitfield (int base)
{
unsigned int v;
@@ -1141,6 +1146,8 @@ void __apicdebuginit print_PIC(void)
v = inb(0x4d1) << 8 | inb(0x4d0);
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
+#endif /* 0 */
+
#else
void __init print_IO_APIC(void) { }
#endif /* !CONFIG_XEN */
@@ -1661,13 +1668,12 @@ static inline void check_timer(void)
* Ok, does IRQ0 through the IOAPIC work?
*/
unmask_IO_APIC_irq(0);
- if (timer_irq_works()) {
+ if (!no_timer_check && timer_irq_works()) {
nmi_watchdog_default();
if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0);
setup_nmi();
enable_8259A_irq(0);
- check_nmi_watchdog();
}
return;
}
@@ -1687,7 +1693,6 @@ static inline void check_timer(void)
nmi_watchdog_default();
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
- check_nmi_watchdog();
}
return;
}
@@ -1736,6 +1741,13 @@ static inline void check_timer(void)
#define check_timer() ((void)0)
#endif /* !CONFIG_XEN */
+static int __init notimercheck(char *s)
+{
+ no_timer_check = 1;
+ return 1;
+}
+__setup("no_timer_check", notimercheck);
+
/*
*
* IRQ's that are handled by the PIC in the MPS IOAPIC case.
@@ -1777,7 +1789,7 @@ struct sysfs_ioapic_data {
};
static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-static int ioapic_suspend(struct sys_device *dev, u32 state)
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
{
struct IO_APIC_route_entry *entry;
struct sysfs_ioapic_data *data;
@@ -1871,78 +1883,6 @@ device_initcall(ioapic_init_sysfs);
#define IO_APIC_MAX_ID 0xFE
-int __init io_apic_get_unique_id (int ioapic, int apic_id)
-{
-#ifndef CONFIG_XEN
- union IO_APIC_reg_00 reg_00;
- static physid_mask_t apic_id_map;
- unsigned long flags;
- int i = 0;
-
- /*
- * The P4 platform supports up to 256 APIC IDs on two separate APIC
- * buses (one for LAPICs, one for IOAPICs), where predecessors only
- * supports up to 16 on one shared APIC bus.
- *
- * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
- * advantage of new APIC bus architecture.
- */
-
- if (physids_empty(apic_id_map))
- apic_id_map = phys_cpu_present_map;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- if (apic_id >= IO_APIC_MAX_ID) {
- apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
- "%d\n", ioapic, apic_id, reg_00.bits.ID);
- apic_id = reg_00.bits.ID;
- }
-
- /*
- * Every APIC in a system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- */
- if (physid_isset(apic_id, apic_id_map)) {
-
- for (i = 0; i < IO_APIC_MAX_ID; i++) {
- if (!physid_isset(i, apic_id_map))
- break;
- }
-
- if (i == IO_APIC_MAX_ID)
- panic("Max apic_id exceeded!\n");
-
- apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
- "trying %d\n", ioapic, apic_id, i);
-
- apic_id = i;
- }
-
- physid_set(apic_id, apic_id_map);
-
- if (reg_00.bits.ID != apic_id) {
- reg_00.bits.ID = apic_id;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0, reg_00.raw);
- reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /* Sanity check */
- if (reg_00.bits.ID != apic_id)
- panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
- }
-
- apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-#endif /* !CONFIG_XEN */
-
- return apic_id;
-}
-
-
int __init io_apic_get_version (int ioapic)
{
union IO_APIC_reg_01 reg_01;
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c
index ff96e22341..7d7385cf61 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c
@@ -25,8 +25,7 @@
*
*/
-// asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
-asmlinkage long sys_iopl(unsigned int new_io_pl)
+asmlinkage long sys_iopl(unsigned int new_io_pl, struct pt_regs *regs)
{
unsigned int old_io_pl = current->thread.io_pl;
physdev_op_t op;
@@ -59,5 +58,5 @@ asmlinkage long sys_iopl(unsigned int new_io_pl)
*/
asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
- return turn_on ? sys_iopl(3) : 0;
+ return turn_on ? sys_iopl(3, NULL) : 0;
}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
index c2aff7edad..059f276767 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
@@ -30,6 +30,7 @@
#include <asm/pgalloc.h>
#include <asm/io_apic.h>
#include <asm/proto.h>
+#include <asm/acpi.h>
/* Have we found an MP table */
int smp_found_config;
@@ -46,7 +47,7 @@ unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1
int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
cpumask_t pci_bus_to_cpumask [256] = { [0 ... 255] = CPU_MASK_ALL };
-int mp_current_pci_id = 0;
+static int mp_current_pci_id = 0;
/* I/O APIC entries */
struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
@@ -108,6 +109,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
static void __init MP_processor_info (struct mpc_config_processor *m)
{
int ver;
+ static int found_bsp=0;
if (!(m->mpc_cpuflag & CPU_ENABLED))
return;
@@ -127,11 +129,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
" Processor ignored.\n", NR_CPUS);
return;
}
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
- return;
- }
num_processors++;
@@ -151,7 +148,19 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
ver = 0x10;
}
apic_version[m->mpc_apicid] = ver;
- bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ /*
+ * bios_cpu_apicid is required to have processors listed
+ * in same order as logical cpu numbers. Hence the first
+ * entry is BSP, and so on.
+ */
+ bios_cpu_apicid[0] = m->mpc_apicid;
+ x86_cpu_to_apicid[0] = m->mpc_apicid;
+ found_bsp = 1;
+ } else {
+ bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid;
+ x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
+ }
}
#else
void __init MP_processor_info (struct mpc_config_processor *m)
@@ -714,7 +723,7 @@ void __init mp_register_lapic (
#define MP_ISA_BUS 0
#define MP_MAX_IOAPIC_PIN 127
-struct mp_ioapic_routing {
+static struct mp_ioapic_routing {
int apic_id;
int gsi_start;
int gsi_end;
@@ -764,7 +773,7 @@ void __init mp_register_ioapic (
mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
mp_ioapics[idx].mpc_apicaddr = address;
- mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
+ mp_ioapics[idx].mpc_apicid = id;
mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
/*
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c
index 48afb51919..a6b1a843cf 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c
@@ -37,6 +37,7 @@
#include <linux/irq.h>
#include <linux/ptrace.h>
#include <linux/utsname.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -59,7 +60,7 @@ asmlinkage extern void ret_from_fork(void);
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
-atomic_t hlt_counter = ATOMIC_INIT(0);
+static atomic_t hlt_counter = ATOMIC_INIT(0);
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
@@ -68,7 +69,7 @@ EXPORT_SYMBOL(boot_option_idle_override);
* Powermanagement idle function, if any..
*/
void (*pm_idle)(void);
-static cpumask_t cpu_idle_map;
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
void disable_hlt(void)
{
@@ -140,8 +141,8 @@ void cpu_idle (void)
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
- if (cpu_isset(cpu, cpu_idle_map))
- cpu_clear(cpu, cpu_idle_map);
+ if (__get_cpu_var(cpu_idle_state))
+ __get_cpu_var(cpu_idle_state) = 0;
rmb();
if (cpu_is_offline(cpu))
@@ -150,22 +151,35 @@ void cpu_idle (void)
__IRQ_STAT(cpu,idle_timestamp) = jiffies;
xen_idle();
}
+
schedule();
}
}
void cpu_idle_wait(void)
{
- int cpu;
+ unsigned int cpu, this_cpu = get_cpu();
cpumask_t map;
- for_each_online_cpu(cpu)
- cpu_set(cpu, cpu_idle_map);
+ set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+ put_cpu();
+
+ cpus_clear(map);
+ for_each_online_cpu(cpu) {
+ per_cpu(cpu_idle_state, cpu) = 1;
+ cpu_set(cpu, map);
+ }
+
+ __get_cpu_var(cpu_idle_state) = 0;
wmb();
do {
ssleep(1);
- cpus_and(map, cpu_idle_map, cpu_online_map);
+ for_each_online_cpu(cpu) {
+ if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+ cpu_clear(cpu, map);
+ }
+ cpus_and(map, map, cpu_online_map);
} while (!cpus_empty(map));
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -199,11 +213,11 @@ void __show_regs(struct pt_regs * regs)
printk("R13: %016lx R14: %016lx R15: %016lx\n",
regs->r13, regs->r14, regs->r15);
- asm("movl %%ds,%0" : "=r" (ds));
- asm("movl %%cs,%0" : "=r" (cs));
- asm("movl %%es,%0" : "=r" (es));
- asm("movl %%fs,%0" : "=r" (fsindex));
- asm("movl %%gs,%0" : "=r" (gsindex));
+ asm("mov %%ds,%0" : "=r" (ds));
+ asm("mov %%cs,%0" : "=r" (cs));
+ asm("mov %%es,%0" : "=r" (es));
+ asm("mov %%fs,%0" : "=r" (fsindex));
+ asm("mov %%gs,%0" : "=r" (gsindex));
rdmsrl(MSR_FS_BASE, fs);
rdmsrl(MSR_GS_BASE, gs);
@@ -343,10 +357,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
- asm("movl %%gs,%0" : "=m" (p->thread.gsindex));
- asm("movl %%fs,%0" : "=m" (p->thread.fsindex));
- asm("movl %%es,%0" : "=m" (p->thread.es));
- asm("movl %%ds,%0" : "=m" (p->thread.ds));
+ asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
+ asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
+ asm("mov %%es,%0" : "=m" (p->thread.es));
+ asm("mov %%ds,%0" : "=m" (p->thread.ds));
if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
@@ -651,7 +665,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
set_32bit_tls(task, FS_TLS, addr);
if (doit) {
load_TLS(&task->thread, cpu);
- asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
+ asm volatile("mov %0,%%fs" :: "r" (FS_TLS_SEL));
}
task->thread.fsindex = FS_TLS_SEL;
task->thread.fs = 0;
@@ -661,7 +675,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
if (doit) {
/* set the selector to 0 to not confuse
__switch_to */
- asm volatile("movl %0,%%fs" :: "r" (0));
+ asm volatile("mov %0,%%fs" :: "r" (0));
ret = HYPERVISOR_set_segment_base(SEGBASE_FS, addr);
}
@@ -723,3 +737,10 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
boot_option_idle_override = 1;
return 1;
}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (randomize_va_space)
+ sp -= get_random_int() % 8192;
+ return sp & ~0xf;
+}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
index 72d4cd59e4..080121e1ab 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
@@ -86,7 +86,6 @@ unsigned long xen_override_max_pfn;
struct cpuinfo_x86 boot_cpu_data;
unsigned long mmu_cr4_features;
-EXPORT_SYMBOL_GPL(mmu_cr4_features);
int acpi_disabled;
EXPORT_SYMBOL(acpi_disabled);
@@ -98,9 +97,6 @@ int __initdata acpi_force = 0;
int acpi_numa __initdata;
-/* For PCI or other memory-mapped resources */
-unsigned long pci_mem_start = 0x10000000;
-
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
int bootloader_type;
@@ -124,8 +120,6 @@ struct sys_desc_table_struct {
struct edid_info edid_info;
struct e820map e820;
-unsigned char aux_device_present;
-
extern int root_mountflags;
extern char _text, _etext, _edata, _end;
@@ -377,6 +371,10 @@ static __init void parse_cmdline_early (char ** cmdline_p)
else if (!memcmp(from, "acpi=strict", 11)) {
acpi_strict = 1;
}
+#ifdef CONFIG_X86_IO_APIC
+ else if (!memcmp(from, "acpi_skip_timer_override", 24))
+ acpi_skip_timer_override = 1;
+#endif
#endif
#if 0
@@ -603,7 +601,6 @@ static void __init print_memory_map(char *who)
void __init setup_arch(char **cmdline_p)
{
- unsigned long low_mem_size;
int i, j;
physdev_op_t op;
@@ -618,7 +615,6 @@ void __init setup_arch(char **cmdline_p)
screen_info = SCREEN_INFO;
#endif
edid_info = EDID_INFO;
- aux_device_present = AUX_DEVICE_INFO;
saved_video_mode = SAVED_VIDEO_MODE;
bootloader_type = LOADER_TYPE;
@@ -821,13 +817,7 @@ void __init setup_arch(char **cmdline_p)
request_resource(&ioport_resource, &standard_io_resources[i]);
}
- /* Will likely break when you have unassigned resources with more
- than 4GB memory and bridges that don't support more than 4GB.
- Doing it properly would require to use pci_alloc_consistent
- in this case. */
- low_mem_size = ((end_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
- if (low_mem_size > pci_mem_start)
- pci_mem_start = low_mem_size;
+ e820_setup_gap();
#ifdef CONFIG_GART_IOMMU
iommu_hole_init();
@@ -867,7 +857,7 @@ static int __init get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
- if (c->x86_cpuid_level < 0x80000004)
+ if (c->extended_cpuid_level < 0x80000004)
return 0;
v = (unsigned int *) c->x86_model_id;
@@ -883,7 +873,7 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, eax, ebx, ecx, edx;
- n = c->x86_cpuid_level;
+ n = c->extended_cpuid_level;
if (n >= 0x80000005) {
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
@@ -913,14 +903,50 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
}
}
+/*
+ * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
+ * Assumes number of cores is a power of two.
+ */
+static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ int cpu = smp_processor_id();
+ int node = 0;
+ unsigned bits;
+ if (c->x86_num_cores == 1)
+ return;
+
+ bits = 0;
+ while ((1 << bits) < c->x86_num_cores)
+ bits++;
+
+ /* Low order bits define the core id (index of core in socket) */
+ cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
+ /* Convert the APIC ID into the socket ID */
+ phys_proc_id[cpu] >>= bits;
+
+#ifdef CONFIG_NUMA
+ /* When an ACPI SRAT table is available use the mappings from SRAT
+ instead. */
+ if (acpi_numa <= 0) {
+ node = phys_proc_id[cpu];
+ if (!node_online(node))
+ node = first_node(node_online_map);
+ cpu_to_node[cpu] = node;
+ } else {
+ node = cpu_to_node[cpu];
+ }
+#endif
+
+ printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
+ cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
+#endif
+}
static int __init init_amd(struct cpuinfo_x86 *c)
{
int r;
int level;
-#ifdef CONFIG_NUMA
- int cpu;
-#endif
/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
@@ -943,26 +969,12 @@ static int __init init_amd(struct cpuinfo_x86 *c)
}
display_cacheinfo(c);
- if (c->x86_cpuid_level >= 0x80000008) {
+ if (c->extended_cpuid_level >= 0x80000008) {
c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
if (c->x86_num_cores & (c->x86_num_cores - 1))
c->x86_num_cores = 1;
-#ifdef CONFIG_NUMA
- /* On a dual core setup the lower bits of apic id
- distingush the cores. Fix up the CPU<->node mappings
- here based on that.
- Assumes number of cores is a power of two.
- When using SRAT use mapping from SRAT. */
- cpu = c->x86_apicid;
- if (acpi_numa <= 0 && c->x86_num_cores > 1) {
- cpu_to_node[cpu] = cpu >> hweight32(c->x86_num_cores - 1);
- if (!node_online(cpu_to_node[cpu]))
- cpu_to_node[cpu] = first_node(node_online_map);
- }
- printk(KERN_INFO "CPU %d(%d) -> Node %d\n",
- cpu, c->x86_num_cores, cpu_to_node[cpu]);
-#endif
+ amd_detect_cmp(c);
}
return r;
@@ -972,10 +984,10 @@ static void __init detect_ht(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
u32 eax, ebx, ecx, edx;
- int index_lsb, index_msb, tmp;
+ int index_msb, tmp;
int cpu = smp_processor_id();
- if (!cpu_has(c, X86_FEATURE_HT))
+ if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
cpuid(1, &eax, &ebx, &ecx, &edx);
@@ -984,7 +996,6 @@ static void __init detect_ht(struct cpuinfo_x86 *c)
if (smp_num_siblings == 1) {
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
} else if (smp_num_siblings > 1) {
- index_lsb = 0;
index_msb = 31;
/*
* At this point we only support two siblings per
@@ -996,45 +1007,65 @@ static void __init detect_ht(struct cpuinfo_x86 *c)
return;
}
tmp = smp_num_siblings;
- while ((tmp & 1) == 0) {
- tmp >>=1 ;
- index_lsb++;
- }
- tmp = smp_num_siblings;
while ((tmp & 0x80000000 ) == 0) {
tmp <<=1 ;
index_msb--;
}
- if (index_lsb != index_msb )
+ if (smp_num_siblings & (smp_num_siblings - 1))
index_msb++;
phys_proc_id[cpu] = phys_pkg_id(index_msb);
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
phys_proc_id[cpu]);
+
+ smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+
+ tmp = smp_num_siblings;
+ index_msb = 31;
+ while ((tmp & 0x80000000) == 0) {
+ tmp <<=1 ;
+ index_msb--;
+ }
+ if (smp_num_siblings & (smp_num_siblings - 1))
+ index_msb++;
+
+ cpu_core_id[cpu] = phys_pkg_id(index_msb);
+
+ if (c->x86_num_cores > 1)
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ cpu_core_id[cpu]);
}
#endif
}
-static void __init sched_cmp_hack(struct cpuinfo_x86 *c)
+/*
+ * find out the number of processor cores on the die
+ */
+static int __init intel_num_cpu_cores(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_SMP
- /* AMD dual core looks like HT but isn't really. Hide it from the
- scheduler. This works around problems with the domain scheduler.
- Also probably gives slightly better scheduling and disables
- SMT nice which is harmful on dual core.
- TBD tune the domain scheduler for dual core. */
- if (c->x86_vendor == X86_VENDOR_AMD && cpu_has(c, X86_FEATURE_CMP_LEGACY))
- smp_num_siblings = 1;
-#endif
+ unsigned int eax;
+
+ if (c->cpuid_level < 4)
+ return 1;
+
+ __asm__("cpuid"
+ : "=a" (eax)
+ : "0" (4), "c" (0)
+ : "bx", "dx");
+
+ if (eax & 0x1f)
+ return ((eax >> 26) + 1);
+ else
+ return 1;
}
-
+
static void __init init_intel(struct cpuinfo_x86 *c)
{
/* Cache sizes */
unsigned n;
init_intel_cacheinfo(c);
- n = c->x86_cpuid_level;
+ n = c->extended_cpuid_level;
if (n >= 0x80000008) {
unsigned eax = cpuid_eax(0x80000008);
c->x86_virt_bits = (eax >> 8) & 0xff;
@@ -1043,6 +1074,9 @@ static void __init init_intel(struct cpuinfo_x86 *c)
if (c->x86 == 15)
c->x86_cache_alignment = c->x86_clflush_size * 2;
+ if (c->x86 >= 15)
+ set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+ c->x86_num_cores = intel_num_cpu_cores(c);
}
void __init get_cpu_vendor(struct cpuinfo_x86 *c)
@@ -1079,8 +1113,7 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_clflush_size = 64;
c->x86_cache_alignment = c->x86_clflush_size;
c->x86_num_cores = 1;
- c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data;
- c->x86_cpuid_level = 0;
+ c->extended_cpuid_level = 0;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
/* Get vendor name */
@@ -1108,11 +1141,14 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
}
if (c->x86_capability[0] & (1<<19))
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
- c->x86_apicid = misc >> 24;
} else {
/* Have CPUID level 0 only - unheard of */
c->x86 = 4;
}
+
+#ifdef CONFIG_SMP
+ phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
}
/*
@@ -1127,11 +1163,11 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
/* AMD-defined flags: level 0x80000001 */
xlvl = cpuid_eax(0x80000000);
- c->x86_cpuid_level = xlvl;
+ c->extended_cpuid_level = xlvl;
if ((xlvl & 0xffff0000) == 0x80000000) {
if (xlvl >= 0x80000001) {
c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[5] = cpuid_ecx(0x80000001);
+ c->x86_capability[6] = cpuid_ecx(0x80000001);
}
if (xlvl >= 0x80000004)
get_model_name(c); /* Default name */
@@ -1172,7 +1208,6 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
select_idle_routine(c);
detect_ht(c);
- sched_cmp_hack(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
@@ -1231,7 +1266,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
/* AMD-defined */
- "pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
@@ -1243,7 +1278,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL,
+ "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
+ "constant_tsc", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1254,17 +1290,25 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ /* VIA/Cyrix/Centaur-defined */
+ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
/* AMD-defined (#2) */
"lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
static char *x86_power_flags[] = {
"ts", /* temperature sensor */
"fid", /* frequency id control */
"vid", /* voltage id control */
"ttp", /* thermal trip */
+ "tm",
+ "stc"
};
@@ -1299,8 +1343,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
#ifdef CONFIG_SMP
- seq_printf(m, "physical id\t: %d\n", phys_proc_id[c - cpu_data]);
- seq_printf(m, "siblings\t: %d\n", c->x86_num_cores * smp_num_siblings);
+ if (smp_num_siblings * c->x86_num_cores > 1) {
+ int cpu = c - cpu_data;
+ seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
+ seq_printf(m, "siblings\t: %d\n",
+ c->x86_num_cores * smp_num_siblings);
+ seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
+ seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+ }
#endif
seq_printf(m,
@@ -1342,12 +1392,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, " [%d]", i);
}
}
- seq_printf(m, "\n");
-
- if (c->x86_num_cores > 1)
- seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
- seq_printf(m, "\n\n");
+ seq_printf(m, "\n\n");
return 0;
}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
index 03452e1bf3..07e0035617 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <linux/bootmem.h>
#include <linux/bitops.h>
+#include <asm/bootsetup.h>
#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -32,7 +33,7 @@
#include <asm-xen/hypervisor.h>
-char x86_boot_params[2048] __initdata = {0,};
+char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/signal.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/signal.c
index 2282257d98..5a2c6f179d 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/signal.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/signal.c
@@ -28,6 +28,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/proto.h>
+#include <asm/ia32_unistd.h>
/* #define DEBUG_SIG 1 */
@@ -83,7 +84,7 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct rt_sigframe
{
- char *pretcode;
+ char __user *pretcode;
struct ucontext uc;
struct siginfo info;
};
@@ -121,7 +122,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
err |= __get_user(buf, &sc->fpstate);
if (buf) {
- if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+ if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
goto badframe;
err |= restore_i387(buf);
} else {
@@ -147,7 +148,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
unsigned long eax;
frame = (struct rt_sigframe __user *)(regs->rsp - 8);
- if (verify_area(VERIFY_READ, frame, sizeof(*frame))) {
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) {
goto badframe;
}
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) {
@@ -160,9 +161,8 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) {
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
goto badframe;
- }
#ifdef DEBUG_SIG
printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax);
@@ -186,7 +186,6 @@ static inline int
setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
{
int err = 0;
- unsigned long eflags;
err |= __put_user(0, &sc->gs);
err |= __put_user(0, &sc->fs);
@@ -210,11 +209,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
err |= __put_user(me->thread.trap_no, &sc->trapno);
err |= __put_user(me->thread.error_code, &sc->err);
err |= __put_user(regs->rip, &sc->rip);
- eflags = regs->eflags;
- if (current->ptrace & PT_PTRACED) {
- eflags &= ~TF_MASK;
- }
- err |= __put_user(eflags, &sc->eflags);
+ err |= __put_user(regs->eflags, &sc->eflags);
err |= __put_user(mask, &sc->oldmask);
err |= __put_user(me->thread.cr2, &sc->cr2);
@@ -253,28 +248,25 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
if (used_math()) {
fp = get_stack(ka, regs, sizeof(struct _fpstate));
- frame = (void __user *)round_down((unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
+ frame = (void __user *)round_down(
+ (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
- if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) {
- goto give_sigsegv;
- }
+ if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
+ goto give_sigsegv;
if (save_i387(fp) < 0)
err |= -1;
- } else {
+ } else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
- }
- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) {
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
- }
if (ka->sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, info);
- if (err) {
+ if (err)
goto give_sigsegv;
}
- }
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
@@ -288,9 +280,8 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
if (sizeof(*set) == 16) {
__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
- } else {
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- }
+ } else
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -302,9 +293,8 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv;
}
- if (err) {
+ if (err)
goto give_sigsegv;
- }
#ifdef DEBUG_SIG
printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
@@ -329,14 +319,9 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->rsp = (unsigned long)frame;
set_fs(USER_DS);
- if (regs->eflags & TF_MASK) {
- if ((current->ptrace & (PT_PTRACED | PT_DTRACE)) == (PT_PTRACED | PT_DTRACE)) {
- ptrace_notify(SIGTRAP);
- } else {
- regs->eflags &= ~TF_MASK;
- }
- }
-
+ regs->eflags &= ~TF_MASK;
+ if (test_thread_flag(TIF_SINGLESTEP))
+ ptrace_notify(SIGTRAP);
#ifdef DEBUG_SIG
printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
current->comm, current->pid, frame, regs->rip, frame->pretcode);
@@ -357,7 +342,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs *regs)
{
#ifdef DEBUG_SIG
- printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig,
+ printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n",
+ current->pid, sig,
regs->rip, regs->rsp, regs);
#endif
@@ -375,10 +361,23 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
regs->rax = -EINTR;
break;
}
- /* fallthrough */
+ /* fallthrough */
case -ERESTARTNOINTR:
regs->rax = regs->orig_rax;
regs->rip -= 2;
+ break;
+ }
+ }
+
+ /*
+ * If TF is set due to a debugger (PT_DTRACE), clear the TF
+ * flag so that register information in the sigcontext is
+ * correct.
+ */
+ if (unlikely(regs->eflags & TF_MASK)) {
+ if (likely(current->ptrace & PT_DTRACE)) {
+ current->ptrace &= ~PT_DTRACE;
+ regs->eflags &= ~TF_MASK;
}
}
@@ -418,9 +417,8 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
* kernel mode. Just return without doing anything
* if so.
*/
- if ((regs->cs & 2) != 2) {
+ if ((regs->cs & 2) != 2)
return 1;
- }
if (try_to_freeze(0))
goto no_signal;
@@ -457,7 +455,9 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
regs->rip -= 2;
}
if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
- regs->rax = __NR_restart_syscall;
+ regs->rax = test_thread_flag(TIF_IA32) ?
+ __NR_ia32_restart_syscall :
+ __NR_restart_syscall;
regs->rip -= 2;
}
}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
index b183fafacd..1e379ed17b 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
@@ -25,7 +25,9 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mach_apic.h>
+#include <asm/mmu_context.h>
#include <asm/proto.h>
+#include <asm/apicdef.h>
/*
* Smarter SMP flushing macros.
@@ -41,7 +43,7 @@ static cpumask_t flush_cpumask;
static struct mm_struct * flush_mm;
static unsigned long flush_va;
static DEFINE_SPINLOCK(tlbstate_lock);
-#define FLUSH_ALL 0xffffffff
+#define FLUSH_ALL -1ULL
/*
* We cannot call mmdrop() because we are in interrupt context,
@@ -52,7 +54,7 @@ static inline void leave_mm (unsigned long cpu)
if (read_pda(mmu_state) == TLBSTATE_OK)
BUG();
clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask);
- __flush_tlb();
+ load_cr3(swapper_pg_dir);
}
/*
@@ -372,7 +374,10 @@ void smp_send_stop(void)
__smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
if (!nolock)
spin_unlock(&call_lock);
- smp_stop_cpu();
+
+ local_irq_disable();
+ disable_local_APIC();
+ local_irq_enable();
}
/*
@@ -409,3 +414,27 @@ asmlinkage void smp_call_function_interrupt(void)
atomic_inc(&call_data->finished);
}
}
+
+int safe_smp_processor_id(void)
+{
+ int apicid, i;
+
+ if (disable_apic)
+ return 0;
+
+ apicid = hard_smp_processor_id();
+ if (x86_cpu_to_apicid[apicid] == apicid)
+ return apicid;
+
+ for (i = 0; i < NR_CPUS; ++i) {
+ if (x86_cpu_to_apicid[i] == apicid)
+ return i;
+ }
+
+ /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
+ * or called too early. Either way, we must be CPU 0. */
+ if (x86_cpu_to_apicid[0] == BAD_APICID)
+ return 0;
+
+ return 0; /* Should not happen */
+}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
index a7e2c3e95e..f1ec0f3459 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
@@ -12,8 +12,7 @@
* Pentium Pro and Pentium-II/Xeon MP machines.
* Original development of Linux SMP code supported by Caldera.
*
- * This code is released under the GNU General Public License version 2 or
- * later.
+ * This code is released under the GNU General Public License version 2
*
* Fixes
* Felix Koop : NR_CPUS used properly
@@ -31,9 +30,13 @@
* Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
* Maciej W. Rozycki : Bits for genuine 82489DX APICs
* Andi Kleen : Changed for SMP boot into long mode.
- * Rusty Russell : Hacked into shape for new "hotplug" boot process.
+ * Rusty Russell : Hacked into shape for new "hotplug" boot process.
+ * Andi Kleen : Converted to new state machine.
+ * Various cleanups.
+ * Probably mostly hotplug CPU ready now.
*/
+
#include <linux/config.h>
#include <linux/init.h>
@@ -53,19 +56,35 @@
#include <asm/kdebug.h>
#include <asm/tlbflush.h>
#include <asm/proto.h>
+#include <asm/nmi.h>
+
+/* Change for real CPU hotplug. Note other files need to be fixed
+ first too. */
+#define __cpuinit __init
+#define __cpuinitdata __initdata
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
/* Package ID of each logical CPU */
u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
EXPORT_SYMBOL(phys_proc_id);
+EXPORT_SYMBOL(cpu_core_id);
/* Bitmask of currently online CPUs */
cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
+
+/*
+ * Private maps to synchronize booting between AP and BP.
+ * Probably not needed anymore, but it makes for easier debugging. -AK
+ */
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
-static cpumask_t smp_commenced_mask;
+
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
@@ -74,13 +93,15 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
int smp_threads_ready;
cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_core_map);
/*
* Trampoline 80x86 program as an array.
*/
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end [];
+extern unsigned char trampoline_data[];
+extern unsigned char trampoline_end[];
/*
* Currently trivial. Write the real->protected mode
@@ -88,11 +109,9 @@ extern unsigned char trampoline_end [];
* has made sure it's suitably aligned.
*/
-static unsigned long __init setup_trampoline(void)
+static unsigned long __cpuinit setup_trampoline(void)
{
void *tramp = __va(SMP_TRAMPOLINE_BASE);
- extern volatile __u32 tramp_gdt_ptr;
- tramp_gdt_ptr = __pa_symbol(&cpu_gdt_table);
memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(tramp);
}
@@ -102,154 +121,224 @@ static unsigned long __init setup_trampoline(void)
* a given CPU
*/
-static void __init smp_store_cpu_info(int id)
+static void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = cpu_data + id;
*c = boot_cpu_data;
identify_cpu(c);
+ print_cpu_info(c);
}
/*
- * TSC synchronization.
+ * New Funky TSC sync algorithm borrowed from IA64.
+ * Main advantage is that it doesn't reset the TSCs fully and
+ * in general looks more robust and it works better than my earlier
+ * attempts. I believe it was written by David Mosberger. Some minor
+ * adjustments for x86-64 by me -AK
+ *
+ * Original comment reproduced below.
+ *
+ * Synchronize TSC of the current (slave) CPU with the TSC of the
+ * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
+ * eliminate the possibility of unaccounted-for errors (such as
+ * getting a machine check in the middle of a calibration step). The
+ * basic idea is for the slave to ask the master what itc value it has
+ * and to read its own itc before and after the master responds. Each
+ * iteration gives us three timestamps:
*
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
+ * slave master
+ *
+ * t0 ---\
+ * ---\
+ * --->
+ * tm
+ * /---
+ * /---
+ * t1 <---
+ *
+ *
+ * The goal is to adjust the slave's TSC such that tm falls exactly
+ * half-way between t0 and t1. If we achieve this, the clocks are
+ * synchronized provided the interconnect between the slave and the
+ * master is symmetric. Even if the interconnect were asymmetric, we
+ * would still know that the synchronization error is smaller than the
+ * roundtrip latency (t0 - t1).
+ *
+ * When the interconnect is quiet and symmetric, this lets us
+ * synchronize the TSC to within one or two cycles. However, we can
+ * only *guarantee* that the synchronization is accurate to within a
+ * round-trip time, which is typically in the range of several hundred
+ * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
+ * are usually almost perfectly synchronized, but we shouldn't assume
+ * that the accuracy is much better than half a micro second or so.
+ *
+ * [there are other errors like the latency of RDTSC and of the
+ * WRMSR. These can also account to hundreds of cycles. So it's
+ * probably worse. It claims 153 cycles error on a dual Opteron,
+ * but I suspect the numbers are actually somewhat worse -AK]
*/
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
+#define MASTER 0
+#define SLAVE (SMP_CACHE_BYTES/8)
-extern unsigned int fast_gettimeoffset_quotient;
+/* Intentionally don't use cpu_relax() while TSC synchronization
+ because we don't want to go into funky power save modi or cause
+ hypervisors to schedule us away. Going to sleep would likely affect
+ latency and low latency is the primary objective here. -AK */
+#define no_cpu_relax() barrier()
-static void __init synchronize_tsc_bp (void)
-{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- long one_usec;
- int buggy = 0;
+static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
+static volatile __cpuinitdata unsigned long go[SLAVE + 1];
+static int notscsync __cpuinitdata;
- printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",num_booting_cpus());
+#undef DEBUG_TSC_SYNC
- one_usec = cpu_khz;
+#define NUM_ROUNDS 64 /* magic value */
+#define NUM_ITERS 5 /* likewise */
- atomic_set(&tsc_start_flag, 1);
- wmb();
+/* Callback on boot CPU */
+static __cpuinit void sync_master(void *arg)
+{
+ unsigned long flags, i;
- /*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
- */
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
+ if (smp_processor_id() != boot_cpu_id)
+ return;
- sync_core();
- rdtscll(tsc_values[smp_processor_id()]);
- /*
- * We clear the TSC in the last loop:
- */
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
+ go[MASTER] = 0;
- /*
- * Wait for all APs to leave the synchronization point:
- */
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
+ local_irq_save(flags);
+ {
+ for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
+ while (!go[MASTER])
+ no_cpu_relax();
+ go[MASTER] = 0;
+ rdtscll(go[SLAVE]);
+ }
}
+ local_irq_restore(flags);
+}
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_isset(i, cpu_callout_map)) {
- t0 = tsc_values[i];
- sum += t0;
- }
- }
- avg = sum / num_booting_cpus();
+/*
+ * Return the number of cycles by which our tsc differs from the tsc
+ * on the master (time-keeper) CPU. A positive number indicates our
+ * tsc is ahead of the master, negative that it is behind.
+ */
+static inline long
+get_delta(long *rt, long *master)
+{
+ unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+ unsigned long tcenter, t0, t1, tm;
+ int i;
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
+ for (i = 0; i < NUM_ITERS; ++i) {
+ rdtscll(t0);
+ go[MASTER] = 1;
+ while (!(tm = go[SLAVE]))
+ no_cpu_relax();
+ go[SLAVE] = 0;
+ rdtscll(t1);
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*one_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = delta / one_usec;
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
+ if (t1 - t0 < best_t1 - best_t0)
+ best_t0 = t0, best_t1 = t1, best_tm = tm;
+ }
- printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
- i, realdelta);
- }
+ *rt = best_t1 - best_t0;
+ *master = best_tm - best_t0;
- sum += delta;
- }
- if (!buggy)
- printk("passed.\n");
+ /* average best_t0 and best_t1 without overflow: */
+ tcenter = (best_t0/2 + best_t1/2);
+ if (best_t0 % 2 + best_t1 % 2 == 2)
+ ++tcenter;
+ return tcenter - best_tm;
}
-static void __init synchronize_tsc_ap (void)
+static __cpuinit void sync_tsc(void)
{
- int i;
+ int i, done = 0;
+ long delta, adj, adjust_latency = 0;
+ unsigned long flags, rt, master_time_stamp, bound;
+#if DEBUG_TSC_SYNC
+ static struct syncdebug {
+ long rt; /* roundtrip time */
+ long master; /* master's timestamp */
+ long diff; /* difference between midpoint and master's timestamp */
+ long lat; /* estimate of tsc adjustment latency */
+ } t[NUM_ROUNDS] __cpuinitdata;
+#endif
- /*
- * Not every cpu is online at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
- */
- while (!atomic_read(&tsc_start_flag)) mb();
+ go[MASTER] = 1;
+
+ smp_call_function(sync_master, NULL, 1, 0);
+
+ while (go[MASTER]) /* wait for master to be ready */
+ no_cpu_relax();
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != num_booting_cpus()) mb();
+ spin_lock_irqsave(&tsc_sync_lock, flags);
+ {
+ for (i = 0; i < NUM_ROUNDS; ++i) {
+ delta = get_delta(&rt, &master_time_stamp);
+ if (delta == 0) {
+ done = 1; /* let's lock on to this... */
+ bound = rt;
+ }
- sync_core();
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
+ if (!done) {
+ unsigned long t;
+ if (i > 0) {
+ adjust_latency += -delta;
+ adj = -delta + adjust_latency/4;
+ } else
+ adj = -delta;
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ rdtscll(t);
+ wrmsrl(MSR_IA32_TSC, t + adj);
+ }
+#if DEBUG_TSC_SYNC
+ t[i].rt = rt;
+ t[i].master = master_time_stamp;
+ t[i].diff = delta;
+ t[i].lat = adjust_latency/4;
+#endif
+ }
}
+ spin_unlock_irqrestore(&tsc_sync_lock, flags);
+
+#if DEBUG_TSC_SYNC
+ for (i = 0; i < NUM_ROUNDS; ++i)
+ printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+ t[i].rt, t[i].master, t[i].diff, t[i].lat);
+#endif
+
+ printk(KERN_INFO
+ "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
+ "maxerr %lu cycles)\n",
+ smp_processor_id(), boot_cpu_id, delta, rt);
}
-#undef NR_LOOPS
-static atomic_t init_deasserted;
+static void __cpuinit tsc_sync_wait(void)
+{
+ if (notscsync || !cpu_has_tsc)
+ return;
+ printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
+ boot_cpu_id);
+ sync_tsc();
+}
-void __init smp_callin(void)
+static __init int notscsync_setup(char *s)
+{
+ notscsync = 1;
+ return 0;
+}
+__setup("notscsync", notscsync_setup);
+
+static atomic_t init_deasserted __cpuinitdata;
+
+/*
+ * Report back to the Boot Processor.
+ * Running on AP.
+ */
+void __cpuinit smp_callin(void)
{
int cpuid, phys_id;
unsigned long timeout;
@@ -260,7 +349,8 @@ void __init smp_callin(void)
* our local APIC. We have to wait for the IPI or we'll
* lock up on an APIC access.
*/
- while (!atomic_read(&init_deasserted));
+ while (!atomic_read(&init_deasserted))
+ cpu_relax();
/*
* (This works even if the APIC is not enabled.)
@@ -291,7 +381,7 @@ void __init smp_callin(void)
*/
if (cpu_isset(cpuid, cpu_callout_map))
break;
- rep_nop();
+ cpu_relax();
}
if (!time_before(jiffies, timeout)) {
@@ -309,8 +399,6 @@ void __init smp_callin(void)
Dprintk("CALLIN, before setup_local_APIC().\n");
setup_local_APIC();
- local_irq_enable();
-
/*
* Get our bogomips.
*/
@@ -324,26 +412,16 @@ void __init smp_callin(void)
*/
smp_store_cpu_info(cpuid);
- local_irq_disable();
-
/*
* Allow the master to continue.
*/
cpu_set(cpuid, cpu_callin_map);
-
- /*
- * Synchronize the TSC with the BP
- */
- if (cpu_has_tsc)
- synchronize_tsc_ap();
}
-int cpucount;
-
/*
- * Activate a secondary processor.
+ * Setup code on secondary processor (after comming out of the trampoline)
*/
-void __init start_secondary(void)
+void __cpuinit start_secondary(void)
{
/*
* Dont put anything before smp_callin(), SMP
@@ -356,14 +434,10 @@ void __init start_secondary(void)
/* otherwise gcc will move up the smp_processor_id before the cpu_init */
barrier();
- Dprintk("cpu %d: waiting for commence\n", smp_processor_id());
- while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
- rep_nop();
-
Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
setup_secondary_APIC_clock();
- Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
+ Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0);
@@ -371,27 +445,27 @@ void __init start_secondary(void)
enable_8259A_irq(0);
}
-
- enable_APIC_timer();
+ enable_APIC_timer();
/*
- * low-memory mappings have been cleared, flush them from
- * the local TLBs too.
+ * Allow the master to continue.
*/
- local_flush_tlb();
-
- Dprintk("cpu %d eSetting cpu_online_map\n", smp_processor_id());
cpu_set(smp_processor_id(), cpu_online_map);
- wmb();
-
+ mb();
+
+ /* Wait for TSC sync to not schedule things before.
+ We still process interrupts, which could see an inconsistent
+ time in that window unfortunately. */
+ tsc_sync_wait();
+
cpu_idle();
}
-extern volatile unsigned long init_rsp;
+extern volatile unsigned long init_rsp;
extern void (*initial_code)(void);
#if APIC_DEBUG
-static inline void inquire_remote_apic(int apicid)
+static void inquire_remote_apic(int apicid)
{
unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
@@ -428,7 +502,10 @@ static inline void inquire_remote_apic(int apicid)
}
#endif
-static int __init wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
+/*
+ * Kick the secondary to wake up.
+ */
+static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
{
unsigned long send_status = 0, accept_status = 0;
int maxlvt, timeout, num_starts, j;
@@ -551,33 +628,35 @@ static int __init wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_
return (send_status | accept_status);
}
-static void __init do_boot_cpu (int apicid)
+/*
+ * Boot one CPU.
+ */
+static int __cpuinit do_boot_cpu(int cpu, int apicid)
{
struct task_struct *idle;
unsigned long boot_error;
- int timeout, cpu;
+ int timeout;
unsigned long start_rip;
-
- cpu = ++cpucount;
/*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
*/
idle = fork_idle(cpu);
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
- x86_cpu_to_apicid[cpu] = apicid;
+ if (IS_ERR(idle)) {
+ printk("failed fork for CPU %d\n", cpu);
+ return PTR_ERR(idle);
+ }
cpu_pda[cpu].pcurrent = idle;
start_rip = setup_trampoline();
- init_rsp = idle->thread.rsp;
+ init_rsp = idle->thread.rsp;
per_cpu(init_tss,cpu).rsp0 = init_rsp;
initial_code = start_secondary;
clear_ti_thread_flag(idle->thread_info, TIF_FORK);
- printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
+ printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
start_rip, init_rsp);
/*
@@ -614,7 +693,7 @@ static void __init do_boot_cpu (int apicid)
/*
* Starting actual IPI sequence...
*/
- boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
+ boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
if (!boot_error) {
/*
@@ -635,8 +714,6 @@ static void __init do_boot_cpu (int apicid)
if (cpu_isset(cpu, cpu_callin_map)) {
/* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("OK.\n");
- print_cpu_info(&cpu_data[cpu]);
Dprintk("CPU has booted.\n");
} else {
boot_error = 1;
@@ -655,76 +732,131 @@ static void __init do_boot_cpu (int apicid)
if (boot_error) {
cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
- cpucount--;
+ cpu_clear(cpu, cpu_present_map);
+ cpu_clear(cpu, cpu_possible_map);
x86_cpu_to_apicid[cpu] = BAD_APICID;
x86_cpu_to_log_apicid[cpu] = BAD_APICID;
+ return -EIO;
}
+
+ return 0;
}
cycles_t cacheflush_time;
unsigned long cache_decay_ticks;
-static void smp_tune_scheduling (void)
+/*
+ * Construct cpu_sibling_map[], so that we can tell the sibling CPU
+ * on SMT systems efficiently.
+ */
+static __cpuinit void detect_siblings(void)
{
- int cachesize; /* kB */
- unsigned long bandwidth = 1000; /* MB/s */
- /*
- * Rough estimation for SMP scheduling, this is the number of
- * cycles it takes for a fully memory-limited process to flush
- * the SMP-local cache.
- *
- * (For a P5 this pretty much means we will choose another idle
- * CPU almost always at wakeup time (this is due to the small
- * L1 cache), on PIIs it's around 50-100 usecs, depending on
- * the cache size)
- */
-
- if (!cpu_khz) {
- /*
- * this basically disables processor-affinity
- * scheduling on SMP without a TSC.
- */
- cacheflush_time = 0;
- return;
- } else {
- cachesize = boot_cpu_data.x86_cache_size;
- if (cachesize == -1) {
- cachesize = 16; /* Pentiums, 2x8kB cache */
- bandwidth = 100;
- }
+ int cpu;
- cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ cpus_clear(cpu_sibling_map[cpu]);
+ cpus_clear(cpu_core_map[cpu]);
}
- cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
+ for_each_online_cpu (cpu) {
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+ int siblings = 0;
+ int i;
+ if (smp_num_siblings > 1) {
+ for_each_online_cpu (i) {
+ if (cpu_core_id[cpu] == cpu_core_id[i]) {
+ siblings++;
+ cpu_set(i, cpu_sibling_map[cpu]);
+ }
+ }
+ } else {
+ siblings++;
+ cpu_set(cpu, cpu_sibling_map[cpu]);
+ }
- printk(KERN_INFO "per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
- (long)cacheflush_time/(cpu_khz/1000),
- ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
- printk(KERN_INFO "task migration cache decay timeout: %ld msecs.\n",
- (cache_decay_ticks + 1) * 1000 / HZ);
+ if (siblings != smp_num_siblings) {
+ printk(KERN_WARNING
+ "WARNING: %d siblings found for CPU%d, should be %d\n",
+ siblings, cpu, smp_num_siblings);
+ smp_num_siblings = siblings;
+ }
+ if (c->x86_num_cores > 1) {
+ for_each_online_cpu(i) {
+ if (phys_proc_id[cpu] == phys_proc_id[i])
+ cpu_set(i, cpu_core_map[cpu]);
+ }
+ } else
+ cpu_core_map[cpu] = cpu_sibling_map[cpu];
+ }
}
/*
- * Cycle through the processors sending APIC IPIs to boot each.
+ * Cleanup possible dangling ends...
*/
-
-static void __init smp_boot_cpus(unsigned int max_cpus)
+static __cpuinit void smp_cleanup_boot(void)
{
- unsigned apicid, cpu, bit, kicked;
+ /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
+ CMOS_WRITE(0, 0xf);
- nmi_watchdog_default();
+ /*
+ * Reset trampoline flag
+ */
+ *((volatile int *) phys_to_virt(0x467)) = 0;
+#ifndef CONFIG_HOTPLUG_CPU
/*
- * Setup boot CPU information
+ * Free pages reserved for SMP bootup.
+ * When you add hotplug CPU support later remove this
+ * Note there is more work to be done for later CPU bootup.
*/
- smp_store_cpu_info(0); /* Final full version of the data */
- printk(KERN_INFO "CPU%d: ", 0);
- print_cpu_info(&cpu_data[0]);
- current_thread_info()->cpu = 0;
- smp_tune_scheduling();
+ free_page((unsigned long) __va(PAGE_SIZE));
+ free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
+#endif
+}
+
+/*
+ * Fall back to non SMP mode after errors.
+ *
+ * RED-PEN audit/test this more. I bet there is more state messed up here.
+ */
+static __cpuinit void disable_smp(void)
+{
+ cpu_present_map = cpumask_of_cpu(0);
+ cpu_possible_map = cpumask_of_cpu(0);
+ if (smp_found_config)
+ phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
+ else
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ cpu_set(0, cpu_sibling_map[0]);
+ cpu_set(0, cpu_core_map[0]);
+}
+
+/*
+ * Handle user cpus=... parameter.
+ */
+static __cpuinit void enforce_max_cpus(unsigned max_cpus)
+{
+ int i, k;
+ k = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_possible(i))
+ continue;
+ if (++k > max_cpus) {
+ cpu_clear(i, cpu_possible_map);
+ cpu_clear(i, cpu_present_map);
+ }
+ }
+}
+/*
+ * Various sanity checks.
+ */
+static int __cpuinit smp_sanity_check(unsigned max_cpus)
+{
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
hard_smp_processor_id());
@@ -737,13 +869,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
*/
if (!smp_found_config) {
printk(KERN_NOTICE "SMP motherboard not detected.\n");
- io_apic_irqs = 0;
- cpu_online_map = cpumask_of_cpu(0);
- phys_cpu_present_map = physid_mask_of_physid(0);
+ disable_smp();
if (APIC_init_uniprocessor())
printk(KERN_NOTICE "Local APIC not detected."
" Using dummy APIC emulation.\n");
- goto smp_done;
+ return -1;
}
/*
@@ -763,196 +893,143 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_id);
printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
- io_apic_irqs = 0;
- cpu_online_map = cpumask_of_cpu(0);
- phys_cpu_present_map = physid_mask_of_physid(0);
- disable_apic = 1;
- goto smp_done;
+ nr_ioapics = 0;
+ return -1;
}
- verify_local_APIC();
-
/*
* If SMP should be disabled, then really disable it!
*/
if (!max_cpus) {
- smp_found_config = 0;
printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
- io_apic_irqs = 0;
- cpu_online_map = cpumask_of_cpu(0);
- phys_cpu_present_map = physid_mask_of_physid(0);
- disable_apic = 1;
- goto smp_done;
+ nr_ioapics = 0;
+ return -1;
}
- connect_bsp_APIC();
- setup_local_APIC();
-
- if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id)
- BUG();
-
- x86_cpu_to_apicid[0] = boot_cpu_id;
-
- /*
- * Now scan the CPU present map and fire up the other CPUs.
- */
- Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+ return 0;
+}
- kicked = 1;
- for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
- apicid = cpu_present_to_apicid(bit);
- /*
- * Don't even attempt to start the boot CPU!
- */
- if (apicid == boot_cpu_id || (apicid == BAD_APICID))
- continue;
+/*
+ * Prepare for SMP bootup. The MP table or ACPI has been read
+ * earlier. Just do some sanity checking here and enable APIC mode.
+ */
+void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
+{
+ int i;
- if (!physid_isset(apicid, phys_cpu_present_map))
- continue;
- if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
- continue;
+ nmi_watchdog_default();
+ current_cpu_data = boot_cpu_data;
+ current_thread_info()->cpu = 0; /* needed? */
- do_boot_cpu(apicid);
- ++kicked;
- }
+ enforce_max_cpus(max_cpus);
/*
- * Cleanup possible dangling ends...
+ * Fill in cpu_present_mask
*/
- {
- /*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- CMOS_WRITE(0, 0xf);
-
- *((volatile int *) phys_to_virt(0x467)) = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ int apicid = cpu_present_to_apicid(i);
+ if (physid_isset(apicid, phys_cpu_present_map)) {
+ cpu_set(i, cpu_present_map);
+ /* possible map would be different if we supported real
+ CPU hotplug. */
+ cpu_set(i, cpu_possible_map);
+ }
}
- /*
- * Allow the user to impress friends.
- */
-
- Dprintk("Before bogomips.\n");
- if (!cpucount) {
- printk(KERN_INFO "Only one processor found.\n");
- } else {
- unsigned long bogosum = 0;
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (cpu_isset(cpu, cpu_callout_map))
- bogosum += cpu_data[cpu].loops_per_jiffy;
- printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
- cpucount+1,
- bogosum/(500000/HZ),
- (bogosum/(5000/HZ))%100);
- Dprintk("Before bogocount - setting activated=1.\n");
+ if (smp_sanity_check(max_cpus) < 0) {
+ printk(KERN_INFO "SMP disabled\n");
+ disable_smp();
+ return;
}
+
/*
- * Construct cpu_sibling_map[], so that we can tell the
- * sibling CPU efficiently.
+ * Switch from PIC to APIC mode.
*/
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- cpus_clear(cpu_sibling_map[cpu]);
-
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- int siblings = 0;
- int i;
- if (!cpu_isset(cpu, cpu_callout_map))
- continue;
-
- if (smp_num_siblings > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
- siblings++;
- cpu_set(i, cpu_sibling_map[cpu]);
- }
- }
- } else {
- siblings++;
- cpu_set(cpu, cpu_sibling_map[cpu]);
- }
+ connect_bsp_APIC();
+ setup_local_APIC();
- if (siblings != smp_num_siblings) {
- printk(KERN_WARNING
- "WARNING: %d siblings found for CPU%d, should be %d\n",
- siblings, cpu, smp_num_siblings);
- smp_num_siblings = siblings;
- }
+ if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
+ panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
+ GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
+ /* Or can we switch back to PIC here? */
}
- Dprintk("Boot done.\n");
-
/*
- * Here we can be sure that there is an IO-APIC in the system. Let's
- * go and set it up:
+ * Now start the IO-APICs
*/
if (!skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
else
nr_ioapics = 0;
- setup_boot_APIC_clock();
-
/*
- * Synchronize the TSC with the AP
+ * Set up local APIC timer on boot CPU.
*/
- if (cpu_has_tsc && cpucount)
- synchronize_tsc_bp();
- smp_done:
- time_init_smp();
+ setup_boot_APIC_clock();
}
-/* These are wrappers to interface to the new boot process. Someone
- who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
-void __init smp_prepare_cpus(unsigned int max_cpus)
+/*
+ * Early setup to make printk work.
+ */
+void __init smp_prepare_boot_cpu(void)
{
- smp_boot_cpus(max_cpus);
+ int me = smp_processor_id();
+ cpu_set(me, cpu_online_map);
+ cpu_set(me, cpu_callout_map);
}
-void __devinit smp_prepare_boot_cpu(void)
+/*
+ * Entry point to boot a CPU.
+ *
+ * This is all __cpuinit, not __devinit for now because we don't support
+ * CPU hotplug (yet).
+ */
+int __cpuinit __cpu_up(unsigned int cpu)
{
- cpu_set(smp_processor_id(), cpu_online_map);
- cpu_set(smp_processor_id(), cpu_callout_map);
-}
+ int err;
+ int apicid = cpu_present_to_apicid(cpu);
-int __devinit __cpu_up(unsigned int cpu)
-{
- /* This only works at boot for x86. See "rewrite" above. */
- if (cpu_isset(cpu, smp_commenced_mask)) {
- local_irq_enable();
- return -ENOSYS;
+ WARN_ON(irqs_disabled());
+
+ Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
+
+ if (apicid == BAD_APICID || apicid == boot_cpu_id ||
+ !physid_isset(apicid, phys_cpu_present_map)) {
+ printk("__cpu_up: bad cpu %d\n", cpu);
+ return -EINVAL;
}
- /* In case one didn't come up */
- if (!cpu_isset(cpu, cpu_callin_map)) {
- local_irq_enable();
- return -EIO;
+ /* Boot it! */
+ err = do_boot_cpu(cpu, apicid);
+ if (err < 0) {
+ Dprintk("do_boot_cpu failed %d\n", err);
+ return err;
}
- local_irq_enable();
/* Unleash the CPU! */
Dprintk("waiting for cpu %d\n", cpu);
- cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
- mb();
+ cpu_relax();
return 0;
}
-void __init smp_cpus_done(unsigned int max_cpus)
+/*
+ * Finish the SMP boot.
+ */
+void __cpuinit smp_cpus_done(unsigned int max_cpus)
{
+ zap_low_mappings();
+ smp_cleanup_boot();
+
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
#endif
- zap_low_mappings();
-}
+ detect_siblings();
+ time_init_gtod();
+
+ check_nmi_watchdog();
+}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
index f13565ee9b..10ae319338 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
@@ -28,6 +28,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/nmi.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -119,95 +120,106 @@ int printk_address(unsigned long address)
}
#endif
-unsigned long *in_exception_stack(int cpu, unsigned long stack)
-{
- int k;
+static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
+ unsigned *usedp, const char **idp)
+{
+ static const char ids[N_EXCEPTION_STACKS][8] = {
+ [DEBUG_STACK - 1] = "#DB",
+ [NMI_STACK - 1] = "NMI",
+ [DOUBLEFAULT_STACK - 1] = "#DF",
+ [STACKFAULT_STACK - 1] = "#SS",
+ [MCE_STACK - 1] = "#MC",
+ };
+ unsigned k;
+
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
- unsigned long end = tss->ist[k] + EXCEPTION_STKSZ;
+ unsigned long end;
- if (stack >= tss->ist[k] && stack <= end)
+ end = per_cpu(init_tss, cpu).ist[k];
+ if (stack >= end)
+ continue;
+ if (stack >= end - EXCEPTION_STKSZ) {
+ if (*usedp & (1U << k))
+ break;
+ *usedp |= 1U << k;
+ *idp = ids[k];
return (unsigned long *)end;
+ }
}
return NULL;
-}
+}
/*
* x86-64 can have upto three kernel stacks:
* process stack
* interrupt stack
- * severe exception (double fault, nmi, stack fault) hardware stack
- * Check and process them in order.
+ * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
void show_trace(unsigned long *stack)
{
unsigned long addr;
- unsigned long *irqstack, *irqstack_end, *estack_end;
- const int cpu = safe_smp_processor_id();
+ const unsigned cpu = safe_smp_processor_id();
+ unsigned long *irqstack_end = (unsigned long *)cpu_pda[cpu].irqstackptr;
int i;
+ unsigned used = 0;
printk("\nCall Trace:");
- i = 0;
-
- estack_end = in_exception_stack(cpu, (unsigned long)stack);
- if (estack_end) {
- while (stack < estack_end) {
- addr = *stack++;
- if (__kernel_text_address(addr)) {
- i += printk_address(addr);
- i += printk(" ");
- if (i > 50) {
- printk("\n");
- i = 0;
- }
- }
+
+#define HANDLE_STACK(cond) \
+ do while (cond) { \
+ addr = *stack++; \
+ if (kernel_text_address(addr)) { \
+ /* \
+ * If the address is either in the text segment of the \
+ * kernel, or in the region which contains vmalloc'ed \
+ * memory, it *may* be the address of a calling \
+ * routine; if so, print it so that someone tracing \
+ * down the cause of the crash will be able to figure \
+ * out the call path that was taken. \
+ */ \
+ i += printk_address(addr); \
+ if (i > 50) { \
+ printk("\n "); \
+ i = 0; \
+ } \
+ else \
+ i += printk(" "); \
+ } \
+ } while (0)
+
+ for(i = 0; ; ) {
+ const char *id;
+ unsigned long *estack_end;
+ estack_end = in_exception_stack(cpu, (unsigned long)stack,
+ &used, &id);
+
+ if (estack_end) {
+ i += printk(" <%s> ", id);
+ HANDLE_STACK (stack < estack_end);
+ i += printk(" <EOE> ");
+ stack = (unsigned long *) estack_end[-2];
+ continue;
}
- i += printk(" <EOE> ");
- i += 7;
- stack = (unsigned long *) estack_end[-2];
- }
-
- irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
- irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 64);
-
- if (stack >= irqstack && stack < irqstack_end) {
- printk("<IRQ> ");
- while (stack < irqstack_end) {
- addr = *stack++;
- /*
- * If the address is either in the text segment of the
- * kernel, or in the region which contains vmalloc'ed
- * memory, it *may* be the address of a calling
- * routine; if so, print it so that someone tracing
- * down the cause of the crash will be able to figure
- * out the call path that was taken.
- */
- if (__kernel_text_address(addr)) {
- i += printk_address(addr);
- i += printk(" ");
- if (i > 50) {
- printk("\n ");
- i = 0;
- }
+ if (irqstack_end) {
+ unsigned long *irqstack;
+ irqstack = irqstack_end -
+ (IRQSTACKSIZE - 64) / sizeof(*irqstack);
+
+ if (stack >= irqstack && stack < irqstack_end) {
+ i += printk(" <IRQ> ");
+ HANDLE_STACK (stack < irqstack_end);
+ stack = (unsigned long *) (irqstack_end[-1]);
+ irqstack_end = NULL;
+ i += printk(" <EOI> ");
+ continue;
}
- }
- stack = (unsigned long *) (irqstack_end[-1]);
- printk(" <EOI> ");
- i += 7;
- }
-
- while (((long) stack & (THREAD_SIZE-1)) != 0) {
- addr = *stack++;
- if (__kernel_text_address(addr)) {
- i += printk_address(addr);
- i += printk(" ");
- if (i > 50) {
- printk("\n ");
- i = 0;
- }
}
+ break;
}
+
+ HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
+#undef HANDLE_STACK
printk("\n");
}
@@ -243,6 +255,7 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
if (i && ((i % 4) == 0))
printk("\n ");
printk("%016lx ", *stack++);
+ touch_nmi_watchdog();
}
show_trace((unsigned long *)rsp);
}
@@ -319,10 +332,12 @@ void handle_BUG(struct pt_regs *regs)
printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
}
+#ifdef CONFIG_BUG
void out_of_line_bug(void)
{
BUG();
}
+#endif
static DEFINE_SPINLOCK(die_lock);
static int die_owner = -1;
@@ -348,7 +363,6 @@ void oops_end(void)
die_owner = -1;
bust_spinlocks(0);
spin_unlock(&die_lock);
- local_irq_enable(); /* make sure back scroll still works */
if (panic_on_oops)
panic("Oops");
}
@@ -489,24 +503,8 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
DO_ERROR(18, SIGSEGV, "reserved", reserved)
-
-#define DO_ERROR_STACK(trapnr, signr, str, name) \
-asmlinkage void *do_##name(struct pt_regs * regs, long error_code) \
-{ \
- struct pt_regs *pr = ((struct pt_regs *)(current->thread.rsp0))-1; \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return regs; \
- if (regs->cs & 3) { \
- memcpy(pr, regs, sizeof(struct pt_regs)); \
- regs = pr; \
- } \
- do_trap(trapnr, signr, str, regs, error_code, NULL); \
- return regs; \
-}
-
-DO_ERROR_STACK(12, SIGBUS, "stack segment", stack_segment)
-DO_ERROR_STACK( 8, SIGSEGV, "double fault", double_fault)
+DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
+DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
{
@@ -585,6 +583,8 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
printk("Do you have a strange power saving mode enabled?\n");
}
+/* Runs on IST stack. This code must keep interrupts off all the time.
+ Nested NMIs are prevented by the CPU. */
asmlinkage void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
@@ -619,15 +619,6 @@ asmlinkage void default_do_nmi(struct pt_regs *regs)
mem_parity_error(reason, regs);
if (reason & 0x40)
io_check_error(reason, regs);
-
- /*
- * Reassert NMI in case it became active meanwhile
- * as it's edge-triggered.
- */
- outb(0x8f, 0x70);
- inb(0x71); /* dummy */
- outb(0x0f, 0x70);
- inb(0x71); /* dummy */
}
asmlinkage void do_int3(struct pt_regs * regs, long error_code)
@@ -639,20 +630,34 @@ asmlinkage void do_int3(struct pt_regs * regs, long error_code)
return;
}
+/* Help handler running on IST stack to switch back to user stack
+ for scheduling or signal handling. The actual stack switch is done in
+ entry.S */
+asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs)
+{
+ struct pt_regs *regs = eregs;
+ /* Did already sync */
+ if (eregs == (struct pt_regs *)eregs->rsp)
+ ;
+ /* Exception from user space */
+ else if (eregs->cs & 3)
+ regs = ((struct pt_regs *)current->thread.rsp0) - 1;
+ /* Exception from kernel and interrupts are enabled. Move to
+ kernel process stack. */
+ else if (eregs->eflags & X86_EFLAGS_IF)
+ regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs));
+ if (eregs != regs)
+ *regs = *eregs;
+ return regs;
+}
+
/* runs on IST stack. */
-asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
+asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code)
{
- struct pt_regs *pr;
unsigned long condition;
struct task_struct *tsk = current;
siginfo_t info;
- pr = (struct pt_regs *)(current->thread.rsp0)-1;
- if (regs->cs & 3) {
- memcpy(pr, regs, sizeof(struct pt_regs));
- regs = pr;
- }
-
#ifdef CONFIG_CHECKING
{
/* RED-PEN interaction with debugger - could destroy gs */
@@ -669,9 +674,9 @@ asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
asm("movq %%db6,%0" : "=r" (condition));
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP) {
- return regs;
- }
+ SIGTRAP) == NOTIFY_STOP)
+ return;
+
conditional_sti(regs);
/* Mask out spurious debug traps due to lazy DR7 setting */
@@ -684,9 +689,7 @@ asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
tsk->thread.debugreg6 = condition;
/* Mask out spurious TF errors due to lazy TF clearing */
- if ((condition & DR_STEP) &&
- (notify_die(DIE_DEBUGSTEP, "debugstep", regs, condition,
- 1, SIGTRAP) != NOTIFY_STOP)) {
+ if (condition & DR_STEP) {
/*
* The TF error should be masked out only if the current
* process is not traced and if the TRAP flag has been set
@@ -698,8 +701,14 @@ asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
*/
if ((regs->cs & 3) == 0)
goto clear_TF_reenable;
- if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
- goto clear_TF;
+ /*
+ * Was the TF flag set by a debugger? If so, clear it now,
+ * so that register information is correct.
+ */
+ if (tsk->ptrace & PT_DTRACE) {
+ regs->eflags &= ~TF_MASK;
+ tsk->ptrace &= ~PT_DTRACE;
+ }
}
/* Ok, finally something we can handle */
@@ -715,18 +724,11 @@ asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
force_sig_info(SIGTRAP, &info, tsk);
clear_dr7:
asm volatile("movq %0,%%db7"::"r"(0UL));
- notify_die(DIE_DEBUG, "debug", regs, condition, 1, SIGTRAP);
- return regs;
+ return;
clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-
-clear_TF:
- /* RED-PEN could cause spurious errors */
- if (notify_die(DIE_DEBUG, "debug2", regs, condition, 1, SIGTRAP)
- != NOTIFY_STOP)
regs->eflags &= ~TF_MASK;
- return regs;
}
static int kernel_math_error(struct pt_regs *regs, char *str)
@@ -738,14 +740,8 @@ static int kernel_math_error(struct pt_regs *regs, char *str)
return 1;
}
notify_die(DIE_GPF, str, regs, 0, 16, SIGFPE);
-#if 0
- /* This should be a die, but warn only for now */
+ /* Illegal floating point operation in the kernel */
die(str, regs, 0);
-#else
- printk(KERN_DEBUG "%s: %s at ", current->comm, str);
- printk_address(regs->rip);
- printk("\n");
-#endif
return 0;
}
@@ -829,7 +825,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
conditional_sti(regs);
if ((regs->cs & 3) == 0 &&
- kernel_math_error(regs, "simd math error"))
+ kernel_math_error(regs, "kernel simd math error"))
return;
/*
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
index f980cdefff..737ffeb6d0 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
@@ -9,30 +9,14 @@
* a different vsyscall implementation for Linux/IA32 and for the name.
*
* vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
- * at virtual address -10Mbyte+1024bytes etc... There are at max 8192
+ * at virtual address -10Mbyte+1024bytes etc... There are at max 4
* vsyscalls. One vsyscall can reserve more than 1 slot to avoid
- * jumping out of line if necessary.
+ * jumping out of line if necessary. We cannot add more with this
+ * mechanism because older kernels won't return -ENOSYS.
+ * If we want more than four we need a vDSO.
*
- * Note: the concept clashes with user mode linux. If you use UML just
- * set the kernel.vsyscall sysctl to 0.
- */
-
-/*
- * TODO 2001-03-20:
- *
- * 1) make page fault handler detect faults on page1-page-last of the vsyscall
- * virtual space, and make it increase %rip and write -ENOSYS in %rax (so
- * we'll be able to upgrade to a new glibc without upgrading kernel after
- * we add more vsyscalls.
- * 2) Possibly we need a fixmap table for the vsyscalls too if we want
- * to avoid SIGSEGV and we want to return -EFAULT from the vsyscalls as well.
- * Can we segfault inside a "syscall"? We can fix this anytime and those fixes
- * won't be visible for userspace. Not fixing this is a noop for correct programs,
- * broken programs will segfault and there's no security risk until we choose to
- * fix it.
- *
- * These are not urgent things that we need to address only before shipping the first
- * production binary kernels.
+ * Note: the concept clashes with user mode linux. If you use UML and
+ * want per guest time just set the kernel.vsyscall64 sysctl to 0.
*/
#include <linux/time.h>
@@ -41,6 +25,7 @@
#include <linux/timer.h>
#include <linux/seqlock.h>
#include <linux/jiffies.h>
+#include <linux/sysctl.h>
#include <asm/vsyscall.h>
#include <asm/pgtable.h>
@@ -62,8 +47,7 @@ static force_inline void timeval_normalize(struct timeval * tv)
time_t __sec;
__sec = tv->tv_usec / 1000000;
- if (__sec)
- {
+ if (__sec) {
tv->tv_usec %= 1000000;
tv->tv_sec += __sec;
}
@@ -81,13 +65,14 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
usec = (__xtime.tv_nsec / 1000) +
(__jiffies - __wall_jiffies) * (1000000 / HZ);
- if (__vxtime.mode == VXTIME_TSC) {
+ if (__vxtime.mode != VXTIME_HPET) {
sync_core();
rdtscll(t);
- if (t < __vxtime.last_tsc) t = __vxtime.last_tsc;
+ if (t < __vxtime.last_tsc)
+ t = __vxtime.last_tsc;
usec += ((t - __vxtime.last_tsc) *
__vxtime.tsc_quot) >> 32;
- /* See comment in x86_64 do_gettimeofday. */
+ /* See comment in x86_64 do_gettimeofday. */
} else {
usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
__vxtime.last) * __vxtime.quot) >> 32;
@@ -101,14 +86,13 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
static force_inline void do_get_tz(struct timezone * tz)
{
- *tz = __sys_tz;
+ *tz = __sys_tz;
}
-
static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
{
int ret;
- asm volatile("syscall"
+ asm volatile("vsysc2: syscall"
: "=a" (ret)
: "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
return ret;
@@ -117,7 +101,7 @@ static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
static force_inline long time_syscall(long *t)
{
long secs;
- asm volatile("syscall"
+ asm volatile("vsysc1: syscall"
: "=a" (secs)
: "0" (__NR_time),"D" (t) : __syscall_clobber);
return secs;
@@ -126,7 +110,7 @@ static force_inline long time_syscall(long *t)
static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
{
if (unlikely(!__sysctl_vsyscall))
- return gettimeofday(tv,tz);
+ return gettimeofday(tv,tz);
if (tv)
do_vgettimeofday(tv);
if (tz)
@@ -153,9 +137,71 @@ static long __vsyscall(2) venosys_0(void)
static long __vsyscall(3) venosys_1(void)
{
return -ENOSYS;
+}
+
+#ifdef CONFIG_SYSCTL
+
+#define SYSCALL 0x050f
+#define NOP2 0x9090
+/*
+ * NOP out syscall in vsyscall page when not needed.
+ */
+static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ extern u16 vsysc1, vsysc2;
+ u16 *map1, *map2;
+ int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ if (!write)
+ return ret;
+ /* gcc has some trouble with __va(__pa()), so just do it this
+ way. */
+ map1 = ioremap(__pa_symbol(&vsysc1), 2);
+ if (!map1)
+ return -ENOMEM;
+ map2 = ioremap(__pa_symbol(&vsysc2), 2);
+ if (!map2) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (!sysctl_vsyscall) {
+ *map1 = SYSCALL;
+ *map2 = SYSCALL;
+ } else {
+ *map1 = NOP2;
+ *map2 = NOP2;
+ }
+ iounmap(map2);
+out:
+ iounmap(map1);
+ return ret;
}
+static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen,
+ void **context)
+{
+ return -ENOSYS;
+}
+
+static ctl_table kernel_table2[] = {
+ { .ctl_name = 99, .procname = "vsyscall64",
+ .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644,
+ .strategy = vsyscall_sysctl_nostrat,
+ .proc_handler = vsyscall_sysctl_change },
+ { 0, }
+};
+
+static ctl_table kernel_root_table2[] = {
+ { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
+ .child = kernel_table2 },
+ { 0 },
+};
+
+#endif
+
static void __init map_vsyscall(void)
{
extern char __vsyscall_0;
@@ -176,14 +222,15 @@ static void __init map_vsyscall_user(void)
static int __init vsyscall_init(void)
{
- BUG_ON(((unsigned long) &vgettimeofday !=
- VSYSCALL_ADDR(__NR_vgettimeofday)));
+ BUG_ON(((unsigned long) &vgettimeofday !=
+ VSYSCALL_ADDR(__NR_vgettimeofday)));
BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
map_vsyscall();
map_vsyscall_user(); /* establish tranlation for user address space */
- sysctl_vsyscall = 0; /* TBD */
-
+#ifdef CONFIG_SYSCTL
+ register_sysctl_table(kernel_root_table2, 0);
+#endif
return 0;
}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
index b8df766c18..442cc247e9 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
@@ -105,7 +105,6 @@ EXPORT_SYMBOL(pci_free_consistent);
#endif
#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pcibios_penalize_isa_irq);
EXPORT_SYMBOL(pci_mem_start);
#endif
@@ -142,35 +141,23 @@ EXPORT_SYMBOL(rtc_lock);
#undef memmove
#undef memchr
#undef strlen
-#undef strcpy
#undef strncmp
#undef strncpy
#undef strchr
-#undef strcmp
-#undef strcpy
-#undef strcat
-#undef memcmp
extern void * memset(void *,int,__kernel_size_t);
extern size_t strlen(const char *);
extern void * memmove(void * dest,const void *src,size_t count);
-extern char * strcpy(char * dest,const char *src);
-extern int strcmp(const char * cs,const char * ct);
extern void *memchr(const void *s, int c, size_t n);
extern void * memcpy(void *,const void *,__kernel_size_t);
extern void * __memcpy(void *,const void *,__kernel_size_t);
-extern char * strcat(char *, const char *);
-extern int memcmp(const void * cs,const void * ct,size_t count);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strncmp);
EXPORT_SYMBOL(strncpy);
EXPORT_SYMBOL(strchr);
-EXPORT_SYMBOL(strcmp);
-EXPORT_SYMBOL(strcat);
EXPORT_SYMBOL(strncat);
EXPORT_SYMBOL(memchr);
EXPORT_SYMBOL(strrchr);
@@ -178,7 +165,6 @@ EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(memscan);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
-EXPORT_SYMBOL(memcmp);
#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
/* prototypes are wrong, these are assembly with custom calling functions */
@@ -209,8 +195,9 @@ EXPORT_SYMBOL(smp_num_siblings);
extern void do_softirq_thunk(void);
EXPORT_SYMBOL(do_softirq_thunk);
-void out_of_line_bug(void);
+#ifdef CONFIG_BUG
EXPORT_SYMBOL(out_of_line_bug);
+#endif
EXPORT_SYMBOL(init_level4_pgt);
@@ -219,7 +206,6 @@ EXPORT_SYMBOL(__supported_pte_mask);
#ifdef CONFIG_SMP
EXPORT_SYMBOL(flush_tlb_page);
-EXPORT_SYMBOL_GPL(flush_tlb_all);
#endif
EXPORT_SYMBOL(cpu_khz);
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
index 8b42292232..874b3afede 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
@@ -65,21 +65,19 @@ void bust_spinlocks(int yes)
static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
unsigned long error_code)
{
- unsigned char *instr = (unsigned char *)(regs->rip);
+ unsigned char *instr;
int scan_more = 1;
int prefetch = 0;
- unsigned char *max_instr = instr + 15;
+ unsigned char *max_instr;
/* If it was a exec fault ignore */
if (error_code & (1<<4))
return 0;
- /* Code segments in LDT could have a non zero base. Don't check
- when that's possible */
- if (regs->cs & (1<<2))
- return 0;
+ instr = (unsigned char *)convert_rip_to_linear(current, regs);
+ max_instr = instr + 15;
- if ((regs->cs & 3) != 0 && regs->rip >= TASK_SIZE)
+ if ((regs->cs & 3) != 0 && instr >= (unsigned char *)TASK_SIZE)
return 0;
while (scan_more && instr < max_instr) {
@@ -238,6 +236,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
/*
* Handle a fault on the vmalloc or module mapping area
+ *
+ * This assumes no large pages in there.
*/
static int vmalloc_fault(unsigned long address)
{
@@ -276,7 +276,10 @@ static int vmalloc_fault(unsigned long address)
if (!pte_present(*pte_ref))
return -1;
pte = pte_offset_kernel(pmd, address);
- if (!pte_present(*pte) || pte_page(*pte) != pte_page(*pte_ref))
+ /* Don't use pte_page here, because the mappings can point
+ outside mem_map, and the NUMA hash lookup cannot handle
+ that. */
+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
BUG();
__flush_tlb_all();
return 0;
@@ -361,7 +364,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
* protection error (error_code & 1) == 0.
*/
if (unlikely(address >= TASK_SIZE)) {
- if (!(error_code & 5)) {
+ if (!(error_code & 5) &&
+ ((address >= VMALLOC_START && address < VMALLOC_END) ||
+ (address >= MODULES_VADDR && address < MODULES_END))) {
if (vmalloc_fault(address) < 0)
goto bad_area_nosemaphore;
return;
@@ -471,17 +476,6 @@ bad_area:
up_read(&mm->mmap_sem);
bad_area_nosemaphore:
-
-#ifdef CONFIG_IA32_EMULATION
- /* 32bit vsyscall. map on demand. */
- if (test_thread_flag(TIF_IA32) &&
- address >= VSYSCALL32_BASE && address < VSYSCALL32_END) {
- if (map_syscall32(mm, address) < 0)
- goto out_of_memory2;
- return;
- }
-#endif
-
/* User mode accesses just cause a SIGSEGV */
if (error_code & 4) {
if (is_prefetch(regs, address, error_code))
@@ -563,7 +557,6 @@ no_context:
*/
out_of_memory:
up_read(&mm->mmap_sem);
-out_of_memory2:
if (current->pid == 1) {
yield();
goto again;
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
index 0e85f182b2..0efcf264fa 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
@@ -919,9 +919,9 @@ static __init int x8664_sysctl_init(void)
__initcall(x8664_sysctl_init);
#endif
-/* Pseudo VMAs to allow ptrace access for the vsyscall pages. x86-64 has two
- different ones: one for 32bit and one for 64bit. Use the appropiate
- for the target task. */
+/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
+ covers the 64bit vsyscall page now. 32bit has a real VMA now and does
+ not need special handling anymore. */
static struct vm_area_struct gate_vma = {
.vm_start = VSYSCALL_START,
@@ -929,22 +929,11 @@ static struct vm_area_struct gate_vma = {
.vm_page_prot = PAGE_READONLY
};
-static struct vm_area_struct gate32_vma = {
- .vm_start = VSYSCALL32_BASE,
- .vm_end = VSYSCALL32_END,
- .vm_page_prot = PAGE_READONLY
-};
-
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(tsk, TIF_IA32)) {
- /* lookup code assumes the pages are present. set them up
- now */
- if (__map_syscall32(tsk->mm, VSYSCALL32_BASE) < 0)
- return NULL;
- return &gate32_vma;
- }
+ if (test_tsk_thread_flag(tsk, TIF_IA32))
+ return NULL;
#endif
return &gate_vma;
}
@@ -952,6 +941,8 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
int in_gate_area(struct task_struct *task, unsigned long addr)
{
struct vm_area_struct *vma = get_gate_vma(task);
+ if (!vma)
+ return 0;
return (addr >= vma->vm_start) && (addr < vma->vm_end);
}
@@ -961,6 +952,5 @@ int in_gate_area(struct task_struct *task, unsigned long addr)
*/
int in_gate_area_no_task(unsigned long addr)
{
- return (((addr >= VSYSCALL_START) && (addr < VSYSCALL_END)) ||
- ((addr >= VSYSCALL32_BASE) && (addr < VSYSCALL32_END)));
+ return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
}
diff --git a/linux-2.6-xen-sparse/drivers/Makefile b/linux-2.6-xen-sparse/drivers/Makefile
index 5fab89854a..786cbad645 100644
--- a/linux-2.6-xen-sparse/drivers/Makefile
+++ b/linux-2.6-xen-sparse/drivers/Makefile
@@ -48,8 +48,8 @@ obj-$(CONFIG_PARIDE) += block/paride/
obj-$(CONFIG_TC) += tc/
obj-$(CONFIG_USB) += usb/
obj-$(CONFIG_USB_GADGET) += usb/gadget/
-obj-$(CONFIG_INPUT) += input/
obj-$(CONFIG_GAMEPORT) += input/gameport/
+obj-$(CONFIG_INPUT) += input/
obj-$(CONFIG_I2O) += message/
obj-$(CONFIG_I2C) += i2c/
obj-$(CONFIG_W1) += w1/
@@ -62,5 +62,6 @@ obj-$(CONFIG_EISA) += eisa/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_MMC) += mmc/
obj-$(CONFIG_INFINIBAND) += infiniband/
+obj-$(CONFIG_BLK_DEV_SGIIOC4) += sn/
obj-y += firmware/
obj-$(CONFIG_CRYPTO) += crypto/
diff --git a/linux-2.6-xen-sparse/drivers/char/mem.c b/linux-2.6-xen-sparse/drivers/char/mem.c
index 8eae836f01..96726fad53 100644
--- a/linux-2.6-xen-sparse/drivers/char/mem.c
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c
@@ -23,6 +23,7 @@
#include <linux/devfs_fs_kernel.h>
#include <linux/ptrace.h>
#include <linux/device.h>
+#include <linux/backing-dev.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -76,14 +77,6 @@ static inline int uncached_access(struct file *file, unsigned long addr)
* On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
*/
return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
-#elif defined(CONFIG_PPC64)
- /* On PPC64, we always do non-cacheable access to the IO hole and
- * cacheable elsewhere. Cache paradox can checkstop the CPU and
- * the high_memory heuristic below is wrong on machines with memory
- * above the IO hole... Ah, and of course, XFree86 doesn't pass
- * O_SYNC when mapping us to tap IO space. Surprised ?
- */
- return !page_is_ram(addr >> PAGE_SHIFT);
#else
/*
* Accessing memory above the top the kernel knows about or through a file pointer
@@ -111,38 +104,6 @@ static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
}
#endif
-static ssize_t do_write_mem(void *p, unsigned long realp,
- const char __user * buf, size_t count, loff_t *ppos)
-{
- ssize_t written;
- unsigned long copied;
-
- written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (realp < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-realp;
- if (sz > count) sz = count;
- /* Hmm. Do something? */
- buf+=sz;
- p+=sz;
- count-=sz;
- written+=sz;
- }
-#endif
- copied = copy_from_user(p, buf, count);
- if (copied) {
- ssize_t ret = written + (count - copied);
-
- if (ret)
- return ret;
- return -EFAULT;
- }
- written += count;
- *ppos += written;
- return written;
-}
-
#ifndef ARCH_HAS_DEV_MEM
/*
* This funcion reads the *physical* memory. The f_pos points directly to the
@@ -152,15 +113,16 @@ static ssize_t read_mem(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read;
+ ssize_t read, sz;
+ char *ptr;
if (!valid_phys_addr_range(p, &count))
return -EFAULT;
read = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
/* we don't have page 0 mapped on sparc and m68k.. */
if (p < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-p;
+ sz = PAGE_SIZE - p;
if (sz > count)
sz = count;
if (sz > 0) {
@@ -173,9 +135,33 @@ static ssize_t read_mem(struct file * file, char __user * buf,
}
}
#endif
- if (copy_to_user(buf, __va(p), count))
- return -EFAULT;
- read += count;
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_mem_ptr(p);
+
+ if (copy_to_user(buf, ptr, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ count -= sz;
+ read += sz;
+ }
+
*ppos += read;
return read;
}
@@ -184,16 +170,76 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
+ ssize_t written, sz;
+ unsigned long copied;
+ void *ptr;
if (!valid_phys_addr_range(p, &count))
return -EFAULT;
- return do_write_mem(__va(p), p, buf, count, ppos);
+
+ written = 0;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (p < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE - p;
+ if (sz > count)
+ sz = count;
+ /* Hmm. Do something? */
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+#endif
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_mem_ptr(p);
+
+ copied = copy_from_user(ptr, buf, sz);
+ if (copied) {
+ ssize_t ret;
+
+ ret = written + (sz - copied);
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+
+ *ppos += written;
+ return written;
}
#endif
static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
{
-#ifdef pgprot_noncached
+#if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+ vma->vm_page_prot = phys_mem_access_prot(file, offset,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+#elif defined(pgprot_noncached)
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
int uncached;
@@ -212,6 +258,25 @@ static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
return 0;
}
+#if 0
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+ unsigned long long val;
+ /*
+ * RED-PEN: on some architectures there is more mapped memory
+ * than available in mem_map which pfn_valid checks
+ * for. Perhaps should add a new macro here.
+ *
+ * RED-PEN: vmalloc is not supported right now.
+ */
+ if (!pfn_valid(vma->vm_pgoff))
+ return -EIO;
+ val = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ vma->vm_pgoff = __pa(val) >> PAGE_SHIFT;
+ return mmap_mem(file, vma);
+}
+#endif
+
extern long vread(char *buf, char *addr, unsigned long count);
extern long vwrite(char *buf, char *addr, unsigned long count);
@@ -222,33 +287,55 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read = 0;
- ssize_t virtr = 0;
+ ssize_t low_count, read, sz;
char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
-
+
+ read = 0;
if (p < (unsigned long) high_memory) {
- read = count;
+ low_count = count;
if (count > (unsigned long) high_memory - p)
- read = (unsigned long) high_memory - p;
+ low_count = (unsigned long) high_memory - p;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
/* we don't have page 0 mapped on sparc and m68k.. */
- if (p < PAGE_SIZE && read > 0) {
+ if (p < PAGE_SIZE && low_count > 0) {
size_t tmp = PAGE_SIZE - p;
- if (tmp > read) tmp = read;
+ if (tmp > low_count) tmp = low_count;
if (clear_user(buf, tmp))
return -EFAULT;
buf += tmp;
p += tmp;
- read -= tmp;
+ read += tmp;
+ low_count -= tmp;
count -= tmp;
}
#endif
- if (copy_to_user(buf, (char *)p, read))
- return -EFAULT;
- p += read;
- buf += read;
- count -= read;
+ while (low_count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, low_count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ kbuf = xlate_dev_kmem_ptr((char *)p);
+
+ if (copy_to_user(buf, kbuf, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ read += sz;
+ low_count -= sz;
+ count -= sz;
+ }
}
if (count > 0) {
@@ -269,15 +356,79 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
}
count -= len;
buf += len;
- virtr += len;
+ read += len;
p += len;
}
free_page((unsigned long)kbuf);
}
*ppos = p;
- return virtr + read;
+ return read;
}
+
+static inline ssize_t
+do_write_kmem(void *p, unsigned long realp, const char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t written, sz;
+ unsigned long copied;
+
+ written = 0;
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (realp < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE - realp;
+ if (sz > count)
+ sz = count;
+ /* Hmm. Do something? */
+ buf += sz;
+ p += sz;
+ realp += sz;
+ count -= sz;
+ written += sz;
+ }
+#endif
+
+ while (count > 0) {
+ char *ptr;
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-realp & (PAGE_SIZE - 1))
+ sz = -realp & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_kmem_ptr(p);
+
+ copied = copy_from_user(ptr, buf, sz);
+ if (copied) {
+ ssize_t ret;
+
+ ret = written + (sz - copied);
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ buf += sz;
+ p += sz;
+ realp += sz;
+ count -= sz;
+ written += sz;
+ }
+
+ *ppos += written;
+ return written;
+}
+
+
/*
* This function writes to the *virtual* memory as seen by the kernel.
*/
@@ -296,7 +447,7 @@ static ssize_t write_kmem(struct file * file, const char __user * buf,
if (count > (unsigned long) high_memory - p)
wrote = (unsigned long) high_memory - p;
- written = do_write_mem((void*)p, p, buf, wrote, ppos);
+ written = do_write_kmem((void*)p, p, buf, wrote, ppos);
if (written != wrote)
return written;
wrote = written;
@@ -344,7 +495,7 @@ static ssize_t read_port(struct file * file, char __user * buf,
unsigned long i = *ppos;
char __user *tmp = buf;
- if (verify_area(VERIFY_WRITE,buf,count))
+ if (!access_ok(VERIFY_WRITE, buf, count))
return -EFAULT;
while (count-- > 0 && i < 65536) {
if (__put_user(inb(i),tmp) < 0)
@@ -362,7 +513,7 @@ static ssize_t write_port(struct file * file, const char __user * buf,
unsigned long i = *ppos;
const char __user * tmp = buf;
- if (verify_area(VERIFY_READ,buf,count))
+ if (!access_ok(VERIFY_READ,buf,count))
return -EFAULT;
while (count-- > 0 && i < 65536) {
char c;
@@ -568,7 +719,6 @@ static int open_port(struct inode * inode, struct file * filp)
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
-#define mmap_mem mmap_kmem
#define zero_lseek null_lseek
#define full_lseek null_lseek
#define write_zero write_null
@@ -581,7 +731,7 @@ static struct file_operations mem_fops = {
.llseek = memory_lseek,
.read = read_mem,
.write = write_mem,
- .mmap = mmap_mem,
+ .mmap = mmap_kmem,
.open = open_mem,
};
#else
@@ -618,6 +768,10 @@ static struct file_operations zero_fops = {
.mmap = mmap_zero,
};
+static struct backing_dev_info zero_bdi = {
+ .capabilities = BDI_CAP_MAP_COPY,
+};
+
static struct file_operations full_fops = {
.llseek = full_lseek,
.read = read_full,
@@ -664,6 +818,7 @@ static int memory_open(struct inode * inode, struct file * filp)
break;
#endif
case 5:
+ filp->f_mapping->backing_dev_info = &zero_bdi;
filp->f_op = &zero_fops;
break;
case 7:
diff --git a/linux-2.6-xen-sparse/drivers/char/tty_io.c b/linux-2.6-xen-sparse/drivers/char/tty_io.c
index a8d33b5288..69e42bdcb4 100644
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c
@@ -187,7 +187,7 @@ char *tty_name(struct tty_struct *tty, char *buf)
EXPORT_SYMBOL(tty_name);
-inline int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
+int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
const char *routine)
{
#ifdef TTY_PARANOIA_CHECK
@@ -1791,7 +1791,6 @@ retry_open:
}
#ifdef CONFIG_VT
if (console_use_vt && (device == MKDEV(TTY_MAJOR,0))) {
- extern int fg_console;
extern struct tty_driver *console_driver;
driver = console_driver;
index = fg_console;
@@ -2018,11 +2017,10 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
return 0;
#ifdef CONFIG_VT
if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) {
- unsigned int currcons = tty->index;
int rc;
acquire_console_sem();
- rc = vc_resize(currcons, tmp_ws.ws_col, tmp_ws.ws_row);
+ rc = vc_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row);
release_console_sem();
if (rc)
return -ENXIO;
@@ -2634,6 +2632,7 @@ static void initialize_tty_struct(struct tty_struct *tty)
tty->magic = TTY_MAGIC;
tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
tty->pgrp = -1;
+ tty->overrun_time = jiffies;
tty->flip.char_buf_ptr = tty->flip.char_buf;
tty->flip.flag_buf_ptr = tty->flip.flag_buf;
INIT_WORK(&tty->flip.work, flush_to_ldisc, tty);
diff --git a/linux-2.6-xen-sparse/include/asm-generic/pgtable.h b/linux-2.6-xen-sparse/include/asm-generic/pgtable.h
index 950f9466a6..e1a95778eb 100644
--- a/linux-2.6-xen-sparse/include/asm-generic/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-generic/pgtable.h
@@ -16,7 +16,7 @@
#ifndef __HAVE_ARCH_SET_PTE_ATOMIC
#define ptep_establish(__vma, __address, __ptep, __entry) \
do { \
- set_pte(__ptep, __entry); \
+ set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
flush_tlb_page(__vma, __address); \
} while (0)
#else /* __HAVE_ARCH_SET_PTE_ATOMIC */
@@ -37,7 +37,7 @@ do { \
*/
#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
do { \
- set_pte(__ptep, __entry); \
+ set_pte_at((__vma)>vm_mm, (__address), __ptep, __entry); \
flush_tlb_page(__vma, __address); \
} while (0)
#endif
@@ -53,20 +53,24 @@ do { \
#endif
#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(pte_t *ptep)
-{
- pte_t pte = *ptep;
- if (!pte_young(pte))
- return 0;
- set_pte(ptep, pte_mkold(pte));
- return 1;
-}
+#define ptep_test_and_clear_young(__vma, __address, __ptep) \
+({ \
+ pte_t __pte = *(__ptep); \
+ int r = 1; \
+ if (!pte_young(__pte)) \
+ r = 0; \
+ else \
+ set_pte_at((__vma)->vm_mm, (__address), \
+ (__ptep), pte_mkold(__pte)); \
+ r; \
+})
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
#define ptep_clear_flush_young(__vma, __address, __ptep) \
({ \
- int __young = ptep_test_and_clear_young(__ptep); \
+ int __young; \
+ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \
if (__young) \
flush_tlb_page(__vma, __address); \
__young; \
@@ -74,20 +78,24 @@ static inline int ptep_test_and_clear_young(pte_t *ptep)
#endif
#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
-{
- pte_t pte = *ptep;
- if (!pte_dirty(pte))
- return 0;
- set_pte(ptep, pte_mkclean(pte));
- return 1;
-}
+#define ptep_test_and_clear_dirty(__vma, __address, __ptep) \
+({ \
+ pte_t __pte = *__ptep; \
+ int r = 1; \
+ if (!pte_dirty(__pte)) \
+ r = 0; \
+ else \
+ set_pte_at((__vma)->vm_mm, (__address), (__ptep), \
+ pte_mkclean(__pte)); \
+ r; \
+})
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
#define ptep_clear_flush_dirty(__vma, __address, __ptep) \
({ \
- int __dirty = ptep_test_and_clear_dirty(__ptep); \
+ int __dirty; \
+ __dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep); \
if (__dirty) \
flush_tlb_page(__vma, __address); \
__dirty; \
@@ -95,36 +103,29 @@ static inline int ptep_test_and_clear_dirty(pte_t *ptep)
#endif
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(pte_t *ptep)
-{
- pte_t pte = *ptep;
- pte_clear(ptep);
- return pte;
-}
+#define ptep_get_and_clear(__mm, __address, __ptep) \
+({ \
+ pte_t __pte = *(__ptep); \
+ pte_clear((__mm), (__address), (__ptep)); \
+ __pte; \
+})
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
#define ptep_clear_flush(__vma, __address, __ptep) \
({ \
- pte_t __pte = ptep_get_and_clear(__ptep); \
+ pte_t __pte; \
+ __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \
flush_tlb_page(__vma, __address); \
__pte; \
})
#endif
#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
pte_t old_pte = *ptep;
- set_pte(ptep, pte_wrprotect(old_pte));
-}
-#endif
-
-#ifndef __HAVE_ARCH_PTEP_MKDIRTY
-static inline void ptep_mkdirty(pte_t *ptep)
-{
- pte_t old_pte = *ptep;
- set_pte(ptep, pte_mkdirty(old_pte));
+ set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
}
#endif
@@ -144,4 +145,77 @@ static inline void ptep_mkdirty(pte_t *ptep)
#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
#endif
+#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+#define lazy_mmu_prot_update(pte) do { } while (0)
+#endif
+
+/*
+ * When walking page tables, get the address of the next boundary,
+ * or the end address of the range if that comes earlier. Although no
+ * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
+ */
+
+#define pgd_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+
+#ifndef pud_addr_end
+#define pud_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+#endif
+
+#ifndef pmd_addr_end
+#define pmd_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+#endif
+
+#ifndef __ASSEMBLY__
+/*
+ * When walking page tables, we usually want to skip any p?d_none entries;
+ * and any p?d_bad entries - reporting the error before resetting to none.
+ * Do the tests inline, but report and clear the bad entry in mm/memory.c.
+ */
+void pgd_clear_bad(pgd_t *);
+void pud_clear_bad(pud_t *);
+void pmd_clear_bad(pmd_t *);
+
+static inline int pgd_none_or_clear_bad(pgd_t *pgd)
+{
+ if (pgd_none(*pgd))
+ return 1;
+ if (unlikely(pgd_bad(*pgd))) {
+ pgd_clear_bad(pgd);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pud_none_or_clear_bad(pud_t *pud)
+{
+ if (pud_none(*pud))
+ return 1;
+ if (unlikely(pud_bad(*pud))) {
+ pud_clear_bad(pud);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pmd_none_or_clear_bad(pmd_t *pmd)
+{
+ if (pmd_none(*pmd))
+ return 1;
+ if (unlikely(pmd_bad(*pmd))) {
+ pmd_clear_bad(pmd);
+ return 1;
+ }
+ return 0;
+}
+#endif /* !__ASSEMBLY__ */
+
#endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h
index 85f022109c..e2e13a9579 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h
@@ -4,6 +4,8 @@
#include <asm/ldt.h>
#include <asm/segment.h>
+#define CPU_16BIT_STACK_SIZE 1024
+
#ifndef __ASSEMBLY__
#include <linux/preempt.h>
@@ -13,6 +15,8 @@
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+
struct Xgt_desc_struct {
unsigned short size;
unsigned long address __attribute__((packed));
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
index 43b4f5780b..41ac456d12 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
@@ -11,7 +11,7 @@
#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, int flag);
+ dma_addr_t *dma_handle, unsigned int __nocast flag);
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle);
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h
index e3e4a531d2..ddf86feacf 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h
@@ -33,8 +33,6 @@ extern pte_t *kmap_pte;
extern pgprot_t kmap_prot;
extern pte_t *pkmap_page_table;
-extern void kmap_init(void);
-
/*
* Right now we initialize only a single pte table. It can be extended
* easily, subsequent pte tables have to be allocated in one physical
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h
index 2fa9f47ccc..f3e03cd0a9 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h
@@ -50,6 +50,17 @@
#include <linux/vmalloc.h>
#include <asm/fixmap.h>
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p) p
+
/**
* virt_to_phys - map virtual addresses to physical
* @address: address to remap
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
index f46144e37f..f54bb7bdd7 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
@@ -34,10 +34,10 @@ static inline void __prepare_arch_switch(void)
* are always kernel segments while inside the kernel. Must
* happen before reload of cr3/ldt (i.e., not in __switch_to).
*/
- __asm__ __volatile__ ( "movl %%fs,%0 ; movl %%gs,%1"
+ __asm__ __volatile__ ( "mov %%fs,%0 ; mov %%gs,%1"
: "=m" (*(int *)&current->thread.fs),
"=m" (*(int *)&current->thread.gs));
- __asm__ __volatile__ ( "movl %0,%%fs ; movl %0,%%gs"
+ __asm__ __volatile__ ( "mov %0,%%fs ; mov %0,%%gs"
: : "r" (0) );
}
@@ -100,7 +100,7 @@ static inline void switch_mm(struct mm_struct *prev,
}
#define deactivate_mm(tsk, mm) \
- asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+ asm("mov %0,%%fs ; mov %0,%%gs": :"r" (0))
#define activate_mm(prev, next) \
switch_mm((prev),(next),NULL)
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
index 404da2640b..2c30b449f1 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
@@ -2,7 +2,6 @@
#define _I386_PGALLOC_H
#include <linux/config.h>
-#include <asm/processor.h>
#include <asm/fixmap.h>
#include <linux/threads.h>
#include <linux/mm.h> /* for struct page */
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
index 9eddbd8012..91f1354048 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
@@ -14,6 +14,7 @@
* hook is made available.
*/
#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
#ifndef CONFIG_XEN_SHADOW_MODE
@@ -22,7 +23,7 @@
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
#endif
-#define ptep_get_and_clear(xp) __pte_ma(xchg(&(xp)->pte_low, 0))
+#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0))
#define pte_same(a, b) ((a).pte_low == (b).pte_low)
/*
* We detect special mappings in one of two ways:
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
index f611f04781..4890d7a479 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
@@ -61,7 +61,7 @@ void paging_init(void);
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
-#define FIRST_USER_PGD_NR 0
+#define FIRST_USER_ADDRESS 0
#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
@@ -194,15 +194,15 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
/*
* Define this if things work differently on an i386 and an i486:
* it will (on an i486) warn about kernel memory accesses that are
- * done without a 'verify_area(VERIFY_WRITE,..)'
+ * done without a 'access_ok(VERIFY_WRITE,..)'
*/
-#undef TEST_VERIFY_AREA
+#undef TEST_ACCESS_OK
/* The boot page tables (all created as a single array) */
extern unsigned long pg0[];
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0)
+#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
#define pmd_none(x) (!pmd_val(x))
/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
@@ -246,32 +246,26 @@ static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return p
# include <asm/pgtable-2level.h>
#endif
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
if (!pte_dirty(*ptep))
return 0;
return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
}
-static inline int ptep_test_and_clear_young(pte_t *ptep)
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
if (!pte_young(*ptep))
return 0;
return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
}
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
if (pte_write(*ptep))
clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
}
-static inline void ptep_mkdirty(pte_t *ptep)
-{
- if (!pte_dirty(*ptep))
- set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
-}
-
/*
* Macro to mark a page protection value as "uncacheable". On processors which do not support
* it, this is a no-op.
@@ -483,11 +477,14 @@ direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
#define io_remap_pfn_range(vma,from,pfn,size,prot) \
direct_remap_area_pages(vma->vm_mm,from,pfn<<PAGE_SHIFT,size,prot,DOMID_IO)
+#define MK_IOSPACE_PFN(space, pfn) (pfn)
+#define GET_IOSPACE(pfn) 0
+#define GET_PFN(pfn) (pfn)
+
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_MKDIRTY
#define __HAVE_ARCH_PTE_SAME
#include <asm-generic/pgtable.h>
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h
index fd54b409e2..604b6db1a2 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h
@@ -99,12 +99,12 @@ extern struct cpuinfo_x86 cpu_data[];
#endif
extern int phys_proc_id[NR_CPUS];
+extern int cpu_core_id[NR_CPUS];
extern char ignore_fpu_irq;
extern void identify_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern void dodgy_tsc(void);
#ifdef CONFIG_X86_HT
extern void detect_ht(struct cpuinfo_x86 *c);
@@ -138,7 +138,7 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {}
* clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
* resulting in stale register contents being returned.
*/
-static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
{
__asm__("cpuid"
: "=a" (*eax),
@@ -148,6 +148,18 @@ static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
: "0" (op), "c"(0));
}
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+ int *edx)
+{
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (op), "c" (count));
+}
+
/*
* CPUID functions returning a single datum
*/
@@ -501,6 +513,13 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa
regs->esp = new_esp; \
} while (0)
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+ HYPERVISOR_set_debugreg((register), \
+ ((thread)->debugreg[register]))
+
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h
index 5496d69023..95edad964c 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h
@@ -38,7 +38,7 @@
* 24 - APM BIOS support
* 25 - APM BIOS support
*
- * 26 - unused
+ * 26 - ESPFIX small SS
* 27 - unused
* 28 - unused
* 29 - unused
@@ -71,6 +71,9 @@
#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6)
#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11)
+#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
/*
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h
index 03a3a64183..0a0073ae71 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h
@@ -16,7 +16,7 @@
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
#define MAX_NONPAE_PFN (1 << 20)
-#define PARAM_SIZE 2048
+#define PARAM_SIZE 4096
#define COMMAND_LINE_SIZE 256
#define OLD_CL_MAGIC_ADDR 0x90020
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h
index fcf9959526..5d8ab26fa4 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h
@@ -84,7 +84,7 @@ static inline unsigned long _get_base(char * addr)
#define loadsegment(seg,value) \
asm volatile("\n" \
"1:\t" \
- "movl %0,%%" #seg "\n" \
+ "mov %0,%%" #seg "\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3:\t" \
@@ -96,13 +96,13 @@ static inline unsigned long _get_base(char * addr)
".align 4\n\t" \
".long 1b,3b\n" \
".previous" \
- : :"m" (*(unsigned int *)&(value)))
+ : :"m" (value))
/*
* Save a segment register away
*/
#define savesegment(seg, value) \
- asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
+ asm volatile("mov %%" #seg ",%0":"=m" (value))
/*
* Clear and set 'TS' bit respectively
@@ -574,4 +574,6 @@ void enable_hlt(void);
extern int es7000_plat;
void cpu_idle_wait(void);
+extern unsigned long arch_align_stack(unsigned long sp);
+
#endif
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h
index 731d8678ab..e36521ab35 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h
@@ -2,7 +2,8 @@
#ifndef _X86_64_BOOTSETUP_H
#define _X86_64_BOOTSETUP_H 1
-extern char x86_boot_params[2048];
+#define BOOT_PARAM_SIZE 4096
+extern char x86_boot_params[BOOT_PARAM_SIZE];
/*
* This is set up by the setup-routine at boot-time
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h
index 90466f397b..0d6c53a95c 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h
@@ -36,11 +36,7 @@
* - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*/
-#ifdef SLOW_IO_BY_JUMPING
-#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
-#else
#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-#endif
#ifdef REALLY_SLOW_IO
#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
@@ -360,6 +356,17 @@ out:
extern int iommu_bio_merge;
#define BIO_VMERGE_BOUNDARY iommu_bio_merge
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p) p
+
#endif /* __KERNEL__ */
#endif
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
index 8acd7990dc..f1a45ec1a9 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
@@ -145,10 +145,6 @@ static inline pgd_t __pgd(unsigned long x)
#define __pgprot(x) ((pgprot_t) { (x) } )
-extern unsigned long vm_stack_flags, vm_stack_flags32;
-extern unsigned long vm_data_default_flags, vm_data_default_flags32;
-extern unsigned long vm_force_exec32;
-
#define __START_KERNEL 0xffffffff80100000UL
#define __START_KERNEL_map 0xffffffff80000000UL
#define __PAGE_OFFSET 0xffff880000000000UL
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h
index 325d700c3b..d6dad2dcce 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h
@@ -1,7 +1,6 @@
#ifndef _X86_64_PGALLOC_H
#define _X86_64_PGALLOC_H
-#include <asm/processor.h>
#include <asm/fixmap.h>
#include <asm/pda.h>
#include <linux/threads.h>
@@ -163,6 +162,8 @@ extern __inline__ void pte_free_kernel(pte_t *pte)
extern void pte_free(struct page *pte);
//#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+//#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
+//#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
#define __pte_free_tlb(tlb,x) pte_free((x))
#define __pmd_free_tlb(tlb,x) pmd_free((x))
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
index 9745edc23b..25a884c08a 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
@@ -132,7 +132,7 @@ extern inline void pgd_clear (pgd_t * pgd)
* each domain will have separate page tables, with their own versions of
* accessed & dirty state.
*/
-static inline pte_t ptep_get_and_clear(pte_t *xp)
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp)
{
pte_t pte = *xp;
if (pte.pte)
@@ -150,7 +150,7 @@ static inline pte_t ptep_get_and_clear(pte_t *xp)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
-#define FIRST_USER_PGD_NR 0
+#define FIRST_USER_ADDRESS 0
#ifndef __ASSEMBLY__
#define MAXMEM 0x3fffffffffffUL
@@ -262,10 +262,11 @@ static inline unsigned long pud_bad(pud_t pud)
val &= ~(_PAGE_USER | _PAGE_DIRTY);
return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
}
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
#define pte_none(x) (!(x).pte)
#define pte_present(x) ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0)
+#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
@@ -339,7 +340,9 @@ extern inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; retu
extern inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
extern inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+struct vm_area_struct;
+
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
pte_t pte = *ptep;
int ret = pte_dirty(pte);
@@ -348,7 +351,7 @@ static inline int ptep_test_and_clear_dirty(pte_t *ptep)
return ret;
}
-static inline int ptep_test_and_clear_young(pte_t *ptep)
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
pte_t pte = *ptep;
int ret = pte_young(pte);
@@ -357,18 +360,12 @@ static inline int ptep_test_and_clear_young(pte_t *ptep)
return ret;
}
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
pte_t pte = *ptep;
if (pte_write(pte))
set_pte(ptep, pte_wrprotect(pte));
}
-static inline void ptep_mkdirty(pte_t *ptep)
-{
- pte_t pte = *ptep;
- if (!pte_dirty(pte))
- xen_l1_entry_update(ptep, pte_mkdirty(pte).pte);
-}
/*
* Macro to mark a page protection value as "uncacheable".
@@ -517,6 +514,13 @@ int __direct_remap_area_pages(struct mm_struct *mm,
#define io_remap_page_range(vma, vaddr, paddr, size, prot) \
remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+#define MK_IOSPACE_PFN(space, pfn) (pfn)
+#define GET_IOSPACE(pfn) 0
+#define GET_PFN(pfn) (pfn)
+
#define HAVE_ARCH_UNMAPPED_AREA
#define pgtable_cache_init() do { } while (0)
@@ -534,7 +538,6 @@ int __direct_remap_area_pages(struct mm_struct *mm,
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_MKDIRTY
#define __HAVE_ARCH_PTE_SAME
#include <asm-generic/pgtable.h>
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h
index e4a683206f..5c3e70a12a 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h
@@ -62,9 +62,8 @@ struct cpuinfo_x86 {
int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/
__u8 x86_virt_bits, x86_phys_bits;
__u8 x86_num_cores;
- __u8 x86_apicid;
__u32 x86_power;
- __u32 x86_cpuid_level; /* Max CPUID function supported */
+ __u32 extended_cpuid_level; /* Max extended CPUID function supported */
unsigned long loops_per_jiffy;
} ____cacheline_aligned;
@@ -92,7 +91,6 @@ extern char ignore_irq13;
extern void identify_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern void dodgy_tsc(void);
/*
* EFLAGS bits
@@ -169,9 +167,9 @@ static inline void set_in_cr4 (unsigned long mask)
/*
- * User space process size. 47bits.
+ * User space process size. 47bits minus one guard page.
*/
-#define TASK_SIZE (0x800000000000UL)
+#define TASK_SIZE (0x800000000000UL - 4096)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h
index 2af8edd82f..e1c0ea6d0b 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h
@@ -91,6 +91,11 @@ extern unsigned long profile_pc(struct pt_regs *regs);
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+struct task_struct;
+
+extern unsigned long
+convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs);
+
enum {
EF_CF = 0x00000001,
EF_PF = 0x00000004,
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/segment.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/segment.h
index db5926a999..e00247dc81 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/segment.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/segment.h
@@ -24,10 +24,9 @@
#define GDT_ENTRY_TLS 1
#define GDT_ENTRY_TSS 8 /* needs two entries */
-#define GDT_ENTRY_LDT 10
-#define GDT_ENTRY_TLS_MIN 11
-#define GDT_ENTRY_TLS_MAX 13
-/* 14 free */
+#define GDT_ENTRY_LDT 10 /* needs two entries */
+#define GDT_ENTRY_TLS_MIN 12
+#define GDT_ENTRY_TLS_MAX 14
#define GDT_ENTRY_KERNELCS16 15
#define GDT_ENTRY_TLS_ENTRIES 3
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h
index 82b5cc2b9f..fd0e5ea9a2 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h
@@ -31,12 +31,16 @@ extern int disable_apic;
struct pt_regs;
+extern cpumask_t cpu_present_mask;
+extern cpumask_t cpu_possible_map;
+extern cpumask_t cpu_online_map;
+extern cpumask_t cpu_callout_map;
+
/*
* Private routines/data
*/
extern void smp_alloc_memory(void);
-extern cpumask_t cpu_online_map;
extern volatile unsigned long smp_invalidate_needed;
extern int pic_mode;
extern int smp_num_siblings;
@@ -44,11 +48,12 @@ extern void smp_flush_tlb(void);
extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
extern void smp_send_reschedule(int cpu);
extern void smp_invalidate_rcv(void); /* Process an NMI */
-extern void (*mtrr_hook) (void);
extern void zap_low_mappings(void);
void smp_stop_cpu(void);
extern cpumask_t cpu_sibling_map[NR_CPUS];
+extern cpumask_t cpu_core_map[NR_CPUS];
extern u8 phys_proc_id[NR_CPUS];
+extern u8 cpu_core_id[NR_CPUS];
#define SMP_TRAMPOLINE_BASE 0x6000
@@ -58,10 +63,6 @@ extern u8 phys_proc_id[NR_CPUS];
* compresses data structures.
*/
-extern cpumask_t cpu_callout_map;
-extern cpumask_t cpu_callin_map;
-#define cpu_possible_map cpu_callout_map
-
static inline int num_booting_cpus(void)
{
return cpus_weight(cpu_callout_map);
@@ -77,7 +78,7 @@ extern __inline int hard_smp_processor_id(void)
}
#endif
-#define safe_smp_processor_id() (disable_apic ? 0 : x86_apicid_to_cpu(hard_smp_processor_id()))
+extern int safe_smp_processor_id(void);
#endif /* !ASSEMBLY */
@@ -99,22 +100,6 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
return cpus_addr(cpumask)[0];
}
-static inline int x86_apicid_to_cpu(u8 apicid)
-{
- int i;
-
- for (i = 0; i < NR_CPUS; ++i)
- if (x86_cpu_to_apicid[i] == apicid)
- return i;
-
- /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
- * or called too early. Either way, we must be CPU 0. */
- if (x86_cpu_to_apicid[0] == BAD_APICID)
- return 0;
-
- return -1;
-}
-
static inline int cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < NR_CPUS)
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h
index 17a8fec17c..b0ad9c2030 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h
@@ -400,4 +400,6 @@ void enable_hlt(void);
#define HAVE_EAT_KEY
void eat_key(void);
+extern unsigned long arch_align_stack(unsigned long sp);
+
#endif
diff --git a/linux-2.6-xen-sparse/include/linux/gfp.h b/linux-2.6-xen-sparse/include/linux/gfp.h
index 42fe579514..f9d3d5eb99 100644
--- a/linux-2.6-xen-sparse/include/linux/gfp.h
+++ b/linux-2.6-xen-sparse/include/linux/gfp.h
@@ -26,26 +26,28 @@ struct vm_area_struct;
*
* __GFP_NORETRY: The VM implementation must not retry indefinitely.
*/
-#define __GFP_WAIT 0x10 /* Can wait and reschedule? */
-#define __GFP_HIGH 0x20 /* Should access emergency pools? */
-#define __GFP_IO 0x40 /* Can start physical IO? */
-#define __GFP_FS 0x80 /* Can call down to low-level FS? */
-#define __GFP_COLD 0x100 /* Cache-cold page required */
-#define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */
-#define __GFP_REPEAT 0x400 /* Retry the allocation. Might fail */
-#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */
-#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */
-#define __GFP_NO_GROW 0x2000 /* Slab internal usage */
-#define __GFP_COMP 0x4000 /* Add compound page metadata */
-#define __GFP_ZERO 0x8000 /* Return zeroed page on success */
-
-#define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */
+#define __GFP_WAIT 0x10u /* Can wait and reschedule? */
+#define __GFP_HIGH 0x20u /* Should access emergency pools? */
+#define __GFP_IO 0x40u /* Can start physical IO? */
+#define __GFP_FS 0x80u /* Can call down to low-level FS? */
+#define __GFP_COLD 0x100u /* Cache-cold page required */
+#define __GFP_NOWARN 0x200u /* Suppress page allocation failure warning */
+#define __GFP_REPEAT 0x400u /* Retry the allocation. Might fail */
+#define __GFP_NOFAIL 0x800u /* Retry for ever. Cannot fail */
+#define __GFP_NORETRY 0x1000u /* Do not retry. Might fail */
+#define __GFP_NO_GROW 0x2000u /* Slab internal usage */
+#define __GFP_COMP 0x4000u /* Add compound page metadata */
+#define __GFP_ZERO 0x8000u /* Return zeroed page on success */
+#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
+
+#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
/* if you forget to add the bitmask here kernel will crash, period */
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
- __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
+ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+ __GFP_NOMEMALLOC)
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT)
@@ -86,7 +88,7 @@ struct vm_area_struct;
extern struct page *
FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
-static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask,
+static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_mask,
unsigned int order)
{
if (unlikely(order >= MAX_ORDER))
@@ -97,17 +99,17 @@ static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask,
}
#ifdef CONFIG_NUMA
-extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order);
+extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, unsigned order);
static inline struct page *
-alloc_pages(unsigned int gfp_mask, unsigned int order)
+alloc_pages(unsigned int __nocast gfp_mask, unsigned int order)
{
if (unlikely(order >= MAX_ORDER))
return NULL;
return alloc_pages_current(gfp_mask, order);
}
-extern struct page *alloc_page_vma(unsigned gfp_mask,
+extern struct page *alloc_page_vma(unsigned __nocast gfp_mask,
struct vm_area_struct *vma, unsigned long addr);
#else
#define alloc_pages(gfp_mask, order) \
@@ -116,8 +118,8 @@ extern struct page *alloc_page_vma(unsigned gfp_mask,
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
-extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order));
-extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
+extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, unsigned int order));
+extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask));
#define __get_free_page(gfp_mask) \
__get_free_pages((gfp_mask),0)
diff --git a/linux-2.6-xen-sparse/include/linux/mm.h b/linux-2.6-xen-sparse/include/linux/mm.h
index b797aaba2b..087fda00fe 100644
--- a/linux-2.6-xen-sparse/include/linux/mm.h
+++ b/linux-2.6-xen-sparse/include/linux/mm.h
@@ -37,10 +37,6 @@ extern int sysctl_legacy_va_layout;
#include <asm/processor.h>
#include <asm/atomic.h>
-#ifndef MM_VM_SIZE
-#define MM_VM_SIZE(mm) ((TASK_SIZE + PGDIR_SIZE - 1) & PGDIR_MASK)
-#endif
-
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
/*
@@ -164,7 +160,8 @@ extern unsigned int kobjsize(const void *objp);
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
-#define VM_FOREIGN 0x01000000 /* Has pages belonging to another VM */
+#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
+#define VM_FOREIGN 0x02000000 /* Has pages belonging to another VM */
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
@@ -582,17 +579,19 @@ struct zap_details {
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */
- unsigned long break_addr; /* Where unmap_vmas stopped */
unsigned long truncate_count; /* Compare vm_truncate_count */
};
-void zap_page_range(struct vm_area_struct *vma, unsigned long address,
+unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *);
-int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm,
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
-void clear_page_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end);
+void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+ unsigned long end, unsigned long floor, unsigned long ceiling);
+void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
+ unsigned long floor, unsigned long ceiling);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma);
int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
@@ -639,9 +638,9 @@ extern unsigned long do_mremap(unsigned long addr,
* These functions are passed a count `nr_to_scan' and a gfpmask. They should
* scan `nr_to_scan' objects, attempting to free them.
*
- * The callback must the number of objects which remain in the cache.
+ * The callback must return the number of objects which remain in the cache.
*
- * The callback will be passes nr_to_scan == 0 when the VM is querying the
+ * The callback will be passed nr_to_scan == 0 when the VM is querying the
* cache size, so a fastpath for that case is appropriate.
*/
typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask);
@@ -728,6 +727,7 @@ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff);
extern void exit_mmap(struct mm_struct *);
+extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
@@ -843,7 +843,7 @@ static inline void vm_stat_unaccount(struct vm_area_struct *vma)
}
/* update per process rss and vm hiwater data */
-extern void update_mem_hiwater(void);
+extern void update_mem_hiwater(struct task_struct *tsk);
#ifndef CONFIG_DEBUG_PAGEALLOC
static inline void
@@ -861,5 +861,8 @@ int in_gate_area_no_task(unsigned long addr);
#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
#endif /* __HAVE_ARCH_GATE_AREA */
+/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
+#define OOM_DISABLE -17
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/linux-2.6-xen-sparse/include/linux/skbuff.h b/linux-2.6-xen-sparse/include/linux/skbuff.h
index dad5d9bc1b..d10a3c5dc0 100644
--- a/linux-2.6-xen-sparse/include/linux/skbuff.h
+++ b/linux-2.6-xen-sparse/include/linux/skbuff.h
@@ -83,12 +83,6 @@
* Any questions? No questions, good. --ANK
*/
-#ifdef __i386__
-#define NET_CALLER(arg) (*(((void **)&arg) - 1))
-#else
-#define NET_CALLER(arg) __builtin_return_address(0)
-#endif
-
struct net_device;
#ifdef CONFIG_NETFILTER
@@ -146,6 +140,20 @@ struct skb_shared_info {
skb_frag_t frags[MAX_SKB_FRAGS];
};
+/* We divide dataref into two halves. The higher 16 bits hold references
+ * to the payload part of skb->data. The lower 16 bits hold references to
+ * the entire skb->data. It is up to the users of the skb to agree on
+ * where the payload starts.
+ *
+ * All users must obey the rule that the skb->data reference count must be
+ * greater than or equal to the payload reference count.
+ *
+ * Holding a reference to the payload part means that the user does not
+ * care about modifications to the header part of skb->data.
+ */
+#define SKB_DATAREF_SHIFT 16
+#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
+
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
@@ -159,14 +167,16 @@ struct skb_shared_info {
* @h: Transport layer header
* @nh: Network layer header
* @mac: Link layer header
- * @dst: FIXME: Describe this field
+ * @dst: destination entry
+ * @sp: the security path, used for xfrm
* @cb: Control buffer. Free for use by every layer. Put private vars here
* @len: Length of actual data
* @data_len: Data length
* @mac_len: Length of link layer header
* @csum: Checksum
- * @__unused: Dead field, may be reused
+ * @local_df: allow local fragmentation
* @cloned: Head may be cloned (check refcnt to be sure)
+ * @nohdr: Payload reference only, must not modify header
* @proto_csum_valid: Protocol csum validated since arriving at localhost
* @proto_csum_blank: Protocol csum must be added before leaving localhost
* @pkt_type: Packet class
@@ -189,6 +199,8 @@ struct skb_shared_info {
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @private: Data which is private to the HIPPI implementation
* @tc_index: Traffic control index
+ * @tc_verd: traffic control verdict
+ * @tc_classid: traffic control classid
*/
struct sk_buff {
@@ -241,6 +253,7 @@ struct sk_buff {
csum;
unsigned char local_df,
cloned:1,
+ nohdr:1,
proto_csum_valid:1,
proto_csum_blank:1,
pkt_type,
@@ -374,7 +387,42 @@ static inline void kfree_skb(struct sk_buff *skb)
*/
static inline int skb_cloned(const struct sk_buff *skb)
{
- return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
+ return skb->cloned &&
+ (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1;
+}
+
+/**
+ * skb_header_cloned - is the header a clone
+ * @skb: buffer to check
+ *
+ * Returns true if modifying the header part of the buffer requires
+ * the data to be copied.
+ */
+static inline int skb_header_cloned(const struct sk_buff *skb)
+{
+ int dataref;
+
+ if (!skb->cloned)
+ return 0;
+
+ dataref = atomic_read(&skb_shinfo(skb)->dataref);
+ dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT);
+ return dataref != 1;
+}
+
+/**
+ * skb_header_release - release reference to header
+ * @skb: buffer to operate on
+ *
+ * Drop a reference to the header part of the buffer. This is done
+ * by acquiring a payload reference. You must not read from the header
+ * part of skb->data after this.
+ */
+static inline void skb_header_release(struct sk_buff *skb)
+{
+ BUG_ON(skb->nohdr);
+ skb->nohdr = 1;
+ atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref);
}
/**
@@ -925,6 +973,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
kfree_skb(skb);
}
+#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB
/**
* __dev_alloc_skb - allocate an skbuff for sending
* @length: length to allocate
@@ -937,7 +986,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
*
* %NULL is returned in there is no free memory.
*/
-#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB
static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
int gfp_mask)
{
@@ -1058,6 +1106,42 @@ static inline int skb_linearize(struct sk_buff *skb, int gfp)
return __skb_linearize(skb, gfp);
}
+/**
+ * skb_postpull_rcsum - update checksum for received skb after pull
+ * @skb: buffer to update
+ * @start: start of data before pull
+ * @len: length of data pulled
+ *
+ * After doing a pull on a received packet, you need to call this to
+ * update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE
+ * so that it can be recomputed from scratch.
+ */
+
+static inline void skb_postpull_rcsum(struct sk_buff *skb,
+ const void *start, int len)
+{
+ if (skb->ip_summed == CHECKSUM_HW)
+ skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
+}
+
+/**
+ * pskb_trim_rcsum - trim received skb and update checksum
+ * @skb: buffer to trim
+ * @len: new length
+ *
+ * This is exactly the same as pskb_trim except that it ensures the
+ * checksum of received packets are still valid after the operation.
+ */
+
+static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+{
+ if (len >= skb->len)
+ return 0;
+ if (skb->ip_summed == CHECKSUM_HW)
+ skb->ip_summed = CHECKSUM_NONE;
+ return __pskb_trim(skb, len);
+}
+
static inline void *kmap_skb_frag(const skb_frag_t *frag)
{
#ifdef CONFIG_HIGHMEM
@@ -1098,6 +1182,8 @@ extern unsigned int skb_checksum(const struct sk_buff *skb, int offset,
int len, unsigned int csum);
extern int skb_copy_bits(const struct sk_buff *skb, int offset,
void *to, int len);
+extern int skb_store_bits(const struct sk_buff *skb, int offset,
+ void *from, int len);
extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb,
int offset, u8 *to, int len,
unsigned int csum);
@@ -1122,22 +1208,6 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
extern void skb_init(void);
extern void skb_add_mtu(int mtu);
-struct skb_iter {
- /* Iteration functions set these */
- unsigned char *data;
- unsigned int len;
-
- /* Private to iteration */
- unsigned int nextfrag;
- struct sk_buff *fraglist;
-};
-
-/* Keep iterating until skb_iter_next returns false. */
-extern void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i);
-extern int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i);
-/* Call this if aborting loop before !skb_iter_next */
-extern void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i);
-
#ifdef CONFIG_NETFILTER
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
{
diff --git a/linux-2.6-xen-sparse/mm/highmem.c b/linux-2.6-xen-sparse/mm/highmem.c
index 846297fb25..b71abaf94b 100644
--- a/linux-2.6-xen-sparse/mm/highmem.c
+++ b/linux-2.6-xen-sparse/mm/highmem.c
@@ -30,9 +30,9 @@
static mempool_t *page_pool, *isa_page_pool;
-static void *page_pool_alloc(int gfp_mask, void *data)
+static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data)
{
- int gfp = gfp_mask | (int) (long) data;
+ unsigned int gfp = gfp_mask | (unsigned int) (long) data;
return alloc_page(gfp);
}
@@ -90,7 +90,8 @@ static void flush_all_zero_pkmaps(void)
* So no dangers, even with speculative execution.
*/
page = pte_page(pkmap_page_table[i]);
- pte_clear(&pkmap_page_table[i]);
+ pte_clear(&init_mm, (unsigned long)page_address(page),
+ &pkmap_page_table[i]);
set_page_address(page, NULL);
}
@@ -138,7 +139,8 @@ start:
}
}
vaddr = PKMAP_ADDR(last_pkmap_nr);
- set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
+ set_pte_at(&init_mm, vaddr,
+ &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
pkmap_count[last_pkmap_nr] = 1;
set_page_address(page, (void *)vaddr);
@@ -332,6 +334,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
continue;
mempool_free(bvec->bv_page, pool);
+ dec_page_state(nr_bounce);
}
bio_endio(bio_orig, bio_orig->bi_size, err);
@@ -412,6 +415,7 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
to->bv_len = from->bv_len;
to->bv_offset = from->bv_offset;
+ inc_page_state(nr_bounce);
if (rw == WRITE) {
char *vto, *vfrom;
diff --git a/linux-2.6-xen-sparse/mm/memory.c b/linux-2.6-xen-sparse/mm/memory.c
index 7a051b1b41..bfd0814d37 100644
--- a/linux-2.6-xen-sparse/mm/memory.c
+++ b/linux-2.6-xen-sparse/mm/memory.c
@@ -46,7 +46,6 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
-#include <linux/acct.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -84,116 +83,205 @@ EXPORT_SYMBOL(high_memory);
EXPORT_SYMBOL(vmalloc_earlyreserve);
/*
+ * If a p?d_bad entry is found while walking page tables, report
+ * the error, before resetting entry to p?d_none. Usually (but
+ * very seldom) called out from the p?d_none_or_clear_bad macros.
+ */
+
+void pgd_clear_bad(pgd_t *pgd)
+{
+ pgd_ERROR(*pgd);
+ pgd_clear(pgd);
+}
+
+void pud_clear_bad(pud_t *pud)
+{
+ pud_ERROR(*pud);
+ pud_clear(pud);
+}
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
+}
+
+/*
* Note: this doesn't free the actual pages themselves. That
* has been handled earlier when unmapping all the memory regions.
*/
-static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long start, unsigned long end)
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
{
- struct page *page;
-
- if (pmd_none(*pmd))
- return;
- if (unlikely(pmd_bad(*pmd))) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- return;
- }
- if (!((start | end) & ~PMD_MASK)) {
- /* Only clear full, aligned ranges */
- page = pmd_page(*pmd);
- pmd_clear(pmd);
- dec_page_state(nr_page_table_pages);
- tlb->mm->nr_ptes--;
- pte_free_tlb(tlb, page);
- }
+ struct page *page = pmd_page(*pmd);
+ pmd_clear(pmd);
+ pte_free_tlb(tlb, page);
+ dec_page_state(nr_page_table_pages);
+ tlb->mm->nr_ptes--;
}
-static inline void clear_pud_range(struct mmu_gather *tlb, pud_t *pud, unsigned long start, unsigned long end)
+static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
{
- unsigned long addr = start, next;
- pmd_t *pmd, *__pmd;
+ pmd_t *pmd;
+ unsigned long next;
+ unsigned long start;
- if (pud_none(*pud))
- return;
- if (unlikely(pud_bad(*pud))) {
- pud_ERROR(*pud);
- pud_clear(pud);
+ start = addr;
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ free_pte_range(tlb, pmd);
+ } while (pmd++, addr = next, addr != end);
+
+ start &= PUD_MASK;
+ if (start < floor)
return;
+ if (ceiling) {
+ ceiling &= PUD_MASK;
+ if (!ceiling)
+ return;
}
+ if (end - 1 > ceiling - 1)
+ return;
- pmd = __pmd = pmd_offset(pud, start);
- do {
- next = (addr + PMD_SIZE) & PMD_MASK;
- if (next > end || next <= addr)
- next = end;
-
- clear_pmd_range(tlb, pmd, addr, next);
- pmd++;
- addr = next;
- } while (addr && (addr < end));
-
- if (!((start | end) & ~PUD_MASK)) {
- /* Only clear full, aligned ranges */
- pud_clear(pud);
- pmd_free_tlb(tlb, __pmd);
- }
+ pmd = pmd_offset(pud, start);
+ pud_clear(pud);
+ pmd_free_tlb(tlb, pmd);
}
-
-static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start, unsigned long end)
+static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
{
- unsigned long addr = start, next;
- pud_t *pud, *__pud;
+ pud_t *pud;
+ unsigned long next;
+ unsigned long start;
- if (pgd_none(*pgd))
- return;
- if (unlikely(pgd_bad(*pgd))) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
+ start = addr;
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+ } while (pud++, addr = next, addr != end);
+
+ start &= PGDIR_MASK;
+ if (start < floor)
return;
+ if (ceiling) {
+ ceiling &= PGDIR_MASK;
+ if (!ceiling)
+ return;
}
+ if (end - 1 > ceiling - 1)
+ return;
- pud = __pud = pud_offset(pgd, start);
- do {
- next = (addr + PUD_SIZE) & PUD_MASK;
- if (next > end || next <= addr)
- next = end;
-
- clear_pud_range(tlb, pud, addr, next);
- pud++;
- addr = next;
- } while (addr && (addr < end));
-
- if (!((start | end) & ~PGDIR_MASK)) {
- /* Only clear full, aligned ranges */
- pgd_clear(pgd);
- pud_free_tlb(tlb, __pud);
- }
+ pud = pud_offset(pgd, start);
+ pgd_clear(pgd);
+ pud_free_tlb(tlb, pud);
}
/*
- * This function clears user-level page tables of a process.
+ * This function frees user-level page tables of a process.
*
* Must be called with pagetable lock held.
*/
-void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+void free_pgd_range(struct mmu_gather **tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
{
- unsigned long addr = start, next;
- pgd_t * pgd = pgd_offset(tlb->mm, start);
- unsigned long i;
-
- for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
- next = (addr + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || next <= addr)
- next = end;
-
- clear_pgd_range(tlb, pgd, addr, next);
- pgd++;
- addr = next;
+ pgd_t *pgd;
+ unsigned long next;
+ unsigned long start;
+
+ /*
+ * The next few lines have given us lots of grief...
+ *
+ * Why are we testing PMD* at this top level? Because often
+ * there will be no work to do at all, and we'd prefer not to
+ * go all the way down to the bottom just to discover that.
+ *
+ * Why all these "- 1"s? Because 0 represents both the bottom
+ * of the address space and the top of it (using -1 for the
+ * top wouldn't help much: the masks would do the wrong thing).
+ * The rule is that addr 0 and floor 0 refer to the bottom of
+ * the address space, but end 0 and ceiling 0 refer to the top
+ * Comparisons need to use "end - 1" and "ceiling - 1" (though
+ * that end 0 case should be mythical).
+ *
+ * Wherever addr is brought up or ceiling brought down, we must
+ * be careful to reject "the opposite 0" before it confuses the
+ * subsequent tests. But what about where end is brought down
+ * by PMD_SIZE below? no, end can't go down to 0 there.
+ *
+ * Whereas we round start (addr) and ceiling down, by different
+ * masks at different levels, in order to test whether a table
+ * now has no other vmas using it, so can be freed, we don't
+ * bother to round floor or end up - the tests don't need that.
+ */
+
+ addr &= PMD_MASK;
+ if (addr < floor) {
+ addr += PMD_SIZE;
+ if (!addr)
+ return;
}
+ if (ceiling) {
+ ceiling &= PMD_MASK;
+ if (!ceiling)
+ return;
+ }
+ if (end - 1 > ceiling - 1)
+ end -= PMD_SIZE;
+ if (addr > end - 1)
+ return;
+
+ start = addr;
+ pgd = pgd_offset((*tlb)->mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+ } while (pgd++, addr = next, addr != end);
+
+ if (!tlb_is_full_mm(*tlb))
+ flush_tlb_pgtables((*tlb)->mm, start, end);
}
-pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+ unsigned long floor, unsigned long ceiling)
+{
+ while (vma) {
+ struct vm_area_struct *next = vma->vm_next;
+ unsigned long addr = vma->vm_start;
+
+ if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+ hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
+ floor, next? next->vm_start: ceiling);
+ } else {
+ /*
+ * Optimization: gather nearby vmas into one call down
+ */
+ while (next && next->vm_start <= vma->vm_end + PMD_SIZE
+ && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
+ HPAGE_SIZE)) {
+ vma = next;
+ next = vma->vm_next;
+ }
+ free_pgd_range(tlb, addr, vma->vm_end,
+ floor, next? next->vm_start: ceiling);
+ }
+ vma = next;
+ }
+}
+
+pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long address)
{
if (!pmd_present(*pmd)) {
struct page *new;
@@ -254,20 +342,7 @@ out:
*/
static inline void
-copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte)
-{
- if (pte_file(pte))
- return;
- swap_duplicate(pte_to_swp_entry(pte));
- if (list_empty(&dst_mm->mmlist)) {
- spin_lock(&mmlist_lock);
- list_add(&dst_mm->mmlist, &src_mm->mmlist);
- spin_unlock(&mmlist_lock);
- }
-}
-
-static inline void
-copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
unsigned long addr)
{
@@ -275,12 +350,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
struct page *page;
unsigned long pfn;
- /* pte contains position in swap, so copy. */
- if (!pte_present(pte)) {
- copy_swap_pte(dst_mm, src_mm, pte);
- set_pte(dst_pte, pte);
+ /* pte contains position in swap or file, so copy. */
+ if (unlikely(!pte_present(pte))) {
+ if (!pte_file(pte)) {
+ swap_duplicate(pte_to_swp_entry(pte));
+ /* make sure dst_mm is on swapoff's mmlist. */
+ if (unlikely(list_empty(&dst_mm->mmlist))) {
+ spin_lock(&mmlist_lock);
+ list_add(&dst_mm->mmlist, &src_mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+ }
+ set_pte_at(dst_mm, addr, dst_pte, pte);
return;
}
+
pfn = pte_pfn(pte);
/* the pte points outside of valid memory, the
* mapping is assumed to be good, meaningful
@@ -292,7 +376,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
page = pfn_to_page(pfn);
if (!page || PageReserved(page)) {
- set_pte(dst_pte, pte);
+ set_pte_at(dst_mm, addr, dst_pte, pte);
return;
}
@@ -301,7 +385,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* in the parent and the child
*/
if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
- ptep_set_wrprotect(src_pte);
+ ptep_set_wrprotect(src_mm, addr, src_pte);
pte = *src_pte;
}
@@ -313,172 +397,137 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte = pte_mkclean(pte);
pte = pte_mkold(pte);
get_page(page);
- dst_mm->rss++;
+ inc_mm_counter(dst_mm, rss);
if (PageAnon(page))
- dst_mm->anon_rss++;
- set_pte(dst_pte, pte);
+ inc_mm_counter(dst_mm, anon_rss);
+ set_pte_at(dst_mm, addr, dst_pte, pte);
page_dup_rmap(page);
}
-static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
pte_t *src_pte, *dst_pte;
- pte_t *s, *d;
unsigned long vm_flags = vma->vm_flags;
+ int progress;
- d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+again:
+ dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
if (!dst_pte)
return -ENOMEM;
+ src_pte = pte_offset_map_nested(src_pmd, addr);
+ progress = 0;
spin_lock(&src_mm->page_table_lock);
- s = src_pte = pte_offset_map_nested(src_pmd, addr);
- for (; addr < end; addr += PAGE_SIZE, s++, d++) {
- if (pte_none(*s))
+ do {
+ /*
+ * We are holding two locks at this point - either of them
+ * could generate latencies in another task on another CPU.
+ */
+ if (progress >= 32 && (need_resched() ||
+ need_lockbreak(&src_mm->page_table_lock) ||
+ need_lockbreak(&dst_mm->page_table_lock)))
+ break;
+ if (pte_none(*src_pte)) {
+ progress++;
continue;
- copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr);
- }
- pte_unmap_nested(src_pte);
- pte_unmap(dst_pte);
+ }
+ copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+ progress += 8;
+ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
spin_unlock(&src_mm->page_table_lock);
+
+ pte_unmap_nested(src_pte - 1);
+ pte_unmap(dst_pte - 1);
cond_resched_lock(&dst_mm->page_table_lock);
+ if (addr != end)
+ goto again;
return 0;
}
-static int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
pmd_t *src_pmd, *dst_pmd;
- int err = 0;
unsigned long next;
- src_pmd = pmd_offset(src_pud, addr);
dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
if (!dst_pmd)
return -ENOMEM;
-
- for (; addr < end; addr = next, src_pmd++, dst_pmd++) {
- next = (addr + PMD_SIZE) & PMD_MASK;
- if (next > end || next <= addr)
- next = end;
- if (pmd_none(*src_pmd))
- continue;
- if (pmd_bad(*src_pmd)) {
- pmd_ERROR(*src_pmd);
- pmd_clear(src_pmd);
+ src_pmd = pmd_offset(src_pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(src_pmd))
continue;
- }
- err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
- vma, addr, next);
- if (err)
- break;
- }
- return err;
+ if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+ return 0;
}
-static int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
pud_t *src_pud, *dst_pud;
- int err = 0;
unsigned long next;
- src_pud = pud_offset(src_pgd, addr);
dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
if (!dst_pud)
return -ENOMEM;
-
- for (; addr < end; addr = next, src_pud++, dst_pud++) {
- next = (addr + PUD_SIZE) & PUD_MASK;
- if (next > end || next <= addr)
- next = end;
- if (pud_none(*src_pud))
- continue;
- if (pud_bad(*src_pud)) {
- pud_ERROR(*src_pud);
- pud_clear(src_pud);
+ src_pud = pud_offset(src_pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(src_pud))
continue;
- }
- err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
- vma, addr, next);
- if (err)
- break;
- }
- return err;
+ if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pud++, src_pud++, addr = next, addr != end);
+ return 0;
}
-int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
struct vm_area_struct *vma)
{
pgd_t *src_pgd, *dst_pgd;
- unsigned long addr, start, end, next;
- int err = 0;
+ unsigned long next;
+ unsigned long addr = vma->vm_start;
+ unsigned long end = vma->vm_end;
if (is_vm_hugetlb_page(vma))
- return copy_hugetlb_page_range(dst, src, vma);
-
- start = vma->vm_start;
- src_pgd = pgd_offset(src, start);
- dst_pgd = pgd_offset(dst, start);
-
- end = vma->vm_end;
- addr = start;
- while (addr && (addr < end-1)) {
- next = (addr + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || next <= addr)
- next = end;
- if (pgd_none(*src_pgd))
- goto next_pgd;
- if (pgd_bad(*src_pgd)) {
- pgd_ERROR(*src_pgd);
- pgd_clear(src_pgd);
- goto next_pgd;
- }
- err = copy_pud_range(dst, src, dst_pgd, src_pgd,
- vma, addr, next);
- if (err)
- break;
+ return copy_hugetlb_page_range(dst_mm, src_mm, vma);
-next_pgd:
- src_pgd++;
- dst_pgd++;
- addr = next;
- }
-
- return err;
+ dst_pgd = pgd_offset(dst_mm, addr);
+ src_pgd = pgd_offset(src_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(src_pgd))
+ continue;
+ if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+ return 0;
}
-static void zap_pte_range(struct mmu_gather *tlb,
- pmd_t *pmd, unsigned long address,
- unsigned long size, struct zap_details *details)
+static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
{
- unsigned long offset;
- pte_t *ptep;
+ pte_t *pte;
- if (pmd_none(*pmd))
- return;
- if (unlikely(pmd_bad(*pmd))) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- return;
- }
- ptep = pte_offset_map(pmd, address);
- offset = address & ~PMD_MASK;
- if (offset + size > PMD_SIZE)
- size = PMD_SIZE - offset;
- size &= PAGE_MASK;
- if (details && !details->check_mapping && !details->nonlinear_vma)
- details = NULL;
- for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
- pte_t pte = *ptep;
- if (pte_none(pte))
+ pte = pte_offset_map(pmd, addr);
+ do {
+ pte_t ptent = *pte;
+ if (pte_none(ptent))
continue;
- if (pte_present(pte)) {
+ if (pte_present(ptent)) {
struct page *page = NULL;
- unsigned long pfn = pte_pfn(pte);
+ unsigned long pfn = pte_pfn(ptent);
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
if (PageReserved(page))
@@ -502,19 +551,20 @@ static void zap_pte_range(struct mmu_gather *tlb,
page->index > details->last_index))
continue;
}
- pte = ptep_get_and_clear(ptep);
- tlb_remove_tlb_entry(tlb, ptep, address+offset);
+ ptent = ptep_get_and_clear(tlb->mm, addr, pte);
+ tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma,
- address+offset) != page->index)
- set_pte(ptep, pgoff_to_pte(page->index));
- if (pte_dirty(pte))
+ addr) != page->index)
+ set_pte_at(tlb->mm, addr, pte,
+ pgoff_to_pte(page->index));
+ if (pte_dirty(ptent))
set_page_dirty(page);
if (PageAnon(page))
- tlb->mm->anon_rss--;
- else if (pte_young(pte))
+ dec_mm_counter(tlb->mm, anon_rss);
+ else if (pte_young(ptent))
mark_page_accessed(page);
tlb->freed++;
page_remove_rmap(page);
@@ -527,78 +577,64 @@ static void zap_pte_range(struct mmu_gather *tlb,
*/
if (unlikely(details))
continue;
- if (!pte_file(pte))
- free_swap_and_cache(pte_to_swp_entry(pte));
- pte_clear(ptep);
- }
- pte_unmap(ptep-1);
+ if (!pte_file(ptent))
+ free_swap_and_cache(pte_to_swp_entry(ptent));
+ pte_clear(tlb->mm, addr, pte);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
}
-static void zap_pmd_range(struct mmu_gather *tlb,
- pud_t *pud, unsigned long address,
- unsigned long size, struct zap_details *details)
+static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
{
- pmd_t * pmd;
- unsigned long end;
+ pmd_t *pmd;
+ unsigned long next;
- if (pud_none(*pud))
- return;
- if (unlikely(pud_bad(*pud))) {
- pud_ERROR(*pud);
- pud_clear(pud);
- return;
- }
- pmd = pmd_offset(pud, address);
- end = address + size;
- if (end > ((address + PUD_SIZE) & PUD_MASK))
- end = ((address + PUD_SIZE) & PUD_MASK);
+ pmd = pmd_offset(pud, addr);
do {
- zap_pte_range(tlb, pmd, address, end - address, details);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ zap_pte_range(tlb, pmd, addr, next, details);
+ } while (pmd++, addr = next, addr != end);
}
-static void zap_pud_range(struct mmu_gather *tlb,
- pgd_t * pgd, unsigned long address,
- unsigned long end, struct zap_details *details)
+static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
{
- pud_t * pud;
+ pud_t *pud;
+ unsigned long next;
- if (pgd_none(*pgd))
- return;
- if (unlikely(pgd_bad(*pgd))) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
- return;
- }
- pud = pud_offset(pgd, address);
+ pud = pud_offset(pgd, addr);
do {
- zap_pmd_range(tlb, pud, address, end - address, details);
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ zap_pmd_range(tlb, pud, addr, next, details);
+ } while (pud++, addr = next, addr != end);
}
-static void unmap_page_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma, unsigned long address,
- unsigned long end, struct zap_details *details)
+static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
{
- unsigned long next;
pgd_t *pgd;
- int i;
+ unsigned long next;
- BUG_ON(address >= end);
- pgd = pgd_offset(vma->vm_mm, address);
+ if (details && !details->check_mapping && !details->nonlinear_vma)
+ details = NULL;
+
+ BUG_ON(addr >= end);
tlb_start_vma(tlb, vma);
- for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next <= address || next > end)
- next = end;
- zap_pud_range(tlb, pgd, address, next, details);
- address = next;
- pgd++;
- }
+ pgd = pgd_offset(vma->vm_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ zap_pud_range(tlb, pgd, addr, next, details);
+ } while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
}
@@ -619,7 +655,7 @@ static void unmap_page_range(struct mmu_gather *tlb,
* @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
* @details: details of nonlinear truncation or shared cache invalidation
*
- * Returns the number of vma's which were covered by the unmapping.
+ * Returns the end address of the unmapping (restart addr if interrupted).
*
* Unmap all pages in the vma list. Called under page_table_lock.
*
@@ -636,7 +672,7 @@ static void unmap_page_range(struct mmu_gather *tlb,
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
@@ -644,12 +680,11 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
unsigned long zap_bytes = ZAP_BLOCK_SIZE;
unsigned long tlb_start = 0; /* For tlb_finish_mmu */
int tlb_start_valid = 0;
- int ret = 0;
+ unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
int fullmm = tlb_is_full_mm(*tlbp);
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
- unsigned long start;
unsigned long end;
start = max(vma->vm_start, start_addr);
@@ -662,7 +697,6 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
if (vma->vm_flags & VM_ACCOUNT)
*nr_accounted += (end - start) >> PAGE_SHIFT;
- ret++;
while (start != end) {
unsigned long block;
@@ -693,7 +727,6 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
if (i_mmap_lock) {
/* must reset count of rss freed */
*tlbp = tlb_gather_mmu(mm, fullmm);
- details->break_addr = start;
goto out;
}
spin_unlock(&mm->page_table_lock);
@@ -707,7 +740,7 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
}
}
out:
- return ret;
+ return start; /* which is now the end (or restart) address */
}
/**
@@ -717,7 +750,7 @@ out:
* @size: number of bytes to zap
* @details: details of nonlinear truncation or shared cache invalidation
*/
-void zap_page_range(struct vm_area_struct *vma, unsigned long address,
+unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *details)
{
struct mm_struct *mm = vma->vm_mm;
@@ -727,16 +760,16 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address,
if (is_vm_hugetlb_page(vma)) {
zap_hugepage_range(vma, address, size);
- return;
+ return end;
}
lru_add_drain();
spin_lock(&mm->page_table_lock);
tlb = tlb_gather_mmu(mm, 0);
- unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
+ end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
tlb_finish_mmu(tlb, address, end);
- acct_update_integrals();
spin_unlock(&mm->page_table_lock);
+ return end;
}
/*
@@ -1005,111 +1038,78 @@ out:
EXPORT_SYMBOL(get_user_pages);
-static void zeromap_pte_range(pte_t * pte, unsigned long address,
- unsigned long size, pgprot_t prot)
+static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
{
- unsigned long end;
+ pte_t *pte;
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
+ pte = pte_alloc_map(mm, pmd, addr);
+ if (!pte)
+ return -ENOMEM;
do {
- pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
+ pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
BUG_ON(!pte_none(*pte));
- set_pte(pte, zero_pte);
- address += PAGE_SIZE;
- pte++;
- } while (address && (address < end));
+ set_pte_at(mm, addr, pte, zero_pte);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
+ return 0;
}
-static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd,
- unsigned long address, unsigned long size, pgprot_t prot)
+static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end, pgprot_t prot)
{
- unsigned long base, end;
+ pmd_t *pmd;
+ unsigned long next;
- base = address & PUD_MASK;
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
do {
- pte_t * pte = pte_alloc_map(mm, pmd, base + address);
- if (!pte)
+ next = pmd_addr_end(addr, end);
+ if (zeromap_pte_range(mm, pmd, addr, next, prot))
return -ENOMEM;
- zeromap_pte_range(pte, base + address, end - address, prot);
- pte_unmap(pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ } while (pmd++, addr = next, addr != end);
return 0;
}
-static inline int zeromap_pud_range(struct mm_struct *mm, pud_t * pud,
- unsigned long address,
- unsigned long size, pgprot_t prot)
+static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
{
- unsigned long base, end;
- int error = 0;
-
- base = address & PGDIR_MASK;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
do {
- pmd_t * pmd = pmd_alloc(mm, pud, base + address);
- error = -ENOMEM;
- if (!pmd)
- break;
- error = zeromap_pmd_range(mm, pmd, base + address,
- end - address, prot);
- if (error)
- break;
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
+ next = pud_addr_end(addr, end);
+ if (zeromap_pmd_range(mm, pud, addr, next, prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
return 0;
}
-int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
- unsigned long size, pgprot_t prot)
+int zeromap_page_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long size, pgprot_t prot)
{
- int i;
- int error = 0;
- pgd_t * pgd;
- unsigned long beg = address;
- unsigned long end = address + size;
+ pgd_t *pgd;
unsigned long next;
+ unsigned long end = addr + size;
struct mm_struct *mm = vma->vm_mm;
+ int err;
- pgd = pgd_offset(mm, address);
- flush_cache_range(vma, beg, end);
- BUG_ON(address >= end);
- BUG_ON(end > vma->vm_end);
-
+ BUG_ON(addr >= end);
+ pgd = pgd_offset(mm, addr);
+ flush_cache_range(vma, addr, end);
spin_lock(&mm->page_table_lock);
- for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
- pud_t *pud = pud_alloc(mm, pgd, address);
- error = -ENOMEM;
- if (!pud)
- break;
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next <= beg || next > end)
- next = end;
- error = zeromap_pud_range(mm, pud, address,
- next - address, prot);
- if (error)
+ do {
+ next = pgd_addr_end(addr, end);
+ err = zeromap_pud_range(mm, pgd, addr, next, prot);
+ if (err)
break;
- address = next;
- pgd++;
- }
- /*
- * Why flush? zeromap_pte_range has a BUG_ON for !pte_none()
- */
- flush_tlb_range(vma, beg, end);
+ } while (pgd++, addr = next, addr != end);
spin_unlock(&mm->page_table_lock);
- return error;
+ return err;
}
/*
@@ -1117,95 +1117,74 @@ int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
* mappings are removed. any references to nonexistent pages results
* in null mappings (currently treated as "copy-on-access")
*/
-static inline void
-remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
- unsigned long pfn, pgprot_t prot)
+static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
{
- unsigned long end;
+ pte_t *pte;
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
+ pte = pte_alloc_map(mm, pmd, addr);
+ if (!pte)
+ return -ENOMEM;
do {
BUG_ON(!pte_none(*pte));
if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
- set_pte(pte, pfn_pte(pfn, prot));
- address += PAGE_SIZE;
+ set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
- pte++;
- } while (address && (address < end));
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
+ return 0;
}
-static inline int
-remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
- unsigned long size, unsigned long pfn, pgprot_t prot)
+static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
{
- unsigned long base, end;
-
- base = address & PUD_MASK;
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
- pfn -= (address >> PAGE_SHIFT);
+ pmd_t *pmd;
+ unsigned long next;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
do {
- pte_t * pte = pte_alloc_map(mm, pmd, base + address);
- if (!pte)
+ next = pmd_addr_end(addr, end);
+ if (remap_pte_range(mm, pmd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot))
return -ENOMEM;
- remap_pte_range(pte, base + address, end - address,
- (address >> PAGE_SHIFT) + pfn, prot);
- pte_unmap(pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ } while (pmd++, addr = next, addr != end);
return 0;
}
-static inline int remap_pud_range(struct mm_struct *mm, pud_t * pud,
- unsigned long address, unsigned long size,
- unsigned long pfn, pgprot_t prot)
+static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
{
- unsigned long base, end;
- int error;
-
- base = address & PGDIR_MASK;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- pfn -= address >> PAGE_SHIFT;
+ pud_t *pud;
+ unsigned long next;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
do {
- pmd_t *pmd = pmd_alloc(mm, pud, base+address);
- error = -ENOMEM;
- if (!pmd)
- break;
- error = remap_pmd_range(mm, pmd, base + address, end - address,
- (address >> PAGE_SHIFT) + pfn, prot);
- if (error)
- break;
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
- return error;
+ next = pud_addr_end(addr, end);
+ if (remap_pmd_range(mm, pud, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
}
/* Note: this is only safe if the mm semaphore is held when called. */
-int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
- int error = 0;
pgd_t *pgd;
- unsigned long beg = from;
- unsigned long end = from + size;
unsigned long next;
+ unsigned long end = addr + size;
struct mm_struct *mm = vma->vm_mm;
- int i;
-
- pfn -= from >> PAGE_SHIFT;
- pgd = pgd_offset(mm, from);
- flush_cache_range(vma, beg, end);
- BUG_ON(from >= end);
+ int err;
/*
* Physically remapped pages are special. Tell the
@@ -1217,31 +1196,21 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
*/
vma->vm_flags |= VM_IO | VM_RESERVED;
+ BUG_ON(addr >= end);
+ pfn -= addr >> PAGE_SHIFT;
+ pgd = pgd_offset(mm, addr);
+ flush_cache_range(vma, addr, end);
spin_lock(&mm->page_table_lock);
- for (i = pgd_index(beg); i <= pgd_index(end-1); i++) {
- pud_t *pud = pud_alloc(mm, pgd, from);
- error = -ENOMEM;
- if (!pud)
- break;
- next = (from + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || next <= from)
- next = end;
- error = remap_pud_range(mm, pud, from, end - from,
- pfn + (from >> PAGE_SHIFT), prot);
- if (error)
+ do {
+ next = pgd_addr_end(addr, end);
+ err = remap_pud_range(mm, pgd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot);
+ if (err)
break;
- from = next;
- pgd++;
- }
- /*
- * Why flush? remap_pte_range has a BUG_ON for !pte_none()
- */
- flush_tlb_range(vma, beg, end);
+ } while (pgd++, addr = next, addr != end);
spin_unlock(&mm->page_table_lock);
-
- return error;
+ return err;
}
-
EXPORT_SYMBOL(remap_pfn_range);
/*
@@ -1265,11 +1234,11 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page
{
pte_t entry;
- flush_cache_page(vma, address);
entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
vma);
ptep_establish(vma, address, page_table, entry);
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
}
/*
@@ -1317,11 +1286,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
int reuse = can_share_swap_page(old_page);
unlock_page(old_page);
if (reuse) {
- flush_cache_page(vma, address);
+ flush_cache_page(vma, address, pfn);
entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
vma);
ptep_set_access_flags(vma, address, page_table, entry, 1);
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
return VM_FAULT_MINOR;
@@ -1355,13 +1325,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
page_table = pte_offset_map(pmd, address);
if (likely(pte_same(*page_table, pte))) {
if (PageAnon(old_page))
- mm->anon_rss--;
- if (PageReserved(old_page)) {
- ++mm->rss;
- acct_update_integrals();
- update_mem_hiwater();
- } else
+ dec_mm_counter(mm, anon_rss);
+ if (PageReserved(old_page))
+ inc_mm_counter(mm, rss);
+ else
page_remove_rmap(old_page);
+ flush_cache_page(vma, address, pfn);
break_cow(vma, new_page, address, page_table);
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
@@ -1405,7 +1374,7 @@ no_new_page:
* i_mmap_lock.
*
* In order to make forward progress despite repeatedly restarting some
- * large vma, note the break_addr set by unmap_vmas when it breaks out:
+ * large vma, note the restart_addr from unmap_vmas when it breaks out:
* and restart from that address when we reach that vma again. It might
* have been split or merged, shrunk or extended, but never shifted: so
* restart_addr remains valid so long as it remains in the vma's range.
@@ -1443,8 +1412,8 @@ again:
}
}
- details->break_addr = end_addr;
- zap_page_range(vma, start_addr, end_addr - start_addr, details);
+ restart_addr = zap_page_range(vma, start_addr,
+ end_addr - start_addr, details);
/*
* We cannot rely on the break test in unmap_vmas:
@@ -1455,14 +1424,14 @@ again:
need_break = need_resched() ||
need_lockbreak(details->i_mmap_lock);
- if (details->break_addr >= end_addr) {
+ if (restart_addr >= end_addr) {
/* We have now completed this vma: mark it so */
vma->vm_truncate_count = details->truncate_count;
if (!need_break)
return 0;
} else {
/* Note restart_addr in vma's truncate_count field */
- vma->vm_truncate_count = details->break_addr;
+ vma->vm_truncate_count = restart_addr;
if (!need_break)
goto again;
}
@@ -1750,12 +1719,13 @@ static int do_swap_page(struct mm_struct * mm,
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
if (unlikely(!pte_same(*page_table, orig_pte))) {
- pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
- unlock_page(page);
- page_cache_release(page);
ret = VM_FAULT_MINOR;
- goto out;
+ goto out_nomap;
+ }
+
+ if (unlikely(!PageUptodate(page))) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_nomap;
}
/* The page isn't present yet, go ahead with the fault. */
@@ -1764,10 +1734,7 @@ static int do_swap_page(struct mm_struct * mm,
if (vm_swap_full())
remove_exclusive_swap_page(page);
- mm->rss++;
- acct_update_integrals();
- update_mem_hiwater();
-
+ inc_mm_counter(mm, rss);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && can_share_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1776,7 +1743,7 @@ static int do_swap_page(struct mm_struct * mm,
unlock_page(page);
flush_icache_page(vma, page);
- set_pte(page_table, pte);
+ set_pte_at(mm, address, page_table, pte);
page_add_anon_rmap(page, vma, address);
if (write_access) {
@@ -1788,10 +1755,17 @@ static int do_swap_page(struct mm_struct * mm,
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
+ lazy_mmu_prot_update(pte);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
out:
return ret;
+out_nomap:
+ pte_unmap(page_table);
+ spin_unlock(&mm->page_table_lock);
+ unlock_page(page);
+ page_cache_release(page);
+ goto out;
}
/*
@@ -1831,9 +1805,7 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
spin_unlock(&mm->page_table_lock);
goto out;
}
- mm->rss++;
- acct_update_integrals();
- update_mem_hiwater();
+ inc_mm_counter(mm, rss);
entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
vma->vm_page_prot)),
vma);
@@ -1842,11 +1814,12 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
page_add_anon_rmap(page, vma, addr);
}
- ptep_establish_new(vma, addr, page_table, entry);
+ set_pte_at(vma, addr, page_table, entry);
pte_unmap(page_table);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry);
+ lazy_mmu_prot_update(entry);
spin_unlock(&mm->page_table_lock);
out:
return VM_FAULT_MINOR;
@@ -1949,15 +1922,13 @@ retry:
/* Only go through if we didn't race with anybody else... */
if (pte_none(*page_table)) {
if (!PageReserved(new_page))
- ++mm->rss;
- acct_update_integrals();
- update_mem_hiwater();
+ inc_mm_counter(mm, rss);
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- ptep_establish_new(vma, address, page_table, entry);
+ set_pte_at(vma, address, page_table, entry);
if (anon) {
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
@@ -1974,6 +1945,7 @@ retry:
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
spin_unlock(&mm->page_table_lock);
out:
return ret;
@@ -2001,7 +1973,7 @@ static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
*/
if (!vma->vm_ops || !vma->vm_ops->populate ||
(write_access && !(vma->vm_flags & VM_SHARED))) {
- pte_clear(pte);
+ pte_clear(mm, address, pte);
return do_no_page(mm, vma, address, write_access, pte, pmd);
}
@@ -2068,6 +2040,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
entry = pte_mkyoung(entry);
ptep_set_access_flags(vma, address, pte, entry, write_access);
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
pte_unmap(pte);
spin_unlock(&mm->page_table_lock);
return VM_FAULT_MINOR;
@@ -2117,15 +2090,12 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
return VM_FAULT_OOM;
}
-#ifndef __ARCH_HAS_4LEVEL_HACK
+#ifndef __PAGETABLE_PUD_FOLDED
/*
* Allocate page upper directory.
*
* We've already handled the fast-path in-line, and we own the
* page table lock.
- *
- * On a two-level or three-level page table, this ends up actually being
- * entirely optimized away.
*/
pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
@@ -2149,15 +2119,14 @@ pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr
out:
return pud_offset(pgd, address);
}
+#endif /* __PAGETABLE_PUD_FOLDED */
+#ifndef __PAGETABLE_PMD_FOLDED
/*
* Allocate page middle directory.
*
* We've already handled the fast-path in-line, and we own the
* page table lock.
- *
- * On a two-level page table, this ends up actually being entirely
- * optimized away.
*/
pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
@@ -2173,38 +2142,24 @@ pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
+#ifndef __ARCH_HAS_4LEVEL_HACK
if (pud_present(*pud)) {
pmd_free(new);
goto out;
}
pud_populate(mm, pud, new);
- out:
- return pmd_offset(pud, address);
-}
#else
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
-{
- pmd_t *new;
-
- spin_unlock(&mm->page_table_lock);
- new = pmd_alloc_one(mm, address);
- spin_lock(&mm->page_table_lock);
- if (!new)
- return NULL;
-
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
if (pgd_present(*pud)) {
pmd_free(new);
goto out;
}
pgd_populate(mm, pud, new);
-out:
+#endif /* __ARCH_HAS_4LEVEL_HACK */
+
+ out:
return pmd_offset(pud, address);
}
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
int make_pages_present(unsigned long addr, unsigned long end)
{
@@ -2271,13 +2226,13 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
* update_mem_hiwater
* - update per process rss and vm high water data
*/
-void update_mem_hiwater(void)
+void update_mem_hiwater(struct task_struct *tsk)
{
- struct task_struct *tsk = current;
-
if (tsk->mm) {
- if (tsk->mm->hiwater_rss < tsk->mm->rss)
- tsk->mm->hiwater_rss = tsk->mm->rss;
+ unsigned long rss = get_mm_counter(tsk->mm, rss);
+
+ if (tsk->mm->hiwater_rss < rss)
+ tsk->mm->hiwater_rss = rss;
if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
tsk->mm->hiwater_vm = tsk->mm->total_vm;
}
diff --git a/linux-2.6-xen-sparse/mm/mmap.c b/linux-2.6-xen-sparse/mm/mmap.c
index 848200e1b8..f2dd282348 100644
--- a/linux-2.6-xen-sparse/mm/mmap.c
+++ b/linux-2.6-xen-sparse/mm/mmap.c
@@ -21,7 +21,6 @@
#include <linux/hugetlb.h>
#include <linux/profile.h>
#include <linux/module.h>
-#include <linux/acct.h>
#include <linux/mount.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
@@ -30,6 +29,10 @@
#include <asm/cacheflush.h>
#include <asm/tlb.h>
+static void unmap_region(struct mm_struct *mm,
+ struct vm_area_struct *vma, struct vm_area_struct *prev,
+ unsigned long start, unsigned long end);
+
/*
* WARNING: the debugging will use recursive algorithms so never enable this
* unless you know what you are doing.
@@ -873,7 +876,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
int error;
struct rb_node ** rb_link, * rb_parent;
int accountable = 1;
- unsigned long charged = 0;
+ unsigned long charged = 0, reqprot = prot;
if (file) {
if (is_file_hugepages(file))
@@ -897,16 +900,16 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
prot |= PROT_EXEC;
if (!len)
- return addr;
+ return -EINVAL;
/* Careful about overflows.. */
len = PAGE_ALIGN(len);
if (!len || len > TASK_SIZE)
- return -EINVAL;
+ return -ENOMEM;
/* offset overflow? */
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
- return -EINVAL;
+ return -EOVERFLOW;
/* Too many mappings? */
if (mm->map_count > sysctl_max_map_count)
@@ -934,9 +937,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
/* mlock MCL_FUTURE? */
if (vm_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
- locked = mm->locked_vm << PAGE_SHIFT;
+ locked = len >> PAGE_SHIFT;
+ locked += mm->locked_vm;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
- locked += len;
+ lock_limit >>= PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
@@ -991,7 +995,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
}
}
- error = security_file_mmap(file, prot, flags);
+ error = security_file_mmap(file, reqprot, prot, flags);
if (error)
return error;
@@ -1006,8 +1010,7 @@ munmap_back:
}
/* Check against address space limit. */
- if ((mm->total_vm << PAGE_SHIFT) + len
- > current->signal->rlim[RLIMIT_AS].rlim_cur)
+ if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
if (accountable && (!(flags & MAP_NORESERVE) ||
@@ -1121,8 +1124,6 @@ out:
pgoff, flags & MAP_NONBLOCK);
down_write(&mm->mmap_sem);
}
- acct_update_integrals();
- update_mem_hiwater();
return addr;
unmap_and_free_vma:
@@ -1132,7 +1133,8 @@ unmap_and_free_vma:
fput(file);
/* Undo any partial mapping done by a device driver. */
- zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
+ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+ charged = 0;
free_vma:
kmem_cache_free(vm_area_cachep, vma);
unacct_error:
@@ -1221,19 +1223,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len, const unsigned long pgoff,
const unsigned long flags)
{
- struct vm_area_struct *vma, *prev_vma;
+ struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- unsigned long base = mm->mmap_base, addr = addr0;
- int first_time = 1;
+ unsigned long addr = addr0;
/* requested length too big for entire address space */
if (len > TASK_SIZE)
return -ENOMEM;
- /* dont allow allocations above current base */
- if (mm->free_area_cache > base)
- mm->free_area_cache = base;
-
/* requesting a specific address */
if (addr) {
addr = PAGE_ALIGN(addr);
@@ -1243,48 +1240,34 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
-try_again:
+ /* either no address requested or can't fit in requested address hole */
+ addr = mm->free_area_cache;
+
/* make sure it can fit in the remaining address space */
- if (mm->free_area_cache < len)
- goto fail;
+ if (addr > len) {
+ vma = find_vma(mm, addr-len);
+ if (!vma || addr <= vma->vm_start)
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr-len);
+ }
+
+ addr = mm->mmap_base-len;
- /* either no address requested or cant fit in requested address hole */
- addr = (mm->free_area_cache - len) & PAGE_MASK;
do {
/*
* Lookup failure means no vma is above this address,
- * i.e. return with success:
+ * else if new region fits below vma->vm_start,
+ * return with success:
*/
- if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
- return addr;
-
- /*
- * new region fits between prev_vma->vm_end and
- * vma->vm_start, use it:
- */
- if (addr+len <= vma->vm_start &&
- (!prev_vma || (addr >= prev_vma->vm_end)))
+ vma = find_vma(mm, addr);
+ if (!vma || addr+len <= vma->vm_start)
/* remember the address as a hint for next time */
return (mm->free_area_cache = addr);
- else
- /* pull free_area_cache down to the first hole */
- if (mm->free_area_cache == vma->vm_end)
- mm->free_area_cache = vma->vm_start;
/* try just below the current vma->vm_start */
addr = vma->vm_start-len;
- } while (len <= vma->vm_start);
+ } while (len < vma->vm_start);
-fail:
- /*
- * if hint left us with no space for the requested
- * mapping then try again:
- */
- if (first_time) {
- mm->free_area_cache = base;
- first_time = 0;
- goto try_again;
- }
/*
* A failed mmap() very likely causes application failure,
* so fall back to the bottom-up function here. This scenario
@@ -1296,7 +1279,7 @@ fail:
/*
* Restore the topdown base:
*/
- mm->free_area_cache = base;
+ mm->free_area_cache = mm->mmap_base;
return addr;
}
@@ -1309,43 +1292,50 @@ void arch_unmap_area_topdown(struct vm_area_struct *area)
*/
if (area->vm_end > area->vm_mm->free_area_cache)
area->vm_mm->free_area_cache = area->vm_end;
+
+ /* dont allow allocations above current base */
+ if (area->vm_mm->free_area_cache > area->vm_mm->mmap_base)
+ area->vm_mm->free_area_cache = area->vm_mm->mmap_base;
}
unsigned long
get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
- if (flags & MAP_FIXED) {
- unsigned long ret;
+ unsigned long ret;
- if (addr > TASK_SIZE - len)
- return -ENOMEM;
- if (addr & ~PAGE_MASK)
- return -EINVAL;
- if (file && is_file_hugepages(file)) {
- /*
- * Check if the given range is hugepage aligned, and
- * can be made suitable for hugepages.
- */
- ret = prepare_hugepage_range(addr, len);
- } else {
- /*
- * Ensure that a normal request is not falling in a
- * reserved hugepage range. For some archs like IA-64,
- * there is a separate region for hugepages.
- */
- ret = is_hugepage_only_range(addr, len);
- }
- if (ret)
- return -EINVAL;
- return addr;
- }
+ if (!(flags & MAP_FIXED)) {
+ unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
- if (file && file->f_op && file->f_op->get_unmapped_area)
- return file->f_op->get_unmapped_area(file, addr, len,
- pgoff, flags);
+ get_area = current->mm->get_unmapped_area;
+ if (file && file->f_op && file->f_op->get_unmapped_area)
+ get_area = file->f_op->get_unmapped_area;
+ addr = get_area(file, addr, len, pgoff, flags);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+ }
- return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+ if (addr > TASK_SIZE - len)
+ return -ENOMEM;
+ if (addr & ~PAGE_MASK)
+ return -EINVAL;
+ if (file && is_file_hugepages(file)) {
+ /*
+ * Check if the given range is hugepage aligned, and
+ * can be made suitable for hugepages.
+ */
+ ret = prepare_hugepage_range(addr, len);
+ } else {
+ /*
+ * Ensure that a normal request is not falling in a
+ * reserved hugepage range. For some archs like IA-64,
+ * there is a separate region for hugepages.
+ */
+ ret = is_hugepage_only_range(current->mm, addr, len);
+ }
+ if (ret)
+ return -EINVAL;
+ return addr;
}
EXPORT_SYMBOL(get_unmapped_area);
@@ -1434,7 +1424,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
struct rlimit *rlim = current->signal->rlim;
/* address space limit tests */
- if (mm->total_vm + grow > rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT)
+ if (!may_expand_vm(mm, grow))
return -ENOMEM;
/* Stack limit test */
@@ -1463,8 +1453,6 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
if (vma->vm_flags & VM_LOCKED)
mm->locked_vm += grow;
__vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
- acct_update_integrals();
- update_mem_hiwater();
return 0;
}
@@ -1592,66 +1580,6 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
}
#endif
-/*
- * Try to free as many page directory entries as we can,
- * without having to work very hard at actually scanning
- * the page tables themselves.
- *
- * Right now we try to free page tables if we have a nice
- * PGDIR-aligned area that got free'd up. We could be more
- * granular if we want to, but this is fast and simple,
- * and covers the bad cases.
- *
- * "prev", if it exists, points to a vma before the one
- * we just free'd - but there's no telling how much before.
- */
-static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
- unsigned long start, unsigned long end)
-{
- unsigned long first = start & PGDIR_MASK;
- unsigned long last = end + PGDIR_SIZE - 1;
- struct mm_struct *mm = tlb->mm;
-
- if (last > MM_VM_SIZE(mm) || last < end)
- last = MM_VM_SIZE(mm);
-
- if (!prev) {
- prev = mm->mmap;
- if (!prev)
- goto no_mmaps;
- if (prev->vm_end > start) {
- if (last > prev->vm_start)
- last = prev->vm_start;
- goto no_mmaps;
- }
- }
- for (;;) {
- struct vm_area_struct *next = prev->vm_next;
-
- if (next) {
- if (next->vm_start < start) {
- prev = next;
- continue;
- }
- if (last > next->vm_start)
- last = next->vm_start;
- }
- if (prev->vm_end > first)
- first = prev->vm_end;
- break;
- }
-no_mmaps:
- if (last < first) /* for arches with discontiguous pgd indices */
- return;
- if (first < FIRST_USER_PGD_NR * PGDIR_SIZE)
- first = FIRST_USER_PGD_NR * PGDIR_SIZE;
- /* No point trying to free anything if we're in the same pte page */
- if ((first & PMD_MASK) < (last & PMD_MASK)) {
- clear_page_range(tlb, first, last);
- flush_tlb_pgtables(mm, first, last);
- }
-}
-
/* Normal function to fix up a mapping
* This function is the default for when an area has no specific
* function. This may be used as part of a more specific routine.
@@ -1677,14 +1605,13 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
* Ok - we have the memory areas we should free on the 'free' list,
* so release them, and do the vma updates.
*/
-static void unmap_vma_list(struct mm_struct *mm,
- struct vm_area_struct *mpnt)
+static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
{
do {
- struct vm_area_struct *next = mpnt->vm_next;
- unmap_vma(mm, mpnt);
- mpnt = next;
- } while (mpnt != NULL);
+ struct vm_area_struct *next = vma->vm_next;
+ unmap_vma(mm, vma);
+ vma = next;
+ } while (vma);
validate_mm(mm);
}
@@ -1694,24 +1621,22 @@ static void unmap_vma_list(struct mm_struct *mm,
* Called with the page table lock held.
*/
static void unmap_region(struct mm_struct *mm,
- struct vm_area_struct *vma,
- struct vm_area_struct *prev,
- unsigned long start,
- unsigned long end)
+ struct vm_area_struct *vma, struct vm_area_struct *prev,
+ unsigned long start, unsigned long end)
{
+ struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
struct mmu_gather *tlb;
unsigned long nr_accounted = 0;
lru_add_drain();
+ spin_lock(&mm->page_table_lock);
tlb = tlb_gather_mmu(mm, 0);
unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
-
- if (is_hugepage_only_range(start, end - start))
- hugetlb_free_pgtables(tlb, prev, start, end);
- else
- free_pgtables(tlb, prev, start, end);
+ free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+ next? next->vm_start: 0);
tlb_finish_mmu(tlb, start, end);
+ spin_unlock(&mm->page_table_lock);
}
/*
@@ -1797,7 +1722,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
{
unsigned long end;
- struct vm_area_struct *mpnt, *prev, *last;
+ struct vm_area_struct *vma, *prev, *last;
if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
return -EINVAL;
@@ -1806,14 +1731,14 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
return -EINVAL;
/* Find the first overlapping VMA */
- mpnt = find_vma_prev(mm, start, &prev);
- if (!mpnt)
+ vma = find_vma_prev(mm, start, &prev);
+ if (!vma)
return 0;
- /* we have start < mpnt->vm_end */
+ /* we have start < vma->vm_end */
/* if it doesn't overlap, we have nothing.. */
end = start + len;
- if (mpnt->vm_start >= end)
+ if (vma->vm_start >= end)
return 0;
/*
@@ -1823,11 +1748,11 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
* unmapped vm_area_struct will remain in use: so lower split_vma
* places tmp vma above, and higher split_vma places tmp vma below.
*/
- if (start > mpnt->vm_start) {
- int error = split_vma(mm, mpnt, start, 0);
+ if (start > vma->vm_start) {
+ int error = split_vma(mm, vma, start, 0);
if (error)
return error;
- prev = mpnt;
+ prev = vma;
}
/* Does it split the last one? */
@@ -1837,18 +1762,16 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
if (error)
return error;
}
- mpnt = prev? prev->vm_next: mm->mmap;
+ vma = prev? prev->vm_next: mm->mmap;
/*
* Remove the vma's, and unmap the actual pages
*/
- detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
- spin_lock(&mm->page_table_lock);
- unmap_region(mm, mpnt, prev, start, end);
- spin_unlock(&mm->page_table_lock);
+ detach_vmas_to_be_unmapped(mm, vma, prev, end);
+ unmap_region(mm, vma, prev, start, end);
/* Fix up all other VM information */
- unmap_vma_list(mm, mpnt);
+ unmap_vma_list(mm, vma);
return 0;
}
@@ -1903,9 +1826,10 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
*/
if (mm->def_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
- locked = mm->locked_vm << PAGE_SHIFT;
+ locked = len >> PAGE_SHIFT;
+ locked += mm->locked_vm;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
- locked += len;
+ lock_limit >>= PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
@@ -1928,8 +1852,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
}
/* Check against address space limits *after* clearing old maps... */
- if ((mm->total_vm << PAGE_SHIFT) + len
- > current->signal->rlim[RLIMIT_AS].rlim_cur)
+ if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
if (mm->map_count > sysctl_max_map_count)
@@ -1968,8 +1891,6 @@ out:
mm->locked_vm += len >> PAGE_SHIFT;
make_pages_present(addr, addr + len);
}
- acct_update_integrals();
- update_mem_hiwater();
return addr;
}
@@ -1979,8 +1900,9 @@ EXPORT_SYMBOL(do_brk);
void exit_mmap(struct mm_struct *mm)
{
struct mmu_gather *tlb;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = mm->mmap;
unsigned long nr_accounted = 0;
+ unsigned long end;
#ifdef arch_exit_mmap
arch_exit_mmap(mm);
@@ -1990,21 +1912,17 @@ void exit_mmap(struct mm_struct *mm)
spin_lock(&mm->page_table_lock);
- tlb = tlb_gather_mmu(mm, 1);
flush_cache_mm(mm);
- /* Use ~0UL here to ensure all VMAs in the mm are unmapped */
- mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
- ~0UL, &nr_accounted, NULL);
+ tlb = tlb_gather_mmu(mm, 1);
+ /* Use -1 here to ensure all VMAs in the mm are unmapped */
+ end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- BUG_ON(mm->map_count); /* This is just debugging */
- clear_page_range(tlb, FIRST_USER_PGD_NR * PGDIR_SIZE, MM_VM_SIZE(mm));
-
- tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
+ free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+ tlb_finish_mmu(tlb, 0, end);
- vma = mm->mmap;
mm->mmap = mm->mmap_cache = NULL;
mm->mm_rb = RB_ROOT;
- mm->rss = 0;
+ set_mm_counter(mm, rss, 0);
mm->total_vm = 0;
mm->locked_vm = 0;
@@ -2019,6 +1937,8 @@ void exit_mmap(struct mm_struct *mm)
remove_vm_struct(vma);
vma = next;
}
+
+ BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
}
/* Insert vm structure into process list sorted by address
@@ -2106,3 +2026,19 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
}
return new_vma;
}
+
+/*
+ * Return true if the calling process may expand its vm space by the passed
+ * number of pages
+ */
+int may_expand_vm(struct mm_struct *mm, unsigned long npages)
+{
+ unsigned long cur = mm->total_vm; /* pages */
+ unsigned long lim;
+
+ lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+
+ if (cur + npages > lim)
+ return 0;
+ return 1;
+}
diff --git a/linux-2.6-xen-sparse/mm/page_alloc.c b/linux-2.6-xen-sparse/mm/page_alloc.c
index 4d55438fc3..cf6749cd25 100644
--- a/linux-2.6-xen-sparse/mm/page_alloc.c
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c
@@ -31,19 +31,26 @@
#include <linux/topology.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
+#include <linux/cpuset.h>
#include <linux/nodemask.h>
#include <linux/vmalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
-/* MCD - HACK: Find somewhere to initialize this EARLY, or make this initializer cleaner */
+/*
+ * MCD - HACK: Find somewhere to initialize this EARLY, or make this
+ * initializer cleaner
+ */
nodemask_t node_online_map = { { [0] = 1UL } };
+EXPORT_SYMBOL(node_online_map);
nodemask_t node_possible_map = NODE_MASK_ALL;
+EXPORT_SYMBOL(node_possible_map);
struct pglist_data *pgdat_list;
unsigned long totalram_pages;
unsigned long totalhigh_pages;
long nr_swap_pages;
+
/*
* results with 256, 32 in the lowmem_reserve sysctl:
* 1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
@@ -191,6 +198,37 @@ static inline void rmv_page_order(struct page *page)
}
/*
+ * Locate the struct page for both the matching buddy in our
+ * pair (buddy1) and the combined O(n+1) page they form (page).
+ *
+ * 1) Any buddy B1 will have an order O twin B2 which satisfies
+ * the following equation:
+ * B2 = B1 ^ (1 << O)
+ * For example, if the starting buddy (buddy2) is #8 its order
+ * 1 buddy is #10:
+ * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
+ *
+ * 2) Any buddy B will have an order O+1 parent P which
+ * satisfies the following equation:
+ * P = B & ~(1 << O)
+ *
+ * Assumption: *_mem_map is contigious at least up to MAX_ORDER
+ */
+static inline struct page *
+__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order)
+{
+ unsigned long buddy_idx = page_idx ^ (1 << order);
+
+ return page + (buddy_idx - page_idx);
+}
+
+static inline unsigned long
+__find_combined_index(unsigned long page_idx, unsigned int order)
+{
+ return (page_idx & ~(1 << order));
+}
+
+/*
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (a) the buddy is free &&
@@ -233,50 +271,49 @@ static inline int page_is_buddy(struct page *page, int order)
* -- wli
*/
-static inline void __free_pages_bulk (struct page *page, struct page *base,
+static inline void __free_pages_bulk (struct page *page,
struct zone *zone, unsigned int order)
{
unsigned long page_idx;
- struct page *coalesced;
int order_size = 1 << order;
if (unlikely(order))
destroy_compound_page(page, order);
- page_idx = page - base;
+ page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
BUG_ON(page_idx & (order_size - 1));
BUG_ON(bad_range(zone, page));
zone->free_pages += order_size;
while (order < MAX_ORDER-1) {
+ unsigned long combined_idx;
struct free_area *area;
struct page *buddy;
- int buddy_idx;
- buddy_idx = (page_idx ^ (1 << order));
- buddy = base + buddy_idx;
+ combined_idx = __find_combined_index(page_idx, order);
+ buddy = __page_find_buddy(page, page_idx, order);
+
if (bad_range(zone, buddy))
break;
if (!page_is_buddy(buddy, order))
- break;
- /* Move the buddy up one level. */
+ break; /* Move the buddy up one level. */
list_del(&buddy->lru);
area = zone->free_area + order;
area->nr_free--;
rmv_page_order(buddy);
- page_idx &= buddy_idx;
+ page = page + (combined_idx - page_idx);
+ page_idx = combined_idx;
order++;
}
- coalesced = base + page_idx;
- set_page_order(coalesced, order);
- list_add(&coalesced->lru, &zone->free_area[order].free_list);
+ set_page_order(page, order);
+ list_add(&page->lru, &zone->free_area[order].free_list);
zone->free_area[order].nr_free++;
}
static inline void free_pages_check(const char *function, struct page *page)
{
- if ( page_mapped(page) ||
+ if ( page_mapcount(page) ||
page->mapping != NULL ||
page_count(page) != 0 ||
(page->flags & (
@@ -309,10 +346,9 @@ free_pages_bulk(struct zone *zone, int count,
struct list_head *list, unsigned int order)
{
unsigned long flags;
- struct page *base, *page = NULL;
+ struct page *page = NULL;
int ret = 0;
- base = zone->zone_mem_map;
spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
@@ -320,7 +356,7 @@ free_pages_bulk(struct zone *zone, int count,
page = list_entry(list->prev, struct page, lru);
/* have to delete it as __free_pages_bulk list manipulates */
list_del(&page->lru);
- __free_pages_bulk(page, base, zone, order);
+ __free_pages_bulk(page, zone, order);
ret++;
}
spin_unlock_irqrestore(&zone->lock, flags);
@@ -405,7 +441,7 @@ void set_page_refs(struct page *page, int order)
*/
static void prep_new_page(struct page *page, int order)
{
- if (page->mapping || page_mapped(page) ||
+ if (page->mapping || page_mapcount(page) ||
(page->flags & (
1 << PG_private |
1 << PG_locked |
@@ -601,7 +637,7 @@ void fastcall free_cold_page(struct page *page)
free_hot_cold_page(page, 1);
}
-static inline void prep_zero_page(struct page *page, int order, int gfp_flags)
+static inline void prep_zero_page(struct page *page, int order, unsigned int __nocast gfp_flags)
{
int i;
@@ -616,7 +652,7 @@ static inline void prep_zero_page(struct page *page, int order, int gfp_flags)
* or two.
*/
static struct page *
-buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
+buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags)
{
unsigned long flags;
struct page *page = NULL;
@@ -694,7 +730,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
* This is the 'heart' of the zoned buddy allocator.
*/
struct page * fastcall
-__alloc_pages(unsigned int gfp_mask, unsigned int order,
+__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
struct zonelist *zonelist)
{
const int wait = gfp_mask & __GFP_WAIT;
@@ -734,6 +770,9 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
classzone_idx, 0, 0))
continue;
+ if (!cpuset_zone_allowed(z))
+ continue;
+
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
@@ -745,6 +784,9 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
* coming from realtime tasks to go deeper into reserves
+ *
+ * This is the last chance, in general, before the goto nopage.
+ * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
*/
for (i = 0; (z = zones[i]) != NULL; i++) {
if (!zone_watermark_ok(z, order, z->pages_min,
@@ -752,18 +794,27 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
gfp_mask & __GFP_HIGH))
continue;
+ if (wait && !cpuset_zone_allowed(z))
+ continue;
+
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
/* This allocation should allow future memory freeing. */
- if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) {
- /* go through the zonelist yet again, ignoring mins */
- for (i = 0; (z = zones[i]) != NULL; i++) {
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
+
+ if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
+ && !in_interrupt()) {
+ if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+ /* go through the zonelist yet again, ignoring mins */
+ for (i = 0; (z = zones[i]) != NULL; i++) {
+ if (!cpuset_zone_allowed(z))
+ continue;
+ page = buffered_rmqueue(z, order, gfp_mask);
+ if (page)
+ goto got_pg;
+ }
}
goto nopage;
}
@@ -800,6 +851,9 @@ rebalance:
gfp_mask & __GFP_HIGH))
continue;
+ if (!cpuset_zone_allowed(z))
+ continue;
+
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
@@ -816,6 +870,9 @@ rebalance:
classzone_idx, 0, 0))
continue;
+ if (!cpuset_zone_allowed(z))
+ continue;
+
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
@@ -862,7 +919,7 @@ EXPORT_SYMBOL(__alloc_pages);
/*
* Common helper functions.
*/
-fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
+fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned int order)
{
struct page * page;
page = alloc_pages(gfp_mask, order);
@@ -873,7 +930,7 @@ fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int orde
EXPORT_SYMBOL(__get_free_pages);
-fastcall unsigned long get_zeroed_page(unsigned int gfp_mask)
+fastcall unsigned long get_zeroed_page(unsigned int __nocast gfp_mask)
{
struct page * page;
@@ -1302,8 +1359,7 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli
#define MAX_NODE_LOAD (num_online_nodes())
static int __initdata node_load[MAX_NUMNODES];
/**
- * find_next_best_node - find the next node that should appear in a given
- * node's fallback list
+ * find_next_best_node - find the next node that should appear in a given node's fallback list
* @node: node whose fallback list we're appending
* @used_node_mask: nodemask_t of already used nodes
*
@@ -1372,7 +1428,6 @@ static void __init build_zonelists(pg_data_t *pgdat)
/* initialize zonelists */
for (i = 0; i < GFP_ZONETYPES; i++) {
zonelist = pgdat->node_zonelists + i;
- memset(zonelist, 0, sizeof(*zonelist));
zonelist->zones[0] = NULL;
}
@@ -1419,7 +1474,6 @@ static void __init build_zonelists(pg_data_t *pgdat)
struct zonelist *zonelist;
zonelist = pgdat->node_zonelists + i;
- memset(zonelist, 0, sizeof(*zonelist));
j = 0;
k = ZONE_NORMAL;
@@ -1461,6 +1515,7 @@ void __init build_all_zonelists(void)
for_each_online_node(i)
build_zonelists(NODE_DATA(i));
printk("Built %i zonelists\n", num_online_nodes());
+ cpuset_init_current_mems_allowed();
}
/*
@@ -1623,6 +1678,18 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
if (batch < 1)
batch = 1;
+ /*
+ * Clamp the batch to a 2^n - 1 value. Having a power
+ * of 2 value was found to be more likely to have
+ * suboptimal cache aliasing properties in some cases.
+ *
+ * For example if 2 tasks are alternately allocating
+ * batches of pages, one task can end up with a lot
+ * of pages of one half of the possible page colors
+ * and the other with pages of the other colors.
+ */
+ batch = (1 << fls(batch + batch/2)) - 1;
+
for (cpu = 0; cpu < NR_CPUS; cpu++) {
struct per_cpu_pages *pcp;
@@ -1681,14 +1748,25 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
}
}
-void __init node_alloc_mem_map(struct pglist_data *pgdat)
+static void __init alloc_node_mem_map(struct pglist_data *pgdat)
{
unsigned long size;
- size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
- pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
+ /* Skip empty nodes */
+ if (!pgdat->node_spanned_pages)
+ return;
+
+ /* ia64 gets its own node_mem_map, before this, without bootmem */
+ if (!pgdat->node_mem_map) {
+ size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+ pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
+ }
#ifndef CONFIG_DISCONTIGMEM
- mem_map = contig_page_data.node_mem_map;
+ /*
+ * With no DISCONTIG, the global mem_map is just set as node 0's
+ */
+ if (pgdat == NODE_DATA(0))
+ mem_map = NODE_DATA(0)->node_mem_map;
#endif
}
@@ -1700,8 +1778,7 @@ void __init free_area_init_node(int nid, struct pglist_data *pgdat,
pgdat->node_start_pfn = node_start_pfn;
calculate_zone_totalpages(pgdat, zones_size, zholes_size);
- if (!pfn_to_page(node_start_pfn))
- node_alloc_mem_map(pgdat);
+ alloc_node_mem_map(pgdat);
free_area_init_core(pgdat, zones_size, zholes_size);
}
@@ -1823,6 +1900,7 @@ static char *vmstat_text[] = {
"allocstall",
"pgrotated",
+ "nr_bounce",
};
static void *vmstat_start(struct seq_file *m, loff_t *pos)
@@ -1926,15 +2004,20 @@ static void setup_per_zone_lowmem_reserve(void)
for_each_pgdat(pgdat) {
for (j = 0; j < MAX_NR_ZONES; j++) {
- struct zone * zone = pgdat->node_zones + j;
+ struct zone *zone = pgdat->node_zones + j;
unsigned long present_pages = zone->present_pages;
zone->lowmem_reserve[j] = 0;
for (idx = j-1; idx >= 0; idx--) {
- struct zone * lower_zone = pgdat->node_zones + idx;
+ struct zone *lower_zone;
+
+ if (sysctl_lowmem_reserve_ratio[idx] < 1)
+ sysctl_lowmem_reserve_ratio[idx] = 1;
- lower_zone->lowmem_reserve[j] = present_pages / sysctl_lowmem_reserve_ratio[idx];
+ lower_zone = pgdat->node_zones + idx;
+ lower_zone->lowmem_reserve[j] = present_pages /
+ sysctl_lowmem_reserve_ratio[idx];
present_pages += lower_zone->present_pages;
}
}
@@ -2041,7 +2124,7 @@ module_init(init_per_zone_pages_min)
* changes.
*/
int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, file, buffer, length, ppos);
setup_per_zone_pages_min();
@@ -2058,7 +2141,7 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
* if in function of the boot time zone sizes.
*/
int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec_minmax(table, write, file, buffer, length, ppos);
setup_per_zone_lowmem_reserve();
diff --git a/linux-2.6-xen-sparse/net/core/dev.c b/linux-2.6-xen-sparse/net/core/dev.c
index b5e12b06ec..8c73647ecb 100644
--- a/linux-2.6-xen-sparse/net/core/dev.c
+++ b/linux-2.6-xen-sparse/net/core/dev.c
@@ -7,7 +7,7 @@
* 2 of the License, or (at your option) any later version.
*
* Derived from the non IP parts of dev.c 1.0.19
- * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
*
@@ -766,6 +766,18 @@ int dev_change_name(struct net_device *dev, char *newname)
}
/**
+ * netdev_features_change - device changes fatures
+ * @dev: device to cause notification
+ *
+ * Called to indicate a device has changed features.
+ */
+void netdev_features_change(struct net_device *dev)
+{
+ notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL(netdev_features_change);
+
+/**
* netdev_state_change - device changes state
* @dev: device to cause notification
*
@@ -1219,6 +1231,19 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
* A negative errno code is returned on a failure. A success does not
* guarantee the frame will be transmitted as it may be dropped due
* to congestion or traffic shaping.
+ *
+ * -----------------------------------------------------------------------------------
+ * I notice this method can also return errors from the queue disciplines,
+ * including NET_XMIT_DROP, which is a positive value. So, errors can also
+ * be positive.
+ *
+ * Regardless of the return value, the skb is consumed, so it is currently
+ * difficult to retry a send to this method. (You can bump the ref count
+ * before sending to hold a reference for retry if you are careful.)
+ *
+ * When calling this method, interrupts MUST be enabled. This is because
+ * the BH enable code must have IRQs enabled so that it will not deadlock.
+ * --BLG
*/
int dev_queue_xmit(struct sk_buff *skb)
@@ -1456,13 +1481,10 @@ int netif_rx(struct sk_buff *skb)
struct softnet_data *queue;
unsigned long flags;
-#ifdef CONFIG_NETPOLL
- if (skb->dev->netpoll_rx && netpoll_rx(skb)) {
- kfree_skb(skb);
+ /* if netpoll wants it, pretend we never saw it */
+ if (netpoll_rx(skb))
return NET_RX_DROP;
- }
-#endif
-
+
if (!skb->stamp.tv_sec)
net_timestamp(&skb->stamp);
@@ -1590,6 +1612,10 @@ static __inline__ int deliver_skb(struct sk_buff *skb,
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+struct net_bridge;
+struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
+ unsigned char *addr);
+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
static __inline__ int handle_bridge(struct sk_buff **pskb,
struct packet_type **pt_prev, int *ret)
@@ -1658,12 +1684,9 @@ int netif_receive_skb(struct sk_buff *skb)
int ret = NET_RX_DROP;
unsigned short type;
-#ifdef CONFIG_NETPOLL
- if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) {
- kfree_skb(skb);
+ /* if we've gotten here through NAPI, check netpoll */
+ if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;
- }
-#endif
if (!skb->stamp.tv_sec)
net_timestamp(&skb->stamp);
@@ -1761,6 +1784,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
+ backlog_dev->weight = weight_p;
for (;;) {
struct sk_buff *skb;
struct net_device *dev;
@@ -1821,8 +1845,10 @@ static void net_rx_action(struct softirq_action *h)
dev = list_entry(queue->poll_list.next,
struct net_device, poll_list);
+ netpoll_poll_lock(dev);
if (dev->quota <= 0 || dev->poll(dev, &budget)) {
+ netpoll_poll_unlock(dev);
local_irq_disable();
list_del(&dev->poll_list);
list_add_tail(&dev->poll_list, &queue->poll_list);
@@ -1831,6 +1857,7 @@ static void net_rx_action(struct softirq_action *h)
else
dev->quota = dev->weight;
} else {
+ netpoll_poll_unlock(dev);
dev_put(dev);
local_irq_disable();
}
@@ -2340,6 +2367,21 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
return err;
}
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+{
+ int err;
+
+ if (!dev->set_mac_address)
+ return -EOPNOTSUPP;
+ if (sa->sa_family != dev->type)
+ return -EINVAL;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ err = dev->set_mac_address(dev, sa);
+ if (!err)
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+ return err;
+}
/*
* Perform the SIOCxIFxxx calls.
@@ -2386,17 +2428,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
return 0;
case SIOCSIFHWADDR:
- if (!dev->set_mac_address)
- return -EOPNOTSUPP;
- if (ifr->ifr_hwaddr.sa_family != dev->type)
- return -EINVAL;
- if (!netif_device_present(dev))
- return -ENODEV;
- err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
- if (!err)
- notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEADDR, dev);
- return err;
+ return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
case SIOCSIFHWBROADCAST:
if (ifr->ifr_hwaddr.sa_family != dev->type)
@@ -3112,7 +3144,7 @@ void free_netdev(struct net_device *dev)
void synchronize_net(void)
{
might_sleep();
- synchronize_kernel();
+ synchronize_rcu();
}
/**
@@ -3362,6 +3394,7 @@ EXPORT_SYMBOL(dev_set_allmulti);
EXPORT_SYMBOL(dev_set_promiscuity);
EXPORT_SYMBOL(dev_change_flags);
EXPORT_SYMBOL(dev_set_mtu);
+EXPORT_SYMBOL(dev_set_mac_address);
EXPORT_SYMBOL(free_netdev);
EXPORT_SYMBOL(netdev_boot_setup_check);
EXPORT_SYMBOL(netdev_set_master);
@@ -3377,9 +3410,12 @@ EXPORT_SYMBOL(unregister_netdevice);
EXPORT_SYMBOL(unregister_netdevice_notifier);
EXPORT_SYMBOL(net_enable_timestamp);
EXPORT_SYMBOL(net_disable_timestamp);
+EXPORT_SYMBOL(dev_get_flags);
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
EXPORT_SYMBOL(br_handle_frame_hook);
+EXPORT_SYMBOL(br_fdb_get_hook);
+EXPORT_SYMBOL(br_fdb_put_hook);
#endif
#ifdef CONFIG_KMOD
diff --git a/linux-2.6-xen-sparse/net/core/skbuff.c b/linux-2.6-xen-sparse/net/core/skbuff.c
index be2801e883..9e144aa414 100644
--- a/linux-2.6-xen-sparse/net/core/skbuff.c
+++ b/linux-2.6-xen-sparse/net/core/skbuff.c
@@ -86,8 +86,10 @@ static kmem_cache_t *skbuff_head_cache;
*/
void skb_over_panic(struct sk_buff *skb, int sz, void *here)
{
- printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s",
- here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
+ "data:%p tail:%p end:%p dev:%s\n",
+ here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+ skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
@@ -102,8 +104,10 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
void skb_under_panic(struct sk_buff *skb, int sz, void *here)
{
- printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s",
- here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
+ "data:%p tail:%p end:%p dev:%s\n",
+ here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+ skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
@@ -241,7 +245,8 @@ static void skb_clone_fraglist(struct sk_buff *skb)
void skb_release_data(struct sk_buff *skb)
{
if (!skb->cloned ||
- atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
+ !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
+ &skb_shinfo(skb)->dataref)) {
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
@@ -275,20 +280,14 @@ void kfree_skbmem(struct sk_buff *skb)
void __kfree_skb(struct sk_buff *skb)
{
- if (skb->list) {
- printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
- "on a list (from %p).\n", NET_CALLER(skb));
- BUG();
- }
+ BUG_ON(skb->list != NULL);
dst_release(skb->dst);
#ifdef CONFIG_XFRM
secpath_put(skb->sp);
#endif
- if(skb->destructor) {
- if (in_irq())
- printk(KERN_WARNING "Warning: kfree_skb on "
- "hard IRQ %p\n", NET_CALLER(skb));
+ if (skb->destructor) {
+ WARN_ON(in_irq());
skb->destructor(skb);
}
#ifdef CONFIG_NETFILTER
@@ -353,6 +352,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
C(csum);
C(local_df);
n->cloned = 1;
+ n->nohdr = 0;
C(proto_csum_valid);
C(proto_csum_blank);
C(pkt_type);
@@ -606,6 +606,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
skb->h.raw += off;
skb->nh.raw += off;
skb->cloned = 0;
+ skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
return 0;
@@ -984,70 +985,94 @@ fault:
return -EFAULT;
}
-/* Keep iterating until skb_iter_next returns false. */
-void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i)
-{
- i->len = skb_headlen(skb);
- i->data = (unsigned char *)skb->data;
- i->nextfrag = 0;
- i->fraglist = NULL;
-}
+/**
+ * skb_store_bits - store bits from kernel buffer to skb
+ * @skb: destination buffer
+ * @offset: offset in destination
+ * @from: source buffer
+ * @len: number of bytes to copy
+ *
+ * Copy the specified number of bytes from the source buffer to the
+ * destination skb. This function handles all the messy bits of
+ * traversing fragment lists and such.
+ */
-int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i)
+int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
{
- /* Unmap previous, if not head fragment. */
- if (i->nextfrag)
- kunmap_skb_frag(i->data);
-
- if (i->fraglist) {
- fraglist:
- /* We're iterating through fraglist. */
- if (i->nextfrag < skb_shinfo(i->fraglist)->nr_frags) {
- i->data = kmap_skb_frag(&skb_shinfo(i->fraglist)
- ->frags[i->nextfrag]);
- i->len = skb_shinfo(i->fraglist)->frags[i->nextfrag]
- .size;
- i->nextfrag++;
- return 1;
- }
- /* Fragments with fragments? Too hard! */
- BUG_ON(skb_shinfo(i->fraglist)->frag_list);
- i->fraglist = i->fraglist->next;
- if (!i->fraglist)
- goto end;
-
- i->len = skb_headlen(i->fraglist);
- i->data = i->fraglist->data;
- i->nextfrag = 0;
- return 1;
+ int i, copy;
+ int start = skb_headlen(skb);
+
+ if (offset > (int)skb->len - len)
+ goto fault;
+
+ if ((copy = start - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ memcpy(skb->data + offset, from, copy);
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ from += copy;
}
- if (i->nextfrag < skb_shinfo(skb)->nr_frags) {
- i->data = kmap_skb_frag(&skb_shinfo(skb)->frags[i->nextfrag]);
- i->len = skb_shinfo(skb)->frags[i->nextfrag].size;
- i->nextfrag++;
- return 1;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + frag->size;
+ if ((copy = end - offset) > 0) {
+ u8 *vaddr;
+
+ if (copy > len)
+ copy = len;
+
+ vaddr = kmap_skb_frag(frag);
+ memcpy(vaddr + frag->page_offset + offset - start,
+ from, copy);
+ kunmap_skb_frag(vaddr);
+
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ from += copy;
+ }
+ start = end;
}
- i->fraglist = skb_shinfo(skb)->frag_list;
- if (i->fraglist)
- goto fraglist;
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
-end:
- /* Bug trap for callers */
- i->data = NULL;
- return 0;
-}
+ for (; list; list = list->next) {
+ int end;
-void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i)
-{
- /* Unmap previous, if not head fragment. */
- if (i->data && i->nextfrag)
- kunmap_skb_frag(i->data);
- /* Bug trap for callers */
- i->data = NULL;
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_store_bits(list, offset - start,
+ from, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ from += copy;
+ }
+ start = end;
+ }
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
}
+EXPORT_SYMBOL(skb_store_bits);
+
/* Checksum skb data. */
unsigned int skb_checksum(const struct sk_buff *skb, int offset,
@@ -1446,7 +1471,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
if (pos < len) {
/* Split frag.
- * We have to variants in this case:
+ * We have two variants in this case:
* 1. Move all the frag to the second
* part, if it is possible. F.e.
* this approach is mandatory for TUX,
@@ -1469,6 +1494,9 @@ static inline void skb_split_no_header(struct sk_buff *skb,
/**
* skb_split - Split fragmented skb to two parts at length len.
+ * @skb: the buffer to split
+ * @skb1: the buffer to receive the second part
+ * @len: new length for skb
*/
void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
@@ -1518,6 +1546,3 @@ EXPORT_SYMBOL(skb_queue_tail);
EXPORT_SYMBOL(skb_unlink);
EXPORT_SYMBOL(skb_append);
EXPORT_SYMBOL(skb_split);
-EXPORT_SYMBOL(skb_iter_first);
-EXPORT_SYMBOL(skb_iter_next);
-EXPORT_SYMBOL(skb_iter_abort);
diff --git a/patches/linux-2.6.11/agpgart.patch b/patches/linux-2.6.11/agpgart.patch
deleted file mode 100644
index 87dded22d4..0000000000
--- a/patches/linux-2.6.11/agpgart.patch
+++ /dev/null
@@ -1,437 +0,0 @@
---- linux-2.6.11/drivers/char/agp/agp.h 2005-03-02 07:38:07 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/agp.h 2005-03-22 11:14:02 +00:00
-@@ -272,6 +272,8 @@
- #define AGP_GENERIC_SIZES_ENTRIES 11
- extern struct aper_size_info_16 agp3_generic_sizes[];
-
-+#define virt_to_gart(x) (phys_to_gart(virt_to_phys(x)))
-+#define gart_to_virt(x) (phys_to_virt(gart_to_phys(x)))
-
- extern int agp_off;
- extern int agp_try_unsupported_boot;
---- linux-2.6.11/drivers/char/agp/ali-agp.c 2005-03-02 07:38:13 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/ali-agp.c 2005-03-22 11:14:56 +00:00
-@@ -150,7 +150,7 @@
- pci_read_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL, &temp);
- pci_write_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL,
- (((temp & ALI_CACHE_FLUSH_ADDR_MASK) |
-- virt_to_phys(addr)) | ALI_CACHE_FLUSH_EN ));
-+ virt_to_gart(addr)) | ALI_CACHE_FLUSH_EN ));
- return addr;
- }
-
-@@ -174,7 +174,7 @@
- pci_read_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL, &temp);
- pci_write_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL,
- (((temp & ALI_CACHE_FLUSH_ADDR_MASK) |
-- virt_to_phys(addr)) | ALI_CACHE_FLUSH_EN));
-+ virt_to_gart(addr)) | ALI_CACHE_FLUSH_EN));
- agp_generic_destroy_page(addr);
- }
-
---- linux-2.6.11/drivers/char/agp/amd-k7-agp.c 2005-03-02 07:38:33 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/amd-k7-agp.c 2005-03-22 11:14:56 +00:00
-@@ -43,7 +43,7 @@
-
- SetPageReserved(virt_to_page(page_map->real));
- global_cache_flush();
-- page_map->remapped = ioremap_nocache(virt_to_phys(page_map->real),
-+ page_map->remapped = ioremap_nocache(virt_to_gart(page_map->real),
- PAGE_SIZE);
- if (page_map->remapped == NULL) {
- ClearPageReserved(virt_to_page(page_map->real));
-@@ -154,7 +154,7 @@
-
- agp_bridge->gatt_table_real = (u32 *)page_dir.real;
- agp_bridge->gatt_table = (u32 __iomem *)page_dir.remapped;
-- agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
-+ agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
-
- /* Get the address for the gart region.
- * This is a bus address even on the alpha, b/c its
-@@ -167,7 +167,7 @@
-
- /* Calculate the agp offset */
- for (i = 0; i < value->num_entries / 1024; i++, addr += 0x00400000) {
-- writel(virt_to_phys(amd_irongate_private.gatt_pages[i]->real) | 1,
-+ writel(virt_to_gart(amd_irongate_private.gatt_pages[i]->real) | 1,
- page_dir.remapped+GET_PAGE_DIR_OFF(addr));
- readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr)); /* PCI Posting. */
- }
---- linux-2.6.11/drivers/char/agp/amd64-agp.c 2005-03-02 07:38:13 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/amd64-agp.c 2005-03-22 11:14:56 +00:00
-@@ -218,7 +218,7 @@
-
- static int amd_8151_configure(void)
- {
-- unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
-+ unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
-
- /* Configure AGP regs in each x86-64 host bridge. */
- for_each_nb() {
-@@ -590,7 +590,7 @@
- {
- struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
-
-- release_mem_region(virt_to_phys(bridge->gatt_table_real),
-+ release_mem_region(virt_to_gart(bridge->gatt_table_real),
- amd64_aperture_sizes[bridge->aperture_size_idx].size);
- agp_remove_bridge(bridge);
- agp_put_bridge(bridge);
---- linux-2.6.11/drivers/char/agp/ati-agp.c 2005-03-02 07:38:13 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/ati-agp.c 2005-03-22 11:14:56 +00:00
-@@ -61,7 +61,7 @@
-
- SetPageReserved(virt_to_page(page_map->real));
- err = map_page_into_agp(virt_to_page(page_map->real));
-- page_map->remapped = ioremap_nocache(virt_to_phys(page_map->real),
-+ page_map->remapped = ioremap_nocache(virt_to_gart(page_map->real),
- PAGE_SIZE);
- if (page_map->remapped == NULL || err) {
- ClearPageReserved(virt_to_page(page_map->real));
---- linux-2.6.11/drivers/char/agp/backend.c 2005-03-02 07:38:13 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/backend.c 2005-03-22 11:14:56 +00:00
-@@ -142,7 +142,7 @@
- return -ENOMEM;
- }
-
-- bridge->scratch_page_real = virt_to_phys(addr);
-+ bridge->scratch_page_real = virt_to_gart(addr);
- bridge->scratch_page =
- bridge->driver->mask_memory(bridge->scratch_page_real, 0);
- }
-@@ -186,7 +186,7 @@
- err_out:
- if (bridge->driver->needs_scratch_page)
- bridge->driver->agp_destroy_page(
-- phys_to_virt(bridge->scratch_page_real));
-+ gart_to_virt(bridge->scratch_page_real));
- if (got_gatt)
- bridge->driver->free_gatt_table();
- if (got_keylist) {
-@@ -211,7 +211,7 @@
- if (bridge->driver->agp_destroy_page &&
- bridge->driver->needs_scratch_page)
- bridge->driver->agp_destroy_page(
-- phys_to_virt(bridge->scratch_page_real));
-+ gart_to_virt(bridge->scratch_page_real));
- }
-
- /* XXX Kludge alert: agpgart isn't ready for multiple bridges yet */
---- linux-2.6.11/drivers/char/agp/efficeon-agp.c 2005-03-02 07:37:30 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/efficeon-agp.c 2005-03-22 11:15:17 +00:00
-@@ -219,7 +219,7 @@
-
- efficeon_private.l1_table[index] = page;
-
-- value = __pa(page) | pati | present | index;
-+ value = virt_to_gart(page) | pati | present | index;
-
- pci_write_config_dword(agp_bridge->dev,
- EFFICEON_ATTPAGE, value);
---- linux-2.6.11/drivers/char/agp/generic.c 2005-03-02 07:37:55 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/generic.c 2005-03-22 11:17:37 +00:00
-@@ -151,7 +151,7 @@
- }
- if (curr->page_count != 0) {
- for (i = 0; i < curr->page_count; i++) {
-- agp_bridge->driver->agp_destroy_page(phys_to_virt(curr->memory[i]));
-+ agp_bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[i]));
- }
- }
- agp_free_key(curr->key);
-@@ -204,7 +204,7 @@
- agp_free_memory(new);
- return NULL;
- }
-- new->memory[i] = virt_to_phys(addr);
-+ new->memory[i] = virt_to_gart(addr);
- new->page_count++;
- }
-
-@@ -697,8 +697,7 @@
- break;
- }
-
-- table = (char *) __get_free_pages(GFP_KERNEL,
-- page_order);
-+ table = alloc_gatt_pages(page_order);
-
- if (table == NULL) {
- i++;
-@@ -729,7 +728,7 @@
- size = ((struct aper_size_info_fixed *) temp)->size;
- page_order = ((struct aper_size_info_fixed *) temp)->page_order;
- num_entries = ((struct aper_size_info_fixed *) temp)->num_entries;
-- table = (char *) __get_free_pages(GFP_KERNEL, page_order);
-+ table = alloc_gatt_pages(page_order);
- }
-
- if (table == NULL)
-@@ -744,7 +743,7 @@
- agp_gatt_table = (void *)table;
-
- agp_bridge->driver->cache_flush();
-- agp_bridge->gatt_table = ioremap_nocache(virt_to_phys(table),
-+ agp_bridge->gatt_table = ioremap_nocache(virt_to_gart(table),
- (PAGE_SIZE * (1 << page_order)));
- agp_bridge->driver->cache_flush();
-
-@@ -752,11 +751,11 @@
- for (page = virt_to_page(table); page <= virt_to_page(table_end); page++)
- ClearPageReserved(page);
-
-- free_pages((unsigned long) table, page_order);
-+ free_gatt_pages(table, page_order);
-
- return -ENOMEM;
- }
-- agp_bridge->gatt_bus_addr = virt_to_phys(agp_bridge->gatt_table_real);
-+ agp_bridge->gatt_bus_addr = virt_to_gart(agp_bridge->gatt_table_real);
-
- /* AK: bogus, should encode addresses > 4GB */
- for (i = 0; i < num_entries; i++) {
-@@ -810,7 +809,7 @@
- for (page = virt_to_page(table); page <= virt_to_page(table_end); page++)
- ClearPageReserved(page);
-
-- free_pages((unsigned long) agp_bridge->gatt_table_real, page_order);
-+ free_gatt_pages(agp_bridge->gatt_table_real, page_order);
-
- agp_gatt_table = NULL;
- agp_bridge->gatt_table = NULL;
---- linux-2.6.11/drivers/char/agp/hp-agp.c 2005-03-02 07:38:19 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/hp-agp.c 2005-03-22 11:14:56 +00:00
-@@ -110,7 +110,7 @@
- hp->gart_size = HP_ZX1_GART_SIZE;
- hp->gatt_entries = hp->gart_size / hp->io_page_size;
-
-- hp->io_pdir = phys_to_virt(readq(hp->ioc_regs+HP_ZX1_PDIR_BASE));
-+ hp->io_pdir = gart_to_virt(readq(hp->ioc_regs+HP_ZX1_PDIR_BASE));
- hp->gatt = &hp->io_pdir[HP_ZX1_IOVA_TO_PDIR(hp->gart_base)];
-
- if (hp->gatt[0] != HP_ZX1_SBA_IOMMU_COOKIE) {
-@@ -248,7 +248,7 @@
- agp_bridge->mode = readl(hp->lba_regs+hp->lba_cap_offset+PCI_AGP_STATUS);
-
- if (hp->io_pdir_owner) {
-- writel(virt_to_phys(hp->io_pdir), hp->ioc_regs+HP_ZX1_PDIR_BASE);
-+ writel(virt_to_gart(hp->io_pdir), hp->ioc_regs+HP_ZX1_PDIR_BASE);
- readl(hp->ioc_regs+HP_ZX1_PDIR_BASE);
- writel(hp->io_tlb_ps, hp->ioc_regs+HP_ZX1_TCNFG);
- readl(hp->ioc_regs+HP_ZX1_TCNFG);
---- linux-2.6.11/drivers/char/agp/i460-agp.c 2005-03-02 07:38:10 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/i460-agp.c 2005-03-22 11:14:56 +00:00
-@@ -371,7 +371,7 @@
- }
- memset(lp->alloced_map, 0, map_size);
-
-- lp->paddr = virt_to_phys(lpage);
-+ lp->paddr = virt_to_gart(lpage);
- lp->refcount = 0;
- atomic_add(I460_KPAGES_PER_IOPAGE, &agp_bridge->current_memory_agp);
- return 0;
-@@ -382,7 +382,7 @@
- kfree(lp->alloced_map);
- lp->alloced_map = NULL;
-
-- free_pages((unsigned long) phys_to_virt(lp->paddr), I460_IO_PAGE_SHIFT - PAGE_SHIFT);
-+ free_pages((unsigned long) gart_to_virt(lp->paddr), I460_IO_PAGE_SHIFT - PAGE_SHIFT);
- atomic_sub(I460_KPAGES_PER_IOPAGE, &agp_bridge->current_memory_agp);
- }
-
---- linux-2.6.11/drivers/char/agp/intel-agp.c 2005-03-02 07:38:09 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/intel-agp.c 2005-03-22 11:14:56 +00:00
-@@ -285,7 +285,7 @@
- if (new == NULL)
- return NULL;
-
-- new->memory[0] = virt_to_phys(addr);
-+ new->memory[0] = virt_to_gart(addr);
- if (pg_count == 4) {
- /* kludge to get 4 physical pages for ARGB cursor */
- new->memory[1] = new->memory[0] + PAGE_SIZE;
-@@ -328,10 +328,10 @@
- agp_free_key(curr->key);
- if(curr->type == AGP_PHYS_MEMORY) {
- if (curr->page_count == 4)
-- i8xx_destroy_pages(phys_to_virt(curr->memory[0]));
-+ i8xx_destroy_pages(gart_to_virt(curr->memory[0]));
- else
- agp_bridge->driver->agp_destroy_page(
-- phys_to_virt(curr->memory[0]));
-+ gart_to_virt(curr->memory[0]));
- vfree(curr->memory);
- }
- kfree(curr);
---- linux-2.6.11/drivers/char/agp/intel-mch-agp.c 2005-03-02 07:37:48 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/intel-mch-agp.c 2005-03-22 11:14:56 +00:00
-@@ -51,7 +51,7 @@
- if (new == NULL)
- return NULL;
-
-- new->memory[0] = virt_to_phys(addr);
-+ new->memory[0] = virt_to_gart(addr);
- new->page_count = 1;
- new->num_scratch_pages = 1;
- new->type = AGP_PHYS_MEMORY;
-@@ -63,7 +63,7 @@
- {
- agp_free_key(curr->key);
- if(curr->type == AGP_PHYS_MEMORY) {
-- agp_bridge->driver->agp_destroy_page(phys_to_virt(curr->memory[0]));
-+ agp_bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[0]));
- vfree(curr->memory);
- }
- kfree(curr);
---- linux-2.6.11/drivers/char/agp/sworks-agp.c 2005-03-02 07:38:37 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/sworks-agp.c 2005-03-22 11:14:56 +00:00
-@@ -51,7 +51,7 @@
- }
- SetPageReserved(virt_to_page(page_map->real));
- global_cache_flush();
-- page_map->remapped = ioremap_nocache(virt_to_phys(page_map->real),
-+ page_map->remapped = ioremap_nocache(virt_to_gart(page_map->real),
- PAGE_SIZE);
- if (page_map->remapped == NULL) {
- ClearPageReserved(virt_to_page(page_map->real));
-@@ -162,7 +162,7 @@
- /* Create a fake scratch directory */
- for(i = 0; i < 1024; i++) {
- writel(agp_bridge->scratch_page, serverworks_private.scratch_dir.remapped+i);
-- writel(virt_to_phys(serverworks_private.scratch_dir.real) | 1, page_dir.remapped+i);
-+ writel(virt_to_gart(serverworks_private.scratch_dir.real) | 1, page_dir.remapped+i);
- }
-
- retval = serverworks_create_gatt_pages(value->num_entries / 1024);
-@@ -174,7 +174,7 @@
-
- agp_bridge->gatt_table_real = (u32 *)page_dir.real;
- agp_bridge->gatt_table = (u32 __iomem *)page_dir.remapped;
-- agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
-+ agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
-
- /* Get the address for the gart region.
- * This is a bus address even on the alpha, b/c its
-@@ -187,7 +187,7 @@
- /* Calculate the agp offset */
-
- for(i = 0; i < value->num_entries / 1024; i++)
-- writel(virt_to_phys(serverworks_private.gatt_pages[i]->real)|1, page_dir.remapped+i);
-+ writel(virt_to_gart(serverworks_private.gatt_pages[i]->real)|1, page_dir.remapped+i);
-
- return 0;
- }
---- linux-2.6.11/drivers/char/agp/uninorth-agp.c 2005-03-02 07:38:09 +00:00
-+++ linux-2.6.11-agp/drivers/char/agp/uninorth-agp.c 2005-03-22 11:14:56 +00:00
-@@ -200,7 +200,7 @@
-
- agp_bridge->gatt_table_real = (u32 *) table;
- agp_bridge->gatt_table = (u32 *)table;
-- agp_bridge->gatt_bus_addr = virt_to_phys(table);
-+ agp_bridge->gatt_bus_addr = virt_to_gart(table);
-
- for (i = 0; i < num_entries; i++) {
- agp_bridge->gatt_table[i] =
---- linux-2.6.11/include/asm-alpha/agp.h 2005-03-02 07:37:39 +00:00
-+++ linux-2.6.11-agp/include/asm-alpha/agp.h 2005-03-22 11:18:34 +00:00
-@@ -10,4 +10,14 @@
- #define flush_agp_mappings()
- #define flush_agp_cache() mb()
-
-+/* Convert a physical address to an address suitable for the GART. */
-+#define phys_to_gart(x) (x)
-+#define gart_to_phys(x) (x)
-+
-+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-+#define alloc_gatt_pages(order) \
-+ ((char *)__get_free_pages(GFP_KERNEL, (order)))
-+#define free_gatt_pages(table, order) \
-+ free_pages((unsigned long)(table), (order))
-+
- #endif
---- linux-2.6.11/include/asm-i386/agp.h 2005-03-02 07:37:31 +00:00
-+++ linux-2.6.11-agp/include/asm-i386/agp.h 2005-03-22 11:18:39 +00:00
-@@ -21,4 +21,14 @@
- worth it. Would need a page for it. */
- #define flush_agp_cache() asm volatile("wbinvd":::"memory")
-
-+/* Convert a physical address to an address suitable for the GART. */
-+#define phys_to_gart(x) (x)
-+#define gart_to_phys(x) (x)
-+
-+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-+#define alloc_gatt_pages(order) \
-+ ((char *)__get_free_pages(GFP_KERNEL, (order)))
-+#define free_gatt_pages(table, order) \
-+ free_pages((unsigned long)(table), (order))
-+
- #endif
---- linux-2.6.11/include/asm-ia64/agp.h 2005-03-02 07:38:09 +00:00
-+++ linux-2.6.11-agp/include/asm-ia64/agp.h 2005-03-22 11:18:45 +00:00
-@@ -18,4 +18,14 @@
- #define flush_agp_mappings() /* nothing */
- #define flush_agp_cache() mb()
-
-+/* Convert a physical address to an address suitable for the GART. */
-+#define phys_to_gart(x) (x)
-+#define gart_to_phys(x) (x)
-+
-+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-+#define alloc_gatt_pages(order) \
-+ ((char *)__get_free_pages(GFP_KERNEL, (order)))
-+#define free_gatt_pages(table, order) \
-+ free_pages((unsigned long)(table), (order))
-+
- #endif /* _ASM_IA64_AGP_H */
---- linux-2.6.11/include/asm-ppc/agp.h 2005-03-02 07:38:08 +00:00
-+++ linux-2.6.11-agp/include/asm-ppc/agp.h 2005-03-22 11:18:52 +00:00
-@@ -10,4 +10,14 @@
- #define flush_agp_mappings()
- #define flush_agp_cache() mb()
-
-+/* Convert a physical address to an address suitable for the GART. */
-+#define phys_to_gart(x) (x)
-+#define gart_to_phys(x) (x)
-+
-+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-+#define alloc_gatt_pages(order) \
-+ ((char *)__get_free_pages(GFP_KERNEL, (order)))
-+#define free_gatt_pages(table, order) \
-+ free_pages((unsigned long)(table), (order))
-+
- #endif
---- linux-2.6.11/include/asm-sparc64/agp.h 2005-03-02 07:37:48 +00:00
-+++ linux-2.6.11-agp/include/asm-sparc64/agp.h 2005-03-22 11:18:59 +00:00
-@@ -8,4 +8,14 @@
- #define flush_agp_mappings()
- #define flush_agp_cache() mb()
-
-+/* Convert a physical address to an address suitable for the GART. */
-+#define phys_to_gart(x) (x)
-+#define gart_to_phys(x) (x)
-+
-+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-+#define alloc_gatt_pages(order) \
-+ ((char *)__get_free_pages(GFP_KERNEL, (order)))
-+#define free_gatt_pages(table, order) \
-+ free_pages((unsigned long)(table), (order))
-+
- #endif
---- linux-2.6.11/include/asm-x86_64/agp.h 2005-03-02 07:37:30 +00:00
-+++ linux-2.6.11-agp/include/asm-x86_64/agp.h 2005-03-22 11:18:22 +00:00
-@@ -19,4 +19,14 @@
- worth it. Would need a page for it. */
- #define flush_agp_cache() asm volatile("wbinvd":::"memory")
-
-+/* Convert a physical address to an address suitable for the GART. */
-+#define phys_to_gart(x) (x)
-+#define gart_to_phys(x) (x)
-+
-+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-+#define alloc_gatt_pages(order) \
-+ ((char *)__get_free_pages(GFP_KERNEL, (order)))
-+#define free_gatt_pages(table, order) \
-+ free_pages((unsigned long)(table), (order))
-+
- #endif
diff --git a/patches/linux-2.6.11/iomap.patch b/patches/linux-2.6.11/iomap.patch
deleted file mode 100644
index 81b4f3f2ab..0000000000
--- a/patches/linux-2.6.11/iomap.patch
+++ /dev/null
@@ -1,120 +0,0 @@
-diff -ur linux-2.6.11/drivers/char/agp/frontend.c linux-2.6.11-io/drivers/char/agp/frontend.c
---- linux-2.6.11/drivers/char/agp/frontend.c 2005-03-02 07:37:49.000000000 +0000
-+++ linux-2.6.11-io/drivers/char/agp/frontend.c 2005-03-15 17:38:30.000000000 +0000
-@@ -627,7 +627,7 @@
- DBG("client vm_ops=%p", kerninfo.vm_ops);
- if (kerninfo.vm_ops) {
- vma->vm_ops = kerninfo.vm_ops;
-- } else if (remap_pfn_range(vma, vma->vm_start,
-+ } else if (io_remap_pfn_range(vma, vma->vm_start,
- (kerninfo.aper_base + offset) >> PAGE_SHIFT,
- size, vma->vm_page_prot)) {
- goto out_again;
-@@ -643,7 +643,7 @@
- DBG("controller vm_ops=%p", kerninfo.vm_ops);
- if (kerninfo.vm_ops) {
- vma->vm_ops = kerninfo.vm_ops;
-- } else if (remap_pfn_range(vma, vma->vm_start,
-+ } else if (io_remap_pfn_range(vma, vma->vm_start,
- kerninfo.aper_base >> PAGE_SHIFT,
- size, vma->vm_page_prot)) {
- goto out_again;
-diff -ur linux-2.6.11/drivers/char/drm/drm_vm.c linux-2.6.11-io/drivers/char/drm/drm_vm.c
---- linux-2.6.11/drivers/char/drm/drm_vm.c 2005-03-02 07:38:33.000000000 +0000
-+++ linux-2.6.11-io/drivers/char/drm/drm_vm.c 2005-03-15 17:43:26.000000000 +0000
-@@ -630,7 +630,7 @@
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot, 0))
- #else
-- if (remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start,
-+ if (io_remap_pfn_range(vma, vma->vm_start,
- (VM_OFFSET(vma) + offset) >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot))
-diff -ur linux-2.6.11/drivers/char/drm/i810_dma.c linux-2.6.11-io/drivers/char/drm/i810_dma.c
---- linux-2.6.11/drivers/char/drm/i810_dma.c 2005-03-02 07:37:55.000000000 +0000
-+++ linux-2.6.11-io/drivers/char/drm/i810_dma.c 2005-03-15 17:53:36.000000000 +0000
-@@ -139,7 +139,7 @@
- buf_priv->currently_mapped = I810_BUF_MAPPED;
- unlock_kernel();
-
-- if (remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start,
-+ if (io_remap_pfn_range(vma, vma->vm_start,
- VM_OFFSET(vma) >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot)) return -EAGAIN;
-diff -ur linux-2.6.11/drivers/char/drm/i830_dma.c linux-2.6.11-io/drivers/char/drm/i830_dma.c
---- linux-2.6.11/drivers/char/drm/i830_dma.c 2005-03-02 07:37:48.000000000 +0000
-+++ linux-2.6.11-io/drivers/char/drm/i830_dma.c 2005-03-15 17:53:46.000000000 +0000
-@@ -157,7 +157,7 @@
- buf_priv->currently_mapped = I830_BUF_MAPPED;
- unlock_kernel();
-
-- if (remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start,
-+ if (io_remap_pfn_range(vma, vma->vm_start,
- VM_OFFSET(vma) >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot)) return -EAGAIN;
-diff -ur linux-2.6.11/drivers/char/hpet.c linux-2.6.11-io/drivers/char/hpet.c
---- linux-2.6.11/drivers/char/hpet.c 2005-03-02 07:38:10.000000000 +0000
-+++ linux-2.6.11-io/drivers/char/hpet.c 2005-03-15 17:37:22.000000000 +0000
-@@ -76,6 +76,7 @@
- struct hpets {
- struct hpets *hp_next;
- struct hpet __iomem *hp_hpet;
-+ unsigned long hp_hpet_phys;
- struct time_interpolator *hp_interpolator;
- unsigned long hp_period;
- unsigned long hp_delta;
-@@ -265,7 +266,7 @@
- return -EINVAL;
-
- devp = file->private_data;
-- addr = (unsigned long)devp->hd_hpet;
-+ addr = devp->hd_hpets->hp_hpet_phys;
-
- if (addr & (PAGE_SIZE - 1))
- return -ENOSYS;
-@@ -274,7 +275,7 @@
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- addr = __pa(addr);
-
-- if (remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
-+ if (io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot)) {
- printk(KERN_ERR "remap_pfn_range failed in hpet.c\n");
- return -EAGAIN;
-@@ -795,6 +796,7 @@
-
- hpetp->hp_which = hpet_nhpet++;
- hpetp->hp_hpet = hdp->hd_address;
-+ hpetp->hp_hpet_phys = hdp->hd_phys_address;
-
- hpetp->hp_ntimer = hdp->hd_nirqs;
-
-diff -ur linux-2.6.11/drivers/sbus/char/flash.c linux-2.6.11-io/drivers/sbus/char/flash.c
---- linux-2.6.11/drivers/sbus/char/flash.c 2005-03-02 07:38:10.000000000 +0000
-+++ linux-2.6.11-io/drivers/sbus/char/flash.c 2005-03-15 17:20:22.000000000 +0000
-@@ -75,7 +75,7 @@
- pgprot_val(vma->vm_page_prot) |= _PAGE_E;
- vma->vm_flags |= (VM_SHM | VM_LOCKED);
-
-- if (remap_pfn_range(vma, vma->vm_start, addr, size, vma->vm_page_prot))
-+ if (io_remap_pfn_range(vma, vma->vm_start, addr, size, vma->vm_page_prot))
- return -EAGAIN;
-
- return 0;
-diff -ur linux-2.6.11/include/linux/mm.h linux-2.6.11-io/include/linux/mm.h
---- linux-2.6.11/include/linux/mm.h 2005-03-02 07:37:47.000000000 +0000
-+++ linux-2.6.11-io/include/linux/mm.h 2005-03-15 17:03:46.000000000 +0000
-@@ -815,6 +815,10 @@
- extern int check_user_page_readable(struct mm_struct *mm, unsigned long address);
- int remap_pfn_range(struct vm_area_struct *, unsigned long,
- unsigned long, unsigned long, pgprot_t);
-+/* Allow arch override for mapping of device and I/O (non-RAM) pages. */
-+#ifndef io_remap_pfn_range
-+#define io_remap_pfn_range remap_pfn_range
-+#endif
-
- #ifdef CONFIG_PROC_FS
- void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
diff --git a/patches/linux-2.6.11/linux-2.6.11.12.patch b/patches/linux-2.6.11/linux-2.6.11.12.patch
deleted file mode 100644
index 592ea13001..0000000000
--- a/patches/linux-2.6.11/linux-2.6.11.12.patch
+++ /dev/null
@@ -1,2579 +0,0 @@
-diff --git a/Documentation/SecurityBugs b/Documentation/SecurityBugs
-new file mode 100644
---- /dev/null
-+++ b/Documentation/SecurityBugs
-@@ -0,0 +1,38 @@
-+Linux kernel developers take security very seriously. As such, we'd
-+like to know when a security bug is found so that it can be fixed and
-+disclosed as quickly as possible. Please report security bugs to the
-+Linux kernel security team.
-+
-+1) Contact
-+
-+The Linux kernel security team can be contacted by email at
-+<security@kernel.org>. This is a private list of security officers
-+who will help verify the bug report and develop and release a fix.
-+It is possible that the security team will bring in extra help from
-+area maintainers to understand and fix the security vulnerability.
-+
-+As it is with any bug, the more information provided the easier it
-+will be to diagnose and fix. Please review the procedure outlined in
-+REPORTING-BUGS if you are unclear about what information is helpful.
-+Any exploit code is very helpful and will not be released without
-+consent from the reporter unless it has already been made public.
-+
-+2) Disclosure
-+
-+The goal of the Linux kernel security team is to work with the
-+bug submitter to bug resolution as well as disclosure. We prefer
-+to fully disclose the bug as soon as possible. It is reasonable to
-+delay disclosure when the bug or the fix is not yet fully understood,
-+the solution is not well-tested or for vendor coordination. However, we
-+expect these delays to be short, measurable in days, not weeks or months.
-+A disclosure date is negotiated by the security team working with the
-+bug submitter as well as vendors. However, the kernel security team
-+holds the final say when setting a disclosure date. The timeframe for
-+disclosure is from immediate (esp. if it's already publically known)
-+to a few weeks. As a basic default policy, we expect report date to
-+disclosure date to be on the order of 7 days.
-+
-+3) Non-disclosure agreements
-+
-+The Linux kernel security team is not a formal body and therefore unable
-+to enter any non-disclosure agreements.
-diff --git a/MAINTAINERS b/MAINTAINERS
---- a/MAINTAINERS
-+++ b/MAINTAINERS
-@@ -1966,6 +1966,11 @@ M: christer@weinigel.se
- W: http://www.weinigel.se
- S: Supported
-
-+SECURITY CONTACT
-+P: Security Officers
-+M: security@kernel.org
-+S: Supported
-+
- SELINUX SECURITY MODULE
- P: Stephen Smalley
- M: sds@epoch.ncsc.mil
-diff --git a/Makefile b/Makefile
---- a/Makefile
-+++ b/Makefile
-@@ -1,8 +1,8 @@
- VERSION = 2
- PATCHLEVEL = 6
- SUBLEVEL = 11
--EXTRAVERSION =
--NAME=Woozy Numbat
-+EXTRAVERSION = .12
-+NAME=Woozy Beaver
-
- # *DOCUMENTATION*
- # To see a list of typical targets execute "make help"
-diff --git a/REPORTING-BUGS b/REPORTING-BUGS
---- a/REPORTING-BUGS
-+++ b/REPORTING-BUGS
-@@ -16,6 +16,10 @@ code relevant to what you were doing. If
- describe how to recreate it. That is worth even more than the oops itself.
- The list of maintainers is in the MAINTAINERS file in this directory.
-
-+ If it is a security bug, please copy the Security Contact listed
-+in the MAINTAINERS file. They can help coordinate bugfix and disclosure.
-+See Documentation/SecurityBugs for more infomation.
-+
- If you are totally stumped as to whom to send the report, send it to
- linux-kernel@vger.kernel.org. (For more information on the linux-kernel
- mailing list see http://www.tux.org/lkml/).
-diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
---- a/arch/ia64/kernel/fsys.S
-+++ b/arch/ia64/kernel/fsys.S
-@@ -611,8 +611,10 @@ GLOBAL_ENTRY(fsys_bubble_down)
- movl r2=ia64_ret_from_syscall
- ;;
- mov rp=r2 // set the real return addr
-- tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
-+ and r3=_TIF_SYSCALL_TRACEAUDIT,r3
- ;;
-+ cmp.eq p8,p0=r3,r0
-+
- (p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8
- (p8) br.call.sptk.many b6=b6 // ignore this return addr
- br.cond.sptk ia64_trace_syscall
-diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
---- a/arch/ia64/kernel/signal.c
-+++ b/arch/ia64/kernel/signal.c
-@@ -224,7 +224,8 @@ ia64_rt_sigreturn (struct sigscratch *sc
- * could be corrupted.
- */
- retval = (long) &ia64_leave_kernel;
-- if (test_thread_flag(TIF_SYSCALL_TRACE))
-+ if (test_thread_flag(TIF_SYSCALL_TRACE)
-+ || test_thread_flag(TIF_SYSCALL_AUDIT))
- /*
- * strace expects to be notified after sigreturn returns even though the
- * context to which we return may not be in the middle of a syscall.
-diff --git a/arch/ppc/oprofile/op_model_fsl_booke.c b/arch/ppc/oprofile/op_model_fsl_booke.c
---- a/arch/ppc/oprofile/op_model_fsl_booke.c
-+++ b/arch/ppc/oprofile/op_model_fsl_booke.c
-@@ -150,7 +150,6 @@ static void fsl_booke_handle_interrupt(s
- int is_kernel;
- int val;
- int i;
-- unsigned int cpu = smp_processor_id();
-
- /* set the PMM bit (see comment below) */
- mtmsr(mfmsr() | MSR_PMM);
-@@ -162,7 +161,7 @@ static void fsl_booke_handle_interrupt(s
- val = ctr_read(i);
- if (val < 0) {
- if (oprofile_running && ctr[i].enabled) {
-- oprofile_add_sample(pc, is_kernel, i, cpu);
-+ oprofile_add_pc(pc, is_kernel, i);
- ctr_write(i, reset_value[i]);
- } else {
- ctr_write(i, 0);
-diff --git a/arch/ppc/platforms/4xx/ebony.h b/arch/ppc/platforms/4xx/ebony.h
---- a/arch/ppc/platforms/4xx/ebony.h
-+++ b/arch/ppc/platforms/4xx/ebony.h
-@@ -61,8 +61,8 @@
- */
-
- /* OpenBIOS defined UART mappings, used before early_serial_setup */
--#define UART0_IO_BASE (u8 *) 0xE0000200
--#define UART1_IO_BASE (u8 *) 0xE0000300
-+#define UART0_IO_BASE 0xE0000200
-+#define UART1_IO_BASE 0xE0000300
-
- /* external Epson SG-615P */
- #define BASE_BAUD 691200
-diff --git a/arch/ppc/platforms/4xx/luan.h b/arch/ppc/platforms/4xx/luan.h
---- a/arch/ppc/platforms/4xx/luan.h
-+++ b/arch/ppc/platforms/4xx/luan.h
-@@ -47,9 +47,9 @@
- #define RS_TABLE_SIZE 3
-
- /* PIBS defined UART mappings, used before early_serial_setup */
--#define UART0_IO_BASE (u8 *) 0xa0000200
--#define UART1_IO_BASE (u8 *) 0xa0000300
--#define UART2_IO_BASE (u8 *) 0xa0000600
-+#define UART0_IO_BASE 0xa0000200
-+#define UART1_IO_BASE 0xa0000300
-+#define UART2_IO_BASE 0xa0000600
-
- #define BASE_BAUD 11059200
- #define STD_UART_OP(num) \
-diff --git a/arch/ppc/platforms/4xx/ocotea.h b/arch/ppc/platforms/4xx/ocotea.h
---- a/arch/ppc/platforms/4xx/ocotea.h
-+++ b/arch/ppc/platforms/4xx/ocotea.h
-@@ -56,8 +56,8 @@
- #define RS_TABLE_SIZE 2
-
- /* OpenBIOS defined UART mappings, used before early_serial_setup */
--#define UART0_IO_BASE (u8 *) 0xE0000200
--#define UART1_IO_BASE (u8 *) 0xE0000300
-+#define UART0_IO_BASE 0xE0000200
-+#define UART1_IO_BASE 0xE0000300
-
- #define BASE_BAUD 11059200/16
- #define STD_UART_OP(num) \
-diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c
---- a/arch/ppc64/kernel/pSeries_iommu.c
-+++ b/arch/ppc64/kernel/pSeries_iommu.c
-@@ -401,6 +401,8 @@ static void iommu_bus_setup_pSeriesLP(st
- struct device_node *dn, *pdn;
- unsigned int *dma_window = NULL;
-
-+ DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self);
-+
- dn = pci_bus_to_OF_node(bus);
-
- /* Find nearest ibm,dma-window, walking up the device tree */
-@@ -455,6 +457,56 @@ static void iommu_dev_setup_pSeries(stru
- }
- }
-
-+static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev)
-+{
-+ struct device_node *pdn, *dn;
-+ struct iommu_table *tbl;
-+ int *dma_window = NULL;
-+
-+ DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, dev->pretty_name);
-+
-+ /* dev setup for LPAR is a little tricky, since the device tree might
-+ * contain the dma-window properties per-device and not neccesarily
-+ * for the bus. So we need to search upwards in the tree until we
-+ * either hit a dma-window property, OR find a parent with a table
-+ * already allocated.
-+ */
-+ dn = pci_device_to_OF_node(dev);
-+
-+ for (pdn = dn; pdn && !pdn->iommu_table; pdn = pdn->parent) {
-+ dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL);
-+ if (dma_window)
-+ break;
-+ }
-+
-+ /* Check for parent == NULL so we don't try to setup the empty EADS
-+ * slots on POWER4 machines.
-+ */
-+ if (dma_window == NULL || pdn->parent == NULL) {
-+ /* Fall back to regular (non-LPAR) dev setup */
-+ DBG("No dma window for device, falling back to regular setup\n");
-+ iommu_dev_setup_pSeries(dev);
-+ return;
-+ } else {
-+ DBG("Found DMA window, allocating table\n");
-+ }
-+
-+ if (!pdn->iommu_table) {
-+ /* iommu_table_setparms_lpar needs bussubno. */
-+ pdn->bussubno = pdn->phb->bus->number;
-+
-+ tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
-+ GFP_KERNEL);
-+
-+ iommu_table_setparms_lpar(pdn->phb, pdn, tbl, dma_window);
-+
-+ pdn->iommu_table = iommu_init_table(tbl);
-+ }
-+
-+ if (pdn != dn)
-+ dn->iommu_table = pdn->iommu_table;
-+}
-+
- static void iommu_bus_setup_null(struct pci_bus *b) { }
- static void iommu_dev_setup_null(struct pci_dev *d) { }
-
-@@ -479,13 +531,14 @@ void iommu_init_early_pSeries(void)
- ppc_md.tce_free = tce_free_pSeriesLP;
- }
- ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP;
-+ ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP;
- } else {
- ppc_md.tce_build = tce_build_pSeries;
- ppc_md.tce_free = tce_free_pSeries;
- ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries;
-+ ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries;
- }
-
-- ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries;
-
- pci_iommu_init();
- }
-diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c
---- a/arch/sparc/kernel/ptrace.c
-+++ b/arch/sparc/kernel/ptrace.c
-@@ -531,18 +531,6 @@ asmlinkage void do_ptrace(struct pt_regs
- pt_error_return(regs, EIO);
- goto out_tsk;
- }
-- if (addr != 1) {
-- if (addr & 3) {
-- pt_error_return(regs, EINVAL);
-- goto out_tsk;
-- }
--#ifdef DEBUG_PTRACE
-- printk ("Original: %08lx %08lx\n", child->thread.kregs->pc, child->thread.kregs->npc);
-- printk ("Continuing with %08lx %08lx\n", addr, addr+4);
--#endif
-- child->thread.kregs->pc = addr;
-- child->thread.kregs->npc = addr + 4;
-- }
-
- if (request == PTRACE_SYSCALL)
- set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c
---- a/arch/sparc64/kernel/ptrace.c
-+++ b/arch/sparc64/kernel/ptrace.c
-@@ -514,25 +514,6 @@ asmlinkage void do_ptrace(struct pt_regs
- pt_error_return(regs, EIO);
- goto out_tsk;
- }
-- if (addr != 1) {
-- unsigned long pc_mask = ~0UL;
--
-- if ((child->thread_info->flags & _TIF_32BIT) != 0)
-- pc_mask = 0xffffffff;
--
-- if (addr & 3) {
-- pt_error_return(regs, EINVAL);
-- goto out_tsk;
-- }
--#ifdef DEBUG_PTRACE
-- printk ("Original: %016lx %016lx\n",
-- child->thread_info->kregs->tpc,
-- child->thread_info->kregs->tnpc);
-- printk ("Continuing with %016lx %016lx\n", addr, addr+4);
--#endif
-- child->thread_info->kregs->tpc = (addr & pc_mask);
-- child->thread_info->kregs->tnpc = ((addr + 4) & pc_mask);
-- }
-
- if (request == PTRACE_SYSCALL) {
- set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
---- a/arch/sparc64/kernel/signal32.c
-+++ b/arch/sparc64/kernel/signal32.c
-@@ -192,10 +192,13 @@ int copy_siginfo_to_user32(compat_siginf
- err |= __put_user(from->si_uid, &to->si_uid);
- break;
- case __SI_FAULT >> 16:
-- case __SI_POLL >> 16:
- err |= __put_user(from->si_trapno, &to->si_trapno);
- err |= __put_user((unsigned long)from->si_addr, &to->si_addr);
- break;
-+ case __SI_POLL >> 16:
-+ err |= __put_user(from->si_band, &to->si_band);
-+ err |= __put_user(from->si_fd, &to->si_fd);
-+ break;
- case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
- case __SI_MESGQ >> 16:
- err |= __put_user(from->si_pid, &to->si_pid);
-diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
---- a/arch/sparc64/kernel/systbls.S
-+++ b/arch/sparc64/kernel/systbls.S
-@@ -75,7 +75,7 @@ sys_call_table32:
- /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
- .word sys_timer_delete, sys32_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
- /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
-- .word sys_mq_timedsend, sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
-+ .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
- /*280*/ .word sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl
-
- #endif /* CONFIG_COMPAT */
-diff --git a/arch/um/include/sysdep-i386/syscalls.h b/arch/um/include/sysdep-i386/syscalls.h
---- a/arch/um/include/sysdep-i386/syscalls.h
-+++ b/arch/um/include/sysdep-i386/syscalls.h
-@@ -23,6 +23,9 @@ extern long sys_mmap2(unsigned long addr
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-
-+/* On i386 they choose a meaningless naming.*/
-+#define __NR_kexec_load __NR_sys_kexec_load
-+
- #define ARCH_SYSCALLS \
- [ __NR_waitpid ] = (syscall_handler_t *) sys_waitpid, \
- [ __NR_break ] = (syscall_handler_t *) sys_ni_syscall, \
-@@ -101,15 +104,12 @@ extern long sys_mmap2(unsigned long addr
- [ 223 ] = (syscall_handler_t *) sys_ni_syscall, \
- [ __NR_set_thread_area ] = (syscall_handler_t *) sys_ni_syscall, \
- [ __NR_get_thread_area ] = (syscall_handler_t *) sys_ni_syscall, \
-- [ __NR_fadvise64 ] = (syscall_handler_t *) sys_fadvise64, \
- [ 251 ] = (syscall_handler_t *) sys_ni_syscall, \
-- [ __NR_remap_file_pages ] = (syscall_handler_t *) sys_remap_file_pages, \
-- [ __NR_utimes ] = (syscall_handler_t *) sys_utimes, \
-- [ __NR_vserver ] = (syscall_handler_t *) sys_ni_syscall,
--
-+ [ 285 ] = (syscall_handler_t *) sys_ni_syscall,
-+
- /* 222 doesn't yet have a name in include/asm-i386/unistd.h */
-
--#define LAST_ARCH_SYSCALL __NR_vserver
-+#define LAST_ARCH_SYSCALL 285
-
- /*
- * Overrides for Emacs so that we follow Linus's tabbing style.
-diff --git a/arch/um/include/sysdep-x86_64/syscalls.h b/arch/um/include/sysdep-x86_64/syscalls.h
---- a/arch/um/include/sysdep-x86_64/syscalls.h
-+++ b/arch/um/include/sysdep-x86_64/syscalls.h
-@@ -71,12 +71,7 @@ extern syscall_handler_t sys_arch_prctl;
- [ __NR_iopl ] = (syscall_handler_t *) sys_ni_syscall, \
- [ __NR_set_thread_area ] = (syscall_handler_t *) sys_ni_syscall, \
- [ __NR_get_thread_area ] = (syscall_handler_t *) sys_ni_syscall, \
-- [ __NR_remap_file_pages ] = (syscall_handler_t *) sys_remap_file_pages, \
- [ __NR_semtimedop ] = (syscall_handler_t *) sys_semtimedop, \
-- [ __NR_fadvise64 ] = (syscall_handler_t *) sys_fadvise64, \
-- [ 223 ] = (syscall_handler_t *) sys_ni_syscall, \
-- [ __NR_utimes ] = (syscall_handler_t *) sys_utimes, \
-- [ __NR_vserver ] = (syscall_handler_t *) sys_ni_syscall, \
- [ 251 ] = (syscall_handler_t *) sys_ni_syscall,
-
- #define LAST_ARCH_SYSCALL 251
-diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
---- a/arch/um/kernel/skas/uaccess.c
-+++ b/arch/um/kernel/skas/uaccess.c
-@@ -61,7 +61,8 @@ static void do_buffer_op(void *jmpbuf, v
- void *arg;
- int *res;
-
-- va_copy(args, *(va_list *)arg_ptr);
-+ /* Some old gccs recognize __va_copy, but not va_copy */
-+ __va_copy(args, *(va_list *)arg_ptr);
- addr = va_arg(args, unsigned long);
- len = va_arg(args, int);
- is_write = va_arg(args, int);
-diff --git a/arch/um/kernel/sys_call_table.c b/arch/um/kernel/sys_call_table.c
---- a/arch/um/kernel/sys_call_table.c
-+++ b/arch/um/kernel/sys_call_table.c
-@@ -48,7 +48,6 @@ extern syscall_handler_t sys_vfork;
- extern syscall_handler_t old_select;
- extern syscall_handler_t sys_modify_ldt;
- extern syscall_handler_t sys_rt_sigsuspend;
--extern syscall_handler_t sys_vserver;
- extern syscall_handler_t sys_mbind;
- extern syscall_handler_t sys_get_mempolicy;
- extern syscall_handler_t sys_set_mempolicy;
-@@ -242,6 +241,7 @@ syscall_handler_t *sys_call_table[] = {
- [ __NR_epoll_create ] = (syscall_handler_t *) sys_epoll_create,
- [ __NR_epoll_ctl ] = (syscall_handler_t *) sys_epoll_ctl,
- [ __NR_epoll_wait ] = (syscall_handler_t *) sys_epoll_wait,
-+ [ __NR_remap_file_pages ] = (syscall_handler_t *) sys_remap_file_pages,
- [ __NR_set_tid_address ] = (syscall_handler_t *) sys_set_tid_address,
- [ __NR_timer_create ] = (syscall_handler_t *) sys_timer_create,
- [ __NR_timer_settime ] = (syscall_handler_t *) sys_timer_settime,
-@@ -252,12 +252,10 @@ syscall_handler_t *sys_call_table[] = {
- [ __NR_clock_gettime ] = (syscall_handler_t *) sys_clock_gettime,
- [ __NR_clock_getres ] = (syscall_handler_t *) sys_clock_getres,
- [ __NR_clock_nanosleep ] = (syscall_handler_t *) sys_clock_nanosleep,
-- [ __NR_statfs64 ] = (syscall_handler_t *) sys_statfs64,
-- [ __NR_fstatfs64 ] = (syscall_handler_t *) sys_fstatfs64,
- [ __NR_tgkill ] = (syscall_handler_t *) sys_tgkill,
- [ __NR_utimes ] = (syscall_handler_t *) sys_utimes,
-- [ __NR_fadvise64_64 ] = (syscall_handler_t *) sys_fadvise64_64,
-- [ __NR_vserver ] = (syscall_handler_t *) sys_vserver,
-+ [ __NR_fadvise64 ] = (syscall_handler_t *) sys_fadvise64,
-+ [ __NR_vserver ] = (syscall_handler_t *) sys_ni_syscall,
- [ __NR_mbind ] = (syscall_handler_t *) sys_mbind,
- [ __NR_get_mempolicy ] = (syscall_handler_t *) sys_get_mempolicy,
- [ __NR_set_mempolicy ] = (syscall_handler_t *) sys_set_mempolicy,
-@@ -267,9 +265,8 @@ syscall_handler_t *sys_call_table[] = {
- [ __NR_mq_timedreceive ] = (syscall_handler_t *) sys_mq_timedreceive,
- [ __NR_mq_notify ] = (syscall_handler_t *) sys_mq_notify,
- [ __NR_mq_getsetattr ] = (syscall_handler_t *) sys_mq_getsetattr,
-- [ __NR_sys_kexec_load ] = (syscall_handler_t *) sys_ni_syscall,
-+ [ __NR_kexec_load ] = (syscall_handler_t *) sys_ni_syscall,
- [ __NR_waitid ] = (syscall_handler_t *) sys_waitid,
-- [ 285 ] = (syscall_handler_t *) sys_ni_syscall,
- [ __NR_add_key ] = (syscall_handler_t *) sys_add_key,
- [ __NR_request_key ] = (syscall_handler_t *) sys_request_key,
- [ __NR_keyctl ] = (syscall_handler_t *) sys_keyctl,
-diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
---- a/arch/x86_64/kernel/apic.c
-+++ b/arch/x86_64/kernel/apic.c
-@@ -775,9 +775,7 @@ void __init setup_boot_APIC_clock (void)
-
- void __init setup_secondary_APIC_clock(void)
- {
-- local_irq_disable(); /* FIXME: Do we need this? --RR */
- setup_APIC_timer(calibration_result);
-- local_irq_enable();
- }
-
- void __init disable_APIC_timer(void)
-diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
---- a/arch/x86_64/kernel/ptrace.c
-+++ b/arch/x86_64/kernel/ptrace.c
-@@ -129,13 +129,13 @@ static int putreg(struct task_struct *ch
- value &= 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,fs_base):
-- if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
-- return -EIO;
-+ if (value >= TASK_SIZE)
-+ return -EIO;
- child->thread.fs = value;
- return 0;
- case offsetof(struct user_regs_struct,gs_base):
-- if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
-- return -EIO;
-+ if (value >= TASK_SIZE)
-+ return -EIO;
- child->thread.gs = value;
- return 0;
- case offsetof(struct user_regs_struct, eflags):
-@@ -149,6 +149,11 @@ static int putreg(struct task_struct *ch
- return -EIO;
- value &= 0xffff;
- break;
-+ case offsetof(struct user_regs_struct, rip):
-+ /* Check if the new RIP address is canonical */
-+ if (value >= TASK_SIZE)
-+ return -EIO;
-+ break;
- }
- put_stack_long(child, regno - sizeof(struct pt_regs), value);
- return 0;
-@@ -247,7 +252,7 @@ asmlinkage long sys_ptrace(long request,
- break;
-
- switch (addr) {
-- case 0 ... sizeof(struct user_regs_struct):
-+ case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
- tmp = getreg(child, addr);
- break;
- case offsetof(struct user, u_debugreg[0]):
-@@ -292,7 +297,7 @@ asmlinkage long sys_ptrace(long request,
- break;
-
- switch (addr) {
-- case 0 ... sizeof(struct user_regs_struct):
-+ case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
- ret = putreg(child, addr, data);
- break;
- /* Disallows to set a breakpoint into the vsyscall */
-diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
---- a/arch/x86_64/kernel/smpboot.c
-+++ b/arch/x86_64/kernel/smpboot.c
-@@ -309,8 +309,6 @@ void __init smp_callin(void)
- Dprintk("CALLIN, before setup_local_APIC().\n");
- setup_local_APIC();
-
-- local_irq_enable();
--
- /*
- * Get our bogomips.
- */
-@@ -324,8 +322,6 @@ void __init smp_callin(void)
- */
- smp_store_cpu_info(cpuid);
-
-- local_irq_disable();
--
- /*
- * Allow the master to continue.
- */
-diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
---- a/arch/x86_64/mm/fault.c
-+++ b/arch/x86_64/mm/fault.c
-@@ -236,6 +236,8 @@ static noinline void pgtable_bad(unsigne
-
- /*
- * Handle a fault on the vmalloc or module mapping area
-+ *
-+ * This assumes no large pages in there.
- */
- static int vmalloc_fault(unsigned long address)
- {
-@@ -274,7 +276,10 @@ static int vmalloc_fault(unsigned long a
- if (!pte_present(*pte_ref))
- return -1;
- pte = pte_offset_kernel(pmd, address);
-- if (!pte_present(*pte) || pte_page(*pte) != pte_page(*pte_ref))
-+ /* Don't use pte_page here, because the mappings can point
-+ outside mem_map, and the NUMA hash lookup cannot handle
-+ that. */
-+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
- BUG();
- __flush_tlb_all();
- return 0;
-@@ -348,7 +353,9 @@ asmlinkage void do_page_fault(struct pt_
- * protection error (error_code & 1) == 0.
- */
- if (unlikely(address >= TASK_SIZE)) {
-- if (!(error_code & 5)) {
-+ if (!(error_code & 5) &&
-+ ((address >= VMALLOC_START && address < VMALLOC_END) ||
-+ (address >= MODULES_VADDR && address < MODULES_END))) {
- if (vmalloc_fault(address) < 0)
- goto bad_area_nosemaphore;
- return;
-diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
---- a/arch/x86_64/mm/ioremap.c
-+++ b/arch/x86_64/mm/ioremap.c
-@@ -266,7 +266,7 @@ void iounmap(volatile void __iomem *addr
- if ((p->flags >> 20) &&
- p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) {
- /* p->size includes the guard page, but cpa doesn't like that */
-- change_page_attr(virt_to_page(__va(p->phys_addr)),
-+ change_page_attr_addr((unsigned long)(__va(p->phys_addr)),
- (p->size - PAGE_SIZE) >> PAGE_SHIFT,
- PAGE_KERNEL);
- global_flush_tlb();
-diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
---- a/drivers/block/ioctl.c
-+++ b/drivers/block/ioctl.c
-@@ -237,3 +237,5 @@ long compat_blkdev_ioctl(struct file *fi
- }
- return ret;
- }
-+
-+EXPORT_SYMBOL_GPL(blkdev_ioctl);
-diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
---- a/drivers/block/pktcdvd.c
-+++ b/drivers/block/pktcdvd.c
-@@ -2400,7 +2400,7 @@ static int pkt_ioctl(struct inode *inode
- case CDROM_LAST_WRITTEN:
- case CDROM_SEND_PACKET:
- case SCSI_IOCTL_SEND_COMMAND:
-- return ioctl_by_bdev(pd->bdev, cmd, arg);
-+ return blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg);
-
- case CDROMEJECT:
- /*
-@@ -2408,7 +2408,7 @@ static int pkt_ioctl(struct inode *inode
- * have to unlock it or else the eject command fails.
- */
- pkt_lock_door(pd, 0);
-- return ioctl_by_bdev(pd->bdev, cmd, arg);
-+ return blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg);
-
- default:
- printk("pktcdvd: Unknown ioctl for %s (%x)\n", pd->name, cmd);
-diff --git a/drivers/char/drm/drm_ioctl.c b/drivers/char/drm/drm_ioctl.c
---- a/drivers/char/drm/drm_ioctl.c
-+++ b/drivers/char/drm/drm_ioctl.c
-@@ -326,6 +326,8 @@ int drm_setversion(DRM_IOCTL_ARGS)
-
- DRM_COPY_FROM_USER_IOCTL(sv, argp, sizeof(sv));
-
-+ memset(&version, 0, sizeof(version));
-+
- dev->driver->version(&version);
- retv.drm_di_major = DRM_IF_MAJOR;
- retv.drm_di_minor = DRM_IF_MINOR;
-diff --git a/drivers/char/raw.c b/drivers/char/raw.c
---- a/drivers/char/raw.c
-+++ b/drivers/char/raw.c
-@@ -122,7 +122,7 @@ raw_ioctl(struct inode *inode, struct fi
- {
- struct block_device *bdev = filp->private_data;
-
-- return ioctl_by_bdev(bdev, command, arg);
-+ return blkdev_ioctl(bdev->bd_inode, filp, command, arg);
- }
-
- static void bind_device(struct raw_config_request *rq)
-diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c
---- a/drivers/i2c/chips/eeprom.c
-+++ b/drivers/i2c/chips/eeprom.c
-@@ -130,7 +130,8 @@ static ssize_t eeprom_read(struct kobjec
-
- /* Hide Vaio security settings to regular users (16 first bytes) */
- if (data->nature == VAIO && off < 16 && !capable(CAP_SYS_ADMIN)) {
-- int in_row1 = 16 - off;
-+ size_t in_row1 = 16 - off;
-+ in_row1 = min(in_row1, count);
- memset(buf, 0, in_row1);
- if (count - in_row1 > 0)
- memcpy(buf + in_row1, &data->data[16], count - in_row1);
-diff --git a/drivers/i2c/chips/it87.c b/drivers/i2c/chips/it87.c
---- a/drivers/i2c/chips/it87.c
-+++ b/drivers/i2c/chips/it87.c
-@@ -631,7 +631,7 @@ static ssize_t show_alarms(struct device
- struct it87_data *data = it87_update_device(dev);
- return sprintf(buf,"%d\n", ALARMS_FROM_REG(data->alarms));
- }
--static DEVICE_ATTR(alarms, S_IRUGO | S_IWUSR, show_alarms, NULL);
-+static DEVICE_ATTR(alarms, S_IRUGO, show_alarms, NULL);
-
- static ssize_t
- show_vrm_reg(struct device *dev, char *buf)
-diff --git a/drivers/i2c/chips/via686a.c b/drivers/i2c/chips/via686a.c
---- a/drivers/i2c/chips/via686a.c
-+++ b/drivers/i2c/chips/via686a.c
-@@ -554,7 +554,7 @@ static ssize_t show_alarms(struct device
- struct via686a_data *data = via686a_update_device(dev);
- return sprintf(buf,"%d\n", ALARMS_FROM_REG(data->alarms));
- }
--static DEVICE_ATTR(alarms, S_IRUGO | S_IWUSR, show_alarms, NULL);
-+static DEVICE_ATTR(alarms, S_IRUGO, show_alarms, NULL);
-
- /* The driver. I choose to use type i2c_driver, as at is identical to both
- smbus_driver and isa_driver, and clients could be of either kind */
-diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
---- a/drivers/ide/ide-disk.c
-+++ b/drivers/ide/ide-disk.c
-@@ -133,6 +133,8 @@ static ide_startstop_t __ide_do_rw_disk(
- if (hwif->no_lba48_dma && lba48 && dma) {
- if (block + rq->nr_sectors > 1ULL << 28)
- dma = 0;
-+ else
-+ lba48 = 0;
- }
-
- if (!dma) {
-@@ -146,7 +148,7 @@ static ide_startstop_t __ide_do_rw_disk(
- /* FIXME: SELECT_MASK(drive, 0) ? */
-
- if (drive->select.b.lba) {
-- if (drive->addressing == 1) {
-+ if (lba48) {
- task_ioreg_t tasklets[10];
-
- pr_debug("%s: LBA=0x%012llx\n", drive->name, block);
-diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
---- a/drivers/input/serio/i8042-x86ia64io.h
-+++ b/drivers/input/serio/i8042-x86ia64io.h
-@@ -88,7 +88,7 @@ static struct dmi_system_id __initdata i
- };
- #endif
-
--#ifdef CONFIG_ACPI
-+#if defined(__ia64__) && defined(CONFIG_ACPI)
- #include <linux/acpi.h>
- #include <acpi/acpi_bus.h>
-
-@@ -281,7 +281,7 @@ static inline int i8042_platform_init(vo
- i8042_kbd_irq = I8042_MAP_IRQ(1);
- i8042_aux_irq = I8042_MAP_IRQ(12);
-
--#ifdef CONFIG_ACPI
-+#if defined(__ia64__) && defined(CONFIG_ACPI)
- if (i8042_acpi_init())
- return -1;
- #endif
-@@ -300,7 +300,7 @@ static inline int i8042_platform_init(vo
-
- static inline void i8042_platform_exit(void)
- {
--#ifdef CONFIG_ACPI
-+#if defined(__ia64__) && defined(CONFIG_ACPI)
- i8042_acpi_exit();
- #endif
- }
-diff --git a/drivers/md/raid6altivec.uc b/drivers/md/raid6altivec.uc
---- a/drivers/md/raid6altivec.uc
-+++ b/drivers/md/raid6altivec.uc
-@@ -108,7 +108,11 @@ int raid6_have_altivec(void);
- int raid6_have_altivec(void)
- {
- /* This assumes either all CPUs have Altivec or none does */
-+#ifdef CONFIG_PPC64
- return cur_cpu_spec->cpu_features & CPU_FTR_ALTIVEC;
-+#else
-+ return cur_cpu_spec[0]->cpu_features & CPU_FTR_ALTIVEC;
-+#endif
- }
- #endif
-
-diff --git a/drivers/media/video/adv7170.c b/drivers/media/video/adv7170.c
---- a/drivers/media/video/adv7170.c
-+++ b/drivers/media/video/adv7170.c
-@@ -130,7 +130,7 @@ adv7170_write_block (struct i2c_client *
- u8 block_data[32];
-
- msg.addr = client->addr;
-- msg.flags = client->flags;
-+ msg.flags = 0;
- while (len >= 2) {
- msg.buf = (char *) block_data;
- msg.len = 0;
-diff --git a/drivers/media/video/adv7175.c b/drivers/media/video/adv7175.c
---- a/drivers/media/video/adv7175.c
-+++ b/drivers/media/video/adv7175.c
-@@ -126,7 +126,7 @@ adv7175_write_block (struct i2c_client *
- u8 block_data[32];
-
- msg.addr = client->addr;
-- msg.flags = client->flags;
-+ msg.flags = 0;
- while (len >= 2) {
- msg.buf = (char *) block_data;
- msg.len = 0;
-diff --git a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c
---- a/drivers/media/video/bt819.c
-+++ b/drivers/media/video/bt819.c
-@@ -146,7 +146,7 @@ bt819_write_block (struct i2c_client *cl
- u8 block_data[32];
-
- msg.addr = client->addr;
-- msg.flags = client->flags;
-+ msg.flags = 0;
- while (len >= 2) {
- msg.buf = (char *) block_data;
- msg.len = 0;
-diff --git a/drivers/media/video/bttv-cards.c b/drivers/media/video/bttv-cards.c
---- a/drivers/media/video/bttv-cards.c
-+++ b/drivers/media/video/bttv-cards.c
-@@ -1939,7 +1939,6 @@ struct tvcard bttv_tvcards[] = {
- .no_tda9875 = 1,
- .no_tda7432 = 1,
- .tuner_type = TUNER_ABSENT,
-- .no_video = 1,
- .pll = PLL_28,
- },{
- .name = "Teppro TEV-560/InterVision IV-560",
-@@ -2718,8 +2717,6 @@ void __devinit bttv_init_card2(struct bt
- }
- btv->pll.pll_current = -1;
-
-- bttv_reset_audio(btv);
--
- /* tuner configuration (from card list / autodetect / insmod option) */
- if (UNSET != bttv_tvcards[btv->c.type].tuner_type)
- if(UNSET == btv->tuner_type)
-diff --git a/drivers/media/video/saa7110.c b/drivers/media/video/saa7110.c
---- a/drivers/media/video/saa7110.c
-+++ b/drivers/media/video/saa7110.c
-@@ -60,8 +60,10 @@ MODULE_PARM_DESC(debug, "Debug level (0-
-
- #define I2C_SAA7110 0x9C /* or 0x9E */
-
-+#define SAA7110_NR_REG 0x35
-+
- struct saa7110 {
-- unsigned char reg[54];
-+ u8 reg[SAA7110_NR_REG];
-
- int norm;
- int input;
-@@ -95,31 +97,28 @@ saa7110_write_block (struct i2c_client *
- unsigned int len)
- {
- int ret = -1;
-- u8 reg = *data++;
-+ u8 reg = *data; /* first register to write to */
-
-- len--;
-+ /* Sanity check */
-+ if (reg + (len - 1) > SAA7110_NR_REG)
-+ return ret;
-
- /* the saa7110 has an autoincrement function, use it if
- * the adapter understands raw I2C */
- if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
- struct saa7110 *decoder = i2c_get_clientdata(client);
- struct i2c_msg msg;
-- u8 block_data[54];
-
-- msg.len = 0;
-- msg.buf = (char *) block_data;
-+ msg.len = len;
-+ msg.buf = (char *) data;
- msg.addr = client->addr;
-- msg.flags = client->flags;
-- while (len >= 1) {
-- msg.len = 0;
-- block_data[msg.len++] = reg;
-- while (len-- >= 1 && msg.len < 54)
-- block_data[msg.len++] =
-- decoder->reg[reg++] = *data++;
-- ret = i2c_transfer(client->adapter, &msg, 1);
-- }
-+ msg.flags = 0;
-+ ret = i2c_transfer(client->adapter, &msg, 1);
-+
-+ /* Cache the written data */
-+ memcpy(decoder->reg + reg, data + 1, len - 1);
- } else {
-- while (len-- >= 1) {
-+ for (++data, --len; len; len--) {
- if ((ret = saa7110_write(client, reg++,
- *data++)) < 0)
- break;
-@@ -192,7 +191,7 @@ saa7110_selmux (struct i2c_client *clien
- return 0;
- }
-
--static const unsigned char initseq[] = {
-+static const unsigned char initseq[1 + SAA7110_NR_REG] = {
- 0, 0x4C, 0x3C, 0x0D, 0xEF, 0xBD, 0xF2, 0x03, 0x00,
- /* 0x08 */ 0xF8, 0xF8, 0x60, 0x60, 0x00, 0x86, 0x18, 0x90,
- /* 0x10 */ 0x00, 0x59, 0x40, 0x46, 0x42, 0x1A, 0xFF, 0xDA,
-diff --git a/drivers/media/video/saa7114.c b/drivers/media/video/saa7114.c
---- a/drivers/media/video/saa7114.c
-+++ b/drivers/media/video/saa7114.c
-@@ -163,7 +163,7 @@ saa7114_write_block (struct i2c_client *
- u8 block_data[32];
-
- msg.addr = client->addr;
-- msg.flags = client->flags;
-+ msg.flags = 0;
- while (len >= 2) {
- msg.buf = (char *) block_data;
- msg.len = 0;
-diff --git a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c
---- a/drivers/media/video/saa7185.c
-+++ b/drivers/media/video/saa7185.c
-@@ -118,7 +118,7 @@ saa7185_write_block (struct i2c_client *
- u8 block_data[32];
-
- msg.addr = client->addr;
-- msg.flags = client->flags;
-+ msg.flags = 0;
- while (len >= 2) {
- msg.buf = (char *) block_data;
- msg.len = 0;
-diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
---- a/drivers/net/3c59x.c
-+++ b/drivers/net/3c59x.c
-@@ -1581,7 +1581,8 @@ vortex_up(struct net_device *dev)
-
- if (VORTEX_PCI(vp)) {
- pci_set_power_state(VORTEX_PCI(vp), PCI_D0); /* Go active */
-- pci_restore_state(VORTEX_PCI(vp));
-+ if (vp->pm_state_valid)
-+ pci_restore_state(VORTEX_PCI(vp));
- pci_enable_device(VORTEX_PCI(vp));
- }
-
-@@ -2741,6 +2742,7 @@ vortex_down(struct net_device *dev, int
- outl(0, ioaddr + DownListPtr);
-
- if (final_down && VORTEX_PCI(vp)) {
-+ vp->pm_state_valid = 1;
- pci_save_state(VORTEX_PCI(vp));
- acpi_set_WOL(dev);
- }
-@@ -3243,9 +3245,10 @@ static void acpi_set_WOL(struct net_devi
- outw(RxEnable, ioaddr + EL3_CMD);
-
- pci_enable_wake(VORTEX_PCI(vp), 0, 1);
-+
-+ /* Change the power state to D3; RxEnable doesn't take effect. */
-+ pci_set_power_state(VORTEX_PCI(vp), PCI_D3hot);
- }
-- /* Change the power state to D3; RxEnable doesn't take effect. */
-- pci_set_power_state(VORTEX_PCI(vp), PCI_D3hot);
- }
-
-
-diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
---- a/drivers/net/amd8111e.c
-+++ b/drivers/net/amd8111e.c
-@@ -1381,6 +1381,8 @@ static int amd8111e_open(struct net_devi
-
- if(amd8111e_restart(dev)){
- spin_unlock_irq(&lp->lock);
-+ if (dev->irq)
-+ free_irq(dev->irq, dev);
- return -ENOMEM;
- }
- /* Start ipg timer */
-diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
---- a/drivers/net/ppp_async.c
-+++ b/drivers/net/ppp_async.c
-@@ -1000,7 +1000,7 @@ static void async_lcp_peek(struct asyncp
- data += 4;
- dlen -= 4;
- /* data[0] is code, data[1] is length */
-- while (dlen >= 2 && dlen >= data[1]) {
-+ while (dlen >= 2 && dlen >= data[1] && data[1] >= 2) {
- switch (data[0]) {
- case LCP_MRU:
- val = (data[2] << 8) + data[3];
-diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
---- a/drivers/net/r8169.c
-+++ b/drivers/net/r8169.c
-@@ -1683,16 +1683,19 @@ static void rtl8169_free_rx_skb(struct r
- rtl8169_make_unusable_by_asic(desc);
- }
-
--static inline void rtl8169_return_to_asic(struct RxDesc *desc, int rx_buf_sz)
-+static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
- {
-- desc->opts1 |= cpu_to_le32(DescOwn + rx_buf_sz);
-+ u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
-+
-+ desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz);
- }
-
--static inline void rtl8169_give_to_asic(struct RxDesc *desc, dma_addr_t mapping,
-- int rx_buf_sz)
-+static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping,
-+ u32 rx_buf_sz)
- {
- desc->addr = cpu_to_le64(mapping);
-- desc->opts1 |= cpu_to_le32(DescOwn + rx_buf_sz);
-+ wmb();
-+ rtl8169_mark_to_asic(desc, rx_buf_sz);
- }
-
- static int rtl8169_alloc_rx_skb(struct pci_dev *pdev, struct sk_buff **sk_buff,
-@@ -1712,7 +1715,7 @@ static int rtl8169_alloc_rx_skb(struct p
- mapping = pci_map_single(pdev, skb->tail, rx_buf_sz,
- PCI_DMA_FROMDEVICE);
-
-- rtl8169_give_to_asic(desc, mapping, rx_buf_sz);
-+ rtl8169_map_to_asic(desc, mapping, rx_buf_sz);
-
- out:
- return ret;
-@@ -2150,7 +2153,7 @@ static inline int rtl8169_try_rx_copy(st
- skb_reserve(skb, NET_IP_ALIGN);
- eth_copy_and_sum(skb, sk_buff[0]->tail, pkt_size, 0);
- *sk_buff = skb;
-- rtl8169_return_to_asic(desc, rx_buf_sz);
-+ rtl8169_mark_to_asic(desc, rx_buf_sz);
- ret = 0;
- }
- }
-diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
---- a/drivers/net/sis900.c
-+++ b/drivers/net/sis900.c
-@@ -236,7 +236,7 @@ static int __devinit sis900_get_mac_addr
- signature = (u16) read_eeprom(ioaddr, EEPROMSignature);
- if (signature == 0xffff || signature == 0x0000) {
- printk (KERN_INFO "%s: Error EERPOM read %x\n",
-- net_dev->name, signature);
-+ pci_name(pci_dev), signature);
- return 0;
- }
-
-@@ -268,7 +268,7 @@ static int __devinit sis630e_get_mac_add
- if (!isa_bridge)
- isa_bridge = pci_get_device(PCI_VENDOR_ID_SI, 0x0018, isa_bridge);
- if (!isa_bridge) {
-- printk("%s: Can not find ISA bridge\n", net_dev->name);
-+ printk("%s: Can not find ISA bridge\n", pci_name(pci_dev));
- return 0;
- }
- pci_read_config_byte(isa_bridge, 0x48, &reg);
-@@ -456,10 +456,6 @@ static int __devinit sis900_probe(struct
- net_dev->tx_timeout = sis900_tx_timeout;
- net_dev->watchdog_timeo = TX_TIMEOUT;
- net_dev->ethtool_ops = &sis900_ethtool_ops;
--
-- ret = register_netdev(net_dev);
-- if (ret)
-- goto err_unmap_rx;
-
- /* Get Mac address according to the chip revision */
- pci_read_config_byte(pci_dev, PCI_CLASS_REVISION, &revision);
-@@ -476,7 +472,7 @@ static int __devinit sis900_probe(struct
-
- if (ret == 0) {
- ret = -ENODEV;
-- goto err_out_unregister;
-+ goto err_unmap_rx;
- }
-
- /* 630ET : set the mii access mode as software-mode */
-@@ -486,7 +482,7 @@ static int __devinit sis900_probe(struct
- /* probe for mii transceiver */
- if (sis900_mii_probe(net_dev) == 0) {
- ret = -ENODEV;
-- goto err_out_unregister;
-+ goto err_unmap_rx;
- }
-
- /* save our host bridge revision */
-@@ -496,6 +492,10 @@ static int __devinit sis900_probe(struct
- pci_dev_put(dev);
- }
-
-+ ret = register_netdev(net_dev);
-+ if (ret)
-+ goto err_unmap_rx;
-+
- /* print some information about our NIC */
- printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ", net_dev->name,
- card_name, ioaddr, net_dev->irq);
-@@ -505,8 +505,6 @@ static int __devinit sis900_probe(struct
-
- return 0;
-
-- err_out_unregister:
-- unregister_netdev(net_dev);
- err_unmap_rx:
- pci_free_consistent(pci_dev, RX_TOTAL_SIZE, sis_priv->rx_ring,
- sis_priv->rx_ring_dma);
-@@ -533,6 +531,7 @@ static int __devinit sis900_probe(struct
- static int __init sis900_mii_probe(struct net_device * net_dev)
- {
- struct sis900_private * sis_priv = net_dev->priv;
-+ const char *dev_name = pci_name(sis_priv->pci_dev);
- u16 poll_bit = MII_STAT_LINK, status = 0;
- unsigned long timeout = jiffies + 5 * HZ;
- int phy_addr;
-@@ -582,21 +581,20 @@ static int __init sis900_mii_probe(struc
- mii_phy->phy_types =
- (mii_status & (MII_STAT_CAN_TX_FDX | MII_STAT_CAN_TX)) ? LAN : HOME;
- printk(KERN_INFO "%s: %s transceiver found at address %d.\n",
-- net_dev->name, mii_chip_table[i].name,
-+ dev_name, mii_chip_table[i].name,
- phy_addr);
- break;
- }
-
- if( !mii_chip_table[i].phy_id1 ) {
- printk(KERN_INFO "%s: Unknown PHY transceiver found at address %d.\n",
-- net_dev->name, phy_addr);
-+ dev_name, phy_addr);
- mii_phy->phy_types = UNKNOWN;
- }
- }
-
- if (sis_priv->mii == NULL) {
-- printk(KERN_INFO "%s: No MII transceivers found!\n",
-- net_dev->name);
-+ printk(KERN_INFO "%s: No MII transceivers found!\n", dev_name);
- return 0;
- }
-
-@@ -621,7 +619,7 @@ static int __init sis900_mii_probe(struc
- poll_bit ^= (mdio_read(net_dev, sis_priv->cur_phy, MII_STATUS) & poll_bit);
- if (time_after_eq(jiffies, timeout)) {
- printk(KERN_WARNING "%s: reset phy and link down now\n",
-- net_dev->name);
-+ dev_name);
- return -ETIME;
- }
- }
-@@ -691,7 +689,7 @@ static u16 sis900_default_phy(struct net
- sis_priv->mii = default_phy;
- sis_priv->cur_phy = default_phy->phy_addr;
- printk(KERN_INFO "%s: Using transceiver found at address %d as default\n",
-- net_dev->name,sis_priv->cur_phy);
-+ pci_name(sis_priv->pci_dev), sis_priv->cur_phy);
- }
-
- status = mdio_read(net_dev, sis_priv->cur_phy, MII_CONTROL);
-diff --git a/drivers/net/tun.c b/drivers/net/tun.c
---- a/drivers/net/tun.c
-+++ b/drivers/net/tun.c
-@@ -229,7 +229,7 @@ static __inline__ ssize_t tun_get_user(s
- size_t len = count;
-
- if (!(tun->flags & TUN_NO_PI)) {
-- if ((len -= sizeof(pi)) > len)
-+ if ((len -= sizeof(pi)) > count)
- return -EINVAL;
-
- if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
-diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
---- a/drivers/net/via-rhine.c
-+++ b/drivers/net/via-rhine.c
-@@ -1197,8 +1197,10 @@ static int rhine_open(struct net_device
- dev->name, rp->pdev->irq);
-
- rc = alloc_ring(dev);
-- if (rc)
-+ if (rc) {
-+ free_irq(rp->pdev->irq, dev);
- return rc;
-+ }
- alloc_rbufs(dev);
- alloc_tbufs(dev);
- rhine_chip_reset(dev);
-@@ -1899,6 +1901,9 @@ static void rhine_shutdown (struct devic
- struct rhine_private *rp = netdev_priv(dev);
- void __iomem *ioaddr = rp->base;
-
-+ if (!(rp->quirks & rqWOL))
-+ return; /* Nothing to do for non-WOL adapters */
-+
- rhine_power_init(dev);
-
- /* Make sure we use pattern 0, 1 and not 4, 5 */
-diff --git a/drivers/net/wan/hd6457x.c b/drivers/net/wan/hd6457x.c
---- a/drivers/net/wan/hd6457x.c
-+++ b/drivers/net/wan/hd6457x.c
-@@ -315,7 +315,7 @@ static inline void sca_rx(card_t *card,
- #endif
- stats->rx_packets++;
- stats->rx_bytes += skb->len;
-- skb->dev->last_rx = jiffies;
-+ dev->last_rx = jiffies;
- skb->protocol = hdlc_type_trans(skb, dev);
- netif_rx(skb);
- }
-diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
---- a/drivers/pci/hotplug/pciehp_ctrl.c
-+++ b/drivers/pci/hotplug/pciehp_ctrl.c
-@@ -1354,10 +1354,11 @@ static u32 remove_board(struct pci_func
- dbg("PCI Bridge Hot-Remove s:b:d:f(%02x:%02x:%02x:%02x)\n",
- ctrl->seg, func->bus, func->device, func->function);
- bridge_slot_remove(func);
-- } else
-+ } else {
- dbg("PCI Function Hot-Remove s:b:d:f(%02x:%02x:%02x:%02x)\n",
- ctrl->seg, func->bus, func->device, func->function);
- slot_remove(func);
-+ }
-
- func = pciehp_slot_find(ctrl->slot_bus, device, 0);
- }
-diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
---- a/drivers/usb/serial/visor.c
-+++ b/drivers/usb/serial/visor.c
-@@ -386,6 +386,7 @@ struct visor_private {
- int bytes_in;
- int bytes_out;
- int outstanding_urbs;
-+ int throttled;
- };
-
- /* number of outstanding urbs to prevent userspace DoS from happening */
-@@ -415,6 +416,7 @@ static int visor_open (struct usb_serial
- priv->bytes_in = 0;
- priv->bytes_out = 0;
- priv->outstanding_urbs = 0;
-+ priv->throttled = 0;
- spin_unlock_irqrestore(&priv->lock, flags);
-
- /*
-@@ -602,6 +604,7 @@ static void visor_read_bulk_callback (st
- struct tty_struct *tty;
- unsigned long flags;
- int i;
-+ int throttled;
- int result;
-
- dbg("%s - port %d", __FUNCTION__, port->number);
-@@ -627,18 +630,21 @@ static void visor_read_bulk_callback (st
- }
- spin_lock_irqsave(&priv->lock, flags);
- priv->bytes_in += urb->actual_length;
-+ throttled = priv->throttled;
- spin_unlock_irqrestore(&priv->lock, flags);
-
-- /* Continue trying to always read */
-- usb_fill_bulk_urb (port->read_urb, port->serial->dev,
-- usb_rcvbulkpipe(port->serial->dev,
-- port->bulk_in_endpointAddress),
-- port->read_urb->transfer_buffer,
-- port->read_urb->transfer_buffer_length,
-- visor_read_bulk_callback, port);
-- result = usb_submit_urb(port->read_urb, GFP_ATOMIC);
-- if (result)
-- dev_err(&port->dev, "%s - failed resubmitting read urb, error %d\n", __FUNCTION__, result);
-+ /* Continue trying to always read if we should */
-+ if (!throttled) {
-+ usb_fill_bulk_urb (port->read_urb, port->serial->dev,
-+ usb_rcvbulkpipe(port->serial->dev,
-+ port->bulk_in_endpointAddress),
-+ port->read_urb->transfer_buffer,
-+ port->read_urb->transfer_buffer_length,
-+ visor_read_bulk_callback, port);
-+ result = usb_submit_urb(port->read_urb, GFP_ATOMIC);
-+ if (result)
-+ dev_err(&port->dev, "%s - failed resubmitting read urb, error %d\n", __FUNCTION__, result);
-+ }
- return;
- }
-
-@@ -683,16 +689,26 @@ exit:
-
- static void visor_throttle (struct usb_serial_port *port)
- {
-+ struct visor_private *priv = usb_get_serial_port_data(port);
-+ unsigned long flags;
-+
- dbg("%s - port %d", __FUNCTION__, port->number);
-- usb_kill_urb(port->read_urb);
-+ spin_lock_irqsave(&priv->lock, flags);
-+ priv->throttled = 1;
-+ spin_unlock_irqrestore(&priv->lock, flags);
- }
-
-
- static void visor_unthrottle (struct usb_serial_port *port)
- {
-+ struct visor_private *priv = usb_get_serial_port_data(port);
-+ unsigned long flags;
- int result;
-
- dbg("%s - port %d", __FUNCTION__, port->number);
-+ spin_lock_irqsave(&priv->lock, flags);
-+ priv->throttled = 0;
-+ spin_unlock_irqrestore(&priv->lock, flags);
-
- port->read_urb->dev = port->serial->dev;
- result = usb_submit_urb(port->read_urb, GFP_ATOMIC);
-diff --git a/drivers/video/matrox/matroxfb_accel.c b/drivers/video/matrox/matroxfb_accel.c
---- a/drivers/video/matrox/matroxfb_accel.c
-+++ b/drivers/video/matrox/matroxfb_accel.c
-@@ -438,13 +438,21 @@ static void matroxfb_1bpp_imageblit(WPMI
- } else if (step == 1) {
- /* Special case for 1..8bit widths */
- while (height--) {
-- mga_writel(mmio, 0, *chardata);
-+#if defined(__BIG_ENDIAN)
-+ fb_writel((*chardata) << 24, mmio.vaddr);
-+#else
-+ fb_writel(*chardata, mmio.vaddr);
-+#endif
- chardata++;
- }
- } else if (step == 2) {
- /* Special case for 9..15bit widths */
- while (height--) {
-- mga_writel(mmio, 0, *(u_int16_t*)chardata);
-+#if defined(__BIG_ENDIAN)
-+ fb_writel((*(u_int16_t*)chardata) << 16, mmio.vaddr);
-+#else
-+ fb_writel(*(u_int16_t*)chardata, mmio.vaddr);
-+#endif
- chardata += 2;
- }
- } else {
-@@ -454,7 +462,7 @@ static void matroxfb_1bpp_imageblit(WPMI
-
- for (i = 0; i < step; i += 4) {
- /* Hope that there are at least three readable bytes beyond the end of bitmap */
-- mga_writel(mmio, 0, get_unaligned((u_int32_t*)(chardata + i)));
-+ fb_writel(get_unaligned((u_int32_t*)(chardata + i)),mmio.vaddr);
- }
- chardata += step;
- }
-diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
---- a/drivers/video/matrox/matroxfb_base.h
-+++ b/drivers/video/matrox/matroxfb_base.h
-@@ -170,14 +170,14 @@ static inline void mga_memcpy_toio(vaddr
-
- if ((unsigned long)src & 3) {
- while (len >= 4) {
-- writel(get_unaligned((u32 *)src), addr);
-+ fb_writel(get_unaligned((u32 *)src), addr);
- addr++;
- len -= 4;
- src += 4;
- }
- } else {
- while (len >= 4) {
-- writel(*(u32 *)src, addr);
-+ fb_writel(*(u32 *)src, addr);
- addr++;
- len -= 4;
- src += 4;
-diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
---- a/fs/binfmt_elf.c
-+++ b/fs/binfmt_elf.c
-@@ -257,7 +257,7 @@ create_elf_tables(struct linux_binprm *b
- }
-
- /* Populate argv and envp */
-- p = current->mm->arg_start;
-+ p = current->mm->arg_end = current->mm->arg_start;
- while (argc-- > 0) {
- size_t len;
- __put_user((elf_addr_t)p, argv++);
-@@ -1008,6 +1008,7 @@ out_free_ph:
- static int load_elf_library(struct file *file)
- {
- struct elf_phdr *elf_phdata;
-+ struct elf_phdr *eppnt;
- unsigned long elf_bss, bss, len;
- int retval, error, i, j;
- struct elfhdr elf_ex;
-@@ -1031,44 +1032,47 @@ static int load_elf_library(struct file
- /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
-
- error = -ENOMEM;
-- elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL);
-+ elf_phdata = kmalloc(j, GFP_KERNEL);
- if (!elf_phdata)
- goto out;
-
-+ eppnt = elf_phdata;
- error = -ENOEXEC;
-- retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata, j);
-+ retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
- if (retval != j)
- goto out_free_ph;
-
- for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
-- if ((elf_phdata + i)->p_type == PT_LOAD) j++;
-+ if ((eppnt + i)->p_type == PT_LOAD)
-+ j++;
- if (j != 1)
- goto out_free_ph;
-
-- while (elf_phdata->p_type != PT_LOAD) elf_phdata++;
-+ while (eppnt->p_type != PT_LOAD)
-+ eppnt++;
-
- /* Now use mmap to map the library into memory. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(file,
-- ELF_PAGESTART(elf_phdata->p_vaddr),
-- (elf_phdata->p_filesz +
-- ELF_PAGEOFFSET(elf_phdata->p_vaddr)),
-+ ELF_PAGESTART(eppnt->p_vaddr),
-+ (eppnt->p_filesz +
-+ ELF_PAGEOFFSET(eppnt->p_vaddr)),
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
-- (elf_phdata->p_offset -
-- ELF_PAGEOFFSET(elf_phdata->p_vaddr)));
-+ (eppnt->p_offset -
-+ ELF_PAGEOFFSET(eppnt->p_vaddr)));
- up_write(&current->mm->mmap_sem);
-- if (error != ELF_PAGESTART(elf_phdata->p_vaddr))
-+ if (error != ELF_PAGESTART(eppnt->p_vaddr))
- goto out_free_ph;
-
-- elf_bss = elf_phdata->p_vaddr + elf_phdata->p_filesz;
-+ elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
- if (padzero(elf_bss)) {
- error = -EFAULT;
- goto out_free_ph;
- }
-
-- len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1);
-- bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
-+ len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1);
-+ bss = eppnt->p_memsz + eppnt->p_vaddr;
- if (bss > len) {
- down_write(&current->mm->mmap_sem);
- do_brk(len, bss - len);
-@@ -1275,7 +1279,7 @@ static void fill_prstatus(struct elf_prs
- static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
- struct mm_struct *mm)
- {
-- int i, len;
-+ unsigned int i, len;
-
- /* first copy the parameters from user space */
- memset(psinfo, 0, sizeof(struct elf_prpsinfo));
-diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
---- a/fs/cramfs/inode.c
-+++ b/fs/cramfs/inode.c
-@@ -70,6 +70,7 @@ static struct inode *get_cramfs_inode(st
- inode->i_data.a_ops = &cramfs_aops;
- } else {
- inode->i_size = 0;
-+ inode->i_blocks = 0;
- init_special_inode(inode, inode->i_mode,
- old_decode_dev(cramfs_inode->size));
- }
-diff --git a/fs/eventpoll.c b/fs/eventpoll.c
---- a/fs/eventpoll.c
-+++ b/fs/eventpoll.c
-@@ -619,6 +619,7 @@ eexit_1:
- return error;
- }
-
-+#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
-
- /*
- * Implement the event wait interface for the eventpoll file. It is the kernel
-@@ -635,7 +636,7 @@ asmlinkage long sys_epoll_wait(int epfd,
- current, epfd, events, maxevents, timeout));
-
- /* The maximum number of event must be greater than zero */
-- if (maxevents <= 0)
-+ if (maxevents <= 0 || maxevents > MAX_EVENTS)
- return -EINVAL;
-
- /* Verify that the area passed by the user is writeable */
-diff --git a/fs/exec.c b/fs/exec.c
---- a/fs/exec.c
-+++ b/fs/exec.c
-@@ -814,7 +814,7 @@ void get_task_comm(char *buf, struct tas
- {
- /* buf must be at least sizeof(tsk->comm) in size */
- task_lock(tsk);
-- memcpy(buf, tsk->comm, sizeof(tsk->comm));
-+ strncpy(buf, tsk->comm, sizeof(tsk->comm));
- task_unlock(tsk);
- }
-
-diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
---- a/fs/ext2/dir.c
-+++ b/fs/ext2/dir.c
-@@ -592,6 +592,7 @@ int ext2_make_empty(struct inode *inode,
- goto fail;
- }
- kaddr = kmap_atomic(page, KM_USER0);
-+ memset(kaddr, 0, chunk_size);
- de = (struct ext2_dir_entry_2 *)kaddr;
- de->name_len = 1;
- de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
-diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
---- a/fs/ext3/balloc.c
-+++ b/fs/ext3/balloc.c
-@@ -268,7 +268,8 @@ void ext3_discard_reservation(struct ino
-
- if (!rsv_is_empty(&rsv->rsv_window)) {
- spin_lock(rsv_lock);
-- rsv_window_remove(inode->i_sb, rsv);
-+ if (!rsv_is_empty(&rsv->rsv_window))
-+ rsv_window_remove(inode->i_sb, rsv);
- spin_unlock(rsv_lock);
- }
- }
-diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
---- a/fs/hfs/mdb.c
-+++ b/fs/hfs/mdb.c
-@@ -333,6 +333,8 @@ void hfs_mdb_close(struct super_block *s
- * Release the resources associated with the in-core MDB. */
- void hfs_mdb_put(struct super_block *sb)
- {
-+ if (!HFS_SB(sb))
-+ return;
- /* free the B-trees */
- hfs_btree_close(HFS_SB(sb)->ext_tree);
- hfs_btree_close(HFS_SB(sb)->cat_tree);
-@@ -340,4 +342,7 @@ void hfs_mdb_put(struct super_block *sb)
- /* free the buffers holding the primary and alternate MDBs */
- brelse(HFS_SB(sb)->mdb_bh);
- brelse(HFS_SB(sb)->alt_mdb_bh);
-+
-+ kfree(HFS_SB(sb));
-+ sb->s_fs_info = NULL;
- }
-diff --git a/fs/hfs/super.c b/fs/hfs/super.c
---- a/fs/hfs/super.c
-+++ b/fs/hfs/super.c
-@@ -263,7 +263,7 @@ static int hfs_fill_super(struct super_b
- res = -EINVAL;
- if (!parse_options((char *)data, sbi)) {
- hfs_warn("hfs_fs: unable to parse mount options.\n");
-- goto bail3;
-+ goto bail;
- }
-
- sb->s_op = &hfs_super_operations;
-@@ -276,7 +276,7 @@ static int hfs_fill_super(struct super_b
- hfs_warn("VFS: Can't find a HFS filesystem on dev %s.\n",
- hfs_mdb_name(sb));
- res = -EINVAL;
-- goto bail2;
-+ goto bail;
- }
-
- /* try to get the root inode */
-@@ -306,10 +306,8 @@ bail_iput:
- iput(root_inode);
- bail_no_root:
- hfs_warn("hfs_fs: get root inode failed.\n");
-+bail:
- hfs_mdb_put(sb);
--bail2:
--bail3:
-- kfree(sbi);
- return res;
- }
-
-diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
---- a/fs/hfsplus/super.c
-+++ b/fs/hfsplus/super.c
-@@ -207,7 +207,9 @@ static void hfsplus_write_super(struct s
- static void hfsplus_put_super(struct super_block *sb)
- {
- dprint(DBG_SUPER, "hfsplus_put_super\n");
-- if (!(sb->s_flags & MS_RDONLY)) {
-+ if (!sb->s_fs_info)
-+ return;
-+ if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) {
- struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
-
- vhdr->modify_date = hfsp_now2mt();
-@@ -223,6 +225,8 @@ static void hfsplus_put_super(struct sup
- iput(HFSPLUS_SB(sb).alloc_file);
- iput(HFSPLUS_SB(sb).hidden_dir);
- brelse(HFSPLUS_SB(sb).s_vhbh);
-+ kfree(sb->s_fs_info);
-+ sb->s_fs_info = NULL;
- }
-
- static int hfsplus_statfs(struct super_block *sb, struct kstatfs *buf)
-diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
---- a/fs/isofs/inode.c
-+++ b/fs/isofs/inode.c
-@@ -685,6 +685,8 @@ root_found:
- sbi->s_log_zone_size = isonum_723 (h_pri->logical_block_size);
- sbi->s_max_size = isonum_733(h_pri->volume_space_size);
- } else {
-+ if (!pri)
-+ goto out_freebh;
- rootp = (struct iso_directory_record *) pri->root_directory_record;
- sbi->s_nzones = isonum_733 (pri->volume_space_size);
- sbi->s_log_zone_size = isonum_723 (pri->logical_block_size);
-@@ -1395,6 +1397,9 @@ struct inode *isofs_iget(struct super_bl
- struct inode *inode;
- struct isofs_iget5_callback_data data;
-
-+ if (offset >= 1ul << sb->s_blocksize_bits)
-+ return NULL;
-+
- data.block = block;
- data.offset = offset;
-
-diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
---- a/fs/isofs/rock.c
-+++ b/fs/isofs/rock.c
-@@ -53,6 +53,7 @@
- if(LEN & 1) LEN++; \
- CHR = ((unsigned char *) DE) + LEN; \
- LEN = *((unsigned char *) DE) - LEN; \
-+ if (LEN<0) LEN=0; \
- if (ISOFS_SB(inode->i_sb)->s_rock_offset!=-1) \
- { \
- LEN-=ISOFS_SB(inode->i_sb)->s_rock_offset; \
-@@ -73,6 +74,10 @@
- offset1 = 0; \
- pbh = sb_bread(DEV->i_sb, block); \
- if(pbh){ \
-+ if (offset > pbh->b_size || offset + cont_size > pbh->b_size){ \
-+ brelse(pbh); \
-+ goto out; \
-+ } \
- memcpy(buffer + offset1, pbh->b_data + offset, cont_size - offset1); \
- brelse(pbh); \
- chr = (unsigned char *) buffer; \
-@@ -103,12 +108,13 @@ int get_rock_ridge_filename(struct iso_d
- struct rock_ridge * rr;
- int sig;
-
-- while (len > 1){ /* There may be one byte for padding somewhere */
-+ while (len > 2){ /* There may be one byte for padding somewhere */
- rr = (struct rock_ridge *) chr;
-- if (rr->len == 0) goto out; /* Something got screwed up here */
-+ if (rr->len < 3) goto out; /* Something got screwed up here */
- sig = isonum_721(chr);
- chr += rr->len;
- len -= rr->len;
-+ if (len < 0) goto out; /* corrupted isofs */
-
- switch(sig){
- case SIG('R','R'):
-@@ -122,6 +128,7 @@ int get_rock_ridge_filename(struct iso_d
- break;
- case SIG('N','M'):
- if (truncate) break;
-+ if (rr->len < 5) break;
- /*
- * If the flags are 2 or 4, this indicates '.' or '..'.
- * We don't want to do anything with this, because it
-@@ -186,12 +193,13 @@ parse_rock_ridge_inode_internal(struct i
- struct rock_ridge * rr;
- int rootflag;
-
-- while (len > 1){ /* There may be one byte for padding somewhere */
-+ while (len > 2){ /* There may be one byte for padding somewhere */
- rr = (struct rock_ridge *) chr;
-- if (rr->len == 0) goto out; /* Something got screwed up here */
-+ if (rr->len < 3) goto out; /* Something got screwed up here */
- sig = isonum_721(chr);
- chr += rr->len;
- len -= rr->len;
-+ if (len < 0) goto out; /* corrupted isofs */
-
- switch(sig){
- #ifndef CONFIG_ZISOFS /* No flag for SF or ZF */
-@@ -462,7 +470,7 @@ static int rock_ridge_symlink_readpage(s
- struct rock_ridge *rr;
-
- if (!ISOFS_SB(inode->i_sb)->s_rock)
-- panic ("Cannot have symlink with high sierra variant of iso filesystem\n");
-+ goto error;
-
- block = ei->i_iget5_block;
- lock_kernel();
-@@ -487,13 +495,15 @@ static int rock_ridge_symlink_readpage(s
- SETUP_ROCK_RIDGE(raw_inode, chr, len);
-
- repeat:
-- while (len > 1) { /* There may be one byte for padding somewhere */
-+ while (len > 2) { /* There may be one byte for padding somewhere */
- rr = (struct rock_ridge *) chr;
-- if (rr->len == 0)
-+ if (rr->len < 3)
- goto out; /* Something got screwed up here */
- sig = isonum_721(chr);
- chr += rr->len;
- len -= rr->len;
-+ if (len < 0)
-+ goto out; /* corrupted isofs */
-
- switch (sig) {
- case SIG('R', 'R'):
-@@ -543,6 +553,7 @@ static int rock_ridge_symlink_readpage(s
- fail:
- brelse(bh);
- unlock_kernel();
-+ error:
- SetPageError(page);
- kunmap(page);
- unlock_page(page);
-diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
---- a/fs/jbd/checkpoint.c
-+++ b/fs/jbd/checkpoint.c
-@@ -339,8 +339,10 @@ int log_do_checkpoint(journal_t *journal
- }
- } while (jh != last_jh && !retry);
-
-- if (batch_count)
-+ if (batch_count) {
- __flush_batch(journal, bhs, &batch_count);
-+ retry = 1;
-+ }
-
- /*
- * If someone cleaned up this transaction while we slept, we're
-diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
---- a/fs/jbd/transaction.c
-+++ b/fs/jbd/transaction.c
-@@ -1775,10 +1775,10 @@ static int journal_unmap_buffer(journal_
- JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
- ret = __dispose_buffer(jh,
- journal->j_running_transaction);
-+ journal_put_journal_head(jh);
- spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
- spin_unlock(&journal->j_state_lock);
-- journal_put_journal_head(jh);
- return ret;
- } else {
- /* There is no currently-running transaction. So the
-@@ -1789,10 +1789,10 @@ static int journal_unmap_buffer(journal_
- JBUFFER_TRACE(jh, "give to committing trans");
- ret = __dispose_buffer(jh,
- journal->j_committing_transaction);
-+ journal_put_journal_head(jh);
- spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
- spin_unlock(&journal->j_state_lock);
-- journal_put_journal_head(jh);
- return ret;
- } else {
- /* The orphan record's transaction has
-@@ -1813,10 +1813,10 @@ static int journal_unmap_buffer(journal_
- journal->j_running_transaction);
- jh->b_next_transaction = NULL;
- }
-+ journal_put_journal_head(jh);
- spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
- spin_unlock(&journal->j_state_lock);
-- journal_put_journal_head(jh);
- return 0;
- } else {
- /* Good, the buffer belongs to the running transaction.
-diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
---- a/include/asm-x86_64/processor.h
-+++ b/include/asm-x86_64/processor.h
-@@ -160,9 +160,9 @@ static inline void clear_in_cr4 (unsigne
-
-
- /*
-- * User space process size. 47bits.
-+ * User space process size. 47bits minus one guard page.
- */
--#define TASK_SIZE (0x800000000000UL)
-+#define TASK_SIZE (0x800000000000UL - 4096)
-
- /* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
-diff --git a/include/linux/err.h b/include/linux/err.h
---- a/include/linux/err.h
-+++ b/include/linux/err.h
-@@ -13,6 +13,8 @@
- * This should be a per-architecture thing, to allow different
- * error and pointer decisions.
- */
-+#define IS_ERR_VALUE(x) unlikely((x) > (unsigned long)-1000L)
-+
- static inline void *ERR_PTR(long error)
- {
- return (void *) error;
-@@ -25,7 +27,7 @@ static inline long PTR_ERR(const void *p
-
- static inline long IS_ERR(const void *ptr)
- {
-- return unlikely((unsigned long)ptr > (unsigned long)-1000L);
-+ return IS_ERR_VALUE((unsigned long)ptr);
- }
-
- #endif /* _LINUX_ERR_H */
-diff --git a/kernel/exit.c b/kernel/exit.c
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -516,8 +516,6 @@ static inline void choose_new_parent(tas
- */
- BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE);
- p->real_parent = reaper;
-- if (p->parent == p->real_parent)
-- BUG();
- }
-
- static inline void reparent_thread(task_t *p, task_t *father, int traced)
-diff --git a/kernel/signal.c b/kernel/signal.c
---- a/kernel/signal.c
-+++ b/kernel/signal.c
-@@ -1728,6 +1728,7 @@ do_signal_stop(int signr)
- * with another processor delivering a stop signal,
- * then the SIGCONT that wakes us up should clear it.
- */
-+ read_unlock(&tasklist_lock);
- return 0;
- }
-
-diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
---- a/lib/rwsem-spinlock.c
-+++ b/lib/rwsem-spinlock.c
-@@ -140,12 +140,12 @@ void fastcall __sched __down_read(struct
-
- rwsemtrace(sem, "Entering __down_read");
-
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irq(&sem->wait_lock);
-
- if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
- /* granted */
- sem->activity++;
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irq(&sem->wait_lock);
- goto out;
- }
-
-@@ -160,7 +160,7 @@ void fastcall __sched __down_read(struct
- list_add_tail(&waiter.list, &sem->wait_list);
-
- /* we don't need to touch the semaphore struct anymore */
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irq(&sem->wait_lock);
-
- /* wait to be given the lock */
- for (;;) {
-@@ -181,10 +181,12 @@ void fastcall __sched __down_read(struct
- */
- int fastcall __down_read_trylock(struct rw_semaphore *sem)
- {
-+ unsigned long flags;
- int ret = 0;
-+
- rwsemtrace(sem, "Entering __down_read_trylock");
-
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irqsave(&sem->wait_lock, flags);
-
- if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
- /* granted */
-@@ -192,7 +194,7 @@ int fastcall __down_read_trylock(struct
- ret = 1;
- }
-
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- rwsemtrace(sem, "Leaving __down_read_trylock");
- return ret;
-@@ -209,12 +211,12 @@ void fastcall __sched __down_write(struc
-
- rwsemtrace(sem, "Entering __down_write");
-
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irq(&sem->wait_lock);
-
- if (sem->activity == 0 && list_empty(&sem->wait_list)) {
- /* granted */
- sem->activity = -1;
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irq(&sem->wait_lock);
- goto out;
- }
-
-@@ -229,7 +231,7 @@ void fastcall __sched __down_write(struc
- list_add_tail(&waiter.list, &sem->wait_list);
-
- /* we don't need to touch the semaphore struct anymore */
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irq(&sem->wait_lock);
-
- /* wait to be given the lock */
- for (;;) {
-@@ -250,10 +252,12 @@ void fastcall __sched __down_write(struc
- */
- int fastcall __down_write_trylock(struct rw_semaphore *sem)
- {
-+ unsigned long flags;
- int ret = 0;
-+
- rwsemtrace(sem, "Entering __down_write_trylock");
-
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irqsave(&sem->wait_lock, flags);
-
- if (sem->activity == 0 && list_empty(&sem->wait_list)) {
- /* granted */
-@@ -261,7 +265,7 @@ int fastcall __down_write_trylock(struct
- ret = 1;
- }
-
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- rwsemtrace(sem, "Leaving __down_write_trylock");
- return ret;
-@@ -272,14 +276,16 @@ int fastcall __down_write_trylock(struct
- */
- void fastcall __up_read(struct rw_semaphore *sem)
- {
-+ unsigned long flags;
-+
- rwsemtrace(sem, "Entering __up_read");
-
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irqsave(&sem->wait_lock, flags);
-
- if (--sem->activity == 0 && !list_empty(&sem->wait_list))
- sem = __rwsem_wake_one_writer(sem);
-
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- rwsemtrace(sem, "Leaving __up_read");
- }
-@@ -289,15 +295,17 @@ void fastcall __up_read(struct rw_semaph
- */
- void fastcall __up_write(struct rw_semaphore *sem)
- {
-+ unsigned long flags;
-+
- rwsemtrace(sem, "Entering __up_write");
-
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irqsave(&sem->wait_lock, flags);
-
- sem->activity = 0;
- if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 1);
-
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- rwsemtrace(sem, "Leaving __up_write");
- }
-@@ -308,15 +316,17 @@ void fastcall __up_write(struct rw_semap
- */
- void fastcall __downgrade_write(struct rw_semaphore *sem)
- {
-+ unsigned long flags;
-+
- rwsemtrace(sem, "Entering __downgrade_write");
-
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irqsave(&sem->wait_lock, flags);
-
- sem->activity = 1;
- if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 0);
-
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- rwsemtrace(sem, "Leaving __downgrade_write");
- }
-diff --git a/lib/rwsem.c b/lib/rwsem.c
---- a/lib/rwsem.c
-+++ b/lib/rwsem.c
-@@ -150,7 +150,7 @@ rwsem_down_failed_common(struct rw_semap
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-
- /* set up my own style of waitqueue */
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irq(&sem->wait_lock);
- waiter->task = tsk;
- get_task_struct(tsk);
-
-@@ -163,7 +163,7 @@ rwsem_down_failed_common(struct rw_semap
- if (!(count & RWSEM_ACTIVE_MASK))
- sem = __rwsem_do_wake(sem, 0);
-
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irq(&sem->wait_lock);
-
- /* wait to be given the lock */
- for (;;) {
-@@ -219,15 +219,17 @@ rwsem_down_write_failed(struct rw_semaph
- */
- struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
- {
-+ unsigned long flags;
-+
- rwsemtrace(sem, "Entering rwsem_wake");
-
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irqsave(&sem->wait_lock, flags);
-
- /* do nothing if list empty */
- if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 0);
-
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- rwsemtrace(sem, "Leaving rwsem_wake");
-
-@@ -241,15 +243,17 @@ struct rw_semaphore fastcall *rwsem_wake
- */
- struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
- {
-+ unsigned long flags;
-+
- rwsemtrace(sem, "Entering rwsem_downgrade_wake");
-
-- spin_lock(&sem->wait_lock);
-+ spin_lock_irqsave(&sem->wait_lock, flags);
-
- /* do nothing if list empty */
- if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 1);
-
-- spin_unlock(&sem->wait_lock);
-+ spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- rwsemtrace(sem, "Leaving rwsem_downgrade_wake");
- return sem;
-diff --git a/mm/mmap.c b/mm/mmap.c
---- a/mm/mmap.c
-+++ b/mm/mmap.c
-@@ -1315,37 +1315,40 @@ unsigned long
- get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
- unsigned long pgoff, unsigned long flags)
- {
-- if (flags & MAP_FIXED) {
-- unsigned long ret;
-+ unsigned long ret;
-
-- if (addr > TASK_SIZE - len)
-- return -ENOMEM;
-- if (addr & ~PAGE_MASK)
-- return -EINVAL;
-- if (file && is_file_hugepages(file)) {
-- /*
-- * Check if the given range is hugepage aligned, and
-- * can be made suitable for hugepages.
-- */
-- ret = prepare_hugepage_range(addr, len);
-- } else {
-- /*
-- * Ensure that a normal request is not falling in a
-- * reserved hugepage range. For some archs like IA-64,
-- * there is a separate region for hugepages.
-- */
-- ret = is_hugepage_only_range(addr, len);
-- }
-- if (ret)
-- return -EINVAL;
-- return addr;
-- }
-+ if (!(flags & MAP_FIXED)) {
-+ unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
-
-- if (file && file->f_op && file->f_op->get_unmapped_area)
-- return file->f_op->get_unmapped_area(file, addr, len,
-- pgoff, flags);
-+ get_area = current->mm->get_unmapped_area;
-+ if (file && file->f_op && file->f_op->get_unmapped_area)
-+ get_area = file->f_op->get_unmapped_area;
-+ addr = get_area(file, addr, len, pgoff, flags);
-+ if (IS_ERR_VALUE(addr))
-+ return addr;
-+ }
-
-- return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
-+ if (addr > TASK_SIZE - len)
-+ return -ENOMEM;
-+ if (addr & ~PAGE_MASK)
-+ return -EINVAL;
-+ if (file && is_file_hugepages(file)) {
-+ /*
-+ * Check if the given range is hugepage aligned, and
-+ * can be made suitable for hugepages.
-+ */
-+ ret = prepare_hugepage_range(addr, len);
-+ } else {
-+ /*
-+ * Ensure that a normal request is not falling in a
-+ * reserved hugepage range. For some archs like IA-64,
-+ * there is a separate region for hugepages.
-+ */
-+ ret = is_hugepage_only_range(addr, len);
-+ }
-+ if (ret)
-+ return -EINVAL;
-+ return addr;
- }
-
- EXPORT_SYMBOL(get_unmapped_area);
-diff --git a/mm/rmap.c b/mm/rmap.c
---- a/mm/rmap.c
-+++ b/mm/rmap.c
-@@ -641,7 +641,7 @@ static void try_to_unmap_cluster(unsigne
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
-- pte_t *pte;
-+ pte_t *pte, *original_pte;
- pte_t pteval;
- struct page *page;
- unsigned long address;
-@@ -673,7 +673,7 @@ static void try_to_unmap_cluster(unsigne
- if (!pmd_present(*pmd))
- goto out_unlock;
-
-- for (pte = pte_offset_map(pmd, address);
-+ for (original_pte = pte = pte_offset_map(pmd, address);
- address < end; pte++, address += PAGE_SIZE) {
-
- if (!pte_present(*pte))
-@@ -710,7 +710,7 @@ static void try_to_unmap_cluster(unsigne
- (*mapcount)--;
- }
-
-- pte_unmap(pte);
-+ pte_unmap(original_pte);
-
- out_unlock:
- spin_unlock(&mm->page_table_lock);
-diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
---- a/net/bluetooth/af_bluetooth.c
-+++ b/net/bluetooth/af_bluetooth.c
-@@ -64,7 +64,7 @@ static kmem_cache_t *bt_sock_cache;
-
- int bt_sock_register(int proto, struct net_proto_family *ops)
- {
-- if (proto >= BT_MAX_PROTO)
-+ if (proto < 0 || proto >= BT_MAX_PROTO)
- return -EINVAL;
-
- if (bt_proto[proto])
-@@ -77,7 +77,7 @@ EXPORT_SYMBOL(bt_sock_register);
-
- int bt_sock_unregister(int proto)
- {
-- if (proto >= BT_MAX_PROTO)
-+ if (proto < 0 || proto >= BT_MAX_PROTO)
- return -EINVAL;
-
- if (!bt_proto[proto])
-@@ -92,7 +92,7 @@ static int bt_sock_create(struct socket
- {
- int err = 0;
-
-- if (proto >= BT_MAX_PROTO)
-+ if (proto < 0 || proto >= BT_MAX_PROTO)
- return -EINVAL;
-
- #if defined(CONFIG_KMOD)
-diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
---- a/net/bridge/br_input.c
-+++ b/net/bridge/br_input.c
-@@ -54,6 +54,9 @@ int br_handle_frame_finish(struct sk_buf
- struct net_bridge_fdb_entry *dst;
- int passedup = 0;
-
-+ /* insert into forwarding database after filtering to avoid spoofing */
-+ br_fdb_insert(p->br, p, eth_hdr(skb)->h_source, 0);
-+
- if (br->dev->flags & IFF_PROMISC) {
- struct sk_buff *skb2;
-
-@@ -108,8 +111,7 @@ int br_handle_frame(struct net_bridge_po
- if (eth_hdr(skb)->h_source[0] & 1)
- goto err;
-
-- if (p->state == BR_STATE_LEARNING ||
-- p->state == BR_STATE_FORWARDING)
-+ if (p->state == BR_STATE_LEARNING)
- br_fdb_insert(p->br, p, eth_hdr(skb)->h_source, 0);
-
- if (p->br->stp_enabled &&
-diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
---- a/net/bridge/br_stp_bpdu.c
-+++ b/net/bridge/br_stp_bpdu.c
-@@ -140,6 +140,9 @@ int br_stp_handle_bpdu(struct sk_buff *s
- struct net_bridge *br = p->br;
- unsigned char *buf;
-
-+ /* insert into forwarding database after filtering to avoid spoofing */
-+ br_fdb_insert(p->br, p, eth_hdr(skb)->h_source, 0);
-+
- /* need at least the 802 and STP headers */
- if (!pskb_may_pull(skb, sizeof(header)+1) ||
- memcmp(skb->data, header, sizeof(header)))
-diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
---- a/net/bridge/netfilter/ebtables.c
-+++ b/net/bridge/netfilter/ebtables.c
-@@ -179,9 +179,10 @@ unsigned int ebt_do_table (unsigned int
- struct ebt_chainstack *cs;
- struct ebt_entries *chaininfo;
- char *base;
-- struct ebt_table_info *private = table->private;
-+ struct ebt_table_info *private;
-
- read_lock_bh(&table->lock);
-+ private = table->private;
- cb_base = COUNTER_BASE(private->counters, private->nentries,
- smp_processor_id());
- if (private->chainstack)
-diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
---- a/net/ipv4/fib_hash.c
-+++ b/net/ipv4/fib_hash.c
-@@ -919,13 +919,23 @@ out:
- return fa;
- }
-
-+static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
-+{
-+ struct fib_alias *fa = fib_get_first(seq);
-+
-+ if (fa)
-+ while (pos && (fa = fib_get_next(seq)))
-+ --pos;
-+ return pos ? NULL : fa;
-+}
-+
- static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
- {
- void *v = NULL;
-
- read_lock(&fib_hash_lock);
- if (ip_fib_main_table)
-- v = *pos ? fib_get_next(seq) : SEQ_START_TOKEN;
-+ v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
- return v;
- }
-
-diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
---- a/net/ipv4/netfilter/ip_queue.c
-+++ b/net/ipv4/netfilter/ip_queue.c
-@@ -3,6 +3,7 @@
- * communicating with userspace via netlink.
- *
- * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
-+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
-@@ -14,6 +15,7 @@
- * Zander).
- * 2000-08-01: Added Nick Williams' MAC support.
- * 2002-06-25: Code cleanup.
-+ * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
- *
- */
- #include <linux/module.h>
-@@ -66,7 +68,15 @@ static DECLARE_MUTEX(ipqnl_sem);
- static void
- ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
- {
-+ /* TCP input path (and probably other bits) assume to be called
-+ * from softirq context, not from syscall, like ipq_issue_verdict is
-+ * called. TCP input path deadlocks with locks taken from timer
-+ * softirq, e.g. We therefore emulate this by local_bh_disable() */
-+
-+ local_bh_disable();
- nf_reinject(entry->skb, entry->info, verdict);
-+ local_bh_enable();
-+
- kfree(entry);
- }
-
-diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
---- a/net/ipv4/tcp_input.c
-+++ b/net/ipv4/tcp_input.c
-@@ -1653,7 +1653,10 @@ static void DBGUNDO(struct sock *sk, str
- static void tcp_undo_cwr(struct tcp_sock *tp, int undo)
- {
- if (tp->prior_ssthresh) {
-- tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
-+ if (tcp_is_bic(tp))
-+ tp->snd_cwnd = max(tp->snd_cwnd, tp->bictcp.last_max_cwnd);
-+ else
-+ tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
-
- if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
- tp->snd_ssthresh = tp->prior_ssthresh;
-diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
---- a/net/ipv4/tcp_timer.c
-+++ b/net/ipv4/tcp_timer.c
-@@ -38,6 +38,7 @@ static void tcp_keepalive_timer (unsigne
-
- #ifdef TCP_DEBUG
- const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
-+EXPORT_SYMBOL(tcp_timer_bug_msg);
- #endif
-
- /*
-diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
---- a/net/ipv4/xfrm4_output.c
-+++ b/net/ipv4/xfrm4_output.c
-@@ -103,17 +103,17 @@ int xfrm4_output(struct sk_buff *skb)
- goto error_nolock;
- }
-
-- spin_lock_bh(&x->lock);
-- err = xfrm_state_check(x, skb);
-- if (err)
-- goto error;
--
- if (x->props.mode) {
- err = xfrm4_tunnel_check_size(skb);
- if (err)
-- goto error;
-+ goto error_nolock;
- }
-
-+ spin_lock_bh(&x->lock);
-+ err = xfrm_state_check(x, skb);
-+ if (err)
-+ goto error;
-+
- xfrm4_encap(skb);
-
- err = x->type->output(skb);
-diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
---- a/net/ipv6/xfrm6_output.c
-+++ b/net/ipv6/xfrm6_output.c
-@@ -103,17 +103,17 @@ int xfrm6_output(struct sk_buff *skb)
- goto error_nolock;
- }
-
-- spin_lock_bh(&x->lock);
-- err = xfrm_state_check(x, skb);
-- if (err)
-- goto error;
--
- if (x->props.mode) {
- err = xfrm6_tunnel_check_size(skb);
- if (err)
-- goto error;
-+ goto error_nolock;
- }
-
-+ spin_lock_bh(&x->lock);
-+ err = xfrm_state_check(x, skb);
-+ if (err)
-+ goto error;
-+
- xfrm6_encap(skb);
-
- err = x->type->output(skb);
-diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
---- a/net/netrom/nr_in.c
-+++ b/net/netrom/nr_in.c
-@@ -74,7 +74,6 @@ static int nr_queue_rx_frame(struct sock
- static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
- int frametype)
- {
-- bh_lock_sock(sk);
- switch (frametype) {
- case NR_CONNACK: {
- nr_cb *nr = nr_sk(sk);
-@@ -103,8 +102,6 @@ static int nr_state1_machine(struct sock
- default:
- break;
- }
-- bh_unlock_sock(sk);
--
- return 0;
- }
-
-@@ -116,7 +113,6 @@ static int nr_state1_machine(struct sock
- static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
- int frametype)
- {
-- bh_lock_sock(sk);
- switch (frametype) {
- case NR_CONNACK | NR_CHOKE_FLAG:
- nr_disconnect(sk, ECONNRESET);
-@@ -132,8 +128,6 @@ static int nr_state2_machine(struct sock
- default:
- break;
- }
-- bh_unlock_sock(sk);
--
- return 0;
- }
-
-@@ -154,7 +148,6 @@ static int nr_state3_machine(struct sock
- nr = skb->data[18];
- ns = skb->data[17];
-
-- bh_lock_sock(sk);
- switch (frametype) {
- case NR_CONNREQ:
- nr_write_internal(sk, NR_CONNACK);
-@@ -265,8 +258,6 @@ static int nr_state3_machine(struct sock
- default:
- break;
- }
-- bh_unlock_sock(sk);
--
- return queued;
- }
-
-diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
---- a/net/rose/rose_route.c
-+++ b/net/rose/rose_route.c
-@@ -727,7 +727,8 @@ int rose_rt_ioctl(unsigned int cmd, void
- }
- if (rose_route.mask > 10) /* Mask can't be more than 10 digits */
- return -EINVAL;
--
-+ if (rose_route.ndigis > 8) /* No more than 8 digipeats */
-+ return -EINVAL;
- err = rose_add_node(&rose_route, dev);
- dev_put(dev);
- return err;
-diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
---- a/net/sched/sch_netem.c
-+++ b/net/sched/sch_netem.c
-@@ -184,10 +184,15 @@ static int netem_enqueue(struct sk_buff
- /* Random duplication */
- if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) {
- struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
--
-- pr_debug("netem_enqueue: dup %p\n", skb2);
-- if (skb2)
-- delay_skb(sch, skb2);
-+ if (skb2) {
-+ struct Qdisc *rootq = sch->dev->qdisc;
-+ u32 dupsave = q->duplicate;
-+
-+ /* prevent duplicating a dup... */
-+ q->duplicate = 0;
-+ rootq->enqueue(skb2, rootq);
-+ q->duplicate = dupsave;
-+ }
- }
-
- /* If doing simple delay then gap == 0 so all packets
-diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
---- a/net/xfrm/xfrm_state.c
-+++ b/net/xfrm/xfrm_state.c
-@@ -609,7 +609,7 @@ static struct xfrm_state *__xfrm_find_ac
-
- for (i = 0; i < XFRM_DST_HSIZE; i++) {
- list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-- if (x->km.seq == seq) {
-+ if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
- xfrm_state_hold(x);
- return x;
- }
-diff --git a/security/keys/key.c b/security/keys/key.c
---- a/security/keys/key.c
-+++ b/security/keys/key.c
-@@ -57,9 +57,10 @@ struct key_user *key_user_lookup(uid_t u
- {
- struct key_user *candidate = NULL, *user;
- struct rb_node *parent = NULL;
-- struct rb_node **p = &key_user_tree.rb_node;
-+ struct rb_node **p;
-
- try_again:
-+ p = &key_user_tree.rb_node;
- spin_lock(&key_user_lock);
-
- /* search the tree for a user record with a matching UID */
-diff --git a/sound/core/timer.c b/sound/core/timer.c
---- a/sound/core/timer.c
-+++ b/sound/core/timer.c
-@@ -1117,7 +1117,8 @@ static void snd_timer_user_append_to_tqu
- if (tu->qused >= tu->queue_size) {
- tu->overrun++;
- } else {
-- memcpy(&tu->queue[tu->qtail++], tread, sizeof(*tread));
-+ memcpy(&tu->tqueue[tu->qtail++], tread, sizeof(*tread));
-+ tu->qtail %= tu->queue_size;
- tu->qused++;
- }
- }
-@@ -1140,6 +1141,8 @@ static void snd_timer_user_ccallback(snd
- spin_lock(&tu->qlock);
- snd_timer_user_append_to_tqueue(tu, &r1);
- spin_unlock(&tu->qlock);
-+ kill_fasync(&tu->fasync, SIGIO, POLL_IN);
-+ wake_up(&tu->qchange_sleep);
- }
-
- static void snd_timer_user_tinterrupt(snd_timer_instance_t *timeri,
-diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
---- a/sound/pci/ac97/ac97_codec.c
-+++ b/sound/pci/ac97/ac97_codec.c
-@@ -1185,7 +1185,7 @@ snd_kcontrol_t *snd_ac97_cnew(const snd_
- /*
- * create mute switch(es) for normal stereo controls
- */
--static int snd_ac97_cmute_new(snd_card_t *card, char *name, int reg, ac97_t *ac97)
-+static int snd_ac97_cmute_new_stereo(snd_card_t *card, char *name, int reg, int check_stereo, ac97_t *ac97)
- {
- snd_kcontrol_t *kctl;
- int err;
-@@ -1196,7 +1196,7 @@ static int snd_ac97_cmute_new(snd_card_t
-
- mute_mask = 0x8000;
- val = snd_ac97_read(ac97, reg);
-- if (ac97->flags & AC97_STEREO_MUTES) {
-+ if (check_stereo || (ac97->flags & AC97_STEREO_MUTES)) {
- /* check whether both mute bits work */
- val1 = val | 0x8080;
- snd_ac97_write(ac97, reg, val1);
-@@ -1254,7 +1254,7 @@ static int snd_ac97_cvol_new(snd_card_t
- /*
- * create a mute-switch and a volume for normal stereo/mono controls
- */
--static int snd_ac97_cmix_new(snd_card_t *card, const char *pfx, int reg, ac97_t *ac97)
-+static int snd_ac97_cmix_new_stereo(snd_card_t *card, const char *pfx, int reg, int check_stereo, ac97_t *ac97)
- {
- int err;
- char name[44];
-@@ -1265,7 +1265,7 @@ static int snd_ac97_cmix_new(snd_card_t
-
- if (snd_ac97_try_bit(ac97, reg, 15)) {
- sprintf(name, "%s Switch", pfx);
-- if ((err = snd_ac97_cmute_new(card, name, reg, ac97)) < 0)
-+ if ((err = snd_ac97_cmute_new_stereo(card, name, reg, check_stereo, ac97)) < 0)
- return err;
- }
- check_volume_resolution(ac97, reg, &lo_max, &hi_max);
-@@ -1277,6 +1277,8 @@ static int snd_ac97_cmix_new(snd_card_t
- return 0;
- }
-
-+#define snd_ac97_cmix_new(card, pfx, reg, ac97) snd_ac97_cmix_new_stereo(card, pfx, reg, 0, ac97)
-+#define snd_ac97_cmute_new(card, name, reg, ac97) snd_ac97_cmute_new_stereo(card, name, reg, 0, ac97)
-
- static unsigned int snd_ac97_determine_spdif_rates(ac97_t *ac97);
-
-@@ -1327,7 +1329,8 @@ static int snd_ac97_mixer_build(ac97_t *
-
- /* build surround controls */
- if (snd_ac97_try_volume_mix(ac97, AC97_SURROUND_MASTER)) {
-- if ((err = snd_ac97_cmix_new(card, "Surround Playback", AC97_SURROUND_MASTER, ac97)) < 0)
-+ /* Surround Master (0x38) is with stereo mutes */
-+ if ((err = snd_ac97_cmix_new_stereo(card, "Surround Playback", AC97_SURROUND_MASTER, 1, ac97)) < 0)
- return err;
- }
-
-diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
---- a/sound/usb/usbaudio.c
-+++ b/sound/usb/usbaudio.c
-@@ -3276,7 +3276,7 @@ static void snd_usb_audio_disconnect(str
- }
- usb_chip[chip->index] = NULL;
- up(&register_mutex);
-- snd_card_free_in_thread(card);
-+ snd_card_free(card);
- } else {
- up(&register_mutex);
- }
-diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c
---- a/sound/usb/usx2y/usbusx2y.c
-+++ b/sound/usb/usx2y/usbusx2y.c
-@@ -1,6 +1,11 @@
- /*
- * usbusy2y.c - ALSA USB US-428 Driver
- *
-+2005-04-14 Karsten Wiese
-+ Version 0.8.7.2:
-+ Call snd_card_free() instead of snd_card_free_in_thread() to prevent oops with dead keyboard symptom.
-+ Tested ok with kernel 2.6.12-rc2.
-+
- 2004-12-14 Karsten Wiese
- Version 0.8.7.1:
- snd_pcm_open for rawusb pcm-devices now returns -EBUSY if called without rawusb's hwdep device being open.
-@@ -143,7 +148,7 @@
-
-
- MODULE_AUTHOR("Karsten Wiese <annabellesgarden@yahoo.de>");
--MODULE_DESCRIPTION("TASCAM "NAME_ALLCAPS" Version 0.8.7.1");
-+MODULE_DESCRIPTION("TASCAM "NAME_ALLCAPS" Version 0.8.7.2");
- MODULE_LICENSE("GPL");
- MODULE_SUPPORTED_DEVICE("{{TASCAM(0x1604), "NAME_ALLCAPS"(0x8001)(0x8005)(0x8007) }}");
-
-@@ -430,8 +435,6 @@ static void usX2Y_usb_disconnect(struct
- if (ptr) {
- usX2Ydev_t* usX2Y = usX2Y((snd_card_t*)ptr);
- struct list_head* p;
-- if (usX2Y->chip_status == USX2Y_STAT_CHIP_HUP) // on 2.6.1 kernel snd_usbmidi_disconnect()
-- return; // calls us back. better leave :-) .
- usX2Y->chip.shutdown = 1;
- usX2Y->chip_status = USX2Y_STAT_CHIP_HUP;
- usX2Y_unlinkSeq(&usX2Y->AS04);
-@@ -443,7 +446,7 @@ static void usX2Y_usb_disconnect(struct
- }
- if (usX2Y->us428ctls_sharedmem)
- wake_up(&usX2Y->us428ctls_wait_queue_head);
-- snd_card_free_in_thread((snd_card_t*)ptr);
-+ snd_card_free((snd_card_t*)ptr);
- }
- }
-
diff --git a/patches/linux-2.6.11/udp-frag.patch b/patches/linux-2.6.11/udp-frag.patch
deleted file mode 100644
index 9e8a26eb20..0000000000
--- a/patches/linux-2.6.11/udp-frag.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
---- a/net/ipv4/udp.c
-+++ b/net/ipv4/udp.c
-@@ -738,7 +738,7 @@ int udp_ioctl(struct sock *sk, int cmd,
- unsigned long amount;
-
- amount = 0;
-- spin_lock_irq(&sk->sk_receive_queue.lock);
-+ spin_lock_bh(&sk->sk_receive_queue.lock);
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb != NULL) {
- /*
-@@ -748,7 +748,7 @@ int udp_ioctl(struct sock *sk, int cmd,
- */
- amount = skb->len - sizeof(struct udphdr);
- }
-- spin_unlock_irq(&sk->sk_receive_queue.lock);
-+ spin_unlock_bh(&sk->sk_receive_queue.lock);
- return put_user(amount, (int __user *)arg);
- }
-
-@@ -848,12 +848,12 @@ csum_copy_err:
- /* Clear queue. */
- if (flags&MSG_PEEK) {
- int clear = 0;
-- spin_lock_irq(&sk->sk_receive_queue.lock);
-+ spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
- clear = 1;
- }
-- spin_unlock_irq(&sk->sk_receive_queue.lock);
-+ spin_unlock_bh(&sk->sk_receive_queue.lock);
- if (clear)
- kfree_skb(skb);
- }
-@@ -1334,7 +1334,7 @@ unsigned int udp_poll(struct file *file,
- struct sk_buff_head *rcvq = &sk->sk_receive_queue;
- struct sk_buff *skb;
-
-- spin_lock_irq(&rcvq->lock);
-+ spin_lock_bh(&rcvq->lock);
- while ((skb = skb_peek(rcvq)) != NULL) {
- if (udp_checksum_complete(skb)) {
- UDP_INC_STATS_BH(UDP_MIB_INERRORS);
-@@ -1345,7 +1345,7 @@ unsigned int udp_poll(struct file *file,
- break;
- }
- }
-- spin_unlock_irq(&rcvq->lock);
-+ spin_unlock_bh(&rcvq->lock);
-
- /* nothing to see, move along */
- if (skb == NULL)
-
diff --git a/patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch b/patches/linux-2.6.12/i386-cpu-hotplug-updated-for-mm.patch
index ec39143743..abd3c2af3c 100644
--- a/patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch
+++ b/patches/linux-2.6.12/i386-cpu-hotplug-updated-for-mm.patch
@@ -1,65 +1,7 @@
-
-From: Zwane Mwaikambo <zwane@linuxpower.ca>
-
-Find attached the i386 cpu hotplug patch updated for Ingo's latest round of
-goodies. In order to avoid dumping cpu hotplug code into kernel/irq/* i
-dropped the cpu_online check in do_IRQ() by modifying fixup_irqs(). The
-difference being that on cpu offline, fixup_irqs() is called before we
-clear the cpu from cpu_online_map and a long delay in order to ensure that
-we never have any queued external interrupts on the APICs. Due to my usual
-test victims being in boxes a continent away this hasn't been tested, but
-i'll cover bug reports (nudge, Nathan! ;)
-
-1) Add CONFIG_HOTPLUG_CPU
-2) disable local APIC timer on dead cpus.
-3) Disable preempt around irq balancing to prevent CPUs going down.
-4) Print irq stats for all possible cpus.
-5) Debugging check for interrupts on offline cpus.
-6) Hacky fixup_irqs() to redirect irqs when cpus go off/online.
-7) play_dead() for offline cpus to spin inside.
-8) Handle offline cpus set in flush_tlb_others().
-9) Grab lock earlier in smp_call_function() to prevent CPUs going down.
-10) Implement __cpu_disable() and __cpu_die().
-11) Enable local interrupts in cpu_enable() after fixup_irqs()
-12) Don't fiddle with NMI on dead cpu, but leave intact on other cpus.
-13) Program IRQ affinity whilst cpu is still in cpu_online_map on offline.
-
-Signed-off-by: Zwane Mwaikambo <zwane@linuxpower.ca>
-DESC
-ppc64: fix hotplug cpu
-EDESC
-From: Zwane Mwaikambo <zwane@fsmlabs.com>
-
-I seem to have broken this when I moved the clearing of the dying cpu to
-arch specific code.
-
-Signed-off-by: Zwane Mwaikambo <zwane@fsmlabs.com>
-Signed-off-by: Andrew Morton <akpm@osdl.org>
----
-
- 25-akpm/arch/i386/Kconfig | 9 ++
- 25-akpm/arch/i386/kernel/apic.c | 3
- 25-akpm/arch/i386/kernel/io_apic.c | 2
- 25-akpm/arch/i386/kernel/irq.c | 66 +++++++++++++++++----
- 25-akpm/arch/i386/kernel/msr.c | 2
- 25-akpm/arch/i386/kernel/process.c | 35 +++++++++++
- 25-akpm/arch/i386/kernel/smp.c | 25 +++++---
- 25-akpm/arch/i386/kernel/smpboot.c | 98 ++++++++++++++++++++++++++++++--
- 25-akpm/arch/i386/kernel/traps.c | 8 ++
- 25-akpm/arch/ia64/kernel/smpboot.c | 3
- 25-akpm/arch/ppc64/kernel/pSeries_smp.c | 5 +
- 25-akpm/arch/s390/kernel/smp.c | 4 -
- 25-akpm/include/asm-i386/cpu.h | 2
- 25-akpm/include/asm-i386/irq.h | 4 +
- 25-akpm/include/asm-i386/smp.h | 3
- 25-akpm/kernel/cpu.c | 14 +---
- arch/ppc64/kernel/smp.c | 0
- 17 files changed, 242 insertions(+), 41 deletions(-)
-
-diff -puN arch/i386/Kconfig~i386-cpu-hotplug-updated-for-mm arch/i386/Kconfig
---- 25/arch/i386/Kconfig~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/i386/Kconfig 2005-02-23 02:20:06.000000000 -0800
-@@ -1205,6 +1205,15 @@ config SCx200
+diff -Naur linux-2.6.12.orig/arch/i386/Kconfig linux-2.6.12/arch/i386/Kconfig
+--- linux-2.6.12.orig/arch/i386/Kconfig 2005-07-08 12:33:40.000000000 -0400
++++ linux-2.6.12/arch/i386/Kconfig 2005-07-08 12:34:10.000000000 -0400
+@@ -1226,6 +1226,15 @@
This support is also available as a module. If compiled as a
module, it will be called scx200.
@@ -75,9 +17,9 @@ diff -puN arch/i386/Kconfig~i386-cpu-hotplug-updated-for-mm arch/i386/Kconfig
source "drivers/pcmcia/Kconfig"
source "drivers/pci/hotplug/Kconfig"
-diff -puN arch/i386/kernel/apic.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/apic.c
---- 25/arch/i386/kernel/apic.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/i386/kernel/apic.c 2005-02-23 02:20:06.000000000 -0800
+diff -Naur linux-2.6.12.orig/arch/i386/kernel/apic.c linux-2.6.12/arch/i386/kernel/apic.c
+--- linux-2.6.12.orig/arch/i386/kernel/apic.c 2005-07-08 12:33:40.000000000 -0400
++++ linux-2.6.12/arch/i386/kernel/apic.c 2005-07-08 12:34:10.000000000 -0400
@@ -26,6 +26,7 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
@@ -86,7 +28,7 @@ diff -puN arch/i386/kernel/apic.c~i386-cpu-hotplug-updated-for-mm arch/i386/kern
#include <asm/atomic.h>
#include <asm/smp.h>
-@@ -1048,7 +1049,7 @@ void __init setup_secondary_APIC_clock(v
+@@ -1048,7 +1049,7 @@
setup_APIC_timer(calibration_result);
}
@@ -95,10 +37,10 @@ diff -puN arch/i386/kernel/apic.c~i386-cpu-hotplug-updated-for-mm arch/i386/kern
{
if (using_apic_timer) {
unsigned long v;
-diff -puN arch/i386/kernel/io_apic.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/io_apic.c
---- 25/arch/i386/kernel/io_apic.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/i386/kernel/io_apic.c 2005-02-23 02:20:06.000000000 -0800
-@@ -576,9 +576,11 @@ static int balanced_irq(void *unused)
+diff -Naur linux-2.6.12.orig/arch/i386/kernel/io_apic.c linux-2.6.12/arch/i386/kernel/io_apic.c
+--- linux-2.6.12.orig/arch/i386/kernel/io_apic.c 2005-07-08 12:33:40.000000000 -0400
++++ linux-2.6.12/arch/i386/kernel/io_apic.c 2005-07-08 12:34:10.000000000 -0400
+@@ -576,9 +576,11 @@
try_to_freeze(PF_FREEZE);
if (time_after(jiffies,
prev_balance_time+balanced_irq_interval)) {
@@ -110,9 +52,9 @@ diff -puN arch/i386/kernel/io_apic.c~i386-cpu-hotplug-updated-for-mm arch/i386/k
}
}
return 0;
-diff -puN arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/irq.c
---- 25/arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/i386/kernel/irq.c 2005-02-23 02:20:06.000000000 -0800
+diff -Naur linux-2.6.12.orig/arch/i386/kernel/irq.c linux-2.6.12/arch/i386/kernel/irq.c
+--- linux-2.6.12.orig/arch/i386/kernel/irq.c 2005-07-08 12:33:40.000000000 -0400
++++ linux-2.6.12/arch/i386/kernel/irq.c 2005-07-08 12:36:06.000000000 -0400
@@ -15,6 +15,9 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
@@ -121,9 +63,9 @@ diff -puN arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
+#include <linux/cpu.h>
+#include <linux/delay.h>
- #ifndef CONFIG_X86_LOCAL_APIC
- /*
-@@ -209,9 +212,8 @@ int show_interrupts(struct seq_file *p,
+ DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
+ EXPORT_PER_CPU_SYMBOL(irq_stat);
+@@ -210,9 +213,8 @@
if (i == 0) {
seq_printf(p, " ");
@@ -135,7 +77,7 @@ diff -puN arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
seq_putc(p, '\n');
}
-@@ -224,9 +226,8 @@ int show_interrupts(struct seq_file *p,
+@@ -225,9 +227,8 @@
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
@@ -147,7 +89,7 @@ diff -puN arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
seq_printf(p, " %s", action->name);
-@@ -239,16 +240,13 @@ skip:
+@@ -240,16 +241,13 @@
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
@@ -155,20 +97,20 @@ diff -puN arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
- if (cpu_online(j))
- seq_printf(p, "%10u ", nmi_count(j));
+ for_each_cpu(j)
-+ seq_printf(p, "%10u ", nmi_count(j));
++ seq_printf(p, "%10u ", nmi_count(j));
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ",
-- irq_stat[j].apic_timer_irqs);
+- per_cpu(irq_stat,j).apic_timer_irqs);
+ for_each_cpu(j)
-+ seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
++ seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-@@ -258,3 +256,45 @@ skip:
+@@ -259,3 +257,45 @@
}
return 0;
}
@@ -214,10 +156,10 @@ diff -puN arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
+}
+#endif
+
-diff -puN arch/i386/kernel/msr.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/msr.c
---- 25/arch/i386/kernel/msr.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/i386/kernel/msr.c 2005-02-23 02:20:06.000000000 -0800
-@@ -260,7 +260,7 @@ static struct file_operations msr_fops =
+diff -Naur linux-2.6.12.orig/arch/i386/kernel/msr.c linux-2.6.12/arch/i386/kernel/msr.c
+--- linux-2.6.12.orig/arch/i386/kernel/msr.c 2005-07-08 12:33:40.000000000 -0400
++++ linux-2.6.12/arch/i386/kernel/msr.c 2005-07-08 12:34:10.000000000 -0400
+@@ -260,7 +260,7 @@
.open = msr_open,
};
@@ -226,9 +168,9 @@ diff -puN arch/i386/kernel/msr.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
{
int err = 0;
struct class_device *class_err;
-diff -puN arch/i386/kernel/process.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/process.c
---- 25/arch/i386/kernel/process.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/i386/kernel/process.c 2005-02-23 02:20:06.000000000 -0800
+diff -Naur linux-2.6.12.orig/arch/i386/kernel/process.c linux-2.6.12/arch/i386/kernel/process.c
+--- linux-2.6.12.orig/arch/i386/kernel/process.c 2005-07-08 12:33:40.000000000 -0400
++++ linux-2.6.12/arch/i386/kernel/process.c 2005-07-08 12:36:43.000000000 -0400
@@ -13,6 +13,7 @@
#include <stdarg.h>
@@ -237,7 +179,7 @@ diff -puN arch/i386/kernel/process.c~i386-cpu-hotplug-updated-for-mm arch/i386/k
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
-@@ -55,6 +56,9 @@
+@@ -54,6 +55,9 @@
#include <linux/irq.h>
#include <linux/err.h>
@@ -246,8 +188,8 @@ diff -puN arch/i386/kernel/process.c~i386-cpu-hotplug-updated-for-mm arch/i386/k
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
- int hlt_counter;
-@@ -139,6 +143,34 @@ static void poll_idle (void)
+ static int hlt_counter;
+@@ -138,6 +142,34 @@
}
}
@@ -282,19 +224,19 @@ diff -puN arch/i386/kernel/process.c~i386-cpu-hotplug-updated-for-mm arch/i386/k
/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
-@@ -162,6 +194,9 @@ void cpu_idle (void)
+@@ -160,6 +192,9 @@
if (!idle)
idle = default_idle;
+ if (cpu_is_offline(cpu))
+ play_dead();
+
- irq_stat[cpu].idle_timestamp = jiffies;
+ __get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
-diff -puN arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/smpboot.c
---- 25/arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/i386/kernel/smpboot.c 2005-02-23 02:20:06.000000000 -0800
+diff -Naur linux-2.6.12.orig/arch/i386/kernel/smpboot.c linux-2.6.12/arch/i386/kernel/smpboot.c
+--- linux-2.6.12.orig/arch/i386/kernel/smpboot.c 2005-07-08 12:33:40.000000000 -0400
++++ linux-2.6.12/arch/i386/kernel/smpboot.c 2005-07-08 12:34:10.000000000 -0400
@@ -44,6 +44,9 @@
#include <linux/smp_lock.h>
#include <linux/irq.h>
@@ -305,9 +247,9 @@ diff -puN arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/i386/k
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
-@@ -89,6 +92,9 @@ extern unsigned char trampoline_end [];
- static unsigned char *trampoline_base;
- static int trampoline_exec;
+@@ -90,6 +93,9 @@
+
+ static void map_cpu_to_logical_apicid(void);
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -315,7 +257,7 @@ diff -puN arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/i386/k
/*
* Currently trivial. Write the real->protected mode
* bootstrap into the page concerned. The caller
-@@ -1095,6 +1101,9 @@ static void __init smp_boot_cpus(unsigne
+@@ -1107,6 +1113,9 @@
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
@@ -325,7 +267,7 @@ diff -puN arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/i386/k
smp_boot_cpus(max_cpus);
}
-@@ -1104,20 +1113,99 @@ void __devinit smp_prepare_boot_cpu(void
+@@ -1116,20 +1125,99 @@
cpu_set(smp_processor_id(), cpu_callout_map);
}
@@ -430,9 +372,9 @@ diff -puN arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/i386/k
local_irq_enable();
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
-diff -puN arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/smp.c
---- 25/arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/i386/kernel/smp.c 2005-02-23 02:20:06.000000000 -0800
+diff -Naur linux-2.6.12.orig/arch/i386/kernel/smp.c linux-2.6.12/arch/i386/kernel/smp.c
+--- linux-2.6.12.orig/arch/i386/kernel/smp.c 2005-07-08 12:33:40.000000000 -0400
++++ linux-2.6.12/arch/i386/kernel/smp.c 2005-07-08 12:34:10.000000000 -0400
@@ -19,6 +19,7 @@
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
@@ -441,7 +383,7 @@ diff -puN arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
-@@ -163,7 +164,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
+@@ -163,7 +164,7 @@
unsigned long flags;
local_irq_save(flags);
@@ -450,7 +392,7 @@ diff -puN arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
/*
* Wait for idle.
*/
-@@ -345,21 +346,21 @@ out:
+@@ -345,21 +346,21 @@
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va)
{
@@ -477,7 +419,7 @@ diff -puN arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
-@@ -484,6 +485,7 @@ void smp_send_nmi_allbutself(void)
+@@ -474,6 +475,7 @@
*/
void smp_send_reschedule(int cpu)
{
@@ -485,7 +427,7 @@ diff -puN arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
}
-@@ -524,10 +526,16 @@ int smp_call_function (void (*func) (voi
+@@ -514,10 +516,16 @@
*/
{
struct call_data_struct data;
@@ -504,7 +446,7 @@ diff -puN arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
-@@ -539,7 +547,6 @@ int smp_call_function (void (*func) (voi
+@@ -529,7 +537,6 @@
if (wait)
atomic_set(&data.finished, 0);
@@ -512,10 +454,10 @@ diff -puN arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/i386/kerne
call_data = &data;
mb();
-diff -puN arch/i386/kernel/traps.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/traps.c
---- 25/arch/i386/kernel/traps.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/i386/kernel/traps.c 2005-02-23 02:20:06.000000000 -0800
-@@ -669,6 +669,14 @@ fastcall void do_nmi(struct pt_regs * re
+diff -Naur linux-2.6.12.orig/arch/i386/kernel/traps.c linux-2.6.12/arch/i386/kernel/traps.c
+--- linux-2.6.12.orig/arch/i386/kernel/traps.c 2005-07-08 12:33:40.000000000 -0400
++++ linux-2.6.12/arch/i386/kernel/traps.c 2005-07-08 12:34:10.000000000 -0400
+@@ -624,6 +624,14 @@
nmi_enter();
cpu = smp_processor_id();
@@ -530,26 +472,28 @@ diff -puN arch/i386/kernel/traps.c~i386-cpu-hotplug-updated-for-mm arch/i386/ker
++nmi_count(cpu);
if (!nmi_callback(regs, cpu))
-diff -puN arch/ia64/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/ia64/kernel/smpboot.c
---- 25/arch/ia64/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/ia64/kernel/smpboot.c 2005-02-23 02:20:06.000000000 -0800
-@@ -590,9 +590,10 @@ int __cpu_disable(void)
- if (cpu == 0)
- return -EBUSY;
+diff -Naur linux-2.6.12.orig/arch/ppc64/kernel/pSeries_smp.c linux-2.6.12/arch/ppc64/kernel/pSeries_smp.c
+--- linux-2.6.12.orig/arch/ppc64/kernel/pSeries_smp.c 2005-07-08 12:33:42.000000000 -0400
++++ linux-2.6.12/arch/ppc64/kernel/pSeries_smp.c 2005-07-08 12:34:10.000000000 -0400
+@@ -92,10 +92,13 @@
+ int pSeries_cpu_disable(void)
+ {
++ int cpu = smp_processor_id();
++
+ cpu_clear(cpu, cpu_online_map);
- fixup_irqs();
- local_flush_tlb_all();
-- printk ("Disabled cpu %u\n", smp_processor_id());
-+ printk("Disabled cpu %u\n", cpu);
- return 0;
- }
+ systemcfg->processorCount--;
+
+ /*fix boot_cpuid here*/
+- if (smp_processor_id() == boot_cpuid)
++ if (cpu == boot_cpuid)
+ boot_cpuid = any_online_cpu(cpu_online_map);
-diff -puN arch/ppc64/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/ppc64/kernel/smp.c
-diff -puN arch/s390/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/s390/kernel/smp.c
---- 25/arch/s390/kernel/smp.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/arch/s390/kernel/smp.c 2005-02-23 02:20:06.000000000 -0800
-@@ -679,12 +679,14 @@ __cpu_disable(void)
+ /* FIXME: abstract this to not be platform specific later on */
+diff -Naur linux-2.6.12.orig/arch/s390/kernel/smp.c linux-2.6.12/arch/s390/kernel/smp.c
+--- linux-2.6.12.orig/arch/s390/kernel/smp.c 2005-07-08 12:33:42.000000000 -0400
++++ linux-2.6.12/arch/s390/kernel/smp.c 2005-07-08 12:34:10.000000000 -0400
+@@ -679,12 +679,14 @@
{
unsigned long flags;
ec_creg_mask_parms cr_parms;
@@ -565,9 +509,9 @@ diff -puN arch/s390/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/s390/kerne
#ifdef CONFIG_PFAULT
/* Disable pfault pseudo page faults on this cpu. */
-diff -puN include/asm-i386/cpu.h~i386-cpu-hotplug-updated-for-mm include/asm-i386/cpu.h
---- 25/include/asm-i386/cpu.h~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/include/asm-i386/cpu.h 2005-02-23 02:20:06.000000000 -0800
+diff -Naur linux-2.6.12.orig/include/asm-i386/cpu.h linux-2.6.12/include/asm-i386/cpu.h
+--- linux-2.6.12.orig/include/asm-i386/cpu.h 2005-07-08 12:33:58.000000000 -0400
++++ linux-2.6.12/include/asm-i386/cpu.h 2005-07-08 12:34:10.000000000 -0400
@@ -5,6 +5,7 @@
#include <linux/cpu.h>
#include <linux/topology.h>
@@ -576,16 +520,16 @@ diff -puN include/asm-i386/cpu.h~i386-cpu-hotplug-updated-for-mm include/asm-i38
#include <asm/node.h>
-@@ -17,4 +18,5 @@ extern int arch_register_cpu(int num);
+@@ -16,4 +17,5 @@
extern void arch_unregister_cpu(int);
#endif
+DECLARE_PER_CPU(int, cpu_state);
#endif /* _ASM_I386_CPU_H_ */
-diff -puN include/asm-i386/irq.h~i386-cpu-hotplug-updated-for-mm include/asm-i386/irq.h
---- 25/include/asm-i386/irq.h~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/include/asm-i386/irq.h 2005-02-23 02:20:06.000000000 -0800
-@@ -38,4 +38,8 @@ extern void release_vm86_irqs(struct tas
+diff -Naur linux-2.6.12.orig/include/asm-i386/irq.h linux-2.6.12/include/asm-i386/irq.h
+--- linux-2.6.12.orig/include/asm-i386/irq.h 2005-07-08 12:33:58.000000000 -0400
++++ linux-2.6.12/include/asm-i386/irq.h 2005-07-08 12:34:10.000000000 -0400
+@@ -38,4 +38,8 @@
extern int irqbalance_disable(char *str);
#endif
@@ -594,10 +538,10 @@ diff -puN include/asm-i386/irq.h~i386-cpu-hotplug-updated-for-mm include/asm-i38
+#endif
+
#endif /* _ASM_IRQ_H */
-diff -puN include/asm-i386/smp.h~i386-cpu-hotplug-updated-for-mm include/asm-i386/smp.h
---- 25/include/asm-i386/smp.h~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/include/asm-i386/smp.h 2005-02-23 02:20:06.000000000 -0800
-@@ -85,6 +85,9 @@ static __inline int logical_smp_processo
+diff -Naur linux-2.6.12.orig/include/asm-i386/smp.h linux-2.6.12/include/asm-i386/smp.h
+--- linux-2.6.12.orig/include/asm-i386/smp.h 2005-07-08 12:33:58.000000000 -0400
++++ linux-2.6.12/include/asm-i386/smp.h 2005-07-08 12:34:10.000000000 -0400
+@@ -83,6 +83,9 @@
}
#endif
@@ -607,10 +551,10 @@ diff -puN include/asm-i386/smp.h~i386-cpu-hotplug-updated-for-mm include/asm-i38
#endif /* !__ASSEMBLY__ */
#define NO_PROC_ID 0xFF /* No processor magic marker */
-diff -puN kernel/cpu.c~i386-cpu-hotplug-updated-for-mm kernel/cpu.c
---- 25/kernel/cpu.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
-+++ 25-akpm/kernel/cpu.c 2005-02-23 02:20:06.000000000 -0800
-@@ -63,19 +63,15 @@ static int take_cpu_down(void *unused)
+diff -Naur linux-2.6.12.orig/kernel/cpu.c linux-2.6.12/kernel/cpu.c
+--- linux-2.6.12.orig/kernel/cpu.c 2005-07-08 12:33:26.000000000 -0400
++++ linux-2.6.12/kernel/cpu.c 2005-07-08 12:34:10.000000000 -0400
+@@ -63,19 +63,15 @@
{
int err;
@@ -635,22 +579,3 @@ diff -puN kernel/cpu.c~i386-cpu-hotplug-updated-for-mm kernel/cpu.c
}
int cpu_down(unsigned int cpu)
-diff -puN arch/ppc64/kernel/pSeries_smp.c~i386-cpu-hotplug-updated-for-mm arch/ppc64/kernel/pSeries_smp.c
---- 25/arch/ppc64/kernel/pSeries_smp.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:08.000000000 -0800
-+++ 25-akpm/arch/ppc64/kernel/pSeries_smp.c 2005-02-23 02:20:08.000000000 -0800
-@@ -86,10 +86,13 @@ static int query_cpu_stopped(unsigned in
-
- int pSeries_cpu_disable(void)
- {
-+ int cpu = smp_processor_id();
-+
-+ cpu_clear(cpu, cpu_online_map);
- systemcfg->processorCount--;
-
- /*fix boot_cpuid here*/
-- if (smp_processor_id() == boot_cpuid)
-+ if (cpu == boot_cpuid)
- boot_cpuid = any_online_cpu(cpu_online_map);
-
- /* FIXME: abstract this to not be platform specific later on */
-_
diff --git a/patches/linux-2.6.11/net-csum.patch b/patches/linux-2.6.12/net-csum.patch
index 115cc1ed13..37a1fbf9c1 100644
--- a/patches/linux-2.6.11/net-csum.patch
+++ b/patches/linux-2.6.12/net-csum.patch
@@ -1,14 +1,3 @@
-diff -ur linux-2.6.11/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.11-csum/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
---- linux-2.6.11/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2005-05-27 11:47:48 +01:00
-+++ linux-2.6.11-csum/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2005-05-27 11:48:07 +01:00
-@@ -803,6 +803,7 @@
- */
- /* FIXME: Source route IP option packets --RR */
- if (hooknum == NF_IP_PRE_ROUTING
-+ && skb->ip_summed != CHECKSUM_UNNECESSARY
- && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
- skb->ip_summed == CHECKSUM_HW ? skb->csum
- : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
diff -ur linux-2.6.11/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.11-csum/net/ipv4/netfilter/ip_conntrack_proto_udp.c
--- linux-2.6.11/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2005-05-27 11:47:48 +01:00
+++ linux-2.6.11-csum/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2005-05-27 11:48:07 +01:00
diff --git a/patches/linux-2.6.11/rcu-nohz.patch b/patches/linux-2.6.12/rcu-nohz.patch
index d7bafb3a62..d7bafb3a62 100644
--- a/patches/linux-2.6.11/rcu-nohz.patch
+++ b/patches/linux-2.6.12/rcu-nohz.patch
diff --git a/patches/linux-2.6.11/smp-alts.patch b/patches/linux-2.6.12/smp-alts.patch
index 5d18c5e71a..5d18c5e71a 100644
--- a/patches/linux-2.6.11/smp-alts.patch
+++ b/patches/linux-2.6.12/smp-alts.patch
diff --git a/patches/linux-2.6.11/x86_64-linux.patch b/patches/linux-2.6.12/x86_64-linux.patch
index 57d4f07a06..57d4f07a06 100644
--- a/patches/linux-2.6.11/x86_64-linux.patch
+++ b/patches/linux-2.6.12/x86_64-linux.patch