From e7399d6ff4d5783837d1e83792cba7a2be41ece0 Mon Sep 17 00:00:00 2001 From: "sos22@douglas.cl.cam.ac.uk" Date: Thu, 16 Jun 2005 11:44:16 +0000 Subject: bitkeeper revision 1.1713.1.13 (42b16610ojQcHPRC6Ao_1CKXClza9A) Slightly disgusting hack to avoid using lots of lock instructions on a uniprocessor machine just because we happened to compile with CONFIG_SMP. Essentially, we make a big table of all of the instruction sequences which differ in ``easy'' ways between UP and SMP kernels, and then select which one to use at run time. Signed-off-by: Steven Smith --- patches/linux-2.6.11/smp-alts.patch | 554 ++++++++++++++++++++++++++++++++++++ 1 file changed, 554 insertions(+) create mode 100644 patches/linux-2.6.11/smp-alts.patch (limited to 'patches') diff --git a/patches/linux-2.6.11/smp-alts.patch b/patches/linux-2.6.11/smp-alts.patch new file mode 100644 index 0000000000..db71ab1e56 --- /dev/null +++ b/patches/linux-2.6.11/smp-alts.patch @@ -0,0 +1,554 @@ +diff -Naur linux-2.6.11/arch/i386/Kconfig linux-2.6.11.post/arch/i386/Kconfig +--- linux-2.6.11/arch/i386/Kconfig 2005-03-02 07:37:49.000000000 +0000 ++++ linux-2.6.11.post/arch/i386/Kconfig 2005-06-10 13:42:35.000000000 +0100 +@@ -481,6 +481,19 @@ + + If you don't know what to do here, say N. + ++config SMP_ALTERNATIVES ++ bool "SMP alternatives support (EXPERIMENTAL)" ++ depends on SMP && EXPERIMENTAL ++ help ++ Try to reduce the overhead of running an SMP kernel on a uniprocessor ++ host slightly by replacing certain key instruction sequences ++ according to whether we currently have more than one CPU available. ++ This should provide a noticeable boost to performance when ++ running SMP kernels on UP machines, and have negligible impact ++ when running on an true SMP host. ++ ++ If unsure, say N. ++ + config NR_CPUS + int "Maximum number of CPUs (2-255)" + range 2 255 +diff -Naur linux-2.6.11/arch/i386/kernel/Makefile linux-2.6.11.post/arch/i386/kernel/Makefile +--- linux-2.6.11/arch/i386/kernel/Makefile 2005-03-02 07:37:49.000000000 +0000 ++++ linux-2.6.11.post/arch/i386/kernel/Makefile 2005-06-16 11:16:18.555332435 +0100 +@@ -32,6 +32,7 @@ + obj-$(CONFIG_HPET_TIMER) += time_hpet.o + obj-$(CONFIG_EFI) += efi.o efi_stub.o + obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ++obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o + + EXTRA_AFLAGS := -traditional + +diff -Naur linux-2.6.11/arch/i386/kernel/smpalts.c linux-2.6.11.post/arch/i386/kernel/smpalts.c +--- linux-2.6.11/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.11.post/arch/i386/kernel/smpalts.c 2005-06-16 11:23:39.300902424 +0100 +@@ -0,0 +1,76 @@ ++#include ++#include ++#include ++#include ++#include ++ ++struct smp_replacement_record { ++ unsigned char targ_size; ++ unsigned char smp1_size; ++ unsigned char smp2_size; ++ unsigned char up_size; ++ unsigned char feature; ++ unsigned char data[0]; ++}; ++ ++struct smp_alternative_record { ++ void *targ_start; ++ struct smp_replacement_record *repl; ++}; ++ ++extern struct smp_alternative_record __start_smp_alternatives_table, ++ __stop_smp_alternatives_table; ++ ++void prepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Enabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ if (r->repl->feature != (unsigned char)-1 && ++ boot_cpu_has(r->repl->feature)) { ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size, ++ r->repl->smp2_size); ++ memset(r->targ_start + r->repl->smp2_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp2_size); ++ } else { ++ memcpy(r->targ_start, ++ r->repl->data, ++ r->repl->smp1_size); ++ memset(r->targ_start + r->repl->smp1_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp1_size); ++ } ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} ++ ++void unprepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Disabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size + r->repl->smp2_size, ++ r->repl->up_size); ++ memset(r->targ_start + r->repl->up_size, ++ 0x90, ++ r->repl->targ_size - r->repl->up_size); ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} +diff -Naur linux-2.6.11/arch/i386/kernel/smpboot.c linux-2.6.11.post/arch/i386/kernel/smpboot.c +--- linux-2.6.11/arch/i386/kernel/smpboot.c 2005-03-02 07:38:09.000000000 +0000 ++++ linux-2.6.11.post/arch/i386/kernel/smpboot.c 2005-06-16 11:17:09.287064617 +0100 +@@ -1003,6 +1003,11 @@ + if (max_cpus <= cpucount+1) + continue; + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (kicked == 1) ++ prepare_for_smp(); ++#endif ++ + if (do_boot_cpu(apicid)) + printk("CPU #%d not responding - cannot use it.\n", + apicid); +@@ -1118,6 +1123,11 @@ + return -EIO; + } + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (num_online_cpus() == 1) ++ prepare_for_smp(); ++#endif ++ + local_irq_enable(); + /* Unleash the CPU! */ + cpu_set(cpu, smp_commenced_mask); +diff -Naur linux-2.6.11/arch/i386/kernel/vmlinux.lds.S linux-2.6.11.post/arch/i386/kernel/vmlinux.lds.S +--- linux-2.6.11/arch/i386/kernel/vmlinux.lds.S 2005-03-02 07:38:37.000000000 +0000 ++++ linux-2.6.11.post/arch/i386/kernel/vmlinux.lds.S 2005-06-10 11:14:14.000000000 +0100 +@@ -30,6 +30,13 @@ + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + ++ . = ALIGN(16); ++ __start_smp_alternatives_table = .; ++ __smp_alternatives : { *(__smp_alternatives) } ++ __stop_smp_alternatives_table = .; ++ ++ __smp_replacements : { *(__smp_replacements) } ++ + RODATA + + /* writeable */ +diff -Naur linux-2.6.11/include/asm-i386/atomic.h linux-2.6.11.post/include/asm-i386/atomic.h +--- linux-2.6.11/include/asm-i386/atomic.h 2005-03-02 07:37:51.000000000 +0000 ++++ linux-2.6.11.post/include/asm-i386/atomic.h 2005-06-13 10:10:39.000000000 +0100 +@@ -4,18 +4,13 @@ + #include + #include + #include ++#include + + /* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +-#ifdef CONFIG_SMP +-#define LOCK "lock ; " +-#else +-#define LOCK "" +-#endif +- + /* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, +diff -Naur linux-2.6.11/include/asm-i386/bitops.h linux-2.6.11.post/include/asm-i386/bitops.h +--- linux-2.6.11/include/asm-i386/bitops.h 2005-03-02 07:38:12.000000000 +0000 ++++ linux-2.6.11.post/include/asm-i386/bitops.h 2005-06-13 10:11:54.000000000 +0100 +@@ -7,6 +7,7 @@ + + #include + #include ++#include + + /* + * These have to be done with inline assembly: that way the bit-setting +@@ -16,12 +17,6 @@ + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +-#ifdef CONFIG_SMP +-#define LOCK_PREFIX "lock ; " +-#else +-#define LOCK_PREFIX "" +-#endif +- + #define ADDR (*(volatile long *) addr) + + /** +@@ -41,7 +36,7 @@ + */ + static inline void set_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +@@ -76,7 +71,7 @@ + */ + static inline void clear_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +@@ -121,7 +116,7 @@ + */ + static inline void change_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +@@ -140,7 +135,7 @@ + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -180,7 +175,7 @@ + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -231,7 +226,7 @@ + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); +diff -Naur linux-2.6.11/include/asm-i386/rwsem.h linux-2.6.11.post/include/asm-i386/rwsem.h +--- linux-2.6.11/include/asm-i386/rwsem.h 2005-03-02 07:38:08.000000000 +0000 ++++ linux-2.6.11.post/include/asm-i386/rwsem.h 2005-06-13 10:13:06.000000000 +0100 +@@ -40,6 +40,7 @@ + + #include + #include ++#include + + struct rwsem_waiter; + +@@ -99,7 +100,7 @@ + { + __asm__ __volatile__( + "# beginning down_read\n\t" +-LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ ++LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ + " js 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -130,7 +131,7 @@ + " movl %1,%2\n\t" + " addl %3,%2\n\t" + " jle 2f\n\t" +-LOCK_PREFIX " cmpxchgl %2,%0\n\t" ++LOCK " cmpxchgl %2,%0\n\t" + " jnz 1b\n\t" + "2:\n\t" + "# ending __down_read_trylock\n\t" +@@ -150,7 +151,7 @@ + tmp = RWSEM_ACTIVE_WRITE_BIAS; + __asm__ __volatile__( + "# beginning down_write\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ + " testl %%edx,%%edx\n\t" /* was the count 0 before? */ + " jnz 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" +@@ -188,7 +189,7 @@ + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + __asm__ __volatile__( + "# beginning __up_read\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -214,7 +215,7 @@ + __asm__ __volatile__( + "# beginning __up_write\n\t" + " movl %2,%%edx\n\t" +-LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ ++LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ + " jnz 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -239,7 +240,7 @@ + { + __asm__ __volatile__( + "# beginning __downgrade_write\n\t" +-LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ ++LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -263,7 +264,7 @@ + static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) + { + __asm__ __volatile__( +-LOCK_PREFIX "addl %1,%0" ++LOCK "addl %1,%0" + : "=m"(sem->count) + : "ir"(delta), "m"(sem->count)); + } +@@ -276,7 +277,7 @@ + int tmp = delta; + + __asm__ __volatile__( +-LOCK_PREFIX "xadd %0,(%2)" ++LOCK "xadd %0,(%2)" + : "+r"(tmp), "=m"(sem->count) + : "r"(sem), "m"(sem->count) + : "memory"); +diff -Naur linux-2.6.11/include/asm-i386/smp_alt.h linux-2.6.11.post/include/asm-i386/smp_alt.h +--- linux-2.6.11/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.11.post/include/asm-i386/smp_alt.h 2005-06-16 11:16:50.109433206 +0100 +@@ -0,0 +1,32 @@ ++#ifndef __ASM_SMP_ALT_H__ ++#define __ASM_SMP_ALT_H__ ++ ++#include ++ ++#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define LOCK \ ++ "6677: nop\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6677b\n" \ ++ ".long 6678f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6678: .byte 1\n" \ ++ ".byte 1\n" \ ++ ".byte 0\n" \ ++ ".byte 1\n" \ ++ ".byte -1\n" \ ++ "lock\n" \ ++ "nop\n" \ ++ ".previous\n" ++void prepare_for_smp(void); ++void unprepare_for_smp(void); ++#else ++#define LOCK "lock ; " ++#endif ++#else ++#define LOCK "" ++#endif ++ ++#endif /* __ASM_SMP_ALT_H__ */ +diff -Naur linux-2.6.11/include/asm-i386/spinlock.h linux-2.6.11.post/include/asm-i386/spinlock.h +--- linux-2.6.11/include/asm-i386/spinlock.h 2005-03-02 07:37:50.000000000 +0000 ++++ linux-2.6.11.post/include/asm-i386/spinlock.h 2005-06-13 14:13:52.000000000 +0100 +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + + asmlinkage int printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); +@@ -47,8 +48,9 @@ + #define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + + #define spin_lock_string \ +- "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ "1:\n" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ +@@ -58,8 +60,9 @@ + "3:\n\t" + + #define spin_lock_string_flags \ +- "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ "1:\n" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 4f\n\t" \ + "2:\t" \ + "testl $0x200, %1\n\t" \ +@@ -121,10 +124,34 @@ + static inline int _raw_spin_trylock(spinlock_t *lock) + { + char oldval; ++#ifdef CONFIG_SMP_ALTERNATIVES + __asm__ __volatile__( +- "xchgb %b0,%1" ++ "1:movb %1,%b0\n" ++ "movb $0,%1\n" ++ "2:" ++ ".section __smp_alternatives,\"a\"\n" ++ ".long 1b\n" ++ ".long 3f\n" ++ ".previous\n" ++ ".section __smp_replacements,\"a\"\n" ++ "3: .byte 2b - 1b\n" ++ ".byte 5f-4f\n" ++ ".byte 0\n" ++ ".byte 6f-5f\n" ++ ".byte -1\n" ++ "4: xchgb %b0,%1\n" ++ "5: movb %1,%b0\n" ++ "movb $0,%1\n" ++ "6:\n" ++ ".previous\n" + :"=q" (oldval), "=m" (lock->slock) + :"0" (0) : "memory"); ++#else ++ __asm__ __volatile__( ++ "xchgb %b0,%1\n" ++ :"=q" (oldval), "=m" (lock->slock) ++ :"0" (0) : "memory"); ++#endif + return oldval > 0; + } + +@@ -225,8 +252,8 @@ + __build_write_lock(rw, "__write_lock_failed"); + } + +-#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +-#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") ++#define _raw_read_unlock(rw) asm volatile(LOCK "incl %0" :"=m" ((rw)->lock) : : "memory") ++#define _raw_write_unlock(rw) asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") + + static inline int _raw_read_trylock(rwlock_t *lock) + { +diff -Naur linux-2.6.11/include/asm-i386/system.h linux-2.6.11.post/include/asm-i386/system.h +--- linux-2.6.11/include/asm-i386/system.h 2005-03-02 07:37:30.000000000 +0000 ++++ linux-2.6.11.post/include/asm-i386/system.h 2005-06-15 13:21:40.000000000 +0100 +@@ -5,7 +5,7 @@ + #include + #include + #include +-#include /* for LOCK_PREFIX */ ++#include + + #ifdef __KERNEL__ + +@@ -249,19 +249,19 @@ + unsigned long prev; + switch (size) { + case 1: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); +@@ -425,11 +425,55 @@ + #endif + + #ifdef CONFIG_SMP +-#define smp_mb() mb() +-#define smp_rmb() rmb() + #define smp_wmb() wmb() +-#define smp_read_barrier_depends() read_barrier_depends() ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define smp_alt_mb(instr) \ ++__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673:.byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte 0\n" \ ++ ".byte %c0\n" \ ++ "6669:lock;addl $0,0(%%esp)\n" \ ++ "6670:" instr "\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : \ ++ : "i" (X86_FEATURE_XMM2) \ ++ : "memory") ++#define smp_rmb() smp_alt_mb("lfence") ++#define smp_mb() smp_alt_mb("mfence") ++#define set_mb(var, value) do { \ ++unsigned long __set_mb_temp; \ ++__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673: .byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 0\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte -1\n" \ ++ "6669: xchg %1, %0\n" \ ++ "6670:movl %1, %0\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : "=m" (var), "=r" (__set_mb_temp) \ ++ : "1" (value) \ ++ : "memory"); } while (0) ++#else ++#define smp_rmb() rmb() ++#define smp_mb() mb() + #define set_mb(var, value) do { xchg(&var, value); } while (0) ++#endif ++#define smp_read_barrier_depends() read_barrier_depends() + #else + #define smp_mb() barrier() + #define smp_rmb() barrier() -- cgit v1.2.3