diff options
Diffstat (limited to 'toolchain/musl/patches/000-git-2015-01-09.patch')
-rw-r--r-- | toolchain/musl/patches/000-git-2015-01-09.patch | 3440 |
1 files changed, 3440 insertions, 0 deletions
diff --git a/toolchain/musl/patches/000-git-2015-01-09.patch b/toolchain/musl/patches/000-git-2015-01-09.patch new file mode 100644 index 0000000000..c710fe0ef2 --- /dev/null +++ b/toolchain/musl/patches/000-git-2015-01-09.patch @@ -0,0 +1,3440 @@ +--- a/arch/arm/atomic.h ++++ b/arch/arm/atomic.h +@@ -22,37 +22,150 @@ static inline int a_ctz_64(uint64_t x) + return a_ctz_l(y); + } + +-#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \ +- || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 +- + #if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 +-#define MEM_BARRIER "dmb ish" +-#else +-#define MEM_BARRIER "mcr p15,0,r0,c7,c10,5" +-#endif + +-static inline int __k_cas(int t, int s, volatile int *p) ++static inline void a_barrier() + { +- int ret; +- __asm__( +- " " MEM_BARRIER "\n" ++ __asm__ __volatile__("dmb ish"); ++} ++ ++static inline int a_cas(volatile int *p, int t, int s) ++{ ++ int old; ++ __asm__ __volatile__( ++ " dmb ish\n" + "1: ldrex %0,%3\n" +- " subs %0,%0,%1\n" +-#ifdef __thumb__ +- " itt eq\n" +-#endif +- " strexeq %0,%2,%3\n" +- " teqeq %0,#1\n" +- " beq 1b\n" +- " " MEM_BARRIER "\n" +- : "=&r"(ret) ++ " cmp %0,%1\n" ++ " bne 1f\n" ++ " strex %0,%2,%3\n" ++ " cmp %0, #0\n" ++ " bne 1b\n" ++ " mov %0, %1\n" ++ "1: dmb ish\n" ++ : "=&r"(old) + : "r"(t), "r"(s), "Q"(*p) + : "memory", "cc" ); +- return ret; ++ return old; ++} ++ ++static inline int a_swap(volatile int *x, int v) ++{ ++ int old, tmp; ++ __asm__ __volatile__( ++ " dmb ish\n" ++ "1: ldrex %0,%3\n" ++ " strex %1,%2,%3\n" ++ " cmp %1, #0\n" ++ " bne 1b\n" ++ " dmb ish\n" ++ : "=&r"(old), "=&r"(tmp) ++ : "r"(v), "Q"(*x) ++ : "memory", "cc" ); ++ return old; ++} ++ ++static inline int a_fetch_add(volatile int *x, int v) ++{ ++ int old, tmp; ++ __asm__ __volatile__( ++ " dmb ish\n" ++ "1: ldrex %0,%3\n" ++ " add %0,%0,%2\n" ++ " strex %1,%0,%3\n" ++ " cmp %1, #0\n" ++ " bne 1b\n" ++ " dmb ish\n" ++ : "=&r"(old), "=&r"(tmp) ++ : "r"(v), "Q"(*x) ++ : "memory", "cc" ); ++ return old-v; ++} ++ ++static inline void a_inc(volatile int *x) ++{ ++ int tmp, tmp2; ++ __asm__ __volatile__( ++ " dmb ish\n" ++ "1: ldrex %0,%2\n" ++ " add %0,%0,#1\n" ++ " strex %1,%0,%2\n" ++ " cmp %1, #0\n" ++ " bne 1b\n" ++ " dmb ish\n" ++ : "=&r"(tmp), "=&r"(tmp2) ++ : "Q"(*x) ++ : "memory", "cc" ); ++} ++ ++static inline void a_dec(volatile int *x) ++{ ++ int tmp, tmp2; ++ __asm__ __volatile__( ++ " dmb ish\n" ++ "1: ldrex %0,%2\n" ++ " sub %0,%0,#1\n" ++ " strex %1,%0,%2\n" ++ " cmp %1, #0\n" ++ " bne 1b\n" ++ " dmb ish\n" ++ : "=&r"(tmp), "=&r"(tmp2) ++ : "Q"(*x) ++ : "memory", "cc" ); ++} ++ ++static inline void a_and(volatile int *x, int v) ++{ ++ int tmp, tmp2; ++ __asm__ __volatile__( ++ " dmb ish\n" ++ "1: ldrex %0,%3\n" ++ " and %0,%0,%2\n" ++ " strex %1,%0,%3\n" ++ " cmp %1, #0\n" ++ " bne 1b\n" ++ " dmb ish\n" ++ : "=&r"(tmp), "=&r"(tmp2) ++ : "r"(v), "Q"(*x) ++ : "memory", "cc" ); ++} ++ ++static inline void a_or(volatile int *x, int v) ++{ ++ int tmp, tmp2; ++ __asm__ __volatile__( ++ " dmb ish\n" ++ "1: ldrex %0,%3\n" ++ " orr %0,%0,%2\n" ++ " strex %1,%0,%3\n" ++ " cmp %1, #0\n" ++ " bne 1b\n" ++ " dmb ish\n" ++ : "=&r"(tmp), "=&r"(tmp2) ++ : "r"(v), "Q"(*x) ++ : "memory", "cc" ); ++} ++ ++static inline void a_store(volatile int *p, int x) ++{ ++ __asm__ __volatile__( ++ " dmb ish\n" ++ " str %1,%0\n" ++ " dmb ish\n" ++ : "=m"(*p) ++ : "r"(x) ++ : "memory", "cc" ); + } ++ + #else +-#define __k_cas ((int (*)(int, int, volatile int *))0xffff0fc0) +-#endif ++ ++int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden"))); ++#define __k_cas __a_cas ++ ++static inline void a_barrier() ++{ ++ __asm__ __volatile__("bl __a_barrier" ++ : : : "memory", "cc", "ip", "lr" ); ++} + + static inline int a_cas(volatile int *p, int t, int s) + { +@@ -65,11 +178,6 @@ static inline int a_cas(volatile int *p, + } + } + +-static inline void *a_cas_p(volatile void *p, void *t, void *s) +-{ +- return (void *)a_cas(p, (int)t, (int)s); +-} +- + static inline int a_swap(volatile int *x, int v) + { + int old; +@@ -98,19 +206,9 @@ static inline void a_dec(volatile int *x + + static inline void a_store(volatile int *p, int x) + { +- while (__k_cas(*p, x, p)); +-} +- +-#define a_spin a_barrier +- +-static inline void a_barrier() +-{ +- __k_cas(0, 0, &(int){0}); +-} +- +-static inline void a_crash() +-{ +- *(volatile char *)0=0; ++ a_barrier(); ++ *p = x; ++ a_barrier(); + } + + static inline void a_and(volatile int *p, int v) +@@ -127,6 +225,20 @@ static inline void a_or(volatile int *p, + while (__k_cas(old, old|v, p)); + } + ++#endif ++ ++static inline void *a_cas_p(volatile void *p, void *t, void *s) ++{ ++ return (void *)a_cas(p, (int)t, (int)s); ++} ++ ++#define a_spin a_barrier ++ ++static inline void a_crash() ++{ ++ *(volatile char *)0=0; ++} ++ + static inline void a_or_l(volatile void *p, long v) + { + a_or(p, v); +--- a/arch/arm/bits/alltypes.h.in ++++ b/arch/arm/bits/alltypes.h.in +@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list + #ifndef __cplusplus + TYPEDEF unsigned wchar_t; + #endif +-TYPEDEF unsigned wint_t; + + TYPEDEF float float_t; + TYPEDEF double double_t; +--- a/arch/arm/bits/syscall.h ++++ b/arch/arm/bits/syscall.h +@@ -339,7 +339,13 @@ + #define __NR_seccomp 383 + #define __NR_getrandom 384 + #define __NR_memfd_create 385 ++#define __NR_bpf 386 + ++#define __ARM_NR_breakpoint 0x0f0001 ++#define __ARM_NR_cacheflush 0x0f0002 ++#define __ARM_NR_usr26 0x0f0003 ++#define __ARM_NR_usr32 0x0f0004 ++#define __ARM_NR_set_tls 0x0f0005 + + /* Repeated with SYS_ prefix */ + +@@ -684,3 +690,4 @@ + #define SYS_seccomp 383 + #define SYS_getrandom 384 + #define SYS_memfd_create 385 ++#define SYS_bpf 386 +--- a/arch/arm/pthread_arch.h ++++ b/arch/arm/pthread_arch.h +@@ -10,9 +10,17 @@ static inline __attribute__((const)) pth + + #else + +-typedef char *(*__ptr_func_t)(void) __attribute__((const)); +-#define __pthread_self() \ +- ((pthread_t)(((__ptr_func_t)0xffff0fe0)()+8-sizeof(struct pthread))) ++static inline __attribute__((const)) pthread_t __pthread_self() ++{ ++#ifdef __clang__ ++ char *p; ++ __asm__( "bl __a_gettp\n\tmov %0,r0" : "=r"(p) : : "cc", "r0", "lr" ); ++#else ++ register char *p __asm__("r0"); ++ __asm__( "bl __a_gettp" : "=r"(p) : : "cc", "lr" ); ++#endif ++ return (void *)(p+8-sizeof(struct pthread)); ++} + + #endif + +--- /dev/null ++++ b/arch/arm/src/__set_thread_area.c +@@ -0,0 +1,49 @@ ++#include <stdint.h> ++#include <elf.h> ++#include "pthread_impl.h" ++#include "libc.h" ++ ++#define HWCAP_TLS (1 << 15) ++ ++extern const unsigned char __attribute__((__visibility__("hidden"))) ++ __a_barrier_dummy[], __a_barrier_oldkuser[], ++ __a_barrier_v6[], __a_barrier_v7[], ++ __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[], ++ __a_gettp_dummy[]; ++ ++#define __a_barrier_kuser 0xffff0fa0 ++#define __a_cas_kuser 0xffff0fc0 ++#define __a_gettp_kuser 0xffff0fe0 ++ ++extern uintptr_t __attribute__((__visibility__("hidden"))) ++ __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr; ++ ++#define SET(op,ver) (__a_##op##_ptr = \ ++ (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy) ++ ++int __set_thread_area(void *p) ++{ ++#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 ++ if (__hwcap & HWCAP_TLS) { ++ size_t *aux; ++ SET(cas, v7); ++ SET(barrier, v7); ++ for (aux=libc.auxv; *aux; aux+=2) { ++ if (*aux != AT_PLATFORM) continue; ++ const char *s = (void *)aux[1]; ++ if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break; ++ SET(cas, v6); ++ SET(barrier, v6); ++ break; ++ } ++ } else { ++ int ver = *(int *)0xffff0ffc; ++ SET(gettp, kuser); ++ SET(cas, kuser); ++ SET(barrier, kuser); ++ if (ver < 2) a_crash(); ++ if (ver < 3) SET(barrier, oldkuser); ++ } ++#endif ++ return __syscall(0xf0005, p); ++} +--- /dev/null ++++ b/arch/arm/src/arm/atomics.s +@@ -0,0 +1,116 @@ ++.text ++ ++.global __a_barrier ++.hidden __a_barrier ++.type __a_barrier,%function ++__a_barrier: ++ ldr ip,1f ++ ldr ip,[pc,ip] ++ add pc,pc,ip ++1: .word __a_barrier_ptr-1b ++.global __a_barrier_dummy ++.hidden __a_barrier_dummy ++__a_barrier_dummy: ++ tst lr,#1 ++ moveq pc,lr ++ bx lr ++.global __a_barrier_oldkuser ++.hidden __a_barrier_oldkuser ++__a_barrier_oldkuser: ++ push {r0,r1,r2,r3,ip,lr} ++ mov r1,r0 ++ mov r2,sp ++ ldr ip,=0xffff0fc0 ++ mov lr,pc ++ mov pc,ip ++ pop {r0,r1,r2,r3,ip,lr} ++ tst lr,#1 ++ moveq pc,lr ++ bx lr ++.global __a_barrier_v6 ++.hidden __a_barrier_v6 ++__a_barrier_v6: ++ mcr p15,0,r0,c7,c10,5 ++ bx lr ++.global __a_barrier_v7 ++.hidden __a_barrier_v7 ++__a_barrier_v7: ++ .word 0xf57ff05b /* dmb ish */ ++ bx lr ++ ++.global __a_cas ++.hidden __a_cas ++.type __a_cas,%function ++__a_cas: ++ ldr ip,1f ++ ldr ip,[pc,ip] ++ add pc,pc,ip ++1: .word __a_cas_ptr-1b ++.global __a_cas_dummy ++.hidden __a_cas_dummy ++__a_cas_dummy: ++ mov r3,r0 ++ ldr r0,[r2] ++ subs r0,r3,r0 ++ streq r1,[r2] ++ tst lr,#1 ++ moveq pc,lr ++ bx lr ++.global __a_cas_v6 ++.hidden __a_cas_v6 ++__a_cas_v6: ++ mov r3,r0 ++ mcr p15,0,r0,c7,c10,5 ++1: .word 0xe1920f9f /* ldrex r0,[r2] */ ++ subs r0,r3,r0 ++ .word 0x01820f91 /* strexeq r0,r1,[r2] */ ++ teqeq r0,#1 ++ beq 1b ++ mcr p15,0,r0,c7,c10,5 ++ bx lr ++.global __a_cas_v7 ++.hidden __a_cas_v7 ++__a_cas_v7: ++ mov r3,r0 ++ .word 0xf57ff05b /* dmb ish */ ++1: .word 0xe1920f9f /* ldrex r0,[r2] */ ++ subs r0,r3,r0 ++ .word 0x01820f91 /* strexeq r0,r1,[r2] */ ++ teqeq r0,#1 ++ beq 1b ++ .word 0xf57ff05b /* dmb ish */ ++ bx lr ++ ++.global __aeabi_read_tp ++.type __aeabi_read_tp,%function ++__aeabi_read_tp: ++ ++.global __a_gettp ++.hidden __a_gettp ++.type __a_gettp,%function ++__a_gettp: ++ ldr r0,1f ++ ldr r0,[pc,r0] ++ add pc,pc,r0 ++1: .word __a_gettp_ptr-1b ++.global __a_gettp_dummy ++.hidden __a_gettp_dummy ++__a_gettp_dummy: ++ mrc p15,0,r0,c13,c0,3 ++ bx lr ++ ++.data ++.global __a_barrier_ptr ++.hidden __a_barrier_ptr ++__a_barrier_ptr: ++ .word 0 ++ ++.global __a_cas_ptr ++.hidden __a_cas_ptr ++__a_cas_ptr: ++ .word 0 ++ ++.global __a_gettp_ptr ++.hidden __a_gettp_ptr ++__a_gettp_ptr: ++ .word 0 +--- a/arch/arm/syscall_arch.h ++++ b/arch/arm/syscall_arch.h +@@ -5,8 +5,6 @@ + + long (__syscall)(long, ...); + +-#ifndef __clang__ +- + #define __asm_syscall(...) do { \ + __asm__ __volatile__ ( "svc 0" \ + : "=r"(r0) : __VA_ARGS__ : "memory"); \ +@@ -54,41 +52,25 @@ static inline long __syscall4(long n, lo + __asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3)); + } + +-#else +- +-static inline long __syscall0(long n) +-{ +- return (__syscall)(n); +-} +- +-static inline long __syscall1(long n, long a) +-{ +- return (__syscall)(n, a); +-} +- +-static inline long __syscall2(long n, long a, long b) +-{ +- return (__syscall)(n, a, b); +-} +- +-static inline long __syscall3(long n, long a, long b, long c) +-{ +- return (__syscall)(n, a, b, c); +-} +- +-static inline long __syscall4(long n, long a, long b, long c, long d) +-{ +- return (__syscall)(n, a, b, c, d); +-} +- +-#endif +- + static inline long __syscall5(long n, long a, long b, long c, long d, long e) + { +- return (__syscall)(n, a, b, c, d, e); ++ register long r7 __asm__("r7") = n; ++ register long r0 __asm__("r0") = a; ++ register long r1 __asm__("r1") = b; ++ register long r2 __asm__("r2") = c; ++ register long r3 __asm__("r3") = d; ++ register long r4 __asm__("r4") = e; ++ __asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4)); + } + + static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f) + { +- return (__syscall)(n, a, b, c, d, e, f); ++ register long r7 __asm__("r7") = n; ++ register long r0 __asm__("r0") = a; ++ register long r1 __asm__("r1") = b; ++ register long r2 __asm__("r2") = c; ++ register long r3 __asm__("r3") = d; ++ register long r4 __asm__("r4") = e; ++ register long r5 __asm__("r5") = f; ++ __asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5)); + } +--- a/arch/i386/bits/alltypes.h.in ++++ b/arch/i386/bits/alltypes.h.in +@@ -17,7 +17,6 @@ TYPEDEF __WCHAR_TYPE__ wchar_t; + TYPEDEF long wchar_t; + #endif + #endif +-TYPEDEF unsigned wint_t; + + #if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 0 + TYPEDEF float float_t; +--- a/arch/i386/bits/syscall.h ++++ b/arch/i386/bits/syscall.h +@@ -355,6 +355,7 @@ + #define __NR_seccomp 354 + #define __NR_getrandom 355 + #define __NR_memfd_create 356 ++#define __NR_bpf 357 + + + /* Repeated with SYS_ prefix */ +@@ -716,3 +717,4 @@ + #define SYS_seccomp 354 + #define SYS_getrandom 355 + #define SYS_memfd_create 356 ++#define SYS_bpf 357 +--- a/arch/microblaze/bits/alltypes.h.in ++++ b/arch/microblaze/bits/alltypes.h.in +@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list + #ifndef __cplusplus + TYPEDEF int wchar_t; + #endif +-TYPEDEF unsigned wint_t; + + TYPEDEF float float_t; + TYPEDEF double double_t; +--- a/arch/microblaze/bits/syscall.h ++++ b/arch/microblaze/bits/syscall.h +@@ -381,6 +381,7 @@ + #define __NR_seccomp 384 + #define __NR_getrandom 385 + #define __NR_memfd_create 386 ++#define __NR_bpf 387 + + /* Repeated with SYS_ prefix */ + +@@ -768,3 +769,4 @@ + #define SYS_seccomp 384 + #define SYS_getrandom 385 + #define SYS_memfd_create 386 ++#define SYS_bpf 387 +--- a/arch/microblaze/syscall_arch.h ++++ b/arch/microblaze/syscall_arch.h +@@ -100,39 +100,7 @@ static inline long __syscall6(long n, lo + + #else + +-static inline long __syscall0(long n) +-{ +- return (__syscall)(n); +-} +- +-static inline long __syscall1(long n, long a) +-{ +- return (__syscall)(n, a); +-} +- +-static inline long __syscall2(long n, long a, long b) +-{ +- return (__syscall)(n, a, b); +-} +- +-static inline long __syscall3(long n, long a, long b, long c) +-{ +- return (__syscall)(n, a, b, c); +-} +- +-static inline long __syscall4(long n, long a, long b, long c, long d) +-{ +- return (__syscall)(n, a, b, c, d); +-} +- +-static inline long __syscall5(long n, long a, long b, long c, long d, long e) +-{ +- return (__syscall)(n, a, b, c, d, e); +-} +- +-static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f) +-{ +- return (__syscall)(n, a, b, c, d, e, f); +-} ++#undef SYSCALL_NO_INLINE ++#define SYSCALL_NO_INLINE + + #endif +--- a/arch/mips/bits/alltypes.h.in ++++ b/arch/mips/bits/alltypes.h.in +@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list + #ifndef __cplusplus + TYPEDEF int wchar_t; + #endif +-TYPEDEF unsigned wint_t; + + TYPEDEF float float_t; + TYPEDEF double double_t; +--- a/arch/mips/bits/syscall.h ++++ b/arch/mips/bits/syscall.h +@@ -352,6 +352,7 @@ + #define __NR_seccomp 4352 + #define __NR_getrandom 4353 + #define __NR_memfd_create 4354 ++#define __NR_bpf 4355 + + + /* Repeated with SYS_ prefix */ +@@ -709,3 +710,4 @@ + #define SYS_seccomp 4352 + #define SYS_getrandom 4353 + #define SYS_memfd_create 4354 ++#define SYS_bpf 4355 +--- a/arch/or1k/bits/alltypes.h.in ++++ b/arch/or1k/bits/alltypes.h.in +@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list + #ifndef __cplusplus + TYPEDEF unsigned wchar_t; + #endif +-TYPEDEF unsigned wint_t; + + TYPEDEF float float_t; + TYPEDEF double double_t; +--- a/arch/or1k/bits/syscall.h ++++ b/arch/or1k/bits/syscall.h +@@ -263,6 +263,7 @@ + #define __NR_seccomp 277 + #define __NR_getrandom 278 + #define __NR_memfd_create 279 ++#define __NR_bpf 280 + + #define SYS_io_setup __NR_io_setup + #define SYS_io_destroy __NR_io_destroy +@@ -529,3 +530,4 @@ + #define SYS_seccomp __NR_seccomp + #define SYS_getrandom __NR_getrandom + #define SYS_memfd_create __NR_memfd_create ++#define SYS_bpf __NR_bpf +--- a/arch/or1k/syscall_arch.h ++++ b/arch/or1k/syscall_arch.h +@@ -1,7 +1,7 @@ + #define __SYSCALL_LL_E(x) \ + ((union { long long ll; long l[2]; }){ .ll = x }).l[0], \ + ((union { long long ll; long l[2]; }){ .ll = x }).l[1] +-#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x)) ++#define __SYSCALL_LL_O(x) __SYSCALL_LL_E((x)) + + #define SYSCALL_MMAP2_UNIT 8192ULL + +@@ -118,39 +118,7 @@ static inline long __syscall6(long n, lo + + #else + +-static inline long __syscall0(long n) +-{ +- return (__syscall)(n); +-} +- +-static inline long __syscall1(long n, long a) +-{ +- return (__syscall)(n, a); +-} +- +-static inline long __syscall2(long n, long a, long b) +-{ +- return (__syscall)(n, a, b); +-} +- +-static inline long __syscall3(long n, long a, long b, long c) +-{ +- return (__syscall)(n, a, b, c); +-} +- +-static inline long __syscall4(long n, long a, long b, long c, long d) +-{ +- return (__syscall)(n, a, b, c, d); +-} +- +-static inline long __syscall5(long n, long a, long b, long c, long d, long e) +-{ +- return (__syscall)(n, a, b, c, d, e); +-} +- +-static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f) +-{ +- return (__syscall)(n, a, b, c, d, e, f); +-} ++#undef SYSCALL_NO_INLINE ++#define SYSCALL_NO_INLINE + + #endif +--- a/arch/powerpc/bits/alltypes.h.in ++++ b/arch/powerpc/bits/alltypes.h.in +@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list + #ifndef __cplusplus + TYPEDEF long wchar_t; + #endif +-TYPEDEF unsigned wint_t; + + TYPEDEF float float_t; + TYPEDEF double double_t; +--- a/arch/powerpc/bits/syscall.h ++++ b/arch/powerpc/bits/syscall.h +@@ -374,6 +374,7 @@ + #define __NR_seccomp 358 + #define __NR_getrandom 359 + #define __NR_memfd_create 360 ++#define __NR_bpf 361 + + /* + * repeated with SYS prefix +@@ -754,3 +755,4 @@ + #define SYS_seccomp 358 + #define SYS_getrandom 359 + #define SYS_memfd_create 360 ++#define SYS_bpf 361 +--- a/arch/powerpc/syscall_arch.h ++++ b/arch/powerpc/syscall_arch.h +@@ -3,39 +3,5 @@ + ((union { long long ll; long l[2]; }){ .ll = x }).l[1] + #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x)) + +-long (__syscall)(long, ...); +- +-static inline long __syscall0(long n) +-{ +- return (__syscall)(n, 0, 0, 0, 0, 0, 0); +-} +- +-static inline long __syscall1(long n, long a) +-{ +- return (__syscall)(n, a, 0, 0, 0, 0, 0); +-} +- +-static inline long __syscall2(long n, long a, long b) +-{ +- return (__syscall)(n, a, b, 0, 0, 0, 0); +-} +- +-static inline long __syscall3(long n, long a, long b, long c) +-{ +- return (__syscall)(n, a, b, c, 0, 0, 0); +-} +- +-static inline long __syscall4(long n, long a, long b, long c, long d) +-{ +- return (__syscall)(n, a, b, c, d, 0, 0); +-} +- +-static inline long __syscall5(long n, long a, long b, long c, long d, long e) +-{ +- return (__syscall)(n, a, b, c, d, e, 0); +-} +- +-static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f) +-{ +- return (__syscall)(n, a, b, c, d, e, f); +-} ++#undef SYSCALL_NO_INLINE ++#define SYSCALL_NO_INLINE +--- a/arch/sh/bits/alltypes.h.in ++++ b/arch/sh/bits/alltypes.h.in +@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list + #ifndef __cplusplus + TYPEDEF long wchar_t; + #endif +-TYPEDEF unsigned wint_t; + + TYPEDEF float float_t; + TYPEDEF double double_t; +--- a/arch/x32/bits/alltypes.h.in ++++ b/arch/x32/bits/alltypes.h.in +@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list + #ifndef __cplusplus + TYPEDEF long wchar_t; + #endif +-TYPEDEF unsigned wint_t; + + #if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 2 + TYPEDEF long double float_t; +--- a/arch/x32/bits/syscall.h ++++ b/arch/x32/bits/syscall.h +@@ -277,6 +277,8 @@ + #define __NR_seccomp (__X32_SYSCALL_BIT + 317) + #define __NR_getrandom (__X32_SYSCALL_BIT + 318) + #define __NR_memfd_create (__X32_SYSCALL_BIT + 319) ++#define __NR_kexec_file_load (__X32_SYSCALL_BIT + 320) ++#define __NR_bpf (__X32_SYSCALL_BIT + 321) + + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) +@@ -604,6 +606,8 @@ + #define SYS_seccomp __NR_seccomp + #define SYS_getrandom __NR_getrandom + #define SYS_memfd_create __NR_memfd_create ++#define SYS_kexec_file_load __NR_kexec_file_load ++#define SYS_bpf __NR_bpf + + #define SYS_rt_sigaction __NR_rt_sigaction + #define SYS_rt_sigreturn __NR_rt_sigreturn +--- a/arch/x86_64/bits/alltypes.h.in ++++ b/arch/x86_64/bits/alltypes.h.in +@@ -8,7 +8,6 @@ TYPEDEF __builtin_va_list __isoc_va_list + #ifndef __cplusplus + TYPEDEF int wchar_t; + #endif +-TYPEDEF unsigned wint_t; + + #if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 2 + TYPEDEF long double float_t; +--- a/arch/x86_64/bits/syscall.h ++++ b/arch/x86_64/bits/syscall.h +@@ -318,6 +318,8 @@ + #define __NR_seccomp 317 + #define __NR_getrandom 318 + #define __NR_memfd_create 319 ++#define __NR_kexec_file_load 320 ++#define __NR_bpf 321 + + + #undef __NR_fstatat +@@ -654,6 +656,8 @@ + #define SYS_seccomp 317 + #define SYS_getrandom 318 + #define SYS_memfd_create 319 ++#define SYS_kexec_file_load 320 ++#define SYS_bpf 321 + + #undef SYS_fstatat + #undef SYS_pread +--- a/include/alltypes.h.in ++++ b/include/alltypes.h.in +@@ -28,6 +28,7 @@ TYPEDEF _Int64 blkcnt_t; + TYPEDEF unsigned _Int64 fsblkcnt_t; + TYPEDEF unsigned _Int64 fsfilcnt_t; + ++TYPEDEF unsigned wint_t; + TYPEDEF unsigned long wctype_t; + + TYPEDEF void * timer_t; +--- a/include/arpa/nameser.h ++++ b/include/arpa/nameser.h +@@ -1,6 +1,11 @@ + #ifndef _ARPA_NAMESER_H + #define _ARPA_NAMESER_H + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#include <stddef.h> + #include <stdint.h> + + #define __NAMESER 19991006 +@@ -48,6 +53,8 @@ extern const struct _ns_flagdata _ns_fla + #define ns_msg_end(handle) ((handle)._eom + 0) + #define ns_msg_size(handle) ((handle)._eom - (handle)._msg) + #define ns_msg_count(handle, section) ((handle)._counts[section] + 0) ++#define ns_msg_getflag(handle, flag) \ ++ (((handle)._flags & _ns_flagdata[flag].mask) >> _ns_flagdata[flag].shift) + + typedef struct __ns_rr { + char name[NS_MAXDNAME]; +@@ -296,43 +303,20 @@ typedef enum __ns_cert_types { + #define NS_OPT_DNSSEC_OK 0x8000U + #define NS_OPT_NSID 3 + +-#define NS_GET16(s, cp) do { \ +- register const unsigned char *t_cp = (const unsigned char *)(cp); \ +- (s) = ((uint16_t)t_cp[0] << 8) \ +- | ((uint16_t)t_cp[1]) \ +- ; \ +- (cp) += NS_INT16SZ; \ +-} while (0) +- +-#define NS_GET32(l, cp) do { \ +- register const unsigned char *t_cp = (const unsigned char *)(cp); \ +- (l) = ((uint32_t)t_cp[0] << 24) \ +- | ((uint32_t)t_cp[1] << 16) \ +- | ((uint32_t)t_cp[2] << 8) \ +- | ((uint32_t)t_cp[3]) \ +- ; \ +- (cp) += NS_INT32SZ; \ +-} while (0) +- +-#define NS_PUT16(s, cp) do { \ +- register uint16_t t_s = (uint16_t)(s); \ +- register unsigned char *t_cp = (unsigned char *)(cp); \ +- *t_cp++ = t_s >> 8; \ +- *t_cp = t_s; \ +- (cp) += NS_INT16SZ; \ +-} while (0) +- +-#define NS_PUT32(l, cp) do { \ +- register uint32_t t_l = (uint32_t)(l); \ +- register unsigned char *t_cp = (unsigned char *)(cp); \ +- *t_cp++ = t_l >> 24; \ +- *t_cp++ = t_l >> 16; \ +- *t_cp++ = t_l >> 8; \ +- *t_cp = t_l; \ +- (cp) += NS_INT32SZ; \ +-} while (0) +- +- ++#define NS_GET16(s, cp) (void)((s) = ns_get16(((cp)+=2)-2)) ++#define NS_GET32(l, cp) (void)((l) = ns_get32(((cp)+=4)-4)) ++#define NS_PUT16(s, cp) ns_put16((s), ((cp)+=2)-2) ++#define NS_PUT32(l, cp) ns_put32((l), ((cp)+=4)-4) ++ ++unsigned ns_get16(const unsigned char *); ++unsigned long ns_get32(const unsigned char *); ++void ns_put16(unsigned, unsigned char *); ++void ns_put32(unsigned long, unsigned char *); ++ ++int ns_initparse(const unsigned char *, int, ns_msg *); ++int ns_parserr(ns_msg *, ns_sect, int, ns_rr *); ++int ns_skiprr(const unsigned char *, const unsigned char *, ns_sect, int); ++int ns_name_uncompress(const unsigned char *, const unsigned char *, const unsigned char *, char *, size_t); + + + #define __BIND 19950621 +@@ -464,4 +448,8 @@ typedef struct { + #define PUTSHORT NS_PUT16 + #define PUTLONG NS_PUT32 + ++#ifdef __cplusplus ++} ++#endif ++ + #endif +--- a/include/complex.h ++++ b/include/complex.h +@@ -7,9 +7,9 @@ extern "C" { + + #define complex _Complex + #ifdef __GNUC__ +-#define _Complex_I (__extension__ 1.0fi) ++#define _Complex_I (__extension__ (0.0f+1.0fi)) + #else +-#define _Complex_I 1.0fi ++#define _Complex_I (0.0f+1.0fi) + #endif + #define I _Complex_I + +@@ -101,8 +101,9 @@ double creal(double complex); + float crealf(float complex); + long double creall(long double complex); + ++#ifndef __cplusplus + #define __CIMAG(x, t) \ +- ((union { _Complex t __z; t __xy[2]; }){(_Complex t)(x)}.__xy[1]) ++ (+(union { _Complex t __z; t __xy[2]; }){(_Complex t)(x)}.__xy[1]) + + #define creal(x) ((double)(x)) + #define crealf(x) ((float)(x)) +@@ -111,13 +112,20 @@ long double creall(long double complex); + #define cimag(x) __CIMAG(x, double) + #define cimagf(x) __CIMAG(x, float) + #define cimagl(x) __CIMAG(x, long double) ++#endif + +-#define __CMPLX(x, y, t) \ +- ((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z) +- ++#if __STDC_VERSION__ >= 201112L ++#if defined(_Imaginary_I) ++#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y))) ++#elif defined(__clang__) ++#define __CMPLX(x, y, t) (+(_Complex t){ (t)(x), (t)(y) }) ++#else ++#define __CMPLX(x, y, t) (__builtin_complex((t)(x), (t)(y))) ++#endif + #define CMPLX(x, y) __CMPLX(x, y, double) + #define CMPLXF(x, y) __CMPLX(x, y, float) + #define CMPLXL(x, y) __CMPLX(x, y, long double) ++#endif + + #ifdef __cplusplus + } +--- a/include/pthread.h ++++ b/include/pthread.h +@@ -84,7 +84,9 @@ __attribute__((const)) + pthread_t pthread_self(void); + + int pthread_equal(pthread_t, pthread_t); ++#ifndef __cplusplus + #define pthread_equal(x,y) ((x)==(y)) ++#endif + + int pthread_setcancelstate(int, int *); + int pthread_setcanceltype(int, int *); +--- a/include/stdint.h ++++ b/include/stdint.h +@@ -47,8 +47,8 @@ typedef uint64_t uint_least64_t; + + #define UINT8_MAX (0xff) + #define UINT16_MAX (0xffff) +-#define UINT32_MAX (0xffffffff) +-#define UINT64_MAX (0xffffffffffffffff) ++#define UINT32_MAX (0xffffffffu) ++#define UINT64_MAX (0xffffffffffffffffu) + + #define INT_FAST8_MIN INT8_MIN + #define INT_FAST64_MIN INT64_MIN +@@ -78,7 +78,7 @@ typedef uint64_t uint_least64_t; + #define INTMAX_MAX INT64_MAX + #define UINTMAX_MAX UINT64_MAX + +-#define WINT_MIN 0 ++#define WINT_MIN 0U + #define WINT_MAX UINT32_MAX + + #if L'\0'-1 > 0 +--- a/include/sys/prctl.h ++++ b/include/sys/prctl.h +@@ -5,6 +5,8 @@ + extern "C" { + #endif + ++#include <stdint.h> ++ + #define PR_SET_PDEATHSIG 1 + #define PR_GET_PDEATHSIG 2 + #define PR_GET_DUMPABLE 3 +@@ -80,6 +82,25 @@ extern "C" { + #define PR_SET_MM_ENV_END 11 + #define PR_SET_MM_AUXV 12 + #define PR_SET_MM_EXE_FILE 13 ++#define PR_SET_MM_MAP 14 ++#define PR_SET_MM_MAP_SIZE 15 ++ ++struct prctl_mm_map { ++ uint64_t start_code; ++ uint64_t end_code; ++ uint64_t start_data; ++ uint64_t end_data; ++ uint64_t start_brk; ++ uint64_t brk; ++ uint64_t start_stack; ++ uint64_t arg_start; ++ uint64_t arg_end; ++ uint64_t env_start; ++ uint64_t env_end; ++ uint64_t *auxv; ++ uint32_t auxv_size; ++ uint32_t exe_fd; ++}; + + #define PR_SET_PTRACER 0x59616d61 + #define PR_SET_PTRACER_ANY (-1UL) +--- a/include/threads.h ++++ b/include/threads.h +@@ -51,7 +51,9 @@ void thrd_yield(void); + + thrd_t thrd_current(void); + int thrd_equal(thrd_t, thrd_t); ++#ifndef __cplusplus + #define thrd_equal(A, B) ((A) == (B)) ++#endif + + void call_once(once_flag *, void (*)(void)); + +--- a/include/utmp.h ++++ b/include/utmp.h +@@ -35,6 +35,8 @@ void setutent(void); + + void updwtmp(const char *, const struct utmp *); + ++int login_tty(int); ++ + #define _PATH_UTMP "/dev/null/utmp" + #define _PATH_WTMP "/dev/null/wtmp" + +--- a/src/fcntl/open.c ++++ b/src/fcntl/open.c +@@ -7,7 +7,7 @@ int open(const char *filename, int flags + { + mode_t mode = 0; + +- if (flags & O_CREAT) { ++ if ((flags & O_CREAT) || (flags & O_TMPFILE) == O_TMPFILE) { + va_list ap; + va_start(ap, flags); + mode = va_arg(ap, mode_t); +--- a/src/fcntl/openat.c ++++ b/src/fcntl/openat.c +@@ -5,11 +5,15 @@ + + int openat(int fd, const char *filename, int flags, ...) + { +- mode_t mode; +- va_list ap; +- va_start(ap, flags); +- mode = va_arg(ap, mode_t); +- va_end(ap); ++ mode_t mode = 0; ++ ++ if ((flags & O_CREAT) || (flags & O_TMPFILE) == O_TMPFILE) { ++ va_list ap; ++ va_start(ap, flags); ++ mode = va_arg(ap, mode_t); ++ va_end(ap); ++ } ++ + return syscall_cp(SYS_openat, fd, filename, flags|O_LARGEFILE, mode); + } + +--- a/src/internal/libm.h ++++ b/src/internal/libm.h +@@ -128,6 +128,18 @@ do { + (d) = __u.f; \ + } while (0) + ++#undef __CMPLX ++#undef CMPLX ++#undef CMPLXF ++#undef CMPLXL ++ ++#define __CMPLX(x, y, t) \ ++ ((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z) ++ ++#define CMPLX(x, y) __CMPLX(x, y, double) ++#define CMPLXF(x, y) __CMPLX(x, y, float) ++#define CMPLXL(x, y) __CMPLX(x, y, long double) ++ + /* fdlibm kernel functions */ + + int __rem_pio2_large(double*,double*,int,int,int); +--- a/src/internal/syscall.h ++++ b/src/internal/syscall.h +@@ -24,12 +24,22 @@ long __syscall_ret(unsigned long), __sys + __syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, + syscall_arg_t, syscall_arg_t, syscall_arg_t); + ++#ifdef SYSCALL_NO_INLINE ++#define __syscall0(n) (__syscall)(n) ++#define __syscall1(n,a) (__syscall)(n,__scc(a)) ++#define __syscall2(n,a,b) (__syscall)(n,__scc(a),__scc(b)) ++#define __syscall3(n,a,b,c) (__syscall)(n,__scc(a),__scc(b),__scc(c)) ++#define __syscall4(n,a,b,c,d) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d)) ++#define __syscall5(n,a,b,c,d,e) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e)) ++#define __syscall6(n,a,b,c,d,e,f) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f)) ++#else + #define __syscall1(n,a) __syscall1(n,__scc(a)) + #define __syscall2(n,a,b) __syscall2(n,__scc(a),__scc(b)) + #define __syscall3(n,a,b,c) __syscall3(n,__scc(a),__scc(b),__scc(c)) + #define __syscall4(n,a,b,c,d) __syscall4(n,__scc(a),__scc(b),__scc(c),__scc(d)) + #define __syscall5(n,a,b,c,d,e) __syscall5(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e)) + #define __syscall6(n,a,b,c,d,e,f) __syscall6(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f)) ++#endif + #define __syscall7(n,a,b,c,d,e,f,g) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g)) + + #define __SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n +--- a/src/ldso/dynlink.c ++++ b/src/ldso/dynlink.c +@@ -645,6 +645,8 @@ static void decode_dyn(struct dso *p) + p->hashtab = (void *)(p->base + dyn[DT_HASH]); + if (dyn[0]&(1<<DT_RPATH)) + p->rpath_orig = (void *)(p->strings + dyn[DT_RPATH]); ++ if (dyn[0]&(1<<DT_RUNPATH)) ++ p->rpath_orig = (void *)(p->strings + dyn[DT_RUNPATH]); + if (search_vec(p->dynv, dyn, DT_GNU_HASH)) + p->ghashtab = (void *)(p->base + *dyn); + if (search_vec(p->dynv, dyn, DT_VERSYM)) +@@ -1126,6 +1128,7 @@ void *__dynlink(int argc, char **argv) + libc.secure = 1; + } + libc.page_size = aux[AT_PAGESZ]; ++ libc.auxv = auxv; + + /* If the dynamic linker was invoked as a program itself, AT_BASE + * will not be set. In that case, we assume the base address is +--- a/src/math/__rem_pio2.c ++++ b/src/math/__rem_pio2.c +@@ -19,6 +19,12 @@ + + #include "libm.h" + ++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 ++#define EPS DBL_EPSILON ++#elif FLT_EVAL_METHOD==2 ++#define EPS LDBL_EPSILON ++#endif ++ + /* + * invpio2: 53 bits of 2/pi + * pio2_1: first 33 bit of pi/2 +@@ -29,6 +35,7 @@ + * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) + */ + static const double ++toint = 1.5/EPS, + invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ + pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ + pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ +@@ -41,8 +48,8 @@ pio2_3t = 8.47842766036889956997e-32; /* + int __rem_pio2(double x, double *y) + { + union {double f; uint64_t i;} u = {x}; +- double_t z,w,t,r; +- double tx[3],ty[2],fn; ++ double_t z,w,t,r,fn; ++ double tx[3],ty[2]; + uint32_t ix; + int sign, n, ex, ey, i; + +@@ -111,8 +118,7 @@ int __rem_pio2(double x, double *y) + if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ + medium: + /* rint(x/(pi/2)), Assume round-to-nearest. */ +- fn = x*invpio2 + 0x1.8p52; +- fn = fn - 0x1.8p52; ++ fn = x*invpio2 + toint - toint; + n = (int32_t)fn; + r = x - fn*pio2_1; + w = fn*pio2_1t; /* 1st round, good to 85 bits */ +--- a/src/math/__rem_pio2f.c ++++ b/src/math/__rem_pio2f.c +@@ -22,12 +22,19 @@ + + #include "libm.h" + ++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 ++#define EPS DBL_EPSILON ++#elif FLT_EVAL_METHOD==2 ++#define EPS LDBL_EPSILON ++#endif ++ + /* + * invpio2: 53 bits of 2/pi + * pio2_1: first 25 bits of pi/2 + * pio2_1t: pi/2 - pio2_1 + */ + static const double ++toint = 1.5/EPS, + invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ + pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */ + pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ +@@ -35,7 +42,8 @@ pio2_1t = 1.58932547735281966916e-08; /* + int __rem_pio2f(float x, double *y) + { + union {float f; uint32_t i;} u = {x}; +- double tx[1],ty[1],fn; ++ double tx[1],ty[1]; ++ double_t fn; + uint32_t ix; + int n, sign, e0; + +@@ -43,8 +51,7 @@ int __rem_pio2f(float x, double *y) + /* 25+53 bit pi is good enough for medium size */ + if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ +- fn = x*invpio2 + 0x1.8p52; +- fn = fn - 0x1.8p52; ++ fn = x*invpio2 + toint - toint; + n = (int32_t)fn; + *y = x - fn*pio2_1 - fn*pio2_1t; + return n; +--- a/src/math/__rem_pio2l.c ++++ b/src/math/__rem_pio2l.c +@@ -20,10 +20,11 @@ + * use __rem_pio2_large() for large x + */ + ++static const long double toint = 1.5/LDBL_EPSILON; ++ + #if LDBL_MANT_DIG == 64 + /* u ~< 0x1p25*pi/2 */ + #define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.m>>48) < ((0x3fff + 25)<<16 | 0x921f>>1 | 0x8000)) +-#define TOINT 0x1.8p63 + #define QUOBITS(x) ((uint32_t)(int32_t)x & 0x7fffffff) + #define ROUND1 22 + #define ROUND2 61 +@@ -50,7 +51,6 @@ pio2_3t = -2.75299651904407171810e-37L; + #elif LDBL_MANT_DIG == 113 + /* u ~< 0x1p45*pi/2 */ + #define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.top) < ((0x3fff + 45)<<16 | 0x921f)) +-#define TOINT 0x1.8p112 + #define QUOBITS(x) ((uint32_t)(int64_t)x & 0x7fffffff) + #define ROUND1 51 + #define ROUND2 119 +@@ -77,7 +77,7 @@ int __rem_pio2l(long double x, long doub + ex = u.i.se & 0x7fff; + if (SMALL(u)) { + /* rint(x/(pi/2)), Assume round-to-nearest. */ +- fn = x*invpio2 + TOINT - TOINT; ++ fn = x*invpio2 + toint - toint; + n = QUOBITS(fn); + r = x-fn*pio2_1; + w = fn*pio2_1t; /* 1st round good to 102/180 bits (ld80/ld128) */ +--- a/src/math/ceil.c ++++ b/src/math/ceil.c +@@ -1,5 +1,12 @@ + #include "libm.h" + ++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 ++#define EPS DBL_EPSILON ++#elif FLT_EVAL_METHOD==2 ++#define EPS LDBL_EPSILON ++#endif ++static const double_t toint = 1/EPS; ++ + double ceil(double x) + { + union {double f; uint64_t i;} u = {x}; +@@ -10,9 +17,9 @@ double ceil(double x) + return x; + /* y = int(x) - x, where int(x) is an integer neighbor of x */ + if (u.i >> 63) +- y = (double)(x - 0x1p52) + 0x1p52 - x; ++ y = x - toint + toint - x; + else +- y = (double)(x + 0x1p52) - 0x1p52 - x; ++ y = x + toint - toint - x; + /* special case because of non-nearest rounding modes */ + if (e <= 0x3ff-1) { + FORCE_EVAL(y); +--- a/src/math/ceill.c ++++ b/src/math/ceill.c +@@ -6,11 +6,9 @@ long double ceill(long double x) + return ceil(x); + } + #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +-#if LDBL_MANT_DIG == 64 +-#define TOINT 0x1p63 +-#elif LDBL_MANT_DIG == 113 +-#define TOINT 0x1p112 +-#endif ++ ++static const long double toint = 1/LDBL_EPSILON; ++ + long double ceill(long double x) + { + union ldshape u = {x}; +@@ -21,9 +19,9 @@ long double ceill(long double x) + return x; + /* y = int(x) - x, where int(x) is an integer neighbor of x */ + if (u.i.se >> 15) +- y = x - TOINT + TOINT - x; ++ y = x - toint + toint - x; + else +- y = x + TOINT - TOINT - x; ++ y = x + toint - toint - x; + /* special case because of non-nearest rounding modes */ + if (e <= 0x3fff-1) { + FORCE_EVAL(y); +--- a/src/math/floor.c ++++ b/src/math/floor.c +@@ -1,5 +1,12 @@ + #include "libm.h" + ++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 ++#define EPS DBL_EPSILON ++#elif FLT_EVAL_METHOD==2 ++#define EPS LDBL_EPSILON ++#endif ++static const double_t toint = 1/EPS; ++ + double floor(double x) + { + union {double f; uint64_t i;} u = {x}; +@@ -10,9 +17,9 @@ double floor(double x) + return x; + /* y = int(x) - x, where int(x) is an integer neighbor of x */ + if (u.i >> 63) +- y = (double)(x - 0x1p52) + 0x1p52 - x; ++ y = x - toint + toint - x; + else +- y = (double)(x + 0x1p52) - 0x1p52 - x; ++ y = x + toint - toint - x; + /* special case because of non-nearest rounding modes */ + if (e <= 0x3ff-1) { + FORCE_EVAL(y); +--- a/src/math/floorl.c ++++ b/src/math/floorl.c +@@ -6,11 +6,9 @@ long double floorl(long double x) + return floor(x); + } + #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +-#if LDBL_MANT_DIG == 64 +-#define TOINT 0x1p63 +-#elif LDBL_MANT_DIG == 113 +-#define TOINT 0x1p112 +-#endif ++ ++static const long double toint = 1/LDBL_EPSILON; ++ + long double floorl(long double x) + { + union ldshape u = {x}; +@@ -21,9 +19,9 @@ long double floorl(long double x) + return x; + /* y = int(x) - x, where int(x) is an integer neighbor of x */ + if (u.i.se >> 15) +- y = x - TOINT + TOINT - x; ++ y = x - toint + toint - x; + else +- y = x + TOINT - TOINT - x; ++ y = x + toint - toint - x; + /* special case because of non-nearest rounding modes */ + if (e <= 0x3fff-1) { + FORCE_EVAL(y); +--- a/src/math/i386/fmod.s ++++ b/src/math/i386/fmod.s +@@ -4,7 +4,7 @@ fmod: + fldl 12(%esp) + fldl 4(%esp) + 1: fprem +- fstsw %ax ++ fnstsw %ax + sahf + jp 1b + fstp %st(1) +--- a/src/math/i386/fmodf.s ++++ b/src/math/i386/fmodf.s +@@ -4,7 +4,7 @@ fmodf: + flds 8(%esp) + flds 4(%esp) + 1: fprem +- fstsw %ax ++ fnstsw %ax + sahf + jp 1b + fstp %st(1) +--- a/src/math/i386/fmodl.s ++++ b/src/math/i386/fmodl.s +@@ -4,7 +4,7 @@ fmodl: + fldt 16(%esp) + fldt 4(%esp) + 1: fprem +- fstsw %ax ++ fnstsw %ax + sahf + jp 1b + fstp %st(1) +--- a/src/math/i386/remainder.s ++++ b/src/math/i386/remainder.s +@@ -7,7 +7,7 @@ drem: + fldl 12(%esp) + fldl 4(%esp) + 1: fprem1 +- fstsw %ax ++ fnstsw %ax + sahf + jp 1b + fstp %st(1) +--- a/src/math/i386/remainderf.s ++++ b/src/math/i386/remainderf.s +@@ -7,7 +7,7 @@ dremf: + flds 8(%esp) + flds 4(%esp) + 1: fprem1 +- fstsw %ax ++ fnstsw %ax + sahf + jp 1b + fstp %st(1) +--- a/src/math/i386/remainderl.s ++++ b/src/math/i386/remainderl.s +@@ -4,7 +4,7 @@ remainderl: + fldt 16(%esp) + fldt 4(%esp) + 1: fprem1 +- fstsw %ax ++ fnstsw %ax + sahf + jp 1b + fstp %st(1) +--- a/src/math/i386/sqrt.s ++++ b/src/math/i386/sqrt.s +@@ -2,7 +2,7 @@ + .type sqrt,@function + sqrt: fldl 4(%esp) + fsqrt +- fstsw %ax ++ fnstsw %ax + sub $12,%esp + fld %st(0) + fstpt (%esp) +--- a/src/math/modfl.c ++++ b/src/math/modfl.c +@@ -11,11 +11,9 @@ long double modfl(long double x, long do + return r; + } + #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +-#if LDBL_MANT_DIG == 64 +-#define TOINT 0x1p63 +-#elif LDBL_MANT_DIG == 113 +-#define TOINT 0x1p112 +-#endif ++ ++static const long double toint = 1/LDBL_EPSILON; ++ + long double modfl(long double x, long double *iptr) + { + union ldshape u = {x}; +@@ -40,7 +38,7 @@ long double modfl(long double x, long do + + /* raises spurious inexact */ + absx = s ? -x : x; +- y = absx + TOINT - TOINT - absx; ++ y = absx + toint - toint - absx; + if (y == 0) { + *iptr = x; + return s ? -0.0 : 0.0; +--- a/src/math/rint.c ++++ b/src/math/rint.c +@@ -1,6 +1,14 @@ ++#include <float.h> + #include <math.h> + #include <stdint.h> + ++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 ++#define EPS DBL_EPSILON ++#elif FLT_EVAL_METHOD==2 ++#define EPS LDBL_EPSILON ++#endif ++static const double_t toint = 1/EPS; ++ + double rint(double x) + { + union {double f; uint64_t i;} u = {x}; +@@ -11,9 +19,9 @@ double rint(double x) + if (e >= 0x3ff+52) + return x; + if (s) +- y = (double)(x - 0x1p52) + 0x1p52; ++ y = x - toint + toint; + else +- y = (double)(x + 0x1p52) - 0x1p52; ++ y = x + toint - toint; + if (y == 0) + return s ? -0.0 : 0; + return y; +--- a/src/math/rintf.c ++++ b/src/math/rintf.c +@@ -1,6 +1,16 @@ ++#include <float.h> + #include <math.h> + #include <stdint.h> + ++#if FLT_EVAL_METHOD==0 ++#define EPS FLT_EPSILON ++#elif FLT_EVAL_METHOD==1 ++#define EPS DBL_EPSILON ++#elif FLT_EVAL_METHOD==2 ++#define EPS LDBL_EPSILON ++#endif ++static const float_t toint = 1/EPS; ++ + float rintf(float x) + { + union {float f; uint32_t i;} u = {x}; +@@ -11,9 +21,9 @@ float rintf(float x) + if (e >= 0x7f+23) + return x; + if (s) +- y = (float)(x - 0x1p23f) + 0x1p23f; ++ y = x - toint + toint; + else +- y = (float)(x + 0x1p23f) - 0x1p23f; ++ y = x + toint - toint; + if (y == 0) + return s ? -0.0f : 0.0f; + return y; +--- a/src/math/rintl.c ++++ b/src/math/rintl.c +@@ -6,11 +6,9 @@ long double rintl(long double x) + return rint(x); + } + #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +-#if LDBL_MANT_DIG == 64 +-#define TOINT 0x1p63 +-#elif LDBL_MANT_DIG == 113 +-#define TOINT 0x1p112 +-#endif ++ ++static const long double toint = 1/LDBL_EPSILON; ++ + long double rintl(long double x) + { + union ldshape u = {x}; +@@ -21,9 +19,9 @@ long double rintl(long double x) + if (e >= 0x3fff+LDBL_MANT_DIG-1) + return x; + if (s) +- y = x - TOINT + TOINT; ++ y = x - toint + toint; + else +- y = x + TOINT - TOINT; ++ y = x + toint - toint; + if (y == 0) + return 0*x; + return y; +--- a/src/math/round.c ++++ b/src/math/round.c +@@ -1,5 +1,12 @@ + #include "libm.h" + ++#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 ++#define EPS DBL_EPSILON ++#elif FLT_EVAL_METHOD==2 ++#define EPS LDBL_EPSILON ++#endif ++static const double_t toint = 1/EPS; ++ + double round(double x) + { + union {double f; uint64_t i;} u = {x}; +@@ -12,10 +19,10 @@ double round(double x) + x = -x; + if (e < 0x3ff-1) { + /* raise inexact if x!=0 */ +- FORCE_EVAL(x + 0x1p52); ++ FORCE_EVAL(x + toint); + return 0*u.f; + } +- y = (double)(x + 0x1p52) - 0x1p52 - x; ++ y = x + toint - toint - x; + if (y > 0.5) + y = y + x - 1; + else if (y <= -0.5) +--- a/src/math/roundf.c ++++ b/src/math/roundf.c +@@ -1,5 +1,14 @@ + #include "libm.h" + ++#if FLT_EVAL_METHOD==0 ++#define EPS FLT_EPSILON ++#elif FLT_EVAL_METHOD==1 ++#define EPS DBL_EPSILON ++#elif FLT_EVAL_METHOD==2 ++#define EPS LDBL_EPSILON ++#endif ++static const float_t toint = 1/EPS; ++ + float roundf(float x) + { + union {float f; uint32_t i;} u = {x}; +@@ -11,10 +20,10 @@ float roundf(float x) + if (u.i >> 31) + x = -x; + if (e < 0x7f-1) { +- FORCE_EVAL(x + 0x1p23f); ++ FORCE_EVAL(x + toint); + return 0*u.f; + } +- y = (float)(x + 0x1p23f) - 0x1p23f - x; ++ y = x + toint - toint - x; + if (y > 0.5f) + y = y + x - 1; + else if (y <= -0.5f) +--- a/src/math/roundl.c ++++ b/src/math/roundl.c +@@ -6,11 +6,9 @@ long double roundl(long double x) + return round(x); + } + #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +-#if LDBL_MANT_DIG == 64 +-#define TOINT 0x1p63 +-#elif LDBL_MANT_DIG == 113 +-#define TOINT 0x1p112 +-#endif ++ ++static const long double toint = 1/LDBL_EPSILON; ++ + long double roundl(long double x) + { + union ldshape u = {x}; +@@ -22,10 +20,10 @@ long double roundl(long double x) + if (u.i.se >> 15) + x = -x; + if (e < 0x3fff-1) { +- FORCE_EVAL(x + TOINT); ++ FORCE_EVAL(x + toint); + return 0*u.f; + } +- y = x + TOINT - TOINT - x; ++ y = x + toint - toint - x; + if (y > 0.5) + y = y + x - 1; + else if (y <= -0.5) +--- a/src/math/truncl.c ++++ b/src/math/truncl.c +@@ -6,11 +6,9 @@ long double truncl(long double x) + return trunc(x); + } + #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +-#if LDBL_MANT_DIG == 64 +-#define TOINT 0x1p63 +-#elif LDBL_MANT_DIG == 113 +-#define TOINT 0x1p112 +-#endif ++ ++static const long double toint = 1/LDBL_EPSILON; ++ + long double truncl(long double x) + { + union ldshape u = {x}; +@@ -27,7 +25,7 @@ long double truncl(long double x) + /* y = int(|x|) - |x|, where int(|x|) is an integer neighbor of |x| */ + if (s) + x = -x; +- y = x + TOINT - TOINT - x; ++ y = x + toint - toint - x; + if (y > 0) + y -= 1; + x += y; +--- a/src/math/x32/exp2l.s ++++ b/src/math/x32/exp2l.s +@@ -6,9 +6,7 @@ expm1l: + fmulp + movl $0xc2820000,-4(%esp) + flds -4(%esp) +- fucomp %st(1) +- fnstsw %ax +- sahf ++ fucomip %st(1) + fld1 + jb 1f + # x*log2e <= -65, return -1 without underflow +@@ -17,11 +15,8 @@ expm1l: + ret + 1: fld %st(1) + fabs +- fucom %st(1) +- fnstsw %ax ++ fucomip %st(1) + fstp %st(0) +- fstp %st(0) +- sahf + ja 1f + f2xm1 + ret +@@ -53,9 +48,7 @@ exp2l: + fld %st(1) + fsub %st(1) + faddp +- fucomp %st(1) +- fnstsw +- sahf ++ fucomip %st(1) + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%esp) + flds (%esp) # 0x1p-149 +--- a/src/math/x32/fmodl.s ++++ b/src/math/x32/fmodl.s +@@ -4,8 +4,8 @@ fmodl: + fldt 24(%esp) + fldt 8(%esp) + 1: fprem +- fstsw %ax +- sahf +- jp 1b ++ fnstsw %ax ++ testb $4,%ah ++ jnz 1b + fstp %st(1) + ret +--- a/src/math/x32/remainderl.s ++++ b/src/math/x32/remainderl.s +@@ -4,8 +4,8 @@ remainderl: + fldt 24(%esp) + fldt 8(%esp) + 1: fprem1 +- fstsw %ax +- sahf +- jp 1b ++ fnstsw %ax ++ testb $4,%ah ++ jnz 1b + fstp %st(1) + ret +--- a/src/math/x86_64/exp2l.s ++++ b/src/math/x86_64/exp2l.s +@@ -6,9 +6,7 @@ expm1l: + fmulp + movl $0xc2820000,-4(%rsp) + flds -4(%rsp) +- fucomp %st(1) +- fnstsw %ax +- sahf ++ fucomip %st(1) + fld1 + jb 1f + # x*log2e <= -65, return -1 without underflow +@@ -17,11 +15,8 @@ expm1l: + ret + 1: fld %st(1) + fabs +- fucom %st(1) +- fnstsw %ax ++ fucomip %st(1) + fstp %st(0) +- fstp %st(0) +- sahf + ja 1f + f2xm1 + ret +@@ -53,9 +48,7 @@ exp2l: + fld %st(1) + fsub %st(1) + faddp +- fucomp %st(1) +- fnstsw +- sahf ++ fucomip %st(1) + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%rsp) + flds (%rsp) # 0x1p-149 +--- a/src/math/x86_64/fmodl.s ++++ b/src/math/x86_64/fmodl.s +@@ -4,8 +4,8 @@ fmodl: + fldt 24(%rsp) + fldt 8(%rsp) + 1: fprem +- fstsw %ax +- sahf +- jp 1b ++ fnstsw %ax ++ testb $4,%ah ++ jnz 1b + fstp %st(1) + ret +--- a/src/math/x86_64/remainderl.s ++++ b/src/math/x86_64/remainderl.s +@@ -4,8 +4,8 @@ remainderl: + fldt 24(%rsp) + fldt 8(%rsp) + 1: fprem1 +- fstsw %ax +- sahf +- jp 1b ++ fnstsw %ax ++ testb $4,%ah ++ jnz 1b + fstp %st(1) + ret +--- a/src/misc/forkpty.c ++++ b/src/misc/forkpty.c +@@ -1,38 +1,57 @@ + #include <pty.h> ++#include <utmp.h> + #include <unistd.h> +-#include <sys/ioctl.h> ++#include <errno.h> + #include <fcntl.h> ++#include <sys/wait.h> ++#include <pthread.h> + +-int forkpty(int *m, char *name, const struct termios *tio, const struct winsize *ws) ++int forkpty(int *pm, char *name, const struct termios *tio, const struct winsize *ws) + { +- int s, t, i, istmp[3]={0}; +- pid_t pid; ++ int m, s, ec=0, p[2], cs; ++ pid_t pid=-1; ++ sigset_t set, oldset; + +- if (openpty(m, &s, name, tio, ws) < 0) return -1; ++ if (openpty(&m, &s, name, tio, ws) < 0) return -1; + +- /* Ensure before forking that we don't exceed fd limit */ +- for (i=0; i<3; i++) { +- if (fcntl(i, F_GETFL) < 0) { +- t = fcntl(s, F_DUPFD, i); +- if (t<0) break; +- else if (t!=i) close(t); +- else istmp[i] = 1; +- } ++ sigfillset(&set); ++ pthread_sigmask(SIG_BLOCK, &set, &oldset); ++ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); ++ ++ if (pipe2(p, O_CLOEXEC)) { ++ close(s); ++ goto out; + } +- pid = i==3 ? fork() : -1; ++ ++ pid = fork(); + if (!pid) { +- close(*m); +- setsid(); +- ioctl(s, TIOCSCTTY, (char *)0); +- dup2(s, 0); +- dup2(s, 1); +- dup2(s, 2); +- if (s>2) close(s); ++ close(m); ++ close(p[0]); ++ if (login_tty(s)) { ++ write(p[1], &errno, sizeof errno); ++ _exit(127); ++ } ++ close(p[1]); ++ pthread_setcancelstate(cs, 0); ++ pthread_sigmask(SIG_SETMASK, &oldset, 0); + return 0; + } +- for (i=0; i<3; i++) +- if (istmp[i]) close(i); + close(s); +- if (pid < 0) close(*m); ++ close(p[1]); ++ if (read(p[0], &ec, sizeof ec) > 0) { ++ int status; ++ waitpid(pid, &status, 0); ++ pid = -1; ++ errno = ec; ++ } ++ close(p[0]); ++ ++out: ++ if (pid > 0) *pm = m; ++ else close(m); ++ ++ pthread_setcancelstate(cs, 0); ++ pthread_sigmask(SIG_SETMASK, &oldset, 0); ++ + return pid; + } +--- a/src/misc/getopt.c ++++ b/src/misc/getopt.c +@@ -4,6 +4,7 @@ + #include <limits.h> + #include <stdlib.h> + #include "libc.h" ++#include "locale_impl.h" + + char *optarg; + int optind=1, opterr=1, optopt, __optpos, __optreset=0; +@@ -11,6 +12,18 @@ int optind=1, opterr=1, optopt, __optpos + #define optpos __optpos + weak_alias(__optreset, optreset); + ++void __getopt_msg(const char *a, const char *b, const char *c, size_t l) ++{ ++ FILE *f = stderr; ++ b = __lctrans_cur(b); ++ flockfile(f); ++ fwrite(a, strlen(a), 1, f) ++ && fwrite(b, strlen(b), 1, f) ++ && fwrite(c, l, 1, f) ++ && putc('\n', f); ++ funlockfile(f); ++} ++ + int getopt(int argc, char * const argv[], const char *optstring) + { + int i; +@@ -24,8 +37,20 @@ int getopt(int argc, char * const argv[] + optind = 1; + } + +- if (optind >= argc || !argv[optind] || argv[optind][0] != '-' || !argv[optind][1]) ++ if (optind >= argc || !argv[optind]) ++ return -1; ++ ++ if (argv[optind][0] != '-') { ++ if (optstring[0] == '-') { ++ optarg = argv[optind++]; ++ return 1; ++ } ++ return -1; ++ } ++ ++ if (!argv[optind][1]) + return -1; ++ + if (argv[optind][1] == '-' && !argv[optind][2]) + return optind++, -1; + +@@ -43,30 +68,31 @@ int getopt(int argc, char * const argv[] + optpos = 0; + } + +- for (i=0; (l = mbtowc(&d, optstring+i, MB_LEN_MAX)) && d!=c; i+=l>0?l:1); ++ if (optstring[0] == '-' || optstring[0] == '+') ++ optstring++; ++ ++ i = 0; ++ d = 0; ++ do { ++ l = mbtowc(&d, optstring+i, MB_LEN_MAX); ++ if (l>0) i+=l; else i++; ++ } while (l && d != c); + + if (d != c) { +- if (optstring[0] != ':' && opterr) { +- write(2, argv[0], strlen(argv[0])); +- write(2, ": illegal option: ", 18); +- write(2, optchar, k); +- write(2, "\n", 1); +- } ++ if (optstring[0] != ':' && opterr) ++ __getopt_msg(argv[0], ": unrecognized option: ", optchar, k); + return '?'; + } +- if (optstring[i+1] == ':') { +- if (optind >= argc) { ++ if (optstring[i] == ':') { ++ if (optstring[i+1] == ':') optarg = 0; ++ else if (optind >= argc) { + if (optstring[0] == ':') return ':'; +- if (opterr) { +- write(2, argv[0], strlen(argv[0])); +- write(2, ": option requires an argument: ", 31); +- write(2, optchar, k); +- write(2, "\n", 1); +- } ++ if (opterr) __getopt_msg(argv[0], ++ ": option requires an argument: ", ++ optchar, k); + return '?'; + } +- if (optstring[i+2] == ':') optarg = 0; +- if (optstring[i+2] != ':' || optpos) { ++ if (optstring[i+1] != ':' || optpos) { + optarg = argv[optind++] + optpos; + optpos = 0; + } +--- a/src/misc/getopt_long.c ++++ b/src/misc/getopt_long.c +@@ -2,37 +2,106 @@ + #include <stddef.h> + #include <getopt.h> + #include <stdio.h> ++#include <string.h> + + extern int __optpos, __optreset; + ++static void permute(char *const *argv, int dest, int src) ++{ ++ char **av = (char **)argv; ++ char *tmp = av[src]; ++ int i; ++ for (i=src; i>dest; i--) ++ av[i] = av[i-1]; ++ av[dest] = tmp; ++} ++ ++void __getopt_msg(const char *, const char *, const char *, size_t); ++ ++static int __getopt_long_core(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly); ++ + static int __getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly) + { ++ int ret, skipped, resumed; ++ const char *optstring2 = optstring + 1; + if (!optind || __optreset) { + __optreset = 0; + __optpos = 0; + optind = 1; + } +- if (optind >= argc || !argv[optind] || argv[optind][0] != '-') return -1; +- if ((longonly && argv[optind][1]) || +- (argv[optind][1] == '-' && argv[optind][2])) +- { ++ if (optind >= argc || !argv[optind]) return -1; ++ skipped = optind; ++ if (optstring[0] != '+' && optstring[0] != '-') { + int i; +- for (i=0; longopts[i].name; i++) { ++ for (i=optind; ; i++) { ++ if (i >= argc || !argv[i]) return -1; ++ if (argv[i][0] == '-' && argv[i][1]) break; ++ } ++ optind = i; ++ optstring2 = optstring; ++ } ++ resumed = optind; ++ ret = __getopt_long_core(argc, argv, optstring2, longopts, idx, longonly); ++ if (resumed > skipped) { ++ int i, cnt = optind-resumed; ++ for (i=0; i<cnt; i++) ++ permute(argv, skipped, optind-1); ++ optind = skipped + cnt; ++ } ++ return ret; ++} ++ ++static int __getopt_long_core(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly) ++{ ++ ++ if (longopts && argv[optind][0] == '-' && ++ ((longonly && argv[optind][1]) || ++ (argv[optind][1] == '-' && argv[optind][2]))) ++ { ++ int i, cnt, match; ++ char *opt; ++ for (cnt=i=0; longopts[i].name; i++) { + const char *name = longopts[i].name; +- char *opt = argv[optind]+1; ++ opt = argv[optind]+1; + if (*opt == '-') opt++; + for (; *name && *name == *opt; name++, opt++); +- if (*name || (*opt && *opt != '=')) continue; ++ if (*opt && *opt != '=') continue; ++ match = i; ++ if (!*name) { ++ cnt = 1; ++ break; ++ } ++ cnt++; ++ } ++ if (cnt==1) { ++ i = match; ++ optind++; ++ optopt = longopts[i].val; + if (*opt == '=') { +- if (!longopts[i].has_arg) continue; ++ if (!longopts[i].has_arg) { ++ if (optstring[0] == ':' || !opterr) ++ return '?'; ++ __getopt_msg(argv[0], ++ ": option does not take an argument: ", ++ longopts[i].name, ++ strlen(longopts[i].name)); ++ return '?'; ++ } + optarg = opt+1; + } else { + if (longopts[i].has_arg == required_argument) { +- if (!(optarg = argv[++optind])) +- return ':'; ++ if (!(optarg = argv[optind])) { ++ if (optstring[0] == ':' || !opterr) ++ return ':'; ++ __getopt_msg(argv[0], ++ ": option requires an argument: ", ++ longopts[i].name, ++ strlen(longopts[i].name)); ++ return '?'; ++ } ++ optind++; + } else optarg = NULL; + } +- optind++; + if (idx) *idx = i; + if (longopts[i].flag) { + *longopts[i].flag = longopts[i].val; +@@ -41,6 +110,12 @@ static int __getopt_long(int argc, char + return longopts[i].val; + } + if (argv[optind][1] == '-') { ++ if (optstring[0] != ':' && opterr) ++ __getopt_msg(argv[0], cnt ? ++ ": option is ambiguous: " : ++ ": unrecognized option: ", ++ argv[optind]+2, ++ strlen(argv[optind]+2)); + optind++; + return '?'; + } +--- /dev/null ++++ b/src/misc/login_tty.c +@@ -0,0 +1,14 @@ ++#include <utmp.h> ++#include <sys/ioctl.h> ++#include <unistd.h> ++ ++int login_tty(int fd) ++{ ++ setsid(); ++ if (ioctl(fd, TIOCSCTTY, (char *)0)) return -1; ++ dup2(fd, 0); ++ dup2(fd, 1); ++ dup2(fd, 2); ++ if (fd>2) close(fd); ++ return 0; ++} +--- a/src/misc/openpty.c ++++ b/src/misc/openpty.c +@@ -3,31 +3,38 @@ + #include <unistd.h> + #include <pty.h> + #include <stdio.h> ++#include <pthread.h> + + /* Nonstandard, but vastly superior to the standard functions */ + +-int openpty(int *m, int *s, char *name, const struct termios *tio, const struct winsize *ws) ++int openpty(int *pm, int *ps, char *name, const struct termios *tio, const struct winsize *ws) + { +- int n=0; ++ int m, s, n=0, cs; + char buf[20]; + +- *m = open("/dev/ptmx", O_RDWR|O_NOCTTY); +- if (*m < 0) return -1; ++ m = open("/dev/ptmx", O_RDWR|O_NOCTTY); ++ if (m < 0) return -1; + +- if (ioctl(*m, TIOCSPTLCK, &n) || ioctl (*m, TIOCGPTN, &n)) { +- close(*m); +- return -1; +- } ++ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); ++ ++ if (ioctl(m, TIOCSPTLCK, &n) || ioctl (m, TIOCGPTN, &n)) ++ goto fail; + + if (!name) name = buf; + snprintf(name, sizeof buf, "/dev/pts/%d", n); +- if ((*s = open(name, O_RDWR|O_NOCTTY)) < 0) { +- close(*m); +- return -1; +- } ++ if ((s = open(name, O_RDWR|O_NOCTTY)) < 0) ++ goto fail; ++ ++ if (tio) tcsetattr(s, TCSANOW, tio); ++ if (ws) ioctl(s, TIOCSWINSZ, ws); + +- if (tio) tcsetattr(*s, TCSANOW, tio); +- if (ws) ioctl(*s, TIOCSWINSZ, ws); ++ *pm = m; ++ *ps = s; + ++ pthread_setcancelstate(cs, 0); + return 0; ++fail: ++ close(m); ++ pthread_setcancelstate(cs, 0); ++ return -1; + } +--- a/src/misc/syslog.c ++++ b/src/misc/syslog.c +@@ -46,8 +46,12 @@ void closelog(void) + + static void __openlog() + { +- log_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); +- if (log_fd >= 0) connect(log_fd, (void *)&log_addr, sizeof log_addr); ++ int fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); ++ if (fd < 0) return; ++ if (connect(fd, (void *)&log_addr, sizeof log_addr) < 0) ++ close(fd); ++ else ++ log_fd = fd; + } + + void openlog(const char *ident, int opt, int facility) +--- a/src/multibyte/c16rtomb.c ++++ b/src/multibyte/c16rtomb.c +@@ -4,6 +4,8 @@ + + size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps) + { ++ static unsigned internal_state; ++ if (!ps) ps = (void *)&internal_state; + unsigned *x = (unsigned *)ps; + wchar_t wc; + +--- a/src/multibyte/mbrtoc16.c ++++ b/src/multibyte/mbrtoc16.c +@@ -3,6 +3,8 @@ + + size_t mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n, mbstate_t *restrict ps) + { ++ static unsigned internal_state; ++ if (!ps) ps = (void *)&internal_state; + unsigned *pending = (unsigned *)ps; + + if (!s) return mbrtoc16(0, "", 1, ps); +--- a/src/multibyte/mbrtoc32.c ++++ b/src/multibyte/mbrtoc32.c +@@ -3,6 +3,8 @@ + + size_t mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n, mbstate_t *restrict ps) + { ++ static unsigned internal_state; ++ if (!ps) ps = (void *)&internal_state; + if (!s) return mbrtoc32(0, "", 1, ps); + wchar_t wc; + size_t ret = mbrtowc(&wc, s, n, ps); +--- a/src/multibyte/wcsnrtombs.c ++++ b/src/multibyte/wcsnrtombs.c +@@ -40,7 +40,7 @@ size_t wcsnrtombs(char *restrict dst, co + ws++; wn--; + /* safe - this loop runs fewer than sizeof(buf) times */ + s+=l; n-=l; +- cnt++; ++ cnt += l; + } + if (dst) *wcs = ws; + return cnt; +--- /dev/null ++++ b/src/network/ns_parse.c +@@ -0,0 +1,171 @@ ++#define _BSD_SOURCE ++#include <errno.h> ++#include <stddef.h> ++#include <resolv.h> ++#include <arpa/nameser.h> ++ ++const struct _ns_flagdata _ns_flagdata[16] = { ++ { 0x8000, 15 }, ++ { 0x7800, 11 }, ++ { 0x0400, 10 }, ++ { 0x0200, 9 }, ++ { 0x0100, 8 }, ++ { 0x0080, 7 }, ++ { 0x0040, 6 }, ++ { 0x0020, 5 }, ++ { 0x0010, 4 }, ++ { 0x000f, 0 }, ++ { 0x0000, 0 }, ++ { 0x0000, 0 }, ++ { 0x0000, 0 }, ++ { 0x0000, 0 }, ++ { 0x0000, 0 }, ++ { 0x0000, 0 }, ++}; ++ ++unsigned ns_get16(const unsigned char *cp) ++{ ++ return cp[0]<<8 | cp[1]; ++} ++ ++unsigned long ns_get32(const unsigned char *cp) ++{ ++ return (unsigned)cp[0]<<24 | cp[1]<<16 | cp[2]<<8 | cp[3]; ++} ++ ++void ns_put16(unsigned s, unsigned char *cp) ++{ ++ *cp++ = s>>8; ++ *cp++ = s; ++} ++ ++void ns_put32(unsigned long l, unsigned char *cp) ++{ ++ *cp++ = l>>24; ++ *cp++ = l>>16; ++ *cp++ = l>>8; ++ *cp++ = l; ++} ++ ++int ns_initparse(const unsigned char *msg, int msglen, ns_msg *handle) ++{ ++ int i, r; ++ ++ handle->_msg = msg; ++ handle->_eom = msg + msglen; ++ if (msglen < (2 + ns_s_max) * NS_INT16SZ) goto bad; ++ NS_GET16(handle->_id, msg); ++ NS_GET16(handle->_flags, msg); ++ for (i = 0; i < ns_s_max; i++) NS_GET16(handle->_counts[i], msg); ++ for (i = 0; i < ns_s_max; i++) { ++ if (handle->_counts[i]) { ++ handle->_sections[i] = msg; ++ r = ns_skiprr(msg, handle->_eom, i, handle->_counts[i]); ++ if (r < 0) return -1; ++ msg += r; ++ } else { ++ handle->_sections[i] = NULL; ++ } ++ } ++ if (msg != handle->_eom) goto bad; ++ handle->_sect = ns_s_max; ++ handle->_rrnum = -1; ++ handle->_msg_ptr = NULL; ++ return 0; ++bad: ++ errno = EMSGSIZE; ++ return -1; ++} ++ ++int ns_skiprr(const unsigned char *ptr, const unsigned char *eom, ns_sect section, int count) ++{ ++ const unsigned char *p = ptr; ++ int r; ++ ++ while (count--) { ++ r = dn_skipname(p, eom); ++ if (r < 0) goto bad; ++ if (r + 2 * NS_INT16SZ > eom - p) goto bad; ++ p += r + 2 * NS_INT16SZ; ++ if (section != ns_s_qd) { ++ if (NS_INT32SZ + NS_INT16SZ > eom - p) goto bad; ++ p += NS_INT32SZ; ++ NS_GET16(r, p); ++ if (r > eom - p) goto bad; ++ p += r; ++ } ++ } ++ return ptr - p; ++bad: ++ errno = EMSGSIZE; ++ return -1; ++} ++ ++int ns_parserr(ns_msg *handle, ns_sect section, int rrnum, ns_rr *rr) ++{ ++ int r; ++ ++ if (section < 0 || section >= ns_s_max) goto bad; ++ if (section != handle->_sect) { ++ handle->_sect = section; ++ handle->_rrnum = 0; ++ handle->_msg_ptr = handle->_sections[section]; ++ } ++ if (rrnum == -1) rrnum = handle->_rrnum; ++ if (rrnum < 0 || rrnum >= handle->_counts[section]) goto bad; ++ if (rrnum < handle->_rrnum) { ++ handle->_rrnum = 0; ++ handle->_msg_ptr = handle->_sections[section]; ++ } ++ if (rrnum > handle->_rrnum) { ++ r = ns_skiprr(handle->_msg_ptr, handle->_eom, section, rrnum - handle->_rrnum); ++ if (r < 0) return -1; ++ handle->_msg_ptr += r; ++ handle->_rrnum = rrnum; ++ } ++ r = ns_name_uncompress(handle->_msg, handle->_eom, handle->_msg_ptr, rr->name, NS_MAXDNAME); ++ if (r < 0) return -1; ++ handle->_msg_ptr += r; ++ if (2 * NS_INT16SZ > handle->_eom - handle->_msg_ptr) goto size; ++ NS_GET16(rr->type, handle->_msg_ptr); ++ NS_GET16(rr->rr_class, handle->_msg_ptr); ++ if (section != ns_s_qd) { ++ if (NS_INT32SZ + NS_INT16SZ > handle->_eom - handle->_msg_ptr) goto size; ++ NS_GET32(rr->ttl, handle->_msg_ptr); ++ NS_GET16(rr->rdlength, handle->_msg_ptr); ++ if (rr->rdlength > handle->_eom - handle->_msg_ptr) goto size; ++ rr->rdata = handle->_msg_ptr; ++ handle->_msg_ptr += rr->rdlength; ++ } else { ++ rr->ttl = 0; ++ rr->rdlength = 0; ++ rr->rdata = NULL; ++ } ++ handle->_rrnum++; ++ if (handle->_rrnum > handle->_counts[section]) { ++ handle->_sect = section + 1; ++ if (handle->_sect == ns_s_max) { ++ handle->_rrnum = -1; ++ handle->_msg_ptr = NULL; ++ } else { ++ handle->_rrnum = 0; ++ } ++ } ++ return 0; ++bad: ++ errno = ENODEV; ++ return -1; ++size: ++ errno = EMSGSIZE; ++ return -1; ++} ++ ++int ns_name_uncompress(const unsigned char *msg, const unsigned char *eom, ++ const unsigned char *src, char *dst, size_t dstsiz) ++{ ++ int r; ++ r = dn_expand(msg, eom, src, dst, dstsiz); ++ if (r < 0) errno = EMSGSIZE; ++ return r; ++} ++ +--- a/src/process/posix_spawn.c ++++ b/src/process/posix_spawn.c +@@ -102,8 +102,7 @@ static int child(void *args_vp) + } + switch(op->cmd) { + case FDOP_CLOSE: +- if ((ret=__syscall(SYS_close, op->fd))) +- goto fail; ++ __syscall(SYS_close, op->fd); + break; + case FDOP_DUP2: + if ((ret=__sys_dup2(op->srcfd, op->fd))<0) +@@ -137,7 +136,7 @@ static int child(void *args_vp) + fail: + /* Since sizeof errno < PIPE_BUF, the write is atomic. */ + ret = -ret; +- if (ret) while (write(p, &ret, sizeof ret) < 0); ++ if (ret) while (__syscall(SYS_write, p, &ret, sizeof ret) < 0); + _exit(127); + } + +--- a/src/regex/fnmatch.c ++++ b/src/regex/fnmatch.c +@@ -97,7 +97,13 @@ escaped: + return pat[0]; + } + +-static int match_bracket(const char *p, int k) ++static int casefold(int k) ++{ ++ int c = towupper(k); ++ return c == k ? towlower(k) : c; ++} ++ ++static int match_bracket(const char *p, int k, int kfold) + { + wchar_t wc; + int inv = 0; +@@ -119,7 +125,10 @@ static int match_bracket(const char *p, + wchar_t wc2; + int l = mbtowc(&wc2, p+1, 4); + if (l < 0) return 0; +- if (wc<=wc2 && (unsigned)k-wc <= wc2-wc) return !inv; ++ if (wc <= wc2) ++ if ((unsigned)k-wc <= wc2-wc || ++ (unsigned)kfold-wc <= wc2-wc) ++ return !inv; + p += l-1; + continue; + } +@@ -132,7 +141,9 @@ static int match_bracket(const char *p, + char buf[16]; + memcpy(buf, p0, p-1-p0); + buf[p-1-p0] = 0; +- if (iswctype(k, wctype(buf))) return !inv; ++ if (iswctype(k, wctype(buf)) || ++ iswctype(kfold, wctype(buf))) ++ return !inv; + } + continue; + } +@@ -143,7 +154,7 @@ static int match_bracket(const char *p, + if (l < 0) return 0; + p += l-1; + } +- if (wc==k) return !inv; ++ if (wc==k || wc==kfold) return !inv; + } + return inv; + } +@@ -153,7 +164,7 @@ static int fnmatch_internal(const char * + const char *p, *ptail, *endpat; + const char *s, *stail, *endstr; + size_t pinc, sinc, tailcnt=0; +- int c, k; ++ int c, k, kfold; + + if (flags & FNM_PERIOD) { + if (*str == '.' && *pat != '.') +@@ -173,10 +184,11 @@ static int fnmatch_internal(const char * + return (c==END) ? 0 : FNM_NOMATCH; + str += sinc; + n -= sinc; ++ kfold = flags & FNM_CASEFOLD ? casefold(k) : k; + if (c == BRACKET) { +- if (!match_bracket(pat, k)) ++ if (!match_bracket(pat, k, kfold)) + return FNM_NOMATCH; +- } else if (c != QUESTION && k != c) { ++ } else if (c != QUESTION && k != c && kfold != c) { + return FNM_NOMATCH; + } + pat+=pinc; +@@ -233,10 +245,11 @@ static int fnmatch_internal(const char * + break; + } + s += sinc; ++ kfold = flags & FNM_CASEFOLD ? casefold(k) : k; + if (c == BRACKET) { +- if (!match_bracket(p-pinc, k)) ++ if (!match_bracket(p-pinc, k, kfold)) + return FNM_NOMATCH; +- } else if (c != QUESTION && k != c) { ++ } else if (c != QUESTION && k != c && kfold != c) { + return FNM_NOMATCH; + } + } +@@ -261,10 +274,11 @@ static int fnmatch_internal(const char * + k = str_next(s, endstr-s, &sinc); + if (!k) + return FNM_NOMATCH; ++ kfold = flags & FNM_CASEFOLD ? casefold(k) : k; + if (c == BRACKET) { +- if (!match_bracket(p-pinc, k)) ++ if (!match_bracket(p-pinc, k, kfold)) + break; +- } else if (c != QUESTION && k != c) { ++ } else if (c != QUESTION && k != c && kfold != c) { + break; + } + s += sinc; +--- a/src/sched/affinity.c ++++ b/src/sched/affinity.c +@@ -1,5 +1,6 @@ + #define _GNU_SOURCE + #include <sched.h> ++#include <string.h> + #include "pthread_impl.h" + #include "syscall.h" + +@@ -10,17 +11,23 @@ int sched_setaffinity(pid_t tid, size_t + + int pthread_setaffinity_np(pthread_t td, size_t size, const cpu_set_t *set) + { +- return syscall(SYS_sched_setaffinity, td->tid, size, set); ++ return -__syscall(SYS_sched_setaffinity, td->tid, size, set); + } + +-int sched_getaffinity(pid_t tid, size_t size, cpu_set_t *set) ++static int do_getaffinity(pid_t tid, size_t size, cpu_set_t *set) + { + long ret = __syscall(SYS_sched_getaffinity, tid, size, set); +- if (ret > 0) ret = 0; +- return __syscall_ret(ret); ++ if (ret < 0) return ret; ++ if (ret < size) memset((char *)set+ret, 0, size-ret); ++ return 0; ++} ++ ++int sched_getaffinity(pid_t tid, size_t size, cpu_set_t *set) ++{ ++ return __syscall_ret(do_getaffinity(tid, size, set)); + } + + int pthread_getaffinity_np(pthread_t td, size_t size, cpu_set_t *set) + { +- return sched_getaffinity(td->tid, size, set); ++ return -do_getaffinity(td->tid, size, set); + } +--- a/src/setjmp/arm/longjmp.s ++++ b/src/setjmp/arm/longjmp.s +@@ -20,7 +20,7 @@ longjmp: + ldc p2, cr4, [ip], #48 + 2: tst r1,#0x40 + beq 2f +- ldc p11, cr8, [ip], #64 ++ .word 0xecbc8b10 /* vldmia ip!, {d8-d15} */ + 2: tst r1,#0x200 + beq 3f + ldcl p1, cr10, [ip], #8 +--- a/src/setjmp/arm/setjmp.s ++++ b/src/setjmp/arm/setjmp.s +@@ -22,7 +22,7 @@ setjmp: + stc p2, cr4, [ip], #48 + 2: tst r1,#0x40 + beq 2f +- stc p11, cr8, [ip], #64 ++ .word 0xecac8b10 /* vstmia ip!, {d8-d15} */ + 2: tst r1,#0x200 + beq 3f + stcl p1, cr10, [ip], #8 +--- a/src/signal/raise.c ++++ b/src/signal/raise.c +@@ -5,12 +5,11 @@ + + int raise(int sig) + { +- int pid, tid, ret; ++ int tid, ret; + sigset_t set; + __block_app_sigs(&set); + tid = __syscall(SYS_gettid); +- pid = __syscall(SYS_getpid); +- ret = syscall(SYS_tgkill, pid, tid, sig); ++ ret = syscall(SYS_tkill, tid, sig); + __restore_sigs(&set); + return ret; + } +--- a/src/stdio/vfprintf.c ++++ b/src/stdio/vfprintf.c +@@ -158,7 +158,7 @@ static void pop_arg(union arg *arg, int + + static void out(FILE *f, const char *s, size_t l) + { +- __fwritex((void *)s, l, f); ++ if (!(f->flags & F_ERR)) __fwritex((void *)s, l, f); + } + + static void pad(FILE *f, char c, int w, int l, int fl) +@@ -225,7 +225,7 @@ static int fmt_fp(FILE *f, long double y + + if (!isfinite(y)) { + char *s = (t&32)?"inf":"INF"; +- if (y!=y) s=(t&32)?"nan":"NAN", pl=0; ++ if (y!=y) s=(t&32)?"nan":"NAN"; + pad(f, ' ', w, 3+pl, fl&~ZERO_PAD); + out(f, prefix, pl); + out(f, s, 3); +@@ -570,7 +570,7 @@ static int printf_core(FILE *f, const ch + if (0) { + case 'o': + a = fmt_o(arg.i, z); +- if ((fl&ALT_FORM) && arg.i) prefix+=5, pl=1; ++ if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1; + } if (0) { + case 'd': case 'i': + pl=1; +@@ -656,6 +656,7 @@ int vfprintf(FILE *restrict f, const cha + int nl_type[NL_ARGMAX+1] = {0}; + union arg nl_arg[NL_ARGMAX+1]; + unsigned char internal_buf[80], *saved_buf = 0; ++ int olderr; + int ret; + + /* the copy allows passing va_list* even if va_list is an array */ +@@ -666,6 +667,8 @@ int vfprintf(FILE *restrict f, const cha + } + + FLOCK(f); ++ olderr = f->flags & F_ERR; ++ if (f->mode < 1) f->flags &= ~F_ERR; + if (!f->buf_size) { + saved_buf = f->buf; + f->wpos = f->wbase = f->buf = internal_buf; +@@ -680,6 +683,8 @@ int vfprintf(FILE *restrict f, const cha + f->buf_size = 0; + f->wpos = f->wbase = f->wend = 0; + } ++ if (f->flags & F_ERR) ret = -1; ++ f->flags |= olderr; + FUNLOCK(f); + va_end(ap2); + return ret; +--- a/src/stdio/vfwprintf.c ++++ b/src/stdio/vfwprintf.c +@@ -149,7 +149,7 @@ static void pop_arg(union arg *arg, int + + static void out(FILE *f, const wchar_t *s, size_t l) + { +- while (l--) fputwc(*s++, f); ++ while (l-- && !(f->flags & F_ERR)) fputwc(*s++, f); + } + + static int getint(wchar_t **s) { +@@ -345,6 +345,7 @@ int vfwprintf(FILE *restrict f, const wc + va_list ap2; + int nl_type[NL_ARGMAX] = {0}; + union arg nl_arg[NL_ARGMAX]; ++ int olderr; + int ret; + + /* the copy allows passing va_list* even if va_list is an array */ +@@ -356,7 +357,11 @@ int vfwprintf(FILE *restrict f, const wc + + FLOCK(f); + f->mode |= f->mode+1; ++ olderr = f->flags & F_ERR; ++ f->flags &= ~F_ERR; + ret = wprintf_core(f, fmt, &ap2, nl_arg, nl_type); ++ if (f->flags & F_ERR) ret = -1; ++ f->flags |= olderr; + FUNLOCK(f); + va_end(ap2); + return ret; +--- a/src/string/armel/memcpy.s ++++ b/src/string/armel/memcpy.s +@@ -49,113 +49,113 @@ memcpy: + * ARM ABI. Since we have to save R0, we might as well save R4 + * which we can use for better pipelining of the reads below + */ +- .fnstart +- .save {r0, r4, lr} +- stmfd sp!, {r0, r4, lr} +- /* Making room for r5-r11 which will be spilled later */ +- .pad #28 +- sub sp, sp, #28 +- +- /* it simplifies things to take care of len<4 early */ +- cmp r2, #4 +- blo copy_last_3_and_return +- +- /* compute the offset to align the source +- * offset = (4-(src&3))&3 = -src & 3 +- */ +- rsb r3, r1, #0 +- ands r3, r3, #3 +- beq src_aligned +- +- /* align source to 32 bits. We need to insert 2 instructions between +- * a ldr[b|h] and str[b|h] because byte and half-word instructions +- * stall 2 cycles. +- */ +- movs r12, r3, lsl #31 +- sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ +- ldrmib r3, [r1], #1 +- ldrcsb r4, [r1], #1 +- ldrcsb r12,[r1], #1 +- strmib r3, [r0], #1 +- strcsb r4, [r0], #1 +- strcsb r12,[r0], #1 ++ .fnstart ++ .save {r0, r4, lr} ++ stmfd sp!, {r0, r4, lr} ++ /* Making room for r5-r11 which will be spilled later */ ++ .pad #28 ++ sub sp, sp, #28 ++ ++ /* it simplifies things to take care of len<4 early */ ++ cmp r2, #4 ++ blo copy_last_3_and_return ++ ++ /* compute the offset to align the source ++ * offset = (4-(src&3))&3 = -src & 3 ++ */ ++ rsb r3, r1, #0 ++ ands r3, r3, #3 ++ beq src_aligned ++ ++ /* align source to 32 bits. We need to insert 2 instructions between ++ * a ldr[b|h] and str[b|h] because byte and half-word instructions ++ * stall 2 cycles. ++ */ ++ movs r12, r3, lsl #31 ++ sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ ++ .word 0x44d13001 /* ldrbmi r3, [r1], #1 */ ++ .word 0x24d14001 /* ldrbcs r4, [r1], #1 */ ++ .word 0x24d1c001 /* ldrbcs r12,[r1], #1 */ ++ .word 0x44c03001 /* strbmi r3, [r0], #1 */ ++ .word 0x24c04001 /* strbcs r4, [r0], #1 */ ++ .word 0x24c0c001 /* strbcs r12,[r0], #1 */ + + src_aligned: + + /* see if src and dst are aligned together (congruent) */ +- eor r12, r0, r1 +- tst r12, #3 +- bne non_congruent +- +- /* Use post-incriment mode for stm to spill r5-r11 to reserved stack +- * frame. Don't update sp. +- */ +- stmea sp, {r5-r11} +- +- /* align the destination to a cache-line */ +- rsb r3, r0, #0 +- ands r3, r3, #0x1C +- beq congruent_aligned32 +- cmp r3, r2 +- andhi r3, r2, #0x1C +- +- /* conditionnaly copies 0 to 7 words (length in r3) */ +- movs r12, r3, lsl #28 +- ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ +- ldmmiia r1!, {r8, r9} /* 8 bytes */ +- stmcsia r0!, {r4, r5, r6, r7} +- stmmiia r0!, {r8, r9} +- tst r3, #0x4 +- ldrne r10,[r1], #4 /* 4 bytes */ +- strne r10,[r0], #4 +- sub r2, r2, r3 ++ eor r12, r0, r1 ++ tst r12, #3 ++ bne non_congruent ++ ++ /* Use post-incriment mode for stm to spill r5-r11 to reserved stack ++ * frame. Don't update sp. ++ */ ++ stmea sp, {r5-r11} ++ ++ /* align the destination to a cache-line */ ++ rsb r3, r0, #0 ++ ands r3, r3, #0x1C ++ beq congruent_aligned32 ++ cmp r3, r2 ++ andhi r3, r2, #0x1C ++ ++ /* conditionnaly copies 0 to 7 words (length in r3) */ ++ movs r12, r3, lsl #28 ++ ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ ++ ldmmi r1!, {r8, r9} /* 8 bytes */ ++ stmcs r0!, {r4, r5, r6, r7} ++ stmmi r0!, {r8, r9} ++ tst r3, #0x4 ++ ldrne r10,[r1], #4 /* 4 bytes */ ++ strne r10,[r0], #4 ++ sub r2, r2, r3 + + congruent_aligned32: + /* +- * here source is aligned to 32 bytes. +- */ ++ * here source is aligned to 32 bytes. ++ */ + + cached_aligned32: +- subs r2, r2, #32 +- blo less_than_32_left ++ subs r2, r2, #32 ++ blo less_than_32_left + +- /* +- * We preload a cache-line up to 64 bytes ahead. On the 926, this will +- * stall only until the requested world is fetched, but the linefill +- * continues in the the background. +- * While the linefill is going, we write our previous cache-line +- * into the write-buffer (which should have some free space). +- * When the linefill is done, the writebuffer will +- * start dumping its content into memory +- * +- * While all this is going, we then load a full cache line into +- * 8 registers, this cache line should be in the cache by now +- * (or partly in the cache). +- * +- * This code should work well regardless of the source/dest alignment. +- * +- */ +- +- /* Align the preload register to a cache-line because the cpu does +- * "critical word first" (the first word requested is loaded first). +- */ +- @ bic r12, r1, #0x1F +- @ add r12, r12, #64 +- +-1: ldmia r1!, { r4-r11 } +- subs r2, r2, #32 +- +- /* +- * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi +- * for ARM9 preload will not be safely guarded by the preceding subs. +- * When it is safely guarded the only possibility to have SIGSEGV here +- * is because the caller overstates the length. +- */ +- @ ldrhi r3, [r12], #32 /* cheap ARM9 preload */ +- stmia r0!, { r4-r11 } +- bhs 1b ++ /* ++ * We preload a cache-line up to 64 bytes ahead. On the 926, this will ++ * stall only until the requested world is fetched, but the linefill ++ * continues in the the background. ++ * While the linefill is going, we write our previous cache-line ++ * into the write-buffer (which should have some free space). ++ * When the linefill is done, the writebuffer will ++ * start dumping its content into memory ++ * ++ * While all this is going, we then load a full cache line into ++ * 8 registers, this cache line should be in the cache by now ++ * (or partly in the cache). ++ * ++ * This code should work well regardless of the source/dest alignment. ++ * ++ */ + +- add r2, r2, #32 ++ /* Align the preload register to a cache-line because the cpu does ++ * "critical word first" (the first word requested is loaded first). ++ */ ++ @ bic r12, r1, #0x1F ++ @ add r12, r12, #64 ++ ++1: ldmia r1!, { r4-r11 } ++ subs r2, r2, #32 ++ ++ /* ++ * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi ++ * for ARM9 preload will not be safely guarded by the preceding subs. ++ * When it is safely guarded the only possibility to have SIGSEGV here ++ * is because the caller overstates the length. ++ */ ++ @ ldrhi r3, [r12], #32 /* cheap ARM9 preload */ ++ stmia r0!, { r4-r11 } ++ bhs 1b ++ ++ add r2, r2, #32 + + less_than_32_left: + /* +@@ -166,30 +166,30 @@ less_than_32_left: + * be a common case (if not executed the code below takes + * about 16 cycles) + */ +- tst r2, #0x1F +- beq 1f ++ tst r2, #0x1F ++ beq 1f + +- /* conditionnaly copies 0 to 31 bytes */ +- movs r12, r2, lsl #28 +- ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ +- ldmmiia r1!, {r8, r9} /* 8 bytes */ +- stmcsia r0!, {r4, r5, r6, r7} +- stmmiia r0!, {r8, r9} +- movs r12, r2, lsl #30 +- ldrcs r3, [r1], #4 /* 4 bytes */ +- ldrmih r4, [r1], #2 /* 2 bytes */ +- strcs r3, [r0], #4 +- strmih r4, [r0], #2 +- tst r2, #0x1 +- ldrneb r3, [r1] /* last byte */ +- strneb r3, [r0] +- +- /* we're done! restore everything and return */ +-1: ldmfd sp!, {r5-r11} +- ldmfd sp!, {r0, r4, lr} +- tst lr, #1 +- moveq pc, lr +- bx lr ++ /* conditionnaly copies 0 to 31 bytes */ ++ movs r12, r2, lsl #28 ++ ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ ++ ldmmi r1!, {r8, r9} /* 8 bytes */ ++ stmcs r0!, {r4, r5, r6, r7} ++ stmmi r0!, {r8, r9} ++ movs r12, r2, lsl #30 ++ ldrcs r3, [r1], #4 /* 4 bytes */ ++ .word 0x40d140b2 /* ldrhmi r4, [r1], #2 */ /* 2 bytes */ ++ strcs r3, [r0], #4 ++ .word 0x40c040b2 /* strhmi r4, [r0], #2 */ ++ tst r2, #0x1 ++ .word 0x15d13000 /* ldrbne r3, [r1] */ /* last byte */ ++ .word 0x15c03000 /* strbne r3, [r0] */ ++ ++ /* we're done! restore everything and return */ ++1: ldmfd sp!, {r5-r11} ++ ldmfd sp!, {r0, r4, lr} ++ tst lr, #1 ++ moveq pc, lr ++ bx lr + + /********************************************************************/ + +@@ -202,180 +202,180 @@ non_congruent: + * (the number of bytes written is always smaller, because we have + * partial words in the shift queue) + */ +- cmp r2, #4 +- blo copy_last_3_and_return ++ cmp r2, #4 ++ blo copy_last_3_and_return + +- /* Use post-incriment mode for stm to spill r5-r11 to reserved stack +- * frame. Don't update sp. +- */ +- stmea sp, {r5-r11} +- +- /* compute shifts needed to align src to dest */ +- rsb r5, r0, #0 +- and r5, r5, #3 /* r5 = # bytes in partial words */ +- mov r12, r5, lsl #3 /* r12 = right */ +- rsb lr, r12, #32 /* lr = left */ +- +- /* read the first word */ +- ldr r3, [r1], #4 +- sub r2, r2, #4 +- +- /* write a partial word (0 to 3 bytes), such that destination +- * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) +- */ +- movs r5, r5, lsl #31 +- strmib r3, [r0], #1 +- movmi r3, r3, lsr #8 +- strcsb r3, [r0], #1 +- movcs r3, r3, lsr #8 +- strcsb r3, [r0], #1 +- movcs r3, r3, lsr #8 +- +- cmp r2, #4 +- blo partial_word_tail +- +- /* Align destination to 32 bytes (cache line boundary) */ +-1: tst r0, #0x1c +- beq 2f +- ldr r5, [r1], #4 +- sub r2, r2, #4 +- orr r4, r3, r5, lsl lr +- mov r3, r5, lsr r12 +- str r4, [r0], #4 +- cmp r2, #4 +- bhs 1b +- blo partial_word_tail ++ /* Use post-incriment mode for stm to spill r5-r11 to reserved stack ++ * frame. Don't update sp. ++ */ ++ stmea sp, {r5-r11} ++ ++ /* compute shifts needed to align src to dest */ ++ rsb r5, r0, #0 ++ and r5, r5, #3 /* r5 = # bytes in partial words */ ++ mov r12, r5, lsl #3 /* r12 = right */ ++ rsb lr, r12, #32 /* lr = left */ ++ ++ /* read the first word */ ++ ldr r3, [r1], #4 ++ sub r2, r2, #4 ++ ++ /* write a partial word (0 to 3 bytes), such that destination ++ * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) ++ */ ++ movs r5, r5, lsl #31 ++ .word 0x44c03001 /* strbmi r3, [r0], #1 */ ++ movmi r3, r3, lsr #8 ++ .word 0x24c03001 /* strbcs r3, [r0], #1 */ ++ movcs r3, r3, lsr #8 ++ .word 0x24c03001 /* strbcs r3, [r0], #1 */ ++ movcs r3, r3, lsr #8 ++ ++ cmp r2, #4 ++ blo partial_word_tail ++ ++ /* Align destination to 32 bytes (cache line boundary) */ ++1: tst r0, #0x1c ++ beq 2f ++ ldr r5, [r1], #4 ++ sub r2, r2, #4 ++ orr r4, r3, r5, lsl lr ++ mov r3, r5, lsr r12 ++ str r4, [r0], #4 ++ cmp r2, #4 ++ bhs 1b ++ blo partial_word_tail + + /* copy 32 bytes at a time */ +-2: subs r2, r2, #32 +- blo less_than_thirtytwo ++2: subs r2, r2, #32 ++ blo less_than_thirtytwo ++ ++ /* Use immediate mode for the shifts, because there is an extra cycle ++ * for register shifts, which could account for up to 50% of ++ * performance hit. ++ */ + +- /* Use immediate mode for the shifts, because there is an extra cycle +- * for register shifts, which could account for up to 50% of +- * performance hit. +- */ +- +- cmp r12, #24 +- beq loop24 +- cmp r12, #8 +- beq loop8 ++ cmp r12, #24 ++ beq loop24 ++ cmp r12, #8 ++ beq loop8 + + loop16: +- ldr r12, [r1], #4 +-1: mov r4, r12 +- ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} +- subs r2, r2, #32 +- ldrhs r12, [r1], #4 +- orr r3, r3, r4, lsl #16 +- mov r4, r4, lsr #16 +- orr r4, r4, r5, lsl #16 +- mov r5, r5, lsr #16 +- orr r5, r5, r6, lsl #16 +- mov r6, r6, lsr #16 +- orr r6, r6, r7, lsl #16 +- mov r7, r7, lsr #16 +- orr r7, r7, r8, lsl #16 +- mov r8, r8, lsr #16 +- orr r8, r8, r9, lsl #16 +- mov r9, r9, lsr #16 +- orr r9, r9, r10, lsl #16 +- mov r10, r10, lsr #16 +- orr r10, r10, r11, lsl #16 +- stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} +- mov r3, r11, lsr #16 +- bhs 1b +- b less_than_thirtytwo ++ ldr r12, [r1], #4 ++1: mov r4, r12 ++ ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} ++ subs r2, r2, #32 ++ ldrhs r12, [r1], #4 ++ orr r3, r3, r4, lsl #16 ++ mov r4, r4, lsr #16 ++ orr r4, r4, r5, lsl #16 ++ mov r5, r5, lsr #16 ++ orr r5, r5, r6, lsl #16 ++ mov r6, r6, lsr #16 ++ orr r6, r6, r7, lsl #16 ++ mov r7, r7, lsr #16 ++ orr r7, r7, r8, lsl #16 ++ mov r8, r8, lsr #16 ++ orr r8, r8, r9, lsl #16 ++ mov r9, r9, lsr #16 ++ orr r9, r9, r10, lsl #16 ++ mov r10, r10, lsr #16 ++ orr r10, r10, r11, lsl #16 ++ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} ++ mov r3, r11, lsr #16 ++ bhs 1b ++ b less_than_thirtytwo + + loop8: +- ldr r12, [r1], #4 +-1: mov r4, r12 +- ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} +- subs r2, r2, #32 +- ldrhs r12, [r1], #4 +- orr r3, r3, r4, lsl #24 +- mov r4, r4, lsr #8 +- orr r4, r4, r5, lsl #24 +- mov r5, r5, lsr #8 +- orr r5, r5, r6, lsl #24 +- mov r6, r6, lsr #8 +- orr r6, r6, r7, lsl #24 +- mov r7, r7, lsr #8 +- orr r7, r7, r8, lsl #24 +- mov r8, r8, lsr #8 +- orr r8, r8, r9, lsl #24 +- mov r9, r9, lsr #8 +- orr r9, r9, r10, lsl #24 +- mov r10, r10, lsr #8 +- orr r10, r10, r11, lsl #24 +- stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} +- mov r3, r11, lsr #8 +- bhs 1b +- b less_than_thirtytwo ++ ldr r12, [r1], #4 ++1: mov r4, r12 ++ ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} ++ subs r2, r2, #32 ++ ldrhs r12, [r1], #4 ++ orr r3, r3, r4, lsl #24 ++ mov r4, r4, lsr #8 ++ orr r4, r4, r5, lsl #24 ++ mov r5, r5, lsr #8 ++ orr r5, r5, r6, lsl #24 ++ mov r6, r6, lsr #8 ++ orr r6, r6, r7, lsl #24 ++ mov r7, r7, lsr #8 ++ orr r7, r7, r8, lsl #24 ++ mov r8, r8, lsr #8 ++ orr r8, r8, r9, lsl #24 ++ mov r9, r9, lsr #8 ++ orr r9, r9, r10, lsl #24 ++ mov r10, r10, lsr #8 ++ orr r10, r10, r11, lsl #24 ++ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} ++ mov r3, r11, lsr #8 ++ bhs 1b ++ b less_than_thirtytwo + + loop24: +- ldr r12, [r1], #4 +-1: mov r4, r12 +- ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} +- subs r2, r2, #32 +- ldrhs r12, [r1], #4 +- orr r3, r3, r4, lsl #8 +- mov r4, r4, lsr #24 +- orr r4, r4, r5, lsl #8 +- mov r5, r5, lsr #24 +- orr r5, r5, r6, lsl #8 +- mov r6, r6, lsr #24 +- orr r6, r6, r7, lsl #8 +- mov r7, r7, lsr #24 +- orr r7, r7, r8, lsl #8 +- mov r8, r8, lsr #24 +- orr r8, r8, r9, lsl #8 +- mov r9, r9, lsr #24 +- orr r9, r9, r10, lsl #8 +- mov r10, r10, lsr #24 +- orr r10, r10, r11, lsl #8 +- stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} +- mov r3, r11, lsr #24 +- bhs 1b ++ ldr r12, [r1], #4 ++1: mov r4, r12 ++ ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} ++ subs r2, r2, #32 ++ ldrhs r12, [r1], #4 ++ orr r3, r3, r4, lsl #8 ++ mov r4, r4, lsr #24 ++ orr r4, r4, r5, lsl #8 ++ mov r5, r5, lsr #24 ++ orr r5, r5, r6, lsl #8 ++ mov r6, r6, lsr #24 ++ orr r6, r6, r7, lsl #8 ++ mov r7, r7, lsr #24 ++ orr r7, r7, r8, lsl #8 ++ mov r8, r8, lsr #24 ++ orr r8, r8, r9, lsl #8 ++ mov r9, r9, lsr #24 ++ orr r9, r9, r10, lsl #8 ++ mov r10, r10, lsr #24 ++ orr r10, r10, r11, lsl #8 ++ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} ++ mov r3, r11, lsr #24 ++ bhs 1b + + less_than_thirtytwo: + /* copy the last 0 to 31 bytes of the source */ +- rsb r12, lr, #32 /* we corrupted r12, recompute it */ +- add r2, r2, #32 +- cmp r2, #4 +- blo partial_word_tail +- +-1: ldr r5, [r1], #4 +- sub r2, r2, #4 +- orr r4, r3, r5, lsl lr +- mov r3, r5, lsr r12 +- str r4, [r0], #4 +- cmp r2, #4 +- bhs 1b ++ rsb r12, lr, #32 /* we corrupted r12, recompute it */ ++ add r2, r2, #32 ++ cmp r2, #4 ++ blo partial_word_tail ++ ++1: ldr r5, [r1], #4 ++ sub r2, r2, #4 ++ orr r4, r3, r5, lsl lr ++ mov r3, r5, lsr r12 ++ str r4, [r0], #4 ++ cmp r2, #4 ++ bhs 1b + + partial_word_tail: + /* we have a partial word in the input buffer */ +- movs r5, lr, lsl #(31-3) +- strmib r3, [r0], #1 +- movmi r3, r3, lsr #8 +- strcsb r3, [r0], #1 +- movcs r3, r3, lsr #8 +- strcsb r3, [r0], #1 ++ movs r5, lr, lsl #(31-3) ++ .word 0x44c03001 /* strbmi r3, [r0], #1 */ ++ movmi r3, r3, lsr #8 ++ .word 0x24c03001 /* strbcs r3, [r0], #1 */ ++ movcs r3, r3, lsr #8 ++ .word 0x24c03001 /* strbcs r3, [r0], #1 */ + +- /* Refill spilled registers from the stack. Don't update sp. */ +- ldmfd sp, {r5-r11} ++ /* Refill spilled registers from the stack. Don't update sp. */ ++ ldmfd sp, {r5-r11} + + copy_last_3_and_return: +- movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ +- ldrmib r2, [r1], #1 +- ldrcsb r3, [r1], #1 +- ldrcsb r12,[r1] +- strmib r2, [r0], #1 +- strcsb r3, [r0], #1 +- strcsb r12,[r0] +- +- /* we're done! restore sp and spilled registers and return */ +- add sp, sp, #28 +- ldmfd sp!, {r0, r4, lr} +- tst lr, #1 +- moveq pc, lr +- bx lr ++ movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ ++ .word 0x44d12001 /* ldrbmi r2, [r1], #1 */ ++ .word 0x24d13001 /* ldrbcs r3, [r1], #1 */ ++ .word 0x25d1c000 /* ldrbcs r12,[r1] */ ++ .word 0x44c02001 /* strbmi r2, [r0], #1 */ ++ .word 0x24c03001 /* strbcs r3, [r0], #1 */ ++ .word 0x25c0c000 /* strbcs r12,[r0] */ ++ ++ /* we're done! restore sp and spilled registers and return */ ++ add sp, sp, #28 ++ ldmfd sp!, {r0, r4, lr} ++ tst lr, #1 ++ moveq pc, lr ++ bx lr +--- a/src/thread/arm/__set_thread_area.s ++++ b/src/thread/arm/__set_thread_area.s +@@ -1,12 +1 @@ +-.text +-.global __set_thread_area +-.type __set_thread_area,%function +-__set_thread_area: +- mov r1,r7 +- mov r7,#0x0f0000 +- add r7,r7,#5 +- svc 0 +- mov r7,r1 +- tst lr,#1 +- moveq pc,lr +- bx lr ++/* Replaced by C code in arch/arm/src */ +--- a/src/thread/arm/tls.s ++++ /dev/null +@@ -1,4 +0,0 @@ +-.global __aeabi_read_tp +-.type __aeabi_read_tp,%function +-__aeabi_read_tp: +- ldr pc,=0xffff0fe0 +--- a/src/thread/pthread_once.c ++++ b/src/thread/pthread_once.c +@@ -8,15 +8,8 @@ static void undo(void *control) + __wake(control, -1, 1); + } + +-int __pthread_once(pthread_once_t *control, void (*init)(void)) ++int __pthread_once_full(pthread_once_t *control, void (*init)(void)) + { +- /* Return immediately if init finished before, but ensure that +- * effects of the init routine are visible to the caller. */ +- if (*control == 2) { +- a_barrier(); +- return 0; +- } +- + /* Try to enter initializing state. Four possibilities: + * 0 - we're the first or the other cancelled; run init + * 1 - another thread is running init; wait +@@ -43,4 +36,15 @@ int __pthread_once(pthread_once_t *contr + } + } + ++int __pthread_once(pthread_once_t *control, void (*init)(void)) ++{ ++ /* Return immediately if init finished before, but ensure that ++ * effects of the init routine are visible to the caller. */ ++ if (*control == 2) { ++ a_barrier(); ++ return 0; ++ } ++ return __pthread_once_full(control, init); ++} ++ + weak_alias(__pthread_once, pthread_once); |