aboutsummaryrefslogtreecommitdiffstats
path: root/toolchain/musl/patches/001-git-2015-06-25.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain/musl/patches/001-git-2015-06-25.patch')
-rw-r--r--toolchain/musl/patches/001-git-2015-06-25.patch2523
1 files changed, 2523 insertions, 0 deletions
diff --git a/toolchain/musl/patches/001-git-2015-06-25.patch b/toolchain/musl/patches/001-git-2015-06-25.patch
new file mode 100644
index 0000000000..abb4a9d135
--- /dev/null
+++ b/toolchain/musl/patches/001-git-2015-06-25.patch
@@ -0,0 +1,2523 @@
+commit 6ba5517a460c6c438f64d69464fdfc3269a4c91a
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Thu Jun 25 22:22:00 2015 +0000
+
+ fix local-dynamic model TLS on mips and powerpc
+
+ the TLS ABI spec for mips, powerpc, and some other (presently
+ unsupported) RISC archs has the return value of __tls_get_addr offset
+ by +0x8000 and the result of DTPOFF relocations offset by -0x8000. I
+ had previously assumed this part of the ABI was actually just an
+ implementation detail, since the adjustments cancel out. however, when
+ the local dynamic model is used for accessing TLS that's known to be
+ in the same DSO, either of the following may happen:
+
+ 1. the -0x8000 offset may already be applied to the argument structure
+ passed to __tls_get_addr at ld time, without any opportunity for
+ runtime relocations.
+
+ 2. __tls_get_addr may be used with a zero offset argument to obtain a
+ base address for the module's TLS, to which the caller then applies
+ immediate offsets for individual objects accessed using the local
+ dynamic model. since the immediate offsets have the -0x8000 adjustment
+ applied to them, the base address they use needs to include the
+ +0x8000 offset.
+
+ it would be possible, but more complex, to store the pointers in the
+ dtv[] array with the +0x8000 offset pre-applied, to avoid the runtime
+ cost of adding 0x8000 on each call to __tls_get_addr. this change
+ could be made later if measurements show that it would help.
+
+commit ce337daa00e42d4f2d9a4d9ae0ed51b20249d924
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 23 04:03:42 2015 +0000
+
+ make dynamic linker work around MAP_FAILED mmap failure on nommu kernels
+
+ previously, loading of additional libraries beyond libc/ldso did not
+ work on nommu kernels, nor did loading programs via invocation of the
+ dynamic linker as a command.
+
+commit a59341420fdedb288d9ff80e73609ae44e9cf258
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 23 00:12:25 2015 +0000
+
+ reimplement strverscmp to fix corner cases
+
+ this interface is non-standardized and is a GNU invention, and as
+ such, our implementation should match the behavior of the GNU
+ function. one peculiarity the old implementation got wrong was the
+ handling of all-zero digit sequences: they are supposed to compare
+ greater than digit sequences of which they are a proper prefix, as in
+ 009 < 00.
+
+ in addition, high bytes were treated with char signedness rather than
+ as unsigned. this was wrong regardless of what the GNU function does
+ since the resulting order relation varied by arch.
+
+ the new strverscmp implementation makes explicit the cases where the
+ order differs from what strcmp would produce, of which there are only
+ two.
+
+commit 153e952e1a688859d7095345b17e6c1df74a295c
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Mon Jun 22 20:33:28 2015 +0000
+
+ fix regression/typo that disabled __simple_malloc when calloc is used
+
+ commit ba819787ee93ceae94efd274f7849e317c1bff58 introduced this
+ regression. since the __malloc0 weak alias was not properly provided
+ by __simple_malloc, use of calloc forced the full malloc to be linked.
+
+commit ba819787ee93ceae94efd274f7849e317c1bff58
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Mon Jun 22 18:50:09 2015 +0000
+
+ fix calloc when __simple_malloc implementation is used
+
+ previously, calloc's implementation encoded assumptions about the
+ implementation of malloc, accessing a size_t word just prior to the
+ allocated memory to determine if it was obtained by mmap to optimize
+ out the zero-filling. when __simple_malloc is used (static linking a
+ program with no realloc/free), it doesn't matter if the result of this
+ check is wrong, since all allocations are zero-initialized anyway. but
+ the access could be invalid if it crosses a page boundary or if the
+ pointer is not sufficiently aligned, which can happen for very small
+ allocations.
+
+ this patch fixes the issue by moving the zero-fill logic into malloc.c
+ with the full malloc, as a new function named __malloc0, which is
+ provided by a weak alias to __simple_malloc (which always gives
+ zero-filled memory) when the full malloc is not in use.
+
+commit 55d061f031085f24d138664c897791aebe9a2fab
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 20 03:01:07 2015 +0000
+
+ provide __stack_chk_fail_local in libc.a
+
+ this symbol is needed only on archs where the PLT call ABI is klunky,
+ and only for position-independent code compiled with stack protector.
+ thus references usually only appear in shared libraries or PIE
+ executables, but they can also appear when linking statically if some
+ of the object files being linked were built as PIC/PIE.
+
+ normally libssp_nonshared.a from the compiler toolchain should provide
+ __stack_chk_fail_local, but reportedly it appears prior to -lc in the
+ link order, thus failing to satisfy references from libc itself (which
+ arise only if libc.a was built as PIC/PIE with stack protector
+ enabled).
+
+commit ce3688eca920aa77549323f84e21f33522397115
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 20 02:54:30 2015 +0000
+
+ work around mips detached thread exit breakage due to kernel regression
+
+ linux kernel commit 46e12c07b3b9603c60fc1d421ff18618241cb081 caused
+ the mips syscall mechanism to fail with EFAULT when the userspace
+ stack pointer is invalid, breaking __unmapself used for detached
+ thread exit. the workaround is to set $sp to a known-valid, readable
+ address, and the simplest one to obtain is the address of the current
+ function, which is available (per o32 calling convention) in $25.
+
+commit 75eceb3ae824d54e865686c0c538551aeebf3372
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Wed Jun 17 17:21:46 2015 +0000
+
+ ignore ENOSYS error from mprotect in pthread_create and dynamic linker
+
+ this error simply indicated a system without memory protection (NOMMU)
+ and should not cause failure in the caller.
+
+commit 10d0268ccfab9152250eeeed3952ce3fed44131a
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 15:25:02 2015 +0000
+
+ switch to using trap number 31 for syscalls on sh
+
+ nominally the low bits of the trap number on sh are the number of
+ syscall arguments, but they have never been used by the kernel, and
+ some code making syscalls does not even know the number of arguments
+ and needs to pass an arbitrary high number anyway.
+
+ sh3/sh4 traditionally used the trap range 16-31 for syscalls, but part
+ of this range overlapped with hardware exceptions/interrupts on sh2
+ hardware, so an incompatible range 32-47 was chosen for sh2.
+
+ using trap number 31 everywhere, since it's in the existing sh3/sh4
+ range and does not conflict with sh2 hardware, is a proposed
+ unification of the kernel syscall convention that will allow binaries
+ to be shared between sh2 and sh3/sh4. if this is not accepted into the
+ kernel, we can refit the sh2 target with runtime selection mechanisms
+ for the trap number, but doing so would be invasive and would entail
+ non-trivial overhead.
+
+commit 3366a99b17847b58f2d8cc52cbb5d65deb824f8a
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 14:55:06 2015 +0000
+
+ switch sh port's __unmapself to generic version when running on sh2/nommu
+
+ due to the way the interrupt and syscall trap mechanism works,
+ userspace on sh2 must never set the stack pointer to an invalid value.
+ thus, the approach used on most archs, where __unmapself executes with
+ no stack for the interval between SYS_munmap and SYS_exit, is not
+ viable on sh2.
+
+ in order not to pessimize sh3/sh4, the sh asm version of __unmapself
+ is not removed. instead it's renamed and redirected through code that
+ calls either the generic (safe) __unmapself or the sh3/sh4 asm,
+ depending on compile-time and run-time conditions.
+
+commit f9d84554bae0fa17c9a1d724549c4408022228a5
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 14:28:30 2015 +0000
+
+ add support for sh2 interrupt-masking-based atomics to sh port
+
+ the sh2 target is being considered an ISA subset of sh3/sh4, in the
+ sense that binaries built for sh2 are intended to be usable on later
+ cpu models/kernels with mmu support. so rather than hard-coding
+ sh2-specific atomics, the runtime atomic selection mechanisms that was
+ already in place has been extended to add sh2 atomics.
+
+ at this time, the sh2 atomics are not SMP-compatible; since the ISA
+ lacks actual atomic operations, the new code instead masks interrupts
+ for the duration of the atomic operation, producing an atomic result
+ on single-core. this is only possible because the kernel/hardware does
+ not impose protections against userspace doing so. additional changes
+ will be needed to support future SMP systems.
+
+ care has been taken to avoid producing significant additional code
+ size in the case where it's known at compile-time that the target is
+ not sh2 and does not need sh2-specific code.
+
+commit 1b0cdc8700d29ef018bf226d74b2b58b23bce91c
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 07:11:19 2015 +0000
+
+ refactor stdio open file list handling, move it out of global libc struct
+
+ functions which open in-memory FILE stream variants all shared a tail
+ with __fdopen, adding the FILE structure to stdio's open file list.
+ replacing this common tail with a function call reduces code size and
+ duplication of logic. the list is also partially encapsulated now.
+
+ function signatures were chosen to facilitate tail call optimization
+ and reduce the need for additional accessor functions.
+
+ with these changes, static linked programs that do not use stdio no
+ longer have an open file list at all.
+
+commit f22a9edaf8a6f2ca1d314d18b3785558279a5c03
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 06:18:00 2015 +0000
+
+ byte-based C locale, phase 3: make MB_CUR_MAX variable to activate code
+
+ this patch activates the new byte-based C locale (high bytes treated
+ as abstract code unit "characters" rather than decoded as multibyte
+ characters) by making the value of MB_CUR_MAX depend on the active
+ locale. for the C locale, the LC_CTYPE category pointer is null,
+ yielding a value of 1. all other locales yield a value of 4.
+
+commit 16f18d036d9a7bf590ee6eb86785c0a9658220b6
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 05:35:31 2015 +0000
+
+ byte-based C locale, phase 2: stdio and iconv (multibyte callers)
+
+ this patch adjusts libc components which use the multibyte functions
+ internally, and which depend on them operating in a particular
+ encoding, to make the appropriate locale changes before calling them
+ and restore the calling thread's locale afterwards. activating the
+ byte-based C locale without these changes would cause regressions in
+ stdio and iconv.
+
+ in the case of iconv, the current implementation was simply using the
+ multibyte functions as UTF-8 conversions. setting a multibyte UTF-8
+ locale for the duration of the iconv operation allows the code to
+ continue working.
+
+ in the case of stdio, POSIX requires that FILE streams have an
+ encoding rule bound at the time of setting wide orientation. as long
+ as all locales, including the C locale, used the same encoding,
+ treating high bytes as UTF-8, there was no need to store an encoding
+ rule as part of the stream's state.
+
+ a new locale field in the FILE structure points to the locale that
+ should be made active during fgetwc/fputwc/ungetwc on the stream. it
+ cannot point to the locale active at the time the stream becomes
+ oriented, because this locale could be mutable (the global locale) or
+ could be destroyed (locale_t objects produced by newlocale) before the
+ stream is closed. instead, a pointer to the static C or C.UTF-8 locale
+ object added in commit commit aeeac9ca5490d7d90fe061ab72da446c01ddf746
+ is used. this is valid since categories other than LC_CTYPE will not
+ affect these functions.
+
+commit 1507ebf837334e9e07cfab1ca1c2e88449069a80
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 04:44:17 2015 +0000
+
+ byte-based C locale, phase 1: multibyte character handling functions
+
+ this patch makes the functions which work directly on multibyte
+ characters treat the high bytes as individual abstract code units
+ rather than as multibyte sequences when MB_CUR_MAX is 1. since
+ MB_CUR_MAX is presently defined as a constant 4, all of the new code
+ added is dead code, and optimizing compilers' code generation should
+ not be affected at all. a future commit will activate the new code.
+
+ as abstract code units, bytes 0x80 to 0xff are represented by wchar_t
+ values 0xdf80 to 0xdfff, at the end of the surrogates range. this
+ ensures that they will never be misinterpreted as Unicode characters,
+ and that all wctype functions return false for these "characters"
+ without needing locale-specific logic. a high range outside of Unicode
+ such as 0x7fffff80 to 0x7fffffff was also considered, but since C11's
+ char16_t also needs to be able to represent conversions of these
+ bytes, the surrogate range was the natural choice.
+
+commit 38e2f727237230300fea6aff68802db04625fd23
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 04:21:38 2015 +0000
+
+ fix btowc corner case
+
+ btowc is required to interpret its argument by conversion to unsigned
+ char, unless the argument is equal to EOF. since the conversion to
+ produces a non-character value anyway, we can just unconditionally
+ convert, for now.
+
+commit ee59c296d56bf26f49f354d6eb32b4b6d4190188
+Author: Szabolcs Nagy <nsz@port70.net>
+Date: Wed Jun 3 10:32:14 2015 +0100
+
+ arm: add vdso support
+
+ vdso will be available on arm in linux v4.2, the user-space code
+ for it is in kernel commit 8512287a8165592466cb9cb347ba94892e9c56a5
+
+commit e3bc22f1eff87b8f029a6ab31f1a269d69e4b053
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sun Jun 14 01:59:02 2015 +0000
+
+ refactor malloc's expand_heap to share with __simple_malloc
+
+ this extends the brk/stack collision protection added to full malloc
+ in commit 276904c2f6bde3a31a24ebfa201482601d18b4f9 to also protect the
+ __simple_malloc function used in static-linked programs that don't
+ reference the free function.
+
+ it also extends support for using mmap when brk fails, which full
+ malloc got in commit 5446303328adf4b4e36d9fba21848e6feb55fab4, to
+ __simple_malloc.
+
+ since __simple_malloc may expand the heap by arbitrarily large
+ increments, the stack collision detection is enhanced to detect
+ interval overlap rather than just proximity of a single address to the
+ stack. code size is increased a bit, but this is partly offset by the
+ sharing of code between the two malloc implementations, which due to
+ linking semantics, both get linked in a program that needs the full
+ malloc with realloc/free support.
+
+commit 4ef9b828c1f39553a69e0635ac91f0fcadd6e8c6
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 13 20:53:02 2015 +0000
+
+ remove cancellation points in stdio
+
+ commit 58165923890865a6ac042fafce13f440ee986fd9 added these optional
+ cancellation points on the basis that cancellable stdio could be
+ useful, to unblock threads stuck on stdio operations that will never
+ complete. however, the only way to ensure that cancellation can
+ achieve this is to violate the rules for side effects when
+ cancellation is acted upon, discarding knowledge of any partial data
+ transfer already completed. our implementation exhibited this behavior
+ and was thus non-conforming.
+
+ in addition to improving correctness, removing these cancellation
+ points moderately reduces code size, and should significantly improve
+ performance on i386, where sysenter/syscall instructions can be used
+ instead of "int $128" for non-cancellable syscalls.
+
+commit 536c6d5a4205e2a3f161f2983ce1e0ac3082187d
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 13 05:17:16 2015 +0000
+
+ fix idiom for setting stdio stream orientation to wide
+
+ the old idiom, f->mode |= f->mode+1, was adapted from the idiom for
+ setting byte orientation, f->mode |= f->mode-1, but the adaptation was
+ incorrect. unless the stream was alreasdy set byte-oriented, this code
+ incremented f->mode each time it was executed, which would eventually
+ lead to overflow. it could be fixed by changing it to f->mode |= 1,
+ but upcoming changes will require slightly more work at the time of
+ wide orientation, so it makes sense to just call fwide. as an
+ optimization in the single-character functions, fwide is only called
+ if the stream is not already wide-oriented.
+
+commit f8f565df467c13248104223f99abf7f37cef7584
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 13 04:42:38 2015 +0000
+
+ add printing of null %s arguments as "(null)" in wide printf
+
+ this is undefined, but supported in our implementation of the normal
+ printf, so for consistency the wide variant should support it too.
+
+commit f9e25d813860d53cd1e9b6145cc63375d2fe2529
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 13 04:37:27 2015 +0000
+
+ add %m support to wide printf
+
+commit ec634aad91f57479ef17525e33ed446c780a61f4
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Thu Jun 11 05:01:04 2015 +0000
+
+ add sh asm for vfork
+
+commit c30cbcb0a646b1f13a22c645616dce624465b883
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Wed Jun 10 02:27:40 2015 +0000
+
+ implement arch-generic version of __unmapself
+
+ this can be used to put off writing an asm version of __unmapself for
+ new archs, or as a permanent solution on archs where it's not
+ practical or even possible to run momentarily with no stack.
+
+ the concept here is simple: the caller takes a lock on a global shared
+ stack and uses it to make the munmap and exit syscalls. the only trick
+ is unlocking, which must be done after the thread exits, and this is
+ achieved by using the set_tid_address syscall to have the kernel zero
+ and futex-wake the lock word as part of the exit syscall.
+
+commit 276904c2f6bde3a31a24ebfa201482601d18b4f9
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 9 20:30:35 2015 +0000
+
+ in malloc, refuse to use brk if it grows into stack
+
+ the linux/nommu fdpic ELF loader sets up the brk range to overlap
+ entirely with the main thread's stack (but growing from opposite
+ ends), so that the resulting failure mode for malloc is not to return
+ a null pointer but to start returning pointers to memory that overlaps
+ with the caller's stack. needless to say this extremely dangerous and
+ makes brk unusable.
+
+ since it's non-trivial to detect execution environments that might be
+ affected by this kernel bug, and since the severity of the bug makes
+ any sort of detection that might yield false-negatives unsafe, we
+ instead check the proximity of the brk to the stack pointer each time
+ the brk is to be expanded. both the main thread's stack (where the
+ real known risk lies) and the calling thread's stack are checked. an
+ arbitrary gap distance of 8 MB is imposed, chosen to be larger than
+ linux default main-thread stack reservation sizes and larger than any
+ reasonable stack configuration on nommu.
+
+ the effeciveness of this patch relies on an assumption that the amount
+ by which the brk is being grown is smaller than the gap limit, which
+ is always true for malloc's use of brk. reliance on this assumption is
+ why the check is being done in malloc-specific code and not in __brk.
+
+commit bd1eaceaa3975bd2a2a34e211cff896affaecadf
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 9 20:09:27 2015 +0000
+
+ fix spurious errors from pwd/grp functions when nscd backend is absent
+
+ for several pwd/grp functions, the only way the caller can distinguish
+ between a successful negative result ("no such user/group") and an
+ internal error is by clearing errno before the call and checking errno
+ afterwards. the nscd backend support code correctly simulated a
+ not-found response on systems where such a backend is not running, but
+ failed to restore errno.
+
+ this commit also fixed an outdated/incorrect comment.
+
+commit 75ce4503950621b11fcc7f1fd1187dbcf3cde312
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sun Jun 7 20:55:23 2015 +0000
+
+ fix regression in pre-v7 arm on kernels with kuser helper removed
+
+ the arm atomics/TLS runtime selection code is called from
+ __set_thread_area and depends on having libc.auxv and __hwcap
+ available. commit 71f099cb7db821c51d8f39dfac622c61e54d794c moved the
+ first call to __set_thread_area to the top of dynamic linking stage 3,
+ before this data is made available, causing the runtime detection code
+ to always see __hwcap as zero and thereby select the atomics/TLS
+ implementations based on kuser helper.
+
+ upcoming work on superh will use similar runtime detection.
+
+ ideally this early-init code should be cleanly refactored and shared
+ between the dynamic linker and static-linked startup.
+
+commit 32f3c4f70633488550c29a2444f819aafdf345ff
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sun Jun 7 03:09:16 2015 +0000
+
+ add multiple inclusion guard to locale_impl.h
+
+commit 04b8360adbb6487f61aa0c00e53ec3a90a5a0d29
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sun Jun 7 02:59:49 2015 +0000
+
+ remove redefinition of MB_CUR_MAX in locale_impl.h
+
+ unless/until the byte-based C locale is implemented, defining
+ MB_CUR_MAX to 1 in the C locale is wrong. no internal code currently
+ uses the MB_CUR_MAX macro, but having it defined inconsistently is
+ error-prone. applications get the value from stdlib.h and were
+ unaffected.
+
+commit 16bf466532d7328e971012b0731ad493b017ad29
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 6 18:53:02 2015 +0000
+
+ make static C and C.UTF-8 locales available outside of newlocale
+
+commit 312eea2ea4f4363fb01b73660c08bfcf43dd3bb4
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 6 18:20:30 2015 +0000
+
+ remove another invalid skip of locking in ungetwc
+
+commit 3d7e32d28dc9962e9efc1c317c5b44b5b2df3008
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 6 18:16:22 2015 +0000
+
+ add macro version of ctype.h isascii function
+
+ presumably internal code (ungetwc and fputwc) was written assuming a
+ macro implementation existed; otherwise use of isascii is just a
+ pessimization.
+
+commit 7e816a6487932cbb3cb71d94b609e50e81f4e5bf
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 6 18:11:17 2015 +0000
+
+ remove invalid skip of locking in ungetwc
+
+ aside from being invalid, the early check only optimized the error
+ case, and likely pessimized the common case by separating the
+ two branches on isascii(c) at opposite ends of the function.
+
+commit 63f4b9f18f3674124d8bcb119739fec85e6da005
+Author: Timo Teräs <timo.teras@iki.fi>
+Date: Fri Jun 5 10:39:42 2015 +0300
+
+ fix uselocale((locale_t)0) not to modify locale
+
+ commit 68630b55c0c7219fe9df70dc28ffbf9efc8021d8 made the new locale to
+ be assigned unconditonally resulting in crashes later on.
+
+diff --git a/arch/arm/syscall_arch.h b/arch/arm/syscall_arch.h
+index 199ad2a..64461ec 100644
+--- a/arch/arm/syscall_arch.h
++++ b/arch/arm/syscall_arch.h
+@@ -72,3 +72,7 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
+ register long r5 __asm__("r5") = f;
+ __asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
+ }
++
++#define VDSO_USEFUL
++#define VDSO_CGT_SYM "__vdso_clock_gettime"
++#define VDSO_CGT_VER "LINUX_2.6"
+diff --git a/arch/mips/pthread_arch.h b/arch/mips/pthread_arch.h
+index f8e35ae..904a248 100644
+--- a/arch/mips/pthread_arch.h
++++ b/arch/mips/pthread_arch.h
+@@ -13,4 +13,6 @@ static inline struct pthread *__pthread_self()
+ #define TLS_ABOVE_TP
+ #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
+
++#define DTP_OFFSET 0x8000
++
+ #define CANCEL_REG_IP (3-(union {int __i; char __b;}){1}.__b)
+diff --git a/arch/powerpc/pthread_arch.h b/arch/powerpc/pthread_arch.h
+index 4115ec8..1cbfc22 100644
+--- a/arch/powerpc/pthread_arch.h
++++ b/arch/powerpc/pthread_arch.h
+@@ -12,6 +12,8 @@ static inline struct pthread *__pthread_self()
+ #define TLS_ABOVE_TP
+ #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
+
++#define DTP_OFFSET 0x8000
++
+ // offset of the PC register in mcontext_t, divided by the system wordsize
+ // the kernel calls the ip "nip", it's the first saved value after the 32
+ // GPRs.
+diff --git a/arch/sh/src/__set_thread_area.c b/arch/sh/src/__set_thread_area.c
+new file mode 100644
+index 0000000..1d3e022
+--- /dev/null
++++ b/arch/sh/src/__set_thread_area.c
+@@ -0,0 +1,34 @@
++#include "pthread_impl.h"
++#include "libc.h"
++#include "sh_atomic.h"
++#include <elf.h>
++
++/* Also perform sh-specific init */
++
++#define CPU_HAS_LLSC 0x0040
++
++__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu;
++
++int __set_thread_area(void *p)
++{
++ size_t *aux;
++ __asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
++#ifndef __SH4A__
++ if (__hwcap & CPU_HAS_LLSC) {
++ __sh_atomic_model = SH_A_LLSC;
++ return 0;
++ }
++#if !defined(__SH3__) && !defined(__SH4__)
++ for (aux=libc.auxv; *aux; aux+=2) {
++ if (*aux != AT_PLATFORM) continue;
++ const char *s = (void *)aux[1];
++ if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
++ __sh_atomic_model = SH_A_IMASK;
++ __sh_nommu = 1;
++ return 0;
++ }
++#endif
++ /* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */
++#endif
++ return 0;
++}
+diff --git a/arch/sh/src/__unmapself.c b/arch/sh/src/__unmapself.c
+new file mode 100644
+index 0000000..4df9e7b
+--- /dev/null
++++ b/arch/sh/src/__unmapself.c
+@@ -0,0 +1,19 @@
++#include "pthread_impl.h"
++
++void __unmapself_sh_mmu(void *, size_t);
++void __unmapself_sh_nommu(void *, size_t);
++
++#if !defined(__SH3__) && !defined(__SH4__)
++#define __unmapself __unmapself_sh_nommu
++#include "../../../src/thread/__unmapself.c"
++#undef __unmapself
++extern __attribute__((__visibility__("hidden"))) unsigned __sh_nommu;
++#else
++#define __sh_nommu 0
++#endif
++
++void __unmapself(void *base, size_t size)
++{
++ if (__sh_nommu) __unmapself_sh_nommu(base, size);
++ else __unmapself_sh_mmu(base, size);
++}
+diff --git a/arch/sh/src/atomic.c b/arch/sh/src/atomic.c
+index f8c615f..7fd7307 100644
+--- a/arch/sh/src/atomic.c
++++ b/arch/sh/src/atomic.c
+@@ -1,8 +1,26 @@
+ #ifndef __SH4A__
+
++#include "sh_atomic.h"
+ #include "atomic.h"
+ #include "libc.h"
+
++static inline unsigned mask()
++{
++ unsigned sr;
++ __asm__ __volatile__ ( "\n"
++ " stc sr,r0 \n"
++ " mov r0,%0 \n"
++ " or #0xf0,r0 \n"
++ " ldc r0,sr \n"
++ : "=&r"(sr) : : "memory", "r0" );
++ return sr;
++}
++
++static inline void unmask(unsigned sr)
++{
++ __asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" );
++}
++
+ /* gusa is a hack in the kernel which lets you create a sequence of instructions
+ * which will be restarted if the process is preempted in the middle of the
+ * sequence. It will do for implementing atomics on non-smp systems. ABI is:
+@@ -25,11 +43,17 @@
+ " mov.l " new ", @" mem "\n" \
+ "1: mov r1, r15\n"
+
+-#define CPU_HAS_LLSC 0x0040
+-
+ int __sh_cas(volatile int *p, int t, int s)
+ {
+- if (__hwcap & CPU_HAS_LLSC) return __sh_cas_llsc(p, t, s);
++ if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s);
++
++ if (__sh_atomic_model == SH_A_IMASK) {
++ unsigned sr = mask();
++ int old = *p;
++ if (old==t) *p = s;
++ unmask(sr);
++ return old;
++ }
+
+ int old;
+ __asm__ __volatile__(
+@@ -43,7 +67,15 @@ int __sh_cas(volatile int *p, int t, int s)
+
+ int __sh_swap(volatile int *x, int v)
+ {
+- if (__hwcap & CPU_HAS_LLSC) return __sh_swap_llsc(x, v);
++ if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v);
++
++ if (__sh_atomic_model == SH_A_IMASK) {
++ unsigned sr = mask();
++ int old = *x;
++ *x = v;
++ unmask(sr);
++ return old;
++ }
+
+ int old;
+ __asm__ __volatile__(
+@@ -55,7 +87,15 @@ int __sh_swap(volatile int *x, int v)
+
+ int __sh_fetch_add(volatile int *x, int v)
+ {
+- if (__hwcap & CPU_HAS_LLSC) return __sh_fetch_add_llsc(x, v);
++ if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v);
++
++ if (__sh_atomic_model == SH_A_IMASK) {
++ unsigned sr = mask();
++ int old = *x;
++ *x = old + v;
++ unmask(sr);
++ return old;
++ }
+
+ int old, dummy;
+ __asm__ __volatile__(
+@@ -69,7 +109,7 @@ int __sh_fetch_add(volatile int *x, int v)
+
+ void __sh_store(volatile int *p, int x)
+ {
+- if (__hwcap & CPU_HAS_LLSC) return __sh_store_llsc(p, x);
++ if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x);
+ __asm__ __volatile__(
+ " mov.l %1, @%0\n"
+ : : "r"(p), "r"(x) : "memory");
+@@ -77,7 +117,15 @@ void __sh_store(volatile int *p, int x)
+
+ void __sh_and(volatile int *x, int v)
+ {
+- if (__hwcap & CPU_HAS_LLSC) return __sh_and_llsc(x, v);
++ if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v);
++
++ if (__sh_atomic_model == SH_A_IMASK) {
++ unsigned sr = mask();
++ int old = *x;
++ *x = old & v;
++ unmask(sr);
++ return;
++ }
+
+ int dummy;
+ __asm__ __volatile__(
+@@ -89,7 +137,15 @@ void __sh_and(volatile int *x, int v)
+
+ void __sh_or(volatile int *x, int v)
+ {
+- if (__hwcap & CPU_HAS_LLSC) return __sh_or_llsc(x, v);
++ if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v);
++
++ if (__sh_atomic_model == SH_A_IMASK) {
++ unsigned sr = mask();
++ int old = *x;
++ *x = old | v;
++ unmask(sr);
++ return;
++ }
+
+ int dummy;
+ __asm__ __volatile__(
+diff --git a/arch/sh/src/sh_atomic.h b/arch/sh/src/sh_atomic.h
+new file mode 100644
+index 0000000..054c2a3
+--- /dev/null
++++ b/arch/sh/src/sh_atomic.h
+@@ -0,0 +1,15 @@
++#ifndef _SH_ATOMIC_H
++#define _SH_ATOMIC_H
++
++#define SH_A_GUSA 0
++#define SH_A_LLSC 1
++#define SH_A_CAS 2
++#if !defined(__SH3__) && !defined(__SH4__)
++#define SH_A_IMASK 3
++#else
++#define SH_A_IMASK -1LL /* unmatchable by unsigned int */
++#endif
++
++extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model;
++
++#endif
+diff --git a/arch/sh/syscall_arch.h b/arch/sh/syscall_arch.h
+index 7ee21a5..f63675a 100644
+--- a/arch/sh/syscall_arch.h
++++ b/arch/sh/syscall_arch.h
+@@ -8,7 +8,7 @@
+ */
+ #define __asm_syscall(trapno, ...) do { \
+ __asm__ __volatile__ ( \
+- "trapa #" #trapno "\n" \
++ "trapa #31\n" \
+ "or r0, r0\n" \
+ "or r0, r0\n" \
+ "or r0, r0\n" \
+diff --git a/include/ctype.h b/include/ctype.h
+index cd2e016..7936536 100644
+--- a/include/ctype.h
++++ b/include/ctype.h
+@@ -64,6 +64,7 @@ int isascii(int);
+ int toascii(int);
+ #define _tolower(a) ((a)|0x20)
+ #define _toupper(a) ((a)&0x5f)
++#define isascii(a) (0 ? isascii(a) : (unsigned)(a) < 128)
+
+ #endif
+
+diff --git a/include/stdlib.h b/include/stdlib.h
+index 97ce5a7..d2c911f 100644
+--- a/include/stdlib.h
++++ b/include/stdlib.h
+@@ -76,7 +76,8 @@ size_t wcstombs (char *__restrict, const wchar_t *__restrict, size_t);
+ #define EXIT_FAILURE 1
+ #define EXIT_SUCCESS 0
+
+-#define MB_CUR_MAX ((size_t)+4)
++size_t __ctype_get_mb_cur_max(void);
++#define MB_CUR_MAX (__ctype_get_mb_cur_max())
+
+ #define RAND_MAX (0x7fffffff)
+
+diff --git a/src/ctype/__ctype_get_mb_cur_max.c b/src/ctype/__ctype_get_mb_cur_max.c
+index d235f4d..8e946fc 100644
+--- a/src/ctype/__ctype_get_mb_cur_max.c
++++ b/src/ctype/__ctype_get_mb_cur_max.c
+@@ -1,6 +1,7 @@
+-#include <stddef.h>
++#include <stdlib.h>
++#include "locale_impl.h"
+
+ size_t __ctype_get_mb_cur_max()
+ {
+- return 4;
++ return MB_CUR_MAX;
+ }
+diff --git a/src/ctype/isascii.c b/src/ctype/isascii.c
+index 3af0a10..54ad3bf 100644
+--- a/src/ctype/isascii.c
++++ b/src/ctype/isascii.c
+@@ -1,4 +1,5 @@
+ #include <ctype.h>
++#undef isascii
+
+ int isascii(int c)
+ {
+diff --git a/src/env/__stack_chk_fail.c b/src/env/__stack_chk_fail.c
+index 47784c6..be0c184 100644
+--- a/src/env/__stack_chk_fail.c
++++ b/src/env/__stack_chk_fail.c
+@@ -25,4 +25,8 @@ void __stack_chk_fail_local(void)
+ a_crash();
+ }
+
++#else
++
++weak_alias(__stack_chk_fail, __stack_chk_fail_local);
++
+ #endif
+diff --git a/src/internal/libc.h b/src/internal/libc.h
+index 6810cd8..98c7535 100644
+--- a/src/internal/libc.h
++++ b/src/internal/libc.h
+@@ -17,8 +17,6 @@ struct __libc {
+ int secure;
+ volatile int threads_minus_1;
+ size_t *auxv;
+- FILE *ofl_head;
+- volatile int ofl_lock[2];
+ size_t tls_size;
+ size_t page_size;
+ struct __locale_struct global_locale;
+diff --git a/src/internal/locale_impl.h b/src/internal/locale_impl.h
+index 9b8385e..f5e4d9b 100644
+--- a/src/internal/locale_impl.h
++++ b/src/internal/locale_impl.h
+@@ -1,3 +1,6 @@
++#ifndef _LOCALE_IMPL_H
++#define _LOCALE_IMPL_H
++
+ #include <locale.h>
+ #include <stdlib.h>
+ #include "libc.h"
+@@ -12,6 +15,10 @@ struct __locale_map {
+ const struct __locale_map *next;
+ };
+
++extern const struct __locale_map __c_dot_utf8;
++extern const struct __locale_struct __c_locale;
++extern const struct __locale_struct __c_dot_utf8_locale;
++
+ const struct __locale_map *__get_locale(int, const char *);
+ const char *__mo_lookup(const void *, size_t, const char *);
+ const char *__lctrans(const char *, const struct __locale_map *);
+@@ -20,9 +27,14 @@ const char *__lctrans_cur(const char *);
+ #define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)])
+ #define LCTRANS_CUR(msg) __lctrans_cur(msg)
+
++#define C_LOCALE ((locale_t)&__c_locale)
++#define UTF8_LOCALE ((locale_t)&__c_dot_utf8_locale)
++
+ #define CURRENT_LOCALE (__pthread_self()->locale)
+
+ #define CURRENT_UTF8 (!!__pthread_self()->locale->cat[LC_CTYPE])
+
+ #undef MB_CUR_MAX
+ #define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1)
++
++#endif
+diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
+index e29f9c8..3890bb5 100644
+--- a/src/internal/pthread_impl.h
++++ b/src/internal/pthread_impl.h
+@@ -94,6 +94,10 @@ struct __timer {
+ #define CANARY canary
+ #endif
+
++#ifndef DTP_OFFSET
++#define DTP_OFFSET 0
++#endif
++
+ #define SIGTIMER 32
+ #define SIGCANCEL 33
+ #define SIGSYNCCALL 34
+diff --git a/src/internal/sh/syscall.s b/src/internal/sh/syscall.s
+index d00712a..331918a 100644
+--- a/src/internal/sh/syscall.s
++++ b/src/internal/sh/syscall.s
+@@ -13,7 +13,7 @@ __syscall:
+ mov.l @r15, r7
+ mov.l @(4,r15), r0
+ mov.l @(8,r15), r1
+- trapa #22
++ trapa #31
+ or r0, r0
+ or r0, r0
+ or r0, r0
+diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h
+index e1325fe..0dd7fb5 100644
+--- a/src/internal/stdio_impl.h
++++ b/src/internal/stdio_impl.h
+@@ -47,6 +47,7 @@ struct _IO_FILE {
+ unsigned char *shend;
+ off_t shlim, shcnt;
+ FILE *prev_locked, *next_locked;
++ struct __locale_struct *locale;
+ };
+
+ size_t __stdio_read(FILE *, unsigned char *, size_t);
+@@ -75,8 +76,9 @@ int __putc_unlocked(int, FILE *);
+ FILE *__fdopen(int, const char *);
+ int __fmodeflags(const char *);
+
+-#define OFLLOCK() LOCK(libc.ofl_lock)
+-#define OFLUNLOCK() UNLOCK(libc.ofl_lock)
++FILE *__ofl_add(FILE *f);
++FILE **__ofl_lock(void);
++void __ofl_unlock(void);
+
+ #define feof(f) ((f)->flags & F_EOF)
+ #define ferror(f) ((f)->flags & F_ERR)
+diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
+index 42b056d..d2a7249 100644
+--- a/src/ldso/dynlink.c
++++ b/src/ldso/dynlink.c
+@@ -337,7 +337,7 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
+ *reloc_addr = def.dso->tls_id;
+ break;
+ case REL_DTPOFF:
+- *reloc_addr = tls_val + addend;
++ *reloc_addr = tls_val + addend - DTP_OFFSET;
+ break;
+ #ifdef TLS_ABOVE_TP
+ case REL_TPOFF:
+@@ -423,6 +423,28 @@ static void reclaim_gaps(struct dso *dso)
+ }
+ }
+
++static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off)
++{
++ char *q = mmap(p, n, prot, flags, fd, off);
++ if (q != MAP_FAILED || errno != EINVAL) return q;
++ /* Fallbacks for MAP_FIXED failure on NOMMU kernels. */
++ if (flags & MAP_ANONYMOUS) {
++ memset(p, 0, n);
++ return p;
++ }
++ ssize_t r;
++ if (lseek(fd, off, SEEK_SET) < 0) return MAP_FAILED;
++ for (q=p; n; q+=r, off+=r, n-=r) {
++ r = read(fd, q, n);
++ if (r < 0 && errno != EINTR) return MAP_FAILED;
++ if (!r) {
++ memset(q, 0, n);
++ break;
++ }
++ }
++ return p;
++}
++
+ static void *map_library(int fd, struct dso *dso)
+ {
+ Ehdr buf[(896+sizeof(Ehdr))/sizeof(Ehdr)];
+@@ -524,19 +546,20 @@ static void *map_library(int fd, struct dso *dso)
+ prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
+ ((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
+ ((ph->p_flags&PF_X) ? PROT_EXEC : 0));
+- if (mmap(base+this_min, this_max-this_min, prot, MAP_PRIVATE|MAP_FIXED, fd, off_start) == MAP_FAILED)
++ if (mmap_fixed(base+this_min, this_max-this_min, prot, MAP_PRIVATE|MAP_FIXED, fd, off_start) == MAP_FAILED)
+ goto error;
+ if (ph->p_memsz > ph->p_filesz) {
+ size_t brk = (size_t)base+ph->p_vaddr+ph->p_filesz;
+ size_t pgbrk = brk+PAGE_SIZE-1 & -PAGE_SIZE;
+ memset((void *)brk, 0, pgbrk-brk & PAGE_SIZE-1);
+- if (pgbrk-(size_t)base < this_max && mmap((void *)pgbrk, (size_t)base+this_max-pgbrk, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
++ if (pgbrk-(size_t)base < this_max && mmap_fixed((void *)pgbrk, (size_t)base+this_max-pgbrk, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
+ goto error;
+ }
+ }
+ for (i=0; ((size_t *)(base+dyn))[i]; i+=2)
+ if (((size_t *)(base+dyn))[i]==DT_TEXTREL) {
+- if (mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC) < 0)
++ if (mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC)
++ && errno != ENOSYS)
+ goto error;
+ break;
+ }
+@@ -927,7 +950,8 @@ static void reloc_all(struct dso *p)
+ do_relocs(p, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ], 3);
+
+ if (head != &ldso && p->relro_start != p->relro_end &&
+- mprotect(p->base+p->relro_start, p->relro_end-p->relro_start, PROT_READ) < 0) {
++ mprotect(p->base+p->relro_start, p->relro_end-p->relro_start, PROT_READ)
++ && errno != ENOSYS) {
+ error("Error relocating %s: RELRO protection failed: %m",
+ p->name);
+ if (runtime) longjmp(*rtld_fail, 1);
+@@ -1078,7 +1102,7 @@ void *__tls_get_new(size_t *v)
+ __block_all_sigs(&set);
+ if (v[0]<=(size_t)self->dtv[0]) {
+ __restore_sigs(&set);
+- return (char *)self->dtv[v[0]]+v[1];
++ return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET;
+ }
+
+ /* This is safe without any locks held because, if the caller
+@@ -1111,7 +1135,7 @@ void *__tls_get_new(size_t *v)
+ if (p->tls_id == v[0]) break;
+ }
+ __restore_sigs(&set);
+- return mem + v[1];
++ return mem + v[1] + DTP_OFFSET;
+ }
+
+ static void update_tls_size()
+@@ -1192,6 +1216,17 @@ _Noreturn void __dls3(size_t *sp)
+ char **argv_orig = argv;
+ char **envp = argv+argc+1;
+
++ /* Find aux vector just past environ[] and use it to initialize
++ * global data that may be needed before we can make syscalls. */
++ __environ = envp;
++ for (i=argc+1; argv[i]; i++);
++ libc.auxv = auxv = (void *)(argv+i+1);
++ decode_vec(auxv, aux, AUX_CNT);
++ __hwcap = aux[AT_HWCAP];
++ libc.page_size = aux[AT_PAGESZ];
++ libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
++ || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
++
+ /* Setup early thread pointer in builtin_tls for ldso/libc itself to
+ * use during dynamic linking. If possible it will also serve as the
+ * thread pointer at runtime. */
+@@ -1200,25 +1235,11 @@ _Noreturn void __dls3(size_t *sp)
+ a_crash();
+ }
+
+- /* Find aux vector just past environ[] */
+- for (i=argc+1; argv[i]; i++)
+- if (!memcmp(argv[i], "LD_LIBRARY_PATH=", 16))
+- env_path = argv[i]+16;
+- else if (!memcmp(argv[i], "LD_PRELOAD=", 11))
+- env_preload = argv[i]+11;
+- auxv = (void *)(argv+i+1);
+-
+- decode_vec(auxv, aux, AUX_CNT);
+-
+ /* Only trust user/env if kernel says we're not suid/sgid */
+- if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
+- || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]) {
+- env_path = 0;
+- env_preload = 0;
+- libc.secure = 1;
++ if (!libc.secure) {
++ env_path = getenv("LD_LIBRARY_PATH");
++ env_preload = getenv("LD_PRELOAD");
+ }
+- libc.page_size = aux[AT_PAGESZ];
+- libc.auxv = auxv;
+
+ /* If the main program was already loaded by the kernel,
+ * AT_PHDR will point to some location other than the dynamic
+diff --git a/src/locale/c_locale.c b/src/locale/c_locale.c
+new file mode 100644
+index 0000000..77ccf58
+--- /dev/null
++++ b/src/locale/c_locale.c
+@@ -0,0 +1,15 @@
++#include "locale_impl.h"
++#include <stdint.h>
++
++static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
++
++const struct __locale_map __c_dot_utf8 = {
++ .map = empty_mo,
++ .map_size = sizeof empty_mo,
++ .name = "C.UTF-8"
++};
++
++const struct __locale_struct __c_locale = { 0 };
++const struct __locale_struct __c_dot_utf8_locale = {
++ .cat[LC_CTYPE] = &__c_dot_utf8
++};
+diff --git a/src/locale/iconv.c b/src/locale/iconv.c
+index e6121ae..1eeea94 100644
+--- a/src/locale/iconv.c
++++ b/src/locale/iconv.c
+@@ -5,6 +5,7 @@
+ #include <stdlib.h>
+ #include <limits.h>
+ #include <stdint.h>
++#include "locale_impl.h"
+
+ #define UTF_32BE 0300
+ #define UTF_16LE 0301
+@@ -165,9 +166,12 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
+ int err;
+ unsigned char type = map[-1];
+ unsigned char totype = tomap[-1];
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+
+ if (!in || !*in || !*inb) return 0;
+
++ *ploc = UTF8_LOCALE;
++
+ for (; *inb; *in+=l, *inb-=l) {
+ c = *(unsigned char *)*in;
+ l = 1;
+@@ -431,6 +435,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
+ break;
+ }
+ }
++ *ploc = loc;
+ return x;
+ ilseq:
+ err = EILSEQ;
+@@ -445,5 +450,6 @@ starved:
+ x = -1;
+ end:
+ errno = err;
++ *ploc = loc;
+ return x;
+ }
+diff --git a/src/locale/langinfo.c b/src/locale/langinfo.c
+index a1ada24..776b447 100644
+--- a/src/locale/langinfo.c
++++ b/src/locale/langinfo.c
+@@ -33,7 +33,8 @@ char *__nl_langinfo_l(nl_item item, locale_t loc)
+ int idx = item & 65535;
+ const char *str;
+
+- if (item == CODESET) return "UTF-8";
++ if (item == CODESET)
++ return MB_CUR_MAX==1 ? "UTF-8-CODE-UNITS" : "UTF-8";
+
+ switch (cat) {
+ case LC_NUMERIC:
+diff --git a/src/locale/locale_map.c b/src/locale/locale_map.c
+index 4346bb0..c3e5917 100644
+--- a/src/locale/locale_map.c
++++ b/src/locale/locale_map.c
+@@ -24,14 +24,6 @@ static const char envvars[][12] = {
+ "LC_MESSAGES",
+ };
+
+-static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
+-
+-const struct __locale_map __c_dot_utf8 = {
+- .map = empty_mo,
+- .map_size = sizeof empty_mo,
+- .name = "C.UTF-8"
+-};
+-
+ const struct __locale_map *__get_locale(int cat, const char *val)
+ {
+ static int lock[2];
+@@ -107,8 +99,8 @@ const struct __locale_map *__get_locale(int cat, const char *val)
+ * sake of being able to do message translations at the
+ * application level. */
+ if (!new && (new = malloc(sizeof *new))) {
+- new->map = empty_mo;
+- new->map_size = sizeof empty_mo;
++ new->map = __c_dot_utf8.map;
++ new->map_size = __c_dot_utf8.map_size;
+ memcpy(new->name, val, n);
+ new->name[n] = 0;
+ new->next = loc_head;
+diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c
+index 89d36b1..f50bbe9 100644
+--- a/src/locale/newlocale.c
++++ b/src/locale/newlocale.c
+@@ -3,16 +3,9 @@
+ #include "locale_impl.h"
+ #include "libc.h"
+
+-extern const struct __locale_map __c_dot_utf8;
+-
+-static const struct __locale_struct c_locale = { 0 };
+-static const struct __locale_struct c_dot_utf8_locale = {
+- .cat[LC_CTYPE] = &__c_dot_utf8
+-};
+-
+ int __loc_is_allocated(locale_t loc)
+ {
+- return loc && loc != &c_locale && loc != &c_dot_utf8_locale;
++ return loc && loc != C_LOCALE && loc != UTF8_LOCALE;
+ }
+
+ locale_t __newlocale(int mask, const char *name, locale_t loc)
+@@ -44,9 +37,9 @@ locale_t __newlocale(int mask, const char *name, locale_t loc)
+ }
+
+ if (!j)
+- return (locale_t)&c_locale;
+- if (j==1 && tmp.cat[LC_CTYPE]==c_dot_utf8_locale.cat[LC_CTYPE])
+- return (locale_t)&c_dot_utf8_locale;
++ return C_LOCALE;
++ if (j==1 && tmp.cat[LC_CTYPE]==&__c_dot_utf8)
++ return UTF8_LOCALE;
+
+ if ((loc = malloc(sizeof *loc))) *loc = tmp;
+
+diff --git a/src/locale/uselocale.c b/src/locale/uselocale.c
+index b70a0c1..0fc5ecb 100644
+--- a/src/locale/uselocale.c
++++ b/src/locale/uselocale.c
+@@ -8,9 +8,7 @@ locale_t __uselocale(locale_t new)
+ locale_t old = self->locale;
+ locale_t global = &libc.global_locale;
+
+- if (new == LC_GLOBAL_LOCALE) new = global;
+-
+- self->locale = new;
++ if (new) self->locale = new == LC_GLOBAL_LOCALE ? global : new;
+
+ return old == global ? LC_GLOBAL_LOCALE : old;
+ }
+diff --git a/src/malloc/calloc.c b/src/malloc/calloc.c
+index c3dfb47..436c0b0 100644
+--- a/src/malloc/calloc.c
++++ b/src/malloc/calloc.c
+@@ -1,22 +1,13 @@
+ #include <stdlib.h>
+ #include <errno.h>
+
++void *__malloc0(size_t);
++
+ void *calloc(size_t m, size_t n)
+ {
+- void *p;
+- size_t *z;
+ if (n && m > (size_t)-1/n) {
+ errno = ENOMEM;
+ return 0;
+ }
+- n *= m;
+- p = malloc(n);
+- if (!p) return 0;
+- /* Only do this for non-mmapped chunks */
+- if (((size_t *)p)[-1] & 7) {
+- /* Only write words that are not already zero */
+- m = (n + sizeof *z - 1)/sizeof *z;
+- for (z=p; m; m--, z++) if (*z) *z=0;
+- }
+- return p;
++ return __malloc0(n * m);
+ }
+diff --git a/src/malloc/expand_heap.c b/src/malloc/expand_heap.c
+new file mode 100644
+index 0000000..d8c0be7
+--- /dev/null
++++ b/src/malloc/expand_heap.c
+@@ -0,0 +1,72 @@
++#include <limits.h>
++#include <stdint.h>
++#include <errno.h>
++#include <sys/mman.h>
++#include "libc.h"
++#include "syscall.h"
++
++/* This function returns true if the interval [old,new]
++ * intersects the 'len'-sized interval below &libc.auxv
++ * (interpreted as the main-thread stack) or below &b
++ * (the current stack). It is used to defend against
++ * buggy brk implementations that can cross the stack. */
++
++static int traverses_stack_p(uintptr_t old, uintptr_t new)
++{
++ const uintptr_t len = 8<<20;
++ uintptr_t a, b;
++
++ b = (uintptr_t)libc.auxv;
++ a = b > len ? b-len : 0;
++ if (new>a && old<b) return 1;
++
++ b = (uintptr_t)&b;
++ a = b > len ? b-len : 0;
++ if (new>a && old<b) return 1;
++
++ return 0;
++}
++
++void *__mmap(void *, size_t, int, int, int, off_t);
++
++/* Expand the heap in-place if brk can be used, or otherwise via mmap,
++ * using an exponential lower bound on growth by mmap to make
++ * fragmentation asymptotically irrelevant. The size argument is both
++ * an input and an output, since the caller needs to know the size
++ * allocated, which will be larger than requested due to page alignment
++ * and mmap minimum size rules. The caller is responsible for locking
++ * to prevent concurrent calls. */
++
++void *__expand_heap(size_t *pn)
++{
++ static uintptr_t brk;
++ static unsigned mmap_step;
++ size_t n = *pn;
++
++ if (n > SIZE_MAX/2 - PAGE_SIZE) {
++ errno = ENOMEM;
++ return 0;
++ }
++ n += -n & PAGE_SIZE-1;
++
++ if (!brk) {
++ brk = __syscall(SYS_brk, 0);
++ brk += -brk & PAGE_SIZE-1;
++ }
++
++ if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n)
++ && __syscall(SYS_brk, brk+n)==brk+n) {
++ *pn = n;
++ brk += n;
++ return (void *)(brk-n);
++ }
++
++ size_t min = (size_t)PAGE_SIZE << mmap_step/2;
++ if (n < min) n = min;
++ void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
++ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
++ if (area == MAP_FAILED) return 0;
++ *pn = n;
++ mmap_step++;
++ return area;
++}
+diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c
+index 7643fc2..09ac575 100644
+--- a/src/malloc/lite_malloc.c
++++ b/src/malloc/lite_malloc.c
+@@ -4,43 +4,47 @@
+ #include <errno.h>
+ #include "libc.h"
+
+-uintptr_t __brk(uintptr_t);
+-
+ #define ALIGN 16
+
++void *__expand_heap(size_t *);
++
+ void *__simple_malloc(size_t n)
+ {
+- static uintptr_t cur, brk;
+- uintptr_t base, new;
++ static char *cur, *end;
+ static volatile int lock[2];
+- size_t align=1;
++ size_t align=1, pad;
++ void *p;
+
+ if (!n) n++;
+- if (n > SIZE_MAX/2) goto toobig;
+-
+ while (align<n && align<ALIGN)
+ align += align;
+- n = n + align - 1 & -align;
+
+ LOCK(lock);
+- if (!cur) cur = brk = __brk(0)+16;
+- base = cur + align-1 & -align;
+- if (n > SIZE_MAX - PAGE_SIZE - base) goto fail;
+- if (base+n > brk) {
+- new = base+n + PAGE_SIZE-1 & -PAGE_SIZE;
+- if (__brk(new) != new) goto fail;
+- brk = new;
+- }
+- cur = base+n;
+- UNLOCK(lock);
+
+- return (void *)base;
++ pad = -(uintptr_t)cur & align-1;
++
++ if (n <= SIZE_MAX/2 + ALIGN) n += pad;
++
++ if (n > end-cur) {
++ size_t m = n;
++ char *new = __expand_heap(&m);
++ if (!new) {
++ UNLOCK(lock);
++ return 0;
++ }
++ if (new != end) {
++ cur = new;
++ n -= pad;
++ pad = 0;
++ }
++ end = new + m;
++ }
+
+-fail:
++ p = cur + pad;
++ cur += n;
+ UNLOCK(lock);
+-toobig:
+- errno = ENOMEM;
+- return 0;
++ return p;
+ }
+
+ weak_alias(__simple_malloc, malloc);
++weak_alias(__simple_malloc, __malloc0);
+diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
+index d4de2dc..eb68d55 100644
+--- a/src/malloc/malloc.c
++++ b/src/malloc/malloc.c
+@@ -13,7 +13,6 @@
+ #define inline inline __attribute__((always_inline))
+ #endif
+
+-uintptr_t __brk(uintptr_t);
+ void *__mmap(void *, size_t, int, int, int, off_t);
+ int __munmap(void *, size_t);
+ void *__mremap(void *, size_t, size_t, int, ...);
+@@ -31,13 +30,9 @@ struct bin {
+ };
+
+ static struct {
+- uintptr_t brk;
+- size_t *heap;
+ volatile uint64_t binmap;
+ struct bin bins[64];
+- volatile int brk_lock[2];
+ volatile int free_lock[2];
+- unsigned mmap_step;
+ } mal;
+
+
+@@ -152,69 +147,52 @@ void __dump_heap(int x)
+ }
+ #endif
+
++void *__expand_heap(size_t *);
++
+ static struct chunk *expand_heap(size_t n)
+ {
+- static int init;
++ static int heap_lock[2];
++ static void *end;
++ void *p;
+ struct chunk *w;
+- uintptr_t new;
+-
+- lock(mal.brk_lock);
+
+- if (!init) {
+- mal.brk = __brk(0);
+-#ifdef SHARED
+- mal.brk = mal.brk + PAGE_SIZE-1 & -PAGE_SIZE;
+-#endif
+- mal.brk = mal.brk + 2*SIZE_ALIGN-1 & -SIZE_ALIGN;
+- mal.heap = (void *)mal.brk;
+- init = 1;
+- }
++ /* The argument n already accounts for the caller's chunk
++ * overhead needs, but if the heap can't be extended in-place,
++ * we need room for an extra zero-sized sentinel chunk. */
++ n += SIZE_ALIGN;
+
+- if (n > SIZE_MAX - mal.brk - 2*PAGE_SIZE) goto fail;
+- new = mal.brk + n + SIZE_ALIGN + PAGE_SIZE - 1 & -PAGE_SIZE;
+- n = new - mal.brk;
++ lock(heap_lock);
+
+- if (__brk(new) != new) {
+- size_t min = (size_t)PAGE_SIZE << mal.mmap_step/2;
+- n += -n & PAGE_SIZE-1;
+- if (n < min) n = min;
+- void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
+- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+- if (area == MAP_FAILED) goto fail;
++ p = __expand_heap(&n);
++ if (!p) {
++ unlock(heap_lock);
++ return 0;
++ }
+
+- mal.mmap_step++;
+- area = (char *)area + SIZE_ALIGN - OVERHEAD;
+- w = area;
++ /* If not just expanding existing space, we need to make a
++ * new sentinel chunk below the allocated space. */
++ if (p != end) {
++ /* Valid/safe because of the prologue increment. */
+ n -= SIZE_ALIGN;
++ p = (char *)p + SIZE_ALIGN;
++ w = MEM_TO_CHUNK(p);
+ w->psize = 0 | C_INUSE;
+- w->csize = n | C_INUSE;
+- w = NEXT_CHUNK(w);
+- w->psize = n | C_INUSE;
+- w->csize = 0 | C_INUSE;
+-
+- unlock(mal.brk_lock);
+-
+- return area;
+ }
+
+- w = MEM_TO_CHUNK(mal.heap);
+- w->psize = 0 | C_INUSE;
+-
+- w = MEM_TO_CHUNK(new);
++ /* Record new heap end and fill in footer. */
++ end = (char *)p + n;
++ w = MEM_TO_CHUNK(end);
+ w->psize = n | C_INUSE;
+ w->csize = 0 | C_INUSE;
+
+- w = MEM_TO_CHUNK(mal.brk);
++ /* Fill in header, which may be new or may be replacing a
++ * zero-size sentinel header at the old end-of-heap. */
++ w = MEM_TO_CHUNK(p);
+ w->csize = n | C_INUSE;
+- mal.brk = new;
+-
+- unlock(mal.brk_lock);
++
++ unlock(heap_lock);
+
+ return w;
+-fail:
+- unlock(mal.brk_lock);
+- errno = ENOMEM;
+- return 0;
+ }
+
+ static int adjust_size(size_t *n)
+@@ -378,6 +356,17 @@ void *malloc(size_t n)
+ return CHUNK_TO_MEM(c);
+ }
+
++void *__malloc0(size_t n)
++{
++ void *p = malloc(n);
++ if (p && !IS_MMAPPED(MEM_TO_CHUNK(p))) {
++ size_t *z;
++ n = (n + sizeof *z - 1)/sizeof *z;
++ for (z=p; n; n--, z++) if (*z) *z=0;
++ }
++ return p;
++}
++
+ void *realloc(void *p, size_t n)
+ {
+ struct chunk *self, *next;
+diff --git a/src/multibyte/btowc.c b/src/multibyte/btowc.c
+index 9d2c3b1..8acd0a2 100644
+--- a/src/multibyte/btowc.c
++++ b/src/multibyte/btowc.c
+@@ -1,7 +1,10 @@
+ #include <stdio.h>
+ #include <wchar.h>
++#include <stdlib.h>
++#include "internal.h"
+
+ wint_t btowc(int c)
+ {
+- return c<128U ? c : EOF;
++ int b = (unsigned char)c;
++ return b<128U ? b : (MB_CUR_MAX==1 && c!=EOF) ? CODEUNIT(c) : WEOF;
+ }
+diff --git a/src/multibyte/internal.h b/src/multibyte/internal.h
+index cc017fa..53d62ed 100644
+--- a/src/multibyte/internal.h
++++ b/src/multibyte/internal.h
+@@ -23,3 +23,10 @@ extern const uint32_t bittab[];
+
+ #define SA 0xc2u
+ #define SB 0xf4u
++
++/* Arbitrary encoding for representing code units instead of characters. */
++#define CODEUNIT(c) (0xdfff & (signed char)(c))
++#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80)
++
++/* Get inline definition of MB_CUR_MAX. */
++#include "locale_impl.h"
+diff --git a/src/multibyte/mbrtowc.c b/src/multibyte/mbrtowc.c
+index e7b3654..ca7da70 100644
+--- a/src/multibyte/mbrtowc.c
++++ b/src/multibyte/mbrtowc.c
+@@ -4,6 +4,7 @@
+ * unnecessary.
+ */
+
++#include <stdlib.h>
+ #include <wchar.h>
+ #include <errno.h>
+ #include "internal.h"
+@@ -27,6 +28,7 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate
+ if (!n) return -2;
+ if (!c) {
+ if (*s < 0x80) return !!(*wc = *s);
++ if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
+ if (*s-SA > SB-SA) goto ilseq;
+ c = bittab[*s++-SA]; n--;
+ }
+diff --git a/src/multibyte/mbsrtowcs.c b/src/multibyte/mbsrtowcs.c
+index 3c1343a..e23083d 100644
+--- a/src/multibyte/mbsrtowcs.c
++++ b/src/multibyte/mbsrtowcs.c
+@@ -7,6 +7,8 @@
+ #include <stdint.h>
+ #include <wchar.h>
+ #include <errno.h>
++#include <string.h>
++#include <stdlib.h>
+ #include "internal.h"
+
+ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st)
+@@ -24,6 +26,23 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs
+ }
+ }
+
++ if (MB_CUR_MAX==1) {
++ if (!ws) return strlen((const char *)s);
++ for (;;) {
++ if (!wn) {
++ *src = (const void *)s;
++ return wn0;
++ }
++ if (!*s) break;
++ c = *s++;
++ *ws++ = CODEUNIT(c);
++ wn--;
++ }
++ *ws = 0;
++ *src = 0;
++ return wn0-wn;
++ }
++
+ if (!ws) for (;;) {
+ if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
+ while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
+diff --git a/src/multibyte/mbtowc.c b/src/multibyte/mbtowc.c
+index 803d221..71a9506 100644
+--- a/src/multibyte/mbtowc.c
++++ b/src/multibyte/mbtowc.c
+@@ -4,6 +4,7 @@
+ * unnecessary.
+ */
+
++#include <stdlib.h>
+ #include <wchar.h>
+ #include <errno.h>
+ #include "internal.h"
+@@ -19,6 +20,7 @@ int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n)
+ if (!wc) wc = &dummy;
+
+ if (*s < 0x80) return !!(*wc = *s);
++ if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
+ if (*s-SA > SB-SA) goto ilseq;
+ c = bittab[*s++-SA];
+
+diff --git a/src/multibyte/wcrtomb.c b/src/multibyte/wcrtomb.c
+index 59f733d..ddc37a5 100644
+--- a/src/multibyte/wcrtomb.c
++++ b/src/multibyte/wcrtomb.c
+@@ -4,8 +4,10 @@
+ * unnecessary.
+ */
+
++#include <stdlib.h>
+ #include <wchar.h>
+ #include <errno.h>
++#include "internal.h"
+
+ size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)
+ {
+@@ -13,6 +15,13 @@ size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)
+ if ((unsigned)wc < 0x80) {
+ *s = wc;
+ return 1;
++ } else if (MB_CUR_MAX == 1) {
++ if (!IS_CODEUNIT(wc)) {
++ errno = EILSEQ;
++ return -1;
++ }
++ *s = wc;
++ return 1;
+ } else if ((unsigned)wc < 0x800) {
+ *s++ = 0xc0 | (wc>>6);
+ *s = 0x80 | (wc&0x3f);
+diff --git a/src/multibyte/wctob.c b/src/multibyte/wctob.c
+index d6353ee..4aeda6a 100644
+--- a/src/multibyte/wctob.c
++++ b/src/multibyte/wctob.c
+@@ -1,8 +1,10 @@
+-#include <stdio.h>
+ #include <wchar.h>
++#include <stdlib.h>
++#include "internal.h"
+
+ int wctob(wint_t c)
+ {
+ if (c < 128U) return c;
++ if (MB_CUR_MAX==1 && IS_CODEUNIT(c)) return (unsigned char)c;
+ return EOF;
+ }
+diff --git a/src/passwd/nscd_query.c b/src/passwd/nscd_query.c
+index 69a7815..d38e371 100644
+--- a/src/passwd/nscd_query.c
++++ b/src/passwd/nscd_query.c
+@@ -32,6 +32,7 @@ FILE *__nscd_query(int32_t req, const char *key, int32_t *buf, size_t len, int *
+ },
+ .msg_iovlen = 2
+ };
++ int errno_save = errno;
+
+ *swap = 0;
+ retry:
+@@ -50,11 +51,14 @@ retry:
+ return f;
+
+ if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
+- /* If there isn't a running nscd we return -1 to indicate that
+- * that is precisely what happened
+- */
+- if (errno == EACCES || errno == ECONNREFUSED || errno == ENOENT)
++ /* If there isn't a running nscd we simulate a "not found"
++ * result and the caller is responsible for calling
++ * fclose on the (unconnected) socket. The value of
++ * errno must be left unchanged in this case. */
++ if (errno == EACCES || errno == ECONNREFUSED || errno == ENOENT) {
++ errno = errno_save;
+ return f;
++ }
+ goto error;
+ }
+
+diff --git a/src/process/sh/vfork.s b/src/process/sh/vfork.s
+new file mode 100644
+index 0000000..48cc939
+--- /dev/null
++++ b/src/process/sh/vfork.s
+@@ -0,0 +1,23 @@
++.global __vfork
++.weak vfork
++.type __vfork,@function
++.type vfork,@function
++__vfork:
++vfork:
++ mov #95, r3
++ add r3, r3
++
++ trapa #31
++ or r0, r0
++ or r0, r0
++ or r0, r0
++ or r0, r0
++ or r0, r0
++
++ mov r0, r4
++ mov.l 1f, r0
++2: braf r0
++ nop
++ .align 2
++ .hidden __syscall_ret
++1: .long __syscall_ret@PLT-(2b+4-.)
+diff --git a/src/regex/fnmatch.c b/src/regex/fnmatch.c
+index 7f6b65f..978fff8 100644
+--- a/src/regex/fnmatch.c
++++ b/src/regex/fnmatch.c
+@@ -18,6 +18,7 @@
+ #include <stdlib.h>
+ #include <wchar.h>
+ #include <wctype.h>
++#include "locale_impl.h"
+
+ #define END 0
+ #define UNMATCHABLE -2
+@@ -229,7 +230,7 @@ static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n
+ * On illegal sequences we may get it wrong, but in that case
+ * we necessarily have a matching failure anyway. */
+ for (s=endstr; s>str && tailcnt; tailcnt--) {
+- if (s[-1] < 128U) s--;
++ if (s[-1] < 128U || MB_CUR_MAX==1) s--;
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
+ }
+ if (tailcnt) return FNM_NOMATCH;
+diff --git a/src/signal/sh/restore.s b/src/signal/sh/restore.s
+index ab26034..eaedcdf 100644
+--- a/src/signal/sh/restore.s
++++ b/src/signal/sh/restore.s
+@@ -2,7 +2,7 @@
+ .type __restore, @function
+ __restore:
+ mov #119, r3 !__NR_sigreturn
+- trapa #16
++ trapa #31
+
+ or r0, r0
+ or r0, r0
+@@ -15,7 +15,7 @@ __restore:
+ __restore_rt:
+ mov #100, r3 !__NR_rt_sigreturn
+ add #73, r3
+- trapa #16
++ trapa #31
+
+ or r0, r0
+ or r0, r0
+diff --git a/src/stdio/__fdopen.c b/src/stdio/__fdopen.c
+index ef8f47d..8d6ce81 100644
+--- a/src/stdio/__fdopen.c
++++ b/src/stdio/__fdopen.c
+@@ -54,13 +54,7 @@ FILE *__fdopen(int fd, const char *mode)
+ if (!libc.threaded) f->lock = -1;
+
+ /* Add new FILE to open file list */
+- OFLLOCK();
+- f->next = libc.ofl_head;
+- if (libc.ofl_head) libc.ofl_head->prev = f;
+- libc.ofl_head = f;
+- OFLUNLOCK();
+-
+- return f;
++ return __ofl_add(f);
+ }
+
+ weak_alias(__fdopen, fdopen);
+diff --git a/src/stdio/__stdio_exit.c b/src/stdio/__stdio_exit.c
+index 716e5f7..191b445 100644
+--- a/src/stdio/__stdio_exit.c
++++ b/src/stdio/__stdio_exit.c
+@@ -16,8 +16,7 @@ static void close_file(FILE *f)
+ void __stdio_exit(void)
+ {
+ FILE *f;
+- OFLLOCK();
+- for (f=libc.ofl_head; f; f=f->next) close_file(f);
++ for (f=*__ofl_lock(); f; f=f->next) close_file(f);
+ close_file(__stdin_used);
+ close_file(__stdout_used);
+ }
+diff --git a/src/stdio/__stdio_read.c b/src/stdio/__stdio_read.c
+index 5947344..f8fa6d3 100644
+--- a/src/stdio/__stdio_read.c
++++ b/src/stdio/__stdio_read.c
+@@ -1,12 +1,5 @@
+ #include "stdio_impl.h"
+ #include <sys/uio.h>
+-#include <pthread.h>
+-
+-static void cleanup(void *p)
+-{
+- FILE *f = p;
+- if (!f->lockcount) __unlockfile(f);
+-}
+
+ size_t __stdio_read(FILE *f, unsigned char *buf, size_t len)
+ {
+@@ -16,9 +9,7 @@ size_t __stdio_read(FILE *f, unsigned char *buf, size_t len)
+ };
+ ssize_t cnt;
+
+- pthread_cleanup_push(cleanup, f);
+- cnt = syscall_cp(SYS_readv, f->fd, iov, 2);
+- pthread_cleanup_pop(0);
++ cnt = syscall(SYS_readv, f->fd, iov, 2);
+ if (cnt <= 0) {
+ f->flags |= F_EOF ^ ((F_ERR^F_EOF) & cnt);
+ return cnt;
+diff --git a/src/stdio/__stdio_write.c b/src/stdio/__stdio_write.c
+index 8c89389..d2d8947 100644
+--- a/src/stdio/__stdio_write.c
++++ b/src/stdio/__stdio_write.c
+@@ -1,12 +1,5 @@
+ #include "stdio_impl.h"
+ #include <sys/uio.h>
+-#include <pthread.h>
+-
+-static void cleanup(void *p)
+-{
+- FILE *f = p;
+- if (!f->lockcount) __unlockfile(f);
+-}
+
+ size_t __stdio_write(FILE *f, const unsigned char *buf, size_t len)
+ {
+@@ -19,9 +12,7 @@ size_t __stdio_write(FILE *f, const unsigned char *buf, size_t len)
+ int iovcnt = 2;
+ ssize_t cnt;
+ for (;;) {
+- pthread_cleanup_push(cleanup, f);
+- cnt = syscall_cp(SYS_writev, f->fd, iov, iovcnt);
+- pthread_cleanup_pop(0);
++ cnt = syscall(SYS_writev, f->fd, iov, iovcnt);
+ if (cnt == rem) {
+ f->wend = f->buf + f->buf_size;
+ f->wpos = f->wbase = f->buf;
+@@ -34,11 +25,8 @@ size_t __stdio_write(FILE *f, const unsigned char *buf, size_t len)
+ }
+ rem -= cnt;
+ if (cnt > iov[0].iov_len) {
+- f->wpos = f->wbase = f->buf;
+ cnt -= iov[0].iov_len;
+ iov++; iovcnt--;
+- } else if (iovcnt == 2) {
+- f->wbase += cnt;
+ }
+ iov[0].iov_base = (char *)iov[0].iov_base + cnt;
+ iov[0].iov_len -= cnt;
+diff --git a/src/stdio/fclose.c b/src/stdio/fclose.c
+index 317b3c9..839d88a 100644
+--- a/src/stdio/fclose.c
++++ b/src/stdio/fclose.c
+@@ -14,11 +14,11 @@ int fclose(FILE *f)
+ __unlist_locked_file(f);
+
+ if (!(perm = f->flags & F_PERM)) {
+- OFLLOCK();
++ FILE **head = __ofl_lock();
+ if (f->prev) f->prev->next = f->next;
+ if (f->next) f->next->prev = f->prev;
+- if (libc.ofl_head == f) libc.ofl_head = f->next;
+- OFLUNLOCK();
++ if (*head == f) *head = f->next;
++ __ofl_unlock();
+ }
+
+ r = fflush(f);
+diff --git a/src/stdio/fflush.c b/src/stdio/fflush.c
+index 7bf862a..3f462c8 100644
+--- a/src/stdio/fflush.c
++++ b/src/stdio/fflush.c
+@@ -35,13 +35,12 @@ int fflush(FILE *f)
+
+ r = __stdout_used ? fflush(__stdout_used) : 0;
+
+- OFLLOCK();
+- for (f=libc.ofl_head; f; f=f->next) {
++ for (f=*__ofl_lock(); f; f=f->next) {
+ FLOCK(f);
+ if (f->wpos > f->wbase) r |= __fflush_unlocked(f);
+ FUNLOCK(f);
+ }
+- OFLUNLOCK();
++ __ofl_unlock();
+
+ return r;
+ }
+diff --git a/src/stdio/fgetwc.c b/src/stdio/fgetwc.c
+index 8626d54..e455cfe 100644
+--- a/src/stdio/fgetwc.c
++++ b/src/stdio/fgetwc.c
+@@ -1,8 +1,9 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ #include <errno.h>
+
+-wint_t __fgetwc_unlocked(FILE *f)
++static wint_t __fgetwc_unlocked_internal(FILE *f)
+ {
+ mbstate_t st = { 0 };
+ wchar_t wc;
+@@ -10,8 +11,6 @@ wint_t __fgetwc_unlocked(FILE *f)
+ unsigned char b;
+ size_t l;
+
+- f->mode |= f->mode+1;
+-
+ /* Convert character from buffer if possible */
+ if (f->rpos < f->rend) {
+ l = mbrtowc(&wc, (void *)f->rpos, f->rend - f->rpos, &st);
+@@ -39,6 +38,16 @@ wint_t __fgetwc_unlocked(FILE *f)
+ return wc;
+ }
+
++wint_t __fgetwc_unlocked(FILE *f)
++{
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
++ if (f->mode <= 0) fwide(f, 1);
++ *ploc = f->locale;
++ wchar_t wc = __fgetwc_unlocked_internal(f);
++ *ploc = loc;
++ return wc;
++}
++
+ wint_t fgetwc(FILE *f)
+ {
+ wint_t c;
+diff --git a/src/stdio/fmemopen.c b/src/stdio/fmemopen.c
+index d784960..7c193a5 100644
+--- a/src/stdio/fmemopen.c
++++ b/src/stdio/fmemopen.c
+@@ -110,11 +110,5 @@ FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode)
+
+ if (!libc.threaded) f->lock = -1;
+
+- OFLLOCK();
+- f->next = libc.ofl_head;
+- if (libc.ofl_head) libc.ofl_head->prev = f;
+- libc.ofl_head = f;
+- OFLUNLOCK();
+-
+- return f;
++ return __ofl_add(f);
+ }
+diff --git a/src/stdio/fopen.c b/src/stdio/fopen.c
+index 07bdb6e..252f082 100644
+--- a/src/stdio/fopen.c
++++ b/src/stdio/fopen.c
+@@ -18,7 +18,7 @@ FILE *fopen(const char *restrict filename, const char *restrict mode)
+ /* Compute the flags to pass to open() */
+ flags = __fmodeflags(mode);
+
+- fd = sys_open_cp(filename, flags, 0666);
++ fd = sys_open(filename, flags, 0666);
+ if (fd < 0) return 0;
+ if (flags & O_CLOEXEC)
+ __syscall(SYS_fcntl, fd, F_SETFD, FD_CLOEXEC);
+diff --git a/src/stdio/fputwc.c b/src/stdio/fputwc.c
+index 7b621dd..789fe9c 100644
+--- a/src/stdio/fputwc.c
++++ b/src/stdio/fputwc.c
+@@ -1,4 +1,5 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ #include <limits.h>
+ #include <ctype.h>
+@@ -7,8 +8,10 @@ wint_t __fputwc_unlocked(wchar_t c, FILE *f)
+ {
+ char mbc[MB_LEN_MAX];
+ int l;
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+
+- f->mode |= f->mode+1;
++ if (f->mode <= 0) fwide(f, 1);
++ *ploc = f->locale;
+
+ if (isascii(c)) {
+ c = putc_unlocked(c, f);
+@@ -20,6 +23,8 @@ wint_t __fputwc_unlocked(wchar_t c, FILE *f)
+ l = wctomb(mbc, c);
+ if (l < 0 || __fwritex((void *)mbc, l, f) < l) c = WEOF;
+ }
++ if (c==WEOF) f->flags |= F_ERR;
++ *ploc = loc;
+ return c;
+ }
+
+diff --git a/src/stdio/fputws.c b/src/stdio/fputws.c
+index 5723cbc..0ed02f1 100644
+--- a/src/stdio/fputws.c
++++ b/src/stdio/fputws.c
+@@ -1,23 +1,28 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+
+ int fputws(const wchar_t *restrict ws, FILE *restrict f)
+ {
+ unsigned char buf[BUFSIZ];
+ size_t l=0;
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+
+ FLOCK(f);
+
+- f->mode |= f->mode+1;
++ fwide(f, 1);
++ *ploc = f->locale;
+
+ while (ws && (l = wcsrtombs((void *)buf, (void*)&ws, sizeof buf, 0))+1 > 1)
+ if (__fwritex(buf, l, f) < l) {
+ FUNLOCK(f);
++ *ploc = loc;
+ return -1;
+ }
+
+ FUNLOCK(f);
+
++ *ploc = loc;
+ return l; /* 0 or -1 */
+ }
+
+diff --git a/src/stdio/fwide.c b/src/stdio/fwide.c
+index 8088e7a..8410b15 100644
+--- a/src/stdio/fwide.c
++++ b/src/stdio/fwide.c
+@@ -1,13 +1,14 @@
+-#include <wchar.h>
+ #include "stdio_impl.h"
+-
+-#define SH (8*sizeof(int)-1)
+-#define NORMALIZE(x) ((x)>>SH | -((-(x))>>SH))
++#include "locale_impl.h"
+
+ int fwide(FILE *f, int mode)
+ {
+ FLOCK(f);
+- if (!f->mode) f->mode = NORMALIZE(mode);
++ if (mode) {
++ if (!f->locale) f->locale = MB_CUR_MAX==1
++ ? C_LOCALE : UTF8_LOCALE;
++ if (!f->mode) f->mode = mode>0 ? 1 : -1;
++ }
+ mode = f->mode;
+ FUNLOCK(f);
+ return mode;
+diff --git a/src/stdio/ofl.c b/src/stdio/ofl.c
+new file mode 100644
+index 0000000..b143999
+--- /dev/null
++++ b/src/stdio/ofl.c
+@@ -0,0 +1,16 @@
++#include "stdio_impl.h"
++#include "libc.h"
++
++static FILE *ofl_head;
++static volatile int ofl_lock[2];
++
++FILE **__ofl_lock()
++{
++ LOCK(ofl_lock);
++ return &ofl_head;
++}
++
++void __ofl_unlock()
++{
++ UNLOCK(ofl_lock);
++}
+diff --git a/src/stdio/ofl_add.c b/src/stdio/ofl_add.c
+new file mode 100644
+index 0000000..d7de9f1
+--- /dev/null
++++ b/src/stdio/ofl_add.c
+@@ -0,0 +1,11 @@
++#include "stdio_impl.h"
++
++FILE *__ofl_add(FILE *f)
++{
++ FILE **head = __ofl_lock();
++ f->next = *head;
++ if (*head) (*head)->prev = f;
++ *head = f;
++ __ofl_unlock();
++ return f;
++}
+diff --git a/src/stdio/open_memstream.c b/src/stdio/open_memstream.c
+index 9eafdfb..58504c9 100644
+--- a/src/stdio/open_memstream.c
++++ b/src/stdio/open_memstream.c
+@@ -79,11 +79,5 @@ FILE *open_memstream(char **bufp, size_t *sizep)
+
+ if (!libc.threaded) f->lock = -1;
+
+- OFLLOCK();
+- f->next = libc.ofl_head;
+- if (libc.ofl_head) libc.ofl_head->prev = f;
+- libc.ofl_head = f;
+- OFLUNLOCK();
+-
+- return f;
++ return __ofl_add(f);
+ }
+diff --git a/src/stdio/open_wmemstream.c b/src/stdio/open_wmemstream.c
+index 3537030..7ab2c64 100644
+--- a/src/stdio/open_wmemstream.c
++++ b/src/stdio/open_wmemstream.c
+@@ -81,11 +81,5 @@ FILE *open_wmemstream(wchar_t **bufp, size_t *sizep)
+
+ if (!libc.threaded) f->lock = -1;
+
+- OFLLOCK();
+- f->next = libc.ofl_head;
+- if (libc.ofl_head) libc.ofl_head->prev = f;
+- libc.ofl_head = f;
+- OFLUNLOCK();
+-
+- return f;
++ return __ofl_add(f);
+ }
+diff --git a/src/stdio/ungetwc.c b/src/stdio/ungetwc.c
+index 913f716..80d6e20 100644
+--- a/src/stdio/ungetwc.c
++++ b/src/stdio/ungetwc.c
+@@ -1,4 +1,5 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ #include <limits.h>
+ #include <ctype.h>
+@@ -8,21 +9,19 @@ wint_t ungetwc(wint_t c, FILE *f)
+ {
+ unsigned char mbc[MB_LEN_MAX];
+ int l=1;
+-
+- if (c == WEOF) return c;
+-
+- /* Try conversion early so we can fail without locking if invalid */
+- if (!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)
+- return WEOF;
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+
+ FLOCK(f);
+
+- f->mode |= f->mode+1;
++ if (f->mode <= 0) fwide(f, 1);
++ *ploc = f->locale;
+
+ if (!f->rpos) __toread(f);
+- if (!f->rpos || f->rpos < f->buf - UNGET + l) {
++ if (!f->rpos || f->rpos < f->buf - UNGET + l || c == WEOF ||
++ (!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)) {
+ FUNLOCK(f);
+- return EOF;
++ *ploc = loc;
++ return WEOF;
+ }
+
+ if (isascii(c)) *--f->rpos = c;
+@@ -31,5 +30,6 @@ wint_t ungetwc(wint_t c, FILE *f)
+ f->flags &= ~F_EOF;
+
+ FUNLOCK(f);
++ *ploc = loc;
+ return c;
+ }
+diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
+index ebdff00..f06d5ae 100644
+--- a/src/stdio/vfwprintf.c
++++ b/src/stdio/vfwprintf.c
+@@ -293,7 +293,10 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
+ if ((fl&LEFT_ADJ)) fprintf(f, "%.*s", w-p, "");
+ l=w;
+ continue;
++ case 'm':
++ arg.p = strerror(errno);
+ case 's':
++ if (!arg.p) arg.p = "(null)";
+ bs = arg.p;
+ if (p<0) p = INT_MAX;
+ for (i=l=0; l<p && (i=mbtowc(&wc, bs, MB_LEN_MAX))>0; bs+=i, l++);
+@@ -356,7 +359,7 @@ int vfwprintf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
+ }
+
+ FLOCK(f);
+- f->mode |= f->mode+1;
++ fwide(f, 1);
+ olderr = f->flags & F_ERR;
+ f->flags &= ~F_ERR;
+ ret = wprintf_core(f, fmt, &ap2, nl_arg, nl_type);
+diff --git a/src/stdio/vfwscanf.c b/src/stdio/vfwscanf.c
+index ac5c2c2..223aad4 100644
+--- a/src/stdio/vfwscanf.c
++++ b/src/stdio/vfwscanf.c
+@@ -104,7 +104,7 @@ int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
+
+ FLOCK(f);
+
+- f->mode |= f->mode+1;
++ fwide(f, 1);
+
+ for (p=fmt; *p; p++) {
+
+diff --git a/src/string/strverscmp.c b/src/string/strverscmp.c
+index 6f37cc6..4daf276 100644
+--- a/src/string/strverscmp.c
++++ b/src/string/strverscmp.c
+@@ -2,40 +2,33 @@
+ #include <ctype.h>
+ #include <string.h>
+
+-int strverscmp(const char *l, const char *r)
++int strverscmp(const char *l0, const char *r0)
+ {
+- int haszero=1;
+- while (*l==*r) {
+- if (!*l) return 0;
++ const unsigned char *l = (const void *)l0;
++ const unsigned char *r = (const void *)r0;
++ size_t i, dp, j;
++ int z = 1;
+
+- if (*l=='0') {
+- if (haszero==1) {
+- haszero=0;
+- }
+- } else if (isdigit(*l)) {
+- if (haszero==1) {
+- haszero=2;
+- }
+- } else {
+- haszero=1;
+- }
+- l++; r++;
++ /* Find maximal matching prefix and track its maximal digit
++ * suffix and whether those digits are all zeros. */
++ for (dp=i=0; l[i]==r[i]; i++) {
++ int c = l[i];
++ if (!c) return 0;
++ if (!isdigit(c)) dp=i+1, z=1;
++ else if (c!='0') z=0;
+ }
+- if (haszero==1 && (*l=='0' || *r=='0')) {
+- haszero=0;
+- }
+- if ((isdigit(*l) && isdigit(*r) ) && haszero) {
+- size_t lenl=0, lenr=0;
+- while (isdigit(l[lenl]) ) lenl++;
+- while (isdigit(r[lenr]) ) lenr++;
+- if (lenl==lenr) {
+- return (*l - *r);
+- } else if (lenl>lenr) {
+- return 1;
+- } else {
+- return -1;
+- }
+- } else {
+- return (*l - *r);
++
++ if (l[dp]!='0' && r[dp]!='0') {
++ /* If we're not looking at a digit sequence that began
++ * with a zero, longest digit string is greater. */
++ for (j=i; isdigit(l[j]); j++)
++ if (!isdigit(r[j])) return 1;
++ if (isdigit(r[j])) return -1;
++ } else if (z && dp<i && (isdigit(l[i]) || isdigit(r[i]))) {
++ /* Otherwise, if common prefix of digit sequence is
++ * all zeros, digits order less than non-digits. */
++ return (unsigned char)(l[i]-'0') - (unsigned char)(r[i]-'0');
+ }
++
++ return l[i] - r[i];
+ }
+diff --git a/src/thread/__tls_get_addr.c b/src/thread/__tls_get_addr.c
+index 3633396..84a413d 100644
+--- a/src/thread/__tls_get_addr.c
++++ b/src/thread/__tls_get_addr.c
+@@ -8,9 +8,9 @@ void *__tls_get_addr(size_t *v)
+ __attribute__((__visibility__("hidden")))
+ void *__tls_get_new(size_t *);
+ if (v[0]<=(size_t)self->dtv[0])
+- return (char *)self->dtv[v[0]]+v[1];
++ return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET;
+ return __tls_get_new(v);
+ #else
+- return (char *)self->dtv[1]+v[1];
++ return (char *)self->dtv[1]+v[1]+DTP_OFFSET;
+ #endif
+ }
+diff --git a/src/thread/__unmapself.c b/src/thread/__unmapself.c
+index e69de29..1d3bee1 100644
+--- a/src/thread/__unmapself.c
++++ b/src/thread/__unmapself.c
+@@ -0,0 +1,29 @@
++#include "pthread_impl.h"
++#include "atomic.h"
++#include "syscall.h"
++/* cheat and reuse CRTJMP macro from dynlink code */
++#include "dynlink.h"
++
++static volatile int lock;
++static void *unmap_base;
++static size_t unmap_size;
++static char shared_stack[256];
++
++static void do_unmap()
++{
++ __syscall(SYS_munmap, unmap_base, unmap_size);
++ __syscall(SYS_exit);
++}
++
++void __unmapself(void *base, size_t size)
++{
++ int tid=__pthread_self()->tid;
++ char *stack = shared_stack + sizeof shared_stack;
++ stack -= (uintptr_t)stack % 16;
++ while (lock || a_cas(&lock, 0, tid))
++ a_spin();
++ __syscall(SYS_set_tid_address, &lock);
++ unmap_base = base;
++ unmap_size = size;
++ CRTJMP(do_unmap, stack);
++}
+diff --git a/src/thread/mips/__unmapself.s b/src/thread/mips/__unmapself.s
+index 9aa0371..ba139dc 100644
+--- a/src/thread/mips/__unmapself.s
++++ b/src/thread/mips/__unmapself.s
+@@ -2,6 +2,7 @@
+ .global __unmapself
+ .type __unmapself,@function
+ __unmapself:
++ move $sp, $25
+ li $2, 4091
+ syscall
+ li $4, 0
+diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
+index de72818..e7df34a 100644
+--- a/src/thread/pthread_create.c
++++ b/src/thread/pthread_create.c
+@@ -191,8 +191,9 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
+ if (!libc.can_do_threads) return ENOSYS;
+ self = __pthread_self();
+ if (!libc.threaded) {
+- for (FILE *f=libc.ofl_head; f; f=f->next)
++ for (FILE *f=*__ofl_lock(); f; f=f->next)
+ init_file_lock(f);
++ __ofl_unlock();
+ init_file_lock(__stdin_used);
+ init_file_lock(__stdout_used);
+ init_file_lock(__stderr_used);
+@@ -231,7 +232,8 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
+ if (guard) {
+ map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (map == MAP_FAILED) goto fail;
+- if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
++ if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)
++ && errno != ENOSYS) {
+ __munmap(map, size);
+ goto fail;
+ }
+diff --git a/src/thread/sh/__set_thread_area.s b/src/thread/sh/__set_thread_area.s
+index d9f1181..e69de29 100644
+--- a/src/thread/sh/__set_thread_area.s
++++ b/src/thread/sh/__set_thread_area.s
+@@ -1,6 +0,0 @@
+-.global __set_thread_area
+-.type __set_thread_area, @function
+-__set_thread_area:
+- ldc r4, gbr
+- rts
+- mov #0, r0
+diff --git a/src/thread/sh/__unmapself.s b/src/thread/sh/__unmapself.s
+index b34c3c8..0161d53 100644
+--- a/src/thread/sh/__unmapself.s
++++ b/src/thread/sh/__unmapself.s
+@@ -1,9 +1,9 @@
+ .text
+-.global __unmapself
+-.type __unmapself, @function
+-__unmapself:
++.global __unmapself_sh_mmu
++.type __unmapself_sh_mmu, @function
++__unmapself_sh_mmu:
+ mov #91, r3 ! SYS_munmap
+- trapa #18
++ trapa #31
+
+ or r0, r0
+ or r0, r0
+@@ -13,7 +13,7 @@ __unmapself:
+
+ mov #1, r3 ! SYS_exit
+ mov #0, r4
+- trapa #17
++ trapa #31
+
+ or r0, r0
+ or r0, r0
+diff --git a/src/thread/sh/clone.s b/src/thread/sh/clone.s
+index d6c9184..f8ad845 100644
+--- a/src/thread/sh/clone.s
++++ b/src/thread/sh/clone.s
+@@ -17,7 +17,7 @@ __clone:
+ mov.l @r15, r6 ! r6 = ptid
+ mov.l @(8,r15), r7 ! r7 = ctid
+ mov.l @(4,r15), r0 ! r0 = tls
+- trapa #21
++ trapa #31
+
+ or r0, r0
+ or r0, r0
+@@ -38,7 +38,7 @@ __clone:
+
+ mov #1, r3 ! __NR_exit
+ mov r0, r4
+- trapa #17
++ trapa #31
+
+ or r0, r0
+ or r0, r0
+diff --git a/src/thread/sh/syscall_cp.s b/src/thread/sh/syscall_cp.s
+index 6b28ddf..c3cafac 100644
+--- a/src/thread/sh/syscall_cp.s
++++ b/src/thread/sh/syscall_cp.s
+@@ -31,7 +31,7 @@ L1: .long __cancel@PLT-(1b-.)
+ mov.l @(4,r15), r7
+ mov.l @(8,r15), r0
+ mov.l @(12,r15), r1
+- trapa #22
++ trapa #31
+
+ __cp_end:
+ ! work around hardware bug
+diff --git a/src/unistd/sh/pipe.s b/src/unistd/sh/pipe.s
+index d865ae3..46c4908 100644
+--- a/src/unistd/sh/pipe.s
++++ b/src/unistd/sh/pipe.s
+@@ -2,7 +2,7 @@
+ .type pipe, @function
+ pipe:
+ mov #42, r3
+- trapa #17
++ trapa #31
+
+ ! work around hardware bug
+ or r0, r0