aboutsummaryrefslogtreecommitdiffstats
path: root/toolchain/musl/patches/001-git-2015-06-16.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain/musl/patches/001-git-2015-06-16.patch')
-rw-r--r--toolchain/musl/patches/001-git-2015-06-16.patch1578
1 files changed, 1578 insertions, 0 deletions
diff --git a/toolchain/musl/patches/001-git-2015-06-16.patch b/toolchain/musl/patches/001-git-2015-06-16.patch
new file mode 100644
index 0000000000..5941adc320
--- /dev/null
+++ b/toolchain/musl/patches/001-git-2015-06-16.patch
@@ -0,0 +1,1578 @@
+commit 1b0cdc8700d29ef018bf226d74b2b58b23bce91c
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 07:11:19 2015 +0000
+
+ refactor stdio open file list handling, move it out of global libc struct
+
+ functions which open in-memory FILE stream variants all shared a tail
+ with __fdopen, adding the FILE structure to stdio's open file list.
+ replacing this common tail with a function call reduces code size and
+ duplication of logic. the list is also partially encapsulated now.
+
+ function signatures were chosen to facilitate tail call optimization
+ and reduce the need for additional accessor functions.
+
+ with these changes, static linked programs that do not use stdio no
+ longer have an open file list at all.
+
+commit f22a9edaf8a6f2ca1d314d18b3785558279a5c03
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 06:18:00 2015 +0000
+
+ byte-based C locale, phase 3: make MB_CUR_MAX variable to activate code
+
+ this patch activates the new byte-based C locale (high bytes treated
+ as abstract code unit "characters" rather than decoded as multibyte
+ characters) by making the value of MB_CUR_MAX depend on the active
+ locale. for the C locale, the LC_CTYPE category pointer is null,
+ yielding a value of 1. all other locales yield a value of 4.
+
+commit 16f18d036d9a7bf590ee6eb86785c0a9658220b6
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 05:35:31 2015 +0000
+
+ byte-based C locale, phase 2: stdio and iconv (multibyte callers)
+
+ this patch adjusts libc components which use the multibyte functions
+ internally, and which depend on them operating in a particular
+ encoding, to make the appropriate locale changes before calling them
+ and restore the calling thread's locale afterwards. activating the
+ byte-based C locale without these changes would cause regressions in
+ stdio and iconv.
+
+ in the case of iconv, the current implementation was simply using the
+ multibyte functions as UTF-8 conversions. setting a multibyte UTF-8
+ locale for the duration of the iconv operation allows the code to
+ continue working.
+
+ in the case of stdio, POSIX requires that FILE streams have an
+ encoding rule bound at the time of setting wide orientation. as long
+ as all locales, including the C locale, used the same encoding,
+ treating high bytes as UTF-8, there was no need to store an encoding
+ rule as part of the stream's state.
+
+ a new locale field in the FILE structure points to the locale that
+ should be made active during fgetwc/fputwc/ungetwc on the stream. it
+ cannot point to the locale active at the time the stream becomes
+ oriented, because this locale could be mutable (the global locale) or
+ could be destroyed (locale_t objects produced by newlocale) before the
+ stream is closed. instead, a pointer to the static C or C.UTF-8 locale
+ object added in commit commit aeeac9ca5490d7d90fe061ab72da446c01ddf746
+ is used. this is valid since categories other than LC_CTYPE will not
+ affect these functions.
+
+commit 1507ebf837334e9e07cfab1ca1c2e88449069a80
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 04:44:17 2015 +0000
+
+ byte-based C locale, phase 1: multibyte character handling functions
+
+ this patch makes the functions which work directly on multibyte
+ characters treat the high bytes as individual abstract code units
+ rather than as multibyte sequences when MB_CUR_MAX is 1. since
+ MB_CUR_MAX is presently defined as a constant 4, all of the new code
+ added is dead code, and optimizing compilers' code generation should
+ not be affected at all. a future commit will activate the new code.
+
+ as abstract code units, bytes 0x80 to 0xff are represented by wchar_t
+ values 0xdf80 to 0xdfff, at the end of the surrogates range. this
+ ensures that they will never be misinterpreted as Unicode characters,
+ and that all wctype functions return false for these "characters"
+ without needing locale-specific logic. a high range outside of Unicode
+ such as 0x7fffff80 to 0x7fffffff was also considered, but since C11's
+ char16_t also needs to be able to represent conversions of these
+ bytes, the surrogate range was the natural choice.
+
+commit 38e2f727237230300fea6aff68802db04625fd23
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 16 04:21:38 2015 +0000
+
+ fix btowc corner case
+
+ btowc is required to interpret its argument by conversion to unsigned
+ char, unless the argument is equal to EOF. since the conversion to
+ produces a non-character value anyway, we can just unconditionally
+ convert, for now.
+
+commit ee59c296d56bf26f49f354d6eb32b4b6d4190188
+Author: Szabolcs Nagy <nsz@port70.net>
+Date: Wed Jun 3 10:32:14 2015 +0100
+
+ arm: add vdso support
+
+ vdso will be available on arm in linux v4.2, the user-space code
+ for it is in kernel commit 8512287a8165592466cb9cb347ba94892e9c56a5
+
+commit e3bc22f1eff87b8f029a6ab31f1a269d69e4b053
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sun Jun 14 01:59:02 2015 +0000
+
+ refactor malloc's expand_heap to share with __simple_malloc
+
+ this extends the brk/stack collision protection added to full malloc
+ in commit 276904c2f6bde3a31a24ebfa201482601d18b4f9 to also protect the
+ __simple_malloc function used in static-linked programs that don't
+ reference the free function.
+
+ it also extends support for using mmap when brk fails, which full
+ malloc got in commit 5446303328adf4b4e36d9fba21848e6feb55fab4, to
+ __simple_malloc.
+
+ since __simple_malloc may expand the heap by arbitrarily large
+ increments, the stack collision detection is enhanced to detect
+ interval overlap rather than just proximity of a single address to the
+ stack. code size is increased a bit, but this is partly offset by the
+ sharing of code between the two malloc implementations, which due to
+ linking semantics, both get linked in a program that needs the full
+ malloc with realloc/free support.
+
+commit 4ef9b828c1f39553a69e0635ac91f0fcadd6e8c6
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 13 20:53:02 2015 +0000
+
+ remove cancellation points in stdio
+
+ commit 58165923890865a6ac042fafce13f440ee986fd9 added these optional
+ cancellation points on the basis that cancellable stdio could be
+ useful, to unblock threads stuck on stdio operations that will never
+ complete. however, the only way to ensure that cancellation can
+ achieve this is to violate the rules for side effects when
+ cancellation is acted upon, discarding knowledge of any partial data
+ transfer already completed. our implementation exhibited this behavior
+ and was thus non-conforming.
+
+ in addition to improving correctness, removing these cancellation
+ points moderately reduces code size, and should significantly improve
+ performance on i386, where sysenter/syscall instructions can be used
+ instead of "int $128" for non-cancellable syscalls.
+
+commit 536c6d5a4205e2a3f161f2983ce1e0ac3082187d
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 13 05:17:16 2015 +0000
+
+ fix idiom for setting stdio stream orientation to wide
+
+ the old idiom, f->mode |= f->mode+1, was adapted from the idiom for
+ setting byte orientation, f->mode |= f->mode-1, but the adaptation was
+ incorrect. unless the stream was alreasdy set byte-oriented, this code
+ incremented f->mode each time it was executed, which would eventually
+ lead to overflow. it could be fixed by changing it to f->mode |= 1,
+ but upcoming changes will require slightly more work at the time of
+ wide orientation, so it makes sense to just call fwide. as an
+ optimization in the single-character functions, fwide is only called
+ if the stream is not already wide-oriented.
+
+commit f8f565df467c13248104223f99abf7f37cef7584
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 13 04:42:38 2015 +0000
+
+ add printing of null %s arguments as "(null)" in wide printf
+
+ this is undefined, but supported in our implementation of the normal
+ printf, so for consistency the wide variant should support it too.
+
+commit f9e25d813860d53cd1e9b6145cc63375d2fe2529
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 13 04:37:27 2015 +0000
+
+ add %m support to wide printf
+
+commit ec634aad91f57479ef17525e33ed446c780a61f4
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Thu Jun 11 05:01:04 2015 +0000
+
+ add sh asm for vfork
+
+commit c30cbcb0a646b1f13a22c645616dce624465b883
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Wed Jun 10 02:27:40 2015 +0000
+
+ implement arch-generic version of __unmapself
+
+ this can be used to put off writing an asm version of __unmapself for
+ new archs, or as a permanent solution on archs where it's not
+ practical or even possible to run momentarily with no stack.
+
+ the concept here is simple: the caller takes a lock on a global shared
+ stack and uses it to make the munmap and exit syscalls. the only trick
+ is unlocking, which must be done after the thread exits, and this is
+ achieved by using the set_tid_address syscall to have the kernel zero
+ and futex-wake the lock word as part of the exit syscall.
+
+commit 276904c2f6bde3a31a24ebfa201482601d18b4f9
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 9 20:30:35 2015 +0000
+
+ in malloc, refuse to use brk if it grows into stack
+
+ the linux/nommu fdpic ELF loader sets up the brk range to overlap
+ entirely with the main thread's stack (but growing from opposite
+ ends), so that the resulting failure mode for malloc is not to return
+ a null pointer but to start returning pointers to memory that overlaps
+ with the caller's stack. needless to say this extremely dangerous and
+ makes brk unusable.
+
+ since it's non-trivial to detect execution environments that might be
+ affected by this kernel bug, and since the severity of the bug makes
+ any sort of detection that might yield false-negatives unsafe, we
+ instead check the proximity of the brk to the stack pointer each time
+ the brk is to be expanded. both the main thread's stack (where the
+ real known risk lies) and the calling thread's stack are checked. an
+ arbitrary gap distance of 8 MB is imposed, chosen to be larger than
+ linux default main-thread stack reservation sizes and larger than any
+ reasonable stack configuration on nommu.
+
+ the effeciveness of this patch relies on an assumption that the amount
+ by which the brk is being grown is smaller than the gap limit, which
+ is always true for malloc's use of brk. reliance on this assumption is
+ why the check is being done in malloc-specific code and not in __brk.
+
+commit bd1eaceaa3975bd2a2a34e211cff896affaecadf
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Tue Jun 9 20:09:27 2015 +0000
+
+ fix spurious errors from pwd/grp functions when nscd backend is absent
+
+ for several pwd/grp functions, the only way the caller can distinguish
+ between a successful negative result ("no such user/group") and an
+ internal error is by clearing errno before the call and checking errno
+ afterwards. the nscd backend support code correctly simulated a
+ not-found response on systems where such a backend is not running, but
+ failed to restore errno.
+
+ this commit also fixed an outdated/incorrect comment.
+
+commit 75ce4503950621b11fcc7f1fd1187dbcf3cde312
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sun Jun 7 20:55:23 2015 +0000
+
+ fix regression in pre-v7 arm on kernels with kuser helper removed
+
+ the arm atomics/TLS runtime selection code is called from
+ __set_thread_area and depends on having libc.auxv and __hwcap
+ available. commit 71f099cb7db821c51d8f39dfac622c61e54d794c moved the
+ first call to __set_thread_area to the top of dynamic linking stage 3,
+ before this data is made available, causing the runtime detection code
+ to always see __hwcap as zero and thereby select the atomics/TLS
+ implementations based on kuser helper.
+
+ upcoming work on superh will use similar runtime detection.
+
+ ideally this early-init code should be cleanly refactored and shared
+ between the dynamic linker and static-linked startup.
+
+commit 32f3c4f70633488550c29a2444f819aafdf345ff
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sun Jun 7 03:09:16 2015 +0000
+
+ add multiple inclusion guard to locale_impl.h
+
+commit 04b8360adbb6487f61aa0c00e53ec3a90a5a0d29
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sun Jun 7 02:59:49 2015 +0000
+
+ remove redefinition of MB_CUR_MAX in locale_impl.h
+
+ unless/until the byte-based C locale is implemented, defining
+ MB_CUR_MAX to 1 in the C locale is wrong. no internal code currently
+ uses the MB_CUR_MAX macro, but having it defined inconsistently is
+ error-prone. applications get the value from stdlib.h and were
+ unaffected.
+
+commit 16bf466532d7328e971012b0731ad493b017ad29
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 6 18:53:02 2015 +0000
+
+ make static C and C.UTF-8 locales available outside of newlocale
+
+commit 312eea2ea4f4363fb01b73660c08bfcf43dd3bb4
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 6 18:20:30 2015 +0000
+
+ remove another invalid skip of locking in ungetwc
+
+commit 3d7e32d28dc9962e9efc1c317c5b44b5b2df3008
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 6 18:16:22 2015 +0000
+
+ add macro version of ctype.h isascii function
+
+ presumably internal code (ungetwc and fputwc) was written assuming a
+ macro implementation existed; otherwise use of isascii is just a
+ pessimization.
+
+commit 7e816a6487932cbb3cb71d94b609e50e81f4e5bf
+Author: Rich Felker <dalias@aerifal.cx>
+Date: Sat Jun 6 18:11:17 2015 +0000
+
+ remove invalid skip of locking in ungetwc
+
+ aside from being invalid, the early check only optimized the error
+ case, and likely pessimized the common case by separating the
+ two branches on isascii(c) at opposite ends of the function.
+
+commit 63f4b9f18f3674124d8bcb119739fec85e6da005
+Author: Timo Teräs <timo.teras@iki.fi>
+Date: Fri Jun 5 10:39:42 2015 +0300
+
+ fix uselocale((locale_t)0) not to modify locale
+
+ commit 68630b55c0c7219fe9df70dc28ffbf9efc8021d8 made the new locale to
+ be assigned unconditonally resulting in crashes later on.
+
+--- a/arch/arm/syscall_arch.h
++++ b/arch/arm/syscall_arch.h
+@@ -72,3 +72,7 @@ static inline long __syscall6(long n, lo
+ register long r5 __asm__("r5") = f;
+ __asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
+ }
++
++#define VDSO_USEFUL
++#define VDSO_CGT_SYM "__vdso_clock_gettime"
++#define VDSO_CGT_VER "LINUX_2.6"
+--- a/include/ctype.h
++++ b/include/ctype.h
+@@ -64,6 +64,7 @@ int isascii(int);
+ int toascii(int);
+ #define _tolower(a) ((a)|0x20)
+ #define _toupper(a) ((a)&0x5f)
++#define isascii(a) (0 ? isascii(a) : (unsigned)(a) < 128)
+
+ #endif
+
+--- a/include/stdlib.h
++++ b/include/stdlib.h
+@@ -76,7 +76,8 @@ size_t wcstombs (char *__restrict, const
+ #define EXIT_FAILURE 1
+ #define EXIT_SUCCESS 0
+
+-#define MB_CUR_MAX ((size_t)+4)
++size_t __ctype_get_mb_cur_max(void);
++#define MB_CUR_MAX (__ctype_get_mb_cur_max())
+
+ #define RAND_MAX (0x7fffffff)
+
+--- a/src/ctype/__ctype_get_mb_cur_max.c
++++ b/src/ctype/__ctype_get_mb_cur_max.c
+@@ -1,6 +1,7 @@
+-#include <stddef.h>
++#include <stdlib.h>
++#include "locale_impl.h"
+
+ size_t __ctype_get_mb_cur_max()
+ {
+- return 4;
++ return MB_CUR_MAX;
+ }
+--- a/src/ctype/isascii.c
++++ b/src/ctype/isascii.c
+@@ -1,4 +1,5 @@
+ #include <ctype.h>
++#undef isascii
+
+ int isascii(int c)
+ {
+--- a/src/internal/libc.h
++++ b/src/internal/libc.h
+@@ -17,8 +17,6 @@ struct __libc {
+ int secure;
+ volatile int threads_minus_1;
+ size_t *auxv;
+- FILE *ofl_head;
+- volatile int ofl_lock[2];
+ size_t tls_size;
+ size_t page_size;
+ struct __locale_struct global_locale;
+--- a/src/internal/locale_impl.h
++++ b/src/internal/locale_impl.h
+@@ -1,3 +1,6 @@
++#ifndef _LOCALE_IMPL_H
++#define _LOCALE_IMPL_H
++
+ #include <locale.h>
+ #include <stdlib.h>
+ #include "libc.h"
+@@ -12,6 +15,10 @@ struct __locale_map {
+ const struct __locale_map *next;
+ };
+
++extern const struct __locale_map __c_dot_utf8;
++extern const struct __locale_struct __c_locale;
++extern const struct __locale_struct __c_dot_utf8_locale;
++
+ const struct __locale_map *__get_locale(int, const char *);
+ const char *__mo_lookup(const void *, size_t, const char *);
+ const char *__lctrans(const char *, const struct __locale_map *);
+@@ -20,9 +27,14 @@ const char *__lctrans_cur(const char *);
+ #define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)])
+ #define LCTRANS_CUR(msg) __lctrans_cur(msg)
+
++#define C_LOCALE ((locale_t)&__c_locale)
++#define UTF8_LOCALE ((locale_t)&__c_dot_utf8_locale)
++
+ #define CURRENT_LOCALE (__pthread_self()->locale)
+
+ #define CURRENT_UTF8 (!!__pthread_self()->locale->cat[LC_CTYPE])
+
+ #undef MB_CUR_MAX
+ #define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1)
++
++#endif
+--- a/src/internal/stdio_impl.h
++++ b/src/internal/stdio_impl.h
+@@ -47,6 +47,7 @@ struct _IO_FILE {
+ unsigned char *shend;
+ off_t shlim, shcnt;
+ FILE *prev_locked, *next_locked;
++ struct __locale_struct *locale;
+ };
+
+ size_t __stdio_read(FILE *, unsigned char *, size_t);
+@@ -75,8 +76,9 @@ int __putc_unlocked(int, FILE *);
+ FILE *__fdopen(int, const char *);
+ int __fmodeflags(const char *);
+
+-#define OFLLOCK() LOCK(libc.ofl_lock)
+-#define OFLUNLOCK() UNLOCK(libc.ofl_lock)
++FILE *__ofl_add(FILE *f);
++FILE **__ofl_lock(void);
++void __ofl_unlock(void);
+
+ #define feof(f) ((f)->flags & F_EOF)
+ #define ferror(f) ((f)->flags & F_ERR)
+--- a/src/ldso/dynlink.c
++++ b/src/ldso/dynlink.c
+@@ -1192,6 +1192,17 @@ _Noreturn void __dls3(size_t *sp)
+ char **argv_orig = argv;
+ char **envp = argv+argc+1;
+
++ /* Find aux vector just past environ[] and use it to initialize
++ * global data that may be needed before we can make syscalls. */
++ __environ = envp;
++ for (i=argc+1; argv[i]; i++);
++ libc.auxv = auxv = (void *)(argv+i+1);
++ decode_vec(auxv, aux, AUX_CNT);
++ __hwcap = aux[AT_HWCAP];
++ libc.page_size = aux[AT_PAGESZ];
++ libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
++ || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
++
+ /* Setup early thread pointer in builtin_tls for ldso/libc itself to
+ * use during dynamic linking. If possible it will also serve as the
+ * thread pointer at runtime. */
+@@ -1200,25 +1211,11 @@ _Noreturn void __dls3(size_t *sp)
+ a_crash();
+ }
+
+- /* Find aux vector just past environ[] */
+- for (i=argc+1; argv[i]; i++)
+- if (!memcmp(argv[i], "LD_LIBRARY_PATH=", 16))
+- env_path = argv[i]+16;
+- else if (!memcmp(argv[i], "LD_PRELOAD=", 11))
+- env_preload = argv[i]+11;
+- auxv = (void *)(argv+i+1);
+-
+- decode_vec(auxv, aux, AUX_CNT);
+-
+ /* Only trust user/env if kernel says we're not suid/sgid */
+- if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
+- || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]) {
+- env_path = 0;
+- env_preload = 0;
+- libc.secure = 1;
++ if (!libc.secure) {
++ env_path = getenv("LD_LIBRARY_PATH");
++ env_preload = getenv("LD_PRELOAD");
+ }
+- libc.page_size = aux[AT_PAGESZ];
+- libc.auxv = auxv;
+
+ /* If the main program was already loaded by the kernel,
+ * AT_PHDR will point to some location other than the dynamic
+--- /dev/null
++++ b/src/locale/c_locale.c
+@@ -0,0 +1,15 @@
++#include "locale_impl.h"
++#include <stdint.h>
++
++static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
++
++const struct __locale_map __c_dot_utf8 = {
++ .map = empty_mo,
++ .map_size = sizeof empty_mo,
++ .name = "C.UTF-8"
++};
++
++const struct __locale_struct __c_locale = { 0 };
++const struct __locale_struct __c_dot_utf8_locale = {
++ .cat[LC_CTYPE] = &__c_dot_utf8
++};
+--- a/src/locale/iconv.c
++++ b/src/locale/iconv.c
+@@ -5,6 +5,7 @@
+ #include <stdlib.h>
+ #include <limits.h>
+ #include <stdint.h>
++#include "locale_impl.h"
+
+ #define UTF_32BE 0300
+ #define UTF_16LE 0301
+@@ -165,9 +166,12 @@ size_t iconv(iconv_t cd0, char **restric
+ int err;
+ unsigned char type = map[-1];
+ unsigned char totype = tomap[-1];
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+
+ if (!in || !*in || !*inb) return 0;
+
++ *ploc = UTF8_LOCALE;
++
+ for (; *inb; *in+=l, *inb-=l) {
+ c = *(unsigned char *)*in;
+ l = 1;
+@@ -431,6 +435,7 @@ size_t iconv(iconv_t cd0, char **restric
+ break;
+ }
+ }
++ *ploc = loc;
+ return x;
+ ilseq:
+ err = EILSEQ;
+@@ -445,5 +450,6 @@ starved:
+ x = -1;
+ end:
+ errno = err;
++ *ploc = loc;
+ return x;
+ }
+--- a/src/locale/langinfo.c
++++ b/src/locale/langinfo.c
+@@ -33,7 +33,8 @@ char *__nl_langinfo_l(nl_item item, loca
+ int idx = item & 65535;
+ const char *str;
+
+- if (item == CODESET) return "UTF-8";
++ if (item == CODESET)
++ return MB_CUR_MAX==1 ? "UTF-8-CODE-UNITS" : "UTF-8";
+
+ switch (cat) {
+ case LC_NUMERIC:
+--- a/src/locale/locale_map.c
++++ b/src/locale/locale_map.c
+@@ -24,14 +24,6 @@ static const char envvars[][12] = {
+ "LC_MESSAGES",
+ };
+
+-static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
+-
+-const struct __locale_map __c_dot_utf8 = {
+- .map = empty_mo,
+- .map_size = sizeof empty_mo,
+- .name = "C.UTF-8"
+-};
+-
+ const struct __locale_map *__get_locale(int cat, const char *val)
+ {
+ static int lock[2];
+@@ -107,8 +99,8 @@ const struct __locale_map *__get_locale(
+ * sake of being able to do message translations at the
+ * application level. */
+ if (!new && (new = malloc(sizeof *new))) {
+- new->map = empty_mo;
+- new->map_size = sizeof empty_mo;
++ new->map = __c_dot_utf8.map;
++ new->map_size = __c_dot_utf8.map_size;
+ memcpy(new->name, val, n);
+ new->name[n] = 0;
+ new->next = loc_head;
+--- a/src/locale/newlocale.c
++++ b/src/locale/newlocale.c
+@@ -3,16 +3,9 @@
+ #include "locale_impl.h"
+ #include "libc.h"
+
+-extern const struct __locale_map __c_dot_utf8;
+-
+-static const struct __locale_struct c_locale = { 0 };
+-static const struct __locale_struct c_dot_utf8_locale = {
+- .cat[LC_CTYPE] = &__c_dot_utf8
+-};
+-
+ int __loc_is_allocated(locale_t loc)
+ {
+- return loc && loc != &c_locale && loc != &c_dot_utf8_locale;
++ return loc && loc != C_LOCALE && loc != UTF8_LOCALE;
+ }
+
+ locale_t __newlocale(int mask, const char *name, locale_t loc)
+@@ -44,9 +37,9 @@ locale_t __newlocale(int mask, const cha
+ }
+
+ if (!j)
+- return (locale_t)&c_locale;
+- if (j==1 && tmp.cat[LC_CTYPE]==c_dot_utf8_locale.cat[LC_CTYPE])
+- return (locale_t)&c_dot_utf8_locale;
++ return C_LOCALE;
++ if (j==1 && tmp.cat[LC_CTYPE]==&__c_dot_utf8)
++ return UTF8_LOCALE;
+
+ if ((loc = malloc(sizeof *loc))) *loc = tmp;
+
+--- a/src/locale/uselocale.c
++++ b/src/locale/uselocale.c
+@@ -8,9 +8,7 @@ locale_t __uselocale(locale_t new)
+ locale_t old = self->locale;
+ locale_t global = &libc.global_locale;
+
+- if (new == LC_GLOBAL_LOCALE) new = global;
+-
+- self->locale = new;
++ if (new) self->locale = new == LC_GLOBAL_LOCALE ? global : new;
+
+ return old == global ? LC_GLOBAL_LOCALE : old;
+ }
+--- /dev/null
++++ b/src/malloc/expand_heap.c
+@@ -0,0 +1,72 @@
++#include <limits.h>
++#include <stdint.h>
++#include <errno.h>
++#include <sys/mman.h>
++#include "libc.h"
++#include "syscall.h"
++
++/* This function returns true if the interval [old,new]
++ * intersects the 'len'-sized interval below &libc.auxv
++ * (interpreted as the main-thread stack) or below &b
++ * (the current stack). It is used to defend against
++ * buggy brk implementations that can cross the stack. */
++
++static int traverses_stack_p(uintptr_t old, uintptr_t new)
++{
++ const uintptr_t len = 8<<20;
++ uintptr_t a, b;
++
++ b = (uintptr_t)libc.auxv;
++ a = b > len ? b-len : 0;
++ if (new>a && old<b) return 1;
++
++ b = (uintptr_t)&b;
++ a = b > len ? b-len : 0;
++ if (new>a && old<b) return 1;
++
++ return 0;
++}
++
++void *__mmap(void *, size_t, int, int, int, off_t);
++
++/* Expand the heap in-place if brk can be used, or otherwise via mmap,
++ * using an exponential lower bound on growth by mmap to make
++ * fragmentation asymptotically irrelevant. The size argument is both
++ * an input and an output, since the caller needs to know the size
++ * allocated, which will be larger than requested due to page alignment
++ * and mmap minimum size rules. The caller is responsible for locking
++ * to prevent concurrent calls. */
++
++void *__expand_heap(size_t *pn)
++{
++ static uintptr_t brk;
++ static unsigned mmap_step;
++ size_t n = *pn;
++
++ if (n > SIZE_MAX/2 - PAGE_SIZE) {
++ errno = ENOMEM;
++ return 0;
++ }
++ n += -n & PAGE_SIZE-1;
++
++ if (!brk) {
++ brk = __syscall(SYS_brk, 0);
++ brk += -brk & PAGE_SIZE-1;
++ }
++
++ if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n)
++ && __syscall(SYS_brk, brk+n)==brk+n) {
++ *pn = n;
++ brk += n;
++ return (void *)(brk-n);
++ }
++
++ size_t min = (size_t)PAGE_SIZE << mmap_step/2;
++ if (n < min) n = min;
++ void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
++ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
++ if (area == MAP_FAILED) return 0;
++ *pn = n;
++ mmap_step++;
++ return area;
++}
+--- a/src/malloc/lite_malloc.c
++++ b/src/malloc/lite_malloc.c
+@@ -4,43 +4,46 @@
+ #include <errno.h>
+ #include "libc.h"
+
+-uintptr_t __brk(uintptr_t);
+-
+ #define ALIGN 16
+
++void *__expand_heap(size_t *);
++
+ void *__simple_malloc(size_t n)
+ {
+- static uintptr_t cur, brk;
+- uintptr_t base, new;
++ static char *cur, *end;
+ static volatile int lock[2];
+- size_t align=1;
++ size_t align=1, pad;
++ void *p;
+
+ if (!n) n++;
+- if (n > SIZE_MAX/2) goto toobig;
+-
+ while (align<n && align<ALIGN)
+ align += align;
+- n = n + align - 1 & -align;
+
+ LOCK(lock);
+- if (!cur) cur = brk = __brk(0)+16;
+- base = cur + align-1 & -align;
+- if (n > SIZE_MAX - PAGE_SIZE - base) goto fail;
+- if (base+n > brk) {
+- new = base+n + PAGE_SIZE-1 & -PAGE_SIZE;
+- if (__brk(new) != new) goto fail;
+- brk = new;
+- }
+- cur = base+n;
+- UNLOCK(lock);
+
+- return (void *)base;
++ pad = -(uintptr_t)cur & align-1;
++
++ if (n <= SIZE_MAX/2 + ALIGN) n += pad;
++
++ if (n > end-cur) {
++ size_t m = n;
++ char *new = __expand_heap(&m);
++ if (!new) {
++ UNLOCK(lock);
++ return 0;
++ }
++ if (new != end) {
++ cur = new;
++ n -= pad;
++ pad = 0;
++ }
++ end = new + m;
++ }
+
+-fail:
++ p = cur + pad;
++ cur += n;
+ UNLOCK(lock);
+-toobig:
+- errno = ENOMEM;
+- return 0;
++ return p;
+ }
+
+ weak_alias(__simple_malloc, malloc);
+--- a/src/malloc/malloc.c
++++ b/src/malloc/malloc.c
+@@ -13,7 +13,6 @@
+ #define inline inline __attribute__((always_inline))
+ #endif
+
+-uintptr_t __brk(uintptr_t);
+ void *__mmap(void *, size_t, int, int, int, off_t);
+ int __munmap(void *, size_t);
+ void *__mremap(void *, size_t, size_t, int, ...);
+@@ -31,13 +30,9 @@ struct bin {
+ };
+
+ static struct {
+- uintptr_t brk;
+- size_t *heap;
+ volatile uint64_t binmap;
+ struct bin bins[64];
+- volatile int brk_lock[2];
+ volatile int free_lock[2];
+- unsigned mmap_step;
+ } mal;
+
+
+@@ -152,69 +147,52 @@ void __dump_heap(int x)
+ }
+ #endif
+
++void *__expand_heap(size_t *);
++
+ static struct chunk *expand_heap(size_t n)
+ {
+- static int init;
++ static int heap_lock[2];
++ static void *end;
++ void *p;
+ struct chunk *w;
+- uintptr_t new;
+-
+- lock(mal.brk_lock);
+
+- if (!init) {
+- mal.brk = __brk(0);
+-#ifdef SHARED
+- mal.brk = mal.brk + PAGE_SIZE-1 & -PAGE_SIZE;
+-#endif
+- mal.brk = mal.brk + 2*SIZE_ALIGN-1 & -SIZE_ALIGN;
+- mal.heap = (void *)mal.brk;
+- init = 1;
++ /* The argument n already accounts for the caller's chunk
++ * overhead needs, but if the heap can't be extended in-place,
++ * we need room for an extra zero-sized sentinel chunk. */
++ n += SIZE_ALIGN;
++
++ lock(heap_lock);
++
++ p = __expand_heap(&n);
++ if (!p) {
++ unlock(heap_lock);
++ return 0;
+ }
+
+- if (n > SIZE_MAX - mal.brk - 2*PAGE_SIZE) goto fail;
+- new = mal.brk + n + SIZE_ALIGN + PAGE_SIZE - 1 & -PAGE_SIZE;
+- n = new - mal.brk;
+-
+- if (__brk(new) != new) {
+- size_t min = (size_t)PAGE_SIZE << mal.mmap_step/2;
+- n += -n & PAGE_SIZE-1;
+- if (n < min) n = min;
+- void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
+- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+- if (area == MAP_FAILED) goto fail;
+-
+- mal.mmap_step++;
+- area = (char *)area + SIZE_ALIGN - OVERHEAD;
+- w = area;
++ /* If not just expanding existing space, we need to make a
++ * new sentinel chunk below the allocated space. */
++ if (p != end) {
++ /* Valid/safe because of the prologue increment. */
+ n -= SIZE_ALIGN;
++ p = (char *)p + SIZE_ALIGN;
++ w = MEM_TO_CHUNK(p);
+ w->psize = 0 | C_INUSE;
+- w->csize = n | C_INUSE;
+- w = NEXT_CHUNK(w);
+- w->psize = n | C_INUSE;
+- w->csize = 0 | C_INUSE;
+-
+- unlock(mal.brk_lock);
+-
+- return area;
+ }
+
+- w = MEM_TO_CHUNK(mal.heap);
+- w->psize = 0 | C_INUSE;
+-
+- w = MEM_TO_CHUNK(new);
++ /* Record new heap end and fill in footer. */
++ end = (char *)p + n;
++ w = MEM_TO_CHUNK(end);
+ w->psize = n | C_INUSE;
+ w->csize = 0 | C_INUSE;
+
+- w = MEM_TO_CHUNK(mal.brk);
++ /* Fill in header, which may be new or may be replacing a
++ * zero-size sentinel header at the old end-of-heap. */
++ w = MEM_TO_CHUNK(p);
+ w->csize = n | C_INUSE;
+- mal.brk = new;
+-
+- unlock(mal.brk_lock);
++
++ unlock(heap_lock);
+
+ return w;
+-fail:
+- unlock(mal.brk_lock);
+- errno = ENOMEM;
+- return 0;
+ }
+
+ static int adjust_size(size_t *n)
+--- a/src/multibyte/btowc.c
++++ b/src/multibyte/btowc.c
+@@ -1,7 +1,10 @@
+ #include <stdio.h>
+ #include <wchar.h>
++#include <stdlib.h>
++#include "internal.h"
+
+ wint_t btowc(int c)
+ {
+- return c<128U ? c : EOF;
++ int b = (unsigned char)c;
++ return b<128U ? b : (MB_CUR_MAX==1 && c!=EOF) ? CODEUNIT(c) : WEOF;
+ }
+--- a/src/multibyte/internal.h
++++ b/src/multibyte/internal.h
+@@ -23,3 +23,10 @@ extern const uint32_t bittab[];
+
+ #define SA 0xc2u
+ #define SB 0xf4u
++
++/* Arbitrary encoding for representing code units instead of characters. */
++#define CODEUNIT(c) (0xdfff & (signed char)(c))
++#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80)
++
++/* Get inline definition of MB_CUR_MAX. */
++#include "locale_impl.h"
+--- a/src/multibyte/mbrtowc.c
++++ b/src/multibyte/mbrtowc.c
+@@ -4,6 +4,7 @@
+ * unnecessary.
+ */
+
++#include <stdlib.h>
+ #include <wchar.h>
+ #include <errno.h>
+ #include "internal.h"
+@@ -27,6 +28,7 @@ size_t mbrtowc(wchar_t *restrict wc, con
+ if (!n) return -2;
+ if (!c) {
+ if (*s < 0x80) return !!(*wc = *s);
++ if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
+ if (*s-SA > SB-SA) goto ilseq;
+ c = bittab[*s++-SA]; n--;
+ }
+--- a/src/multibyte/mbsrtowcs.c
++++ b/src/multibyte/mbsrtowcs.c
+@@ -7,6 +7,8 @@
+ #include <stdint.h>
+ #include <wchar.h>
+ #include <errno.h>
++#include <string.h>
++#include <stdlib.h>
+ #include "internal.h"
+
+ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st)
+@@ -24,6 +26,23 @@ size_t mbsrtowcs(wchar_t *restrict ws, c
+ }
+ }
+
++ if (MB_CUR_MAX==1) {
++ if (!ws) return strlen((const char *)s);
++ for (;;) {
++ if (!wn) {
++ *src = (const void *)s;
++ return wn0;
++ }
++ if (!*s) break;
++ c = *s++;
++ *ws++ = CODEUNIT(c);
++ wn--;
++ }
++ *ws = 0;
++ *src = 0;
++ return wn0-wn;
++ }
++
+ if (!ws) for (;;) {
+ if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
+ while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
+--- a/src/multibyte/mbtowc.c
++++ b/src/multibyte/mbtowc.c
+@@ -4,6 +4,7 @@
+ * unnecessary.
+ */
+
++#include <stdlib.h>
+ #include <wchar.h>
+ #include <errno.h>
+ #include "internal.h"
+@@ -19,6 +20,7 @@ int mbtowc(wchar_t *restrict wc, const c
+ if (!wc) wc = &dummy;
+
+ if (*s < 0x80) return !!(*wc = *s);
++ if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
+ if (*s-SA > SB-SA) goto ilseq;
+ c = bittab[*s++-SA];
+
+--- a/src/multibyte/wcrtomb.c
++++ b/src/multibyte/wcrtomb.c
+@@ -4,8 +4,10 @@
+ * unnecessary.
+ */
+
++#include <stdlib.h>
+ #include <wchar.h>
+ #include <errno.h>
++#include "internal.h"
+
+ size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)
+ {
+@@ -13,6 +15,13 @@ size_t wcrtomb(char *restrict s, wchar_t
+ if ((unsigned)wc < 0x80) {
+ *s = wc;
+ return 1;
++ } else if (MB_CUR_MAX == 1) {
++ if (!IS_CODEUNIT(wc)) {
++ errno = EILSEQ;
++ return -1;
++ }
++ *s = wc;
++ return 1;
+ } else if ((unsigned)wc < 0x800) {
+ *s++ = 0xc0 | (wc>>6);
+ *s = 0x80 | (wc&0x3f);
+--- a/src/multibyte/wctob.c
++++ b/src/multibyte/wctob.c
+@@ -1,8 +1,10 @@
+-#include <stdio.h>
+ #include <wchar.h>
++#include <stdlib.h>
++#include "internal.h"
+
+ int wctob(wint_t c)
+ {
+ if (c < 128U) return c;
++ if (MB_CUR_MAX==1 && IS_CODEUNIT(c)) return (unsigned char)c;
+ return EOF;
+ }
+--- a/src/passwd/nscd_query.c
++++ b/src/passwd/nscd_query.c
+@@ -32,6 +32,7 @@ FILE *__nscd_query(int32_t req, const ch
+ },
+ .msg_iovlen = 2
+ };
++ int errno_save = errno;
+
+ *swap = 0;
+ retry:
+@@ -50,11 +51,14 @@ retry:
+ return f;
+
+ if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
+- /* If there isn't a running nscd we return -1 to indicate that
+- * that is precisely what happened
+- */
+- if (errno == EACCES || errno == ECONNREFUSED || errno == ENOENT)
++ /* If there isn't a running nscd we simulate a "not found"
++ * result and the caller is responsible for calling
++ * fclose on the (unconnected) socket. The value of
++ * errno must be left unchanged in this case. */
++ if (errno == EACCES || errno == ECONNREFUSED || errno == ENOENT) {
++ errno = errno_save;
+ return f;
++ }
+ goto error;
+ }
+
+--- /dev/null
++++ b/src/process/sh/vfork.s
+@@ -0,0 +1,23 @@
++.global __vfork
++.weak vfork
++.type __vfork,@function
++.type vfork,@function
++__vfork:
++vfork:
++ mov #95, r3
++ add r3, r3
++
++ trapa #16
++ or r0, r0
++ or r0, r0
++ or r0, r0
++ or r0, r0
++ or r0, r0
++
++ mov r0, r4
++ mov.l 1f, r0
++2: braf r0
++ nop
++ .align 2
++ .hidden __syscall_ret
++1: .long __syscall_ret@PLT-(2b+4-.)
+--- a/src/regex/fnmatch.c
++++ b/src/regex/fnmatch.c
+@@ -18,6 +18,7 @@
+ #include <stdlib.h>
+ #include <wchar.h>
+ #include <wctype.h>
++#include "locale_impl.h"
+
+ #define END 0
+ #define UNMATCHABLE -2
+@@ -229,7 +230,7 @@ static int fnmatch_internal(const char *
+ * On illegal sequences we may get it wrong, but in that case
+ * we necessarily have a matching failure anyway. */
+ for (s=endstr; s>str && tailcnt; tailcnt--) {
+- if (s[-1] < 128U) s--;
++ if (s[-1] < 128U || MB_CUR_MAX==1) s--;
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
+ }
+ if (tailcnt) return FNM_NOMATCH;
+--- a/src/stdio/__fdopen.c
++++ b/src/stdio/__fdopen.c
+@@ -54,13 +54,7 @@ FILE *__fdopen(int fd, const char *mode)
+ if (!libc.threaded) f->lock = -1;
+
+ /* Add new FILE to open file list */
+- OFLLOCK();
+- f->next = libc.ofl_head;
+- if (libc.ofl_head) libc.ofl_head->prev = f;
+- libc.ofl_head = f;
+- OFLUNLOCK();
+-
+- return f;
++ return __ofl_add(f);
+ }
+
+ weak_alias(__fdopen, fdopen);
+--- a/src/stdio/__stdio_exit.c
++++ b/src/stdio/__stdio_exit.c
+@@ -16,8 +16,7 @@ static void close_file(FILE *f)
+ void __stdio_exit(void)
+ {
+ FILE *f;
+- OFLLOCK();
+- for (f=libc.ofl_head; f; f=f->next) close_file(f);
++ for (f=*__ofl_lock(); f; f=f->next) close_file(f);
+ close_file(__stdin_used);
+ close_file(__stdout_used);
+ }
+--- a/src/stdio/__stdio_read.c
++++ b/src/stdio/__stdio_read.c
+@@ -1,12 +1,5 @@
+ #include "stdio_impl.h"
+ #include <sys/uio.h>
+-#include <pthread.h>
+-
+-static void cleanup(void *p)
+-{
+- FILE *f = p;
+- if (!f->lockcount) __unlockfile(f);
+-}
+
+ size_t __stdio_read(FILE *f, unsigned char *buf, size_t len)
+ {
+@@ -16,9 +9,7 @@ size_t __stdio_read(FILE *f, unsigned ch
+ };
+ ssize_t cnt;
+
+- pthread_cleanup_push(cleanup, f);
+- cnt = syscall_cp(SYS_readv, f->fd, iov, 2);
+- pthread_cleanup_pop(0);
++ cnt = syscall(SYS_readv, f->fd, iov, 2);
+ if (cnt <= 0) {
+ f->flags |= F_EOF ^ ((F_ERR^F_EOF) & cnt);
+ return cnt;
+--- a/src/stdio/__stdio_write.c
++++ b/src/stdio/__stdio_write.c
+@@ -1,12 +1,5 @@
+ #include "stdio_impl.h"
+ #include <sys/uio.h>
+-#include <pthread.h>
+-
+-static void cleanup(void *p)
+-{
+- FILE *f = p;
+- if (!f->lockcount) __unlockfile(f);
+-}
+
+ size_t __stdio_write(FILE *f, const unsigned char *buf, size_t len)
+ {
+@@ -19,9 +12,7 @@ size_t __stdio_write(FILE *f, const unsi
+ int iovcnt = 2;
+ ssize_t cnt;
+ for (;;) {
+- pthread_cleanup_push(cleanup, f);
+- cnt = syscall_cp(SYS_writev, f->fd, iov, iovcnt);
+- pthread_cleanup_pop(0);
++ cnt = syscall(SYS_writev, f->fd, iov, iovcnt);
+ if (cnt == rem) {
+ f->wend = f->buf + f->buf_size;
+ f->wpos = f->wbase = f->buf;
+@@ -34,11 +25,8 @@ size_t __stdio_write(FILE *f, const unsi
+ }
+ rem -= cnt;
+ if (cnt > iov[0].iov_len) {
+- f->wpos = f->wbase = f->buf;
+ cnt -= iov[0].iov_len;
+ iov++; iovcnt--;
+- } else if (iovcnt == 2) {
+- f->wbase += cnt;
+ }
+ iov[0].iov_base = (char *)iov[0].iov_base + cnt;
+ iov[0].iov_len -= cnt;
+--- a/src/stdio/fclose.c
++++ b/src/stdio/fclose.c
+@@ -14,11 +14,11 @@ int fclose(FILE *f)
+ __unlist_locked_file(f);
+
+ if (!(perm = f->flags & F_PERM)) {
+- OFLLOCK();
++ FILE **head = __ofl_lock();
+ if (f->prev) f->prev->next = f->next;
+ if (f->next) f->next->prev = f->prev;
+- if (libc.ofl_head == f) libc.ofl_head = f->next;
+- OFLUNLOCK();
++ if (*head == f) *head = f->next;
++ __ofl_unlock();
+ }
+
+ r = fflush(f);
+--- a/src/stdio/fflush.c
++++ b/src/stdio/fflush.c
+@@ -35,13 +35,12 @@ int fflush(FILE *f)
+
+ r = __stdout_used ? fflush(__stdout_used) : 0;
+
+- OFLLOCK();
+- for (f=libc.ofl_head; f; f=f->next) {
++ for (f=*__ofl_lock(); f; f=f->next) {
+ FLOCK(f);
+ if (f->wpos > f->wbase) r |= __fflush_unlocked(f);
+ FUNLOCK(f);
+ }
+- OFLUNLOCK();
++ __ofl_unlock();
+
+ return r;
+ }
+--- a/src/stdio/fgetwc.c
++++ b/src/stdio/fgetwc.c
+@@ -1,8 +1,9 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ #include <errno.h>
+
+-wint_t __fgetwc_unlocked(FILE *f)
++static wint_t __fgetwc_unlocked_internal(FILE *f)
+ {
+ mbstate_t st = { 0 };
+ wchar_t wc;
+@@ -10,8 +11,6 @@ wint_t __fgetwc_unlocked(FILE *f)
+ unsigned char b;
+ size_t l;
+
+- f->mode |= f->mode+1;
+-
+ /* Convert character from buffer if possible */
+ if (f->rpos < f->rend) {
+ l = mbrtowc(&wc, (void *)f->rpos, f->rend - f->rpos, &st);
+@@ -39,6 +38,16 @@ wint_t __fgetwc_unlocked(FILE *f)
+ return wc;
+ }
+
++wint_t __fgetwc_unlocked(FILE *f)
++{
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
++ if (f->mode <= 0) fwide(f, 1);
++ *ploc = f->locale;
++ wchar_t wc = __fgetwc_unlocked_internal(f);
++ *ploc = loc;
++ return wc;
++}
++
+ wint_t fgetwc(FILE *f)
+ {
+ wint_t c;
+--- a/src/stdio/fmemopen.c
++++ b/src/stdio/fmemopen.c
+@@ -110,11 +110,5 @@ FILE *fmemopen(void *restrict buf, size_
+
+ if (!libc.threaded) f->lock = -1;
+
+- OFLLOCK();
+- f->next = libc.ofl_head;
+- if (libc.ofl_head) libc.ofl_head->prev = f;
+- libc.ofl_head = f;
+- OFLUNLOCK();
+-
+- return f;
++ return __ofl_add(f);
+ }
+--- a/src/stdio/fopen.c
++++ b/src/stdio/fopen.c
+@@ -18,7 +18,7 @@ FILE *fopen(const char *restrict filenam
+ /* Compute the flags to pass to open() */
+ flags = __fmodeflags(mode);
+
+- fd = sys_open_cp(filename, flags, 0666);
++ fd = sys_open(filename, flags, 0666);
+ if (fd < 0) return 0;
+ if (flags & O_CLOEXEC)
+ __syscall(SYS_fcntl, fd, F_SETFD, FD_CLOEXEC);
+--- a/src/stdio/fputwc.c
++++ b/src/stdio/fputwc.c
+@@ -1,4 +1,5 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ #include <limits.h>
+ #include <ctype.h>
+@@ -7,8 +8,10 @@ wint_t __fputwc_unlocked(wchar_t c, FILE
+ {
+ char mbc[MB_LEN_MAX];
+ int l;
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+
+- f->mode |= f->mode+1;
++ if (f->mode <= 0) fwide(f, 1);
++ *ploc = f->locale;
+
+ if (isascii(c)) {
+ c = putc_unlocked(c, f);
+@@ -20,6 +23,8 @@ wint_t __fputwc_unlocked(wchar_t c, FILE
+ l = wctomb(mbc, c);
+ if (l < 0 || __fwritex((void *)mbc, l, f) < l) c = WEOF;
+ }
++ if (c==WEOF) f->flags |= F_ERR;
++ *ploc = loc;
+ return c;
+ }
+
+--- a/src/stdio/fputws.c
++++ b/src/stdio/fputws.c
+@@ -1,23 +1,28 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+
+ int fputws(const wchar_t *restrict ws, FILE *restrict f)
+ {
+ unsigned char buf[BUFSIZ];
+ size_t l=0;
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+
+ FLOCK(f);
+
+- f->mode |= f->mode+1;
++ fwide(f, 1);
++ *ploc = f->locale;
+
+ while (ws && (l = wcsrtombs((void *)buf, (void*)&ws, sizeof buf, 0))+1 > 1)
+ if (__fwritex(buf, l, f) < l) {
+ FUNLOCK(f);
++ *ploc = loc;
+ return -1;
+ }
+
+ FUNLOCK(f);
+
++ *ploc = loc;
+ return l; /* 0 or -1 */
+ }
+
+--- a/src/stdio/fwide.c
++++ b/src/stdio/fwide.c
+@@ -1,13 +1,14 @@
+-#include <wchar.h>
+ #include "stdio_impl.h"
+-
+-#define SH (8*sizeof(int)-1)
+-#define NORMALIZE(x) ((x)>>SH | -((-(x))>>SH))
++#include "locale_impl.h"
+
+ int fwide(FILE *f, int mode)
+ {
+ FLOCK(f);
+- if (!f->mode) f->mode = NORMALIZE(mode);
++ if (mode) {
++ if (!f->locale) f->locale = MB_CUR_MAX==1
++ ? C_LOCALE : UTF8_LOCALE;
++ if (!f->mode) f->mode = mode>0 ? 1 : -1;
++ }
+ mode = f->mode;
+ FUNLOCK(f);
+ return mode;
+--- /dev/null
++++ b/src/stdio/ofl.c
+@@ -0,0 +1,16 @@
++#include "stdio_impl.h"
++#include "libc.h"
++
++static FILE *ofl_head;
++static volatile int ofl_lock[2];
++
++FILE **__ofl_lock()
++{
++ LOCK(ofl_lock);
++ return &ofl_head;
++}
++
++void __ofl_unlock()
++{
++ UNLOCK(ofl_lock);
++}
+--- /dev/null
++++ b/src/stdio/ofl_add.c
+@@ -0,0 +1,11 @@
++#include "stdio_impl.h"
++
++FILE *__ofl_add(FILE *f)
++{
++ FILE **head = __ofl_lock();
++ f->next = *head;
++ if (*head) (*head)->prev = f;
++ *head = f;
++ __ofl_unlock();
++ return f;
++}
+--- a/src/stdio/open_memstream.c
++++ b/src/stdio/open_memstream.c
+@@ -79,11 +79,5 @@ FILE *open_memstream(char **bufp, size_t
+
+ if (!libc.threaded) f->lock = -1;
+
+- OFLLOCK();
+- f->next = libc.ofl_head;
+- if (libc.ofl_head) libc.ofl_head->prev = f;
+- libc.ofl_head = f;
+- OFLUNLOCK();
+-
+- return f;
++ return __ofl_add(f);
+ }
+--- a/src/stdio/open_wmemstream.c
++++ b/src/stdio/open_wmemstream.c
+@@ -81,11 +81,5 @@ FILE *open_wmemstream(wchar_t **bufp, si
+
+ if (!libc.threaded) f->lock = -1;
+
+- OFLLOCK();
+- f->next = libc.ofl_head;
+- if (libc.ofl_head) libc.ofl_head->prev = f;
+- libc.ofl_head = f;
+- OFLUNLOCK();
+-
+- return f;
++ return __ofl_add(f);
+ }
+--- a/src/stdio/ungetwc.c
++++ b/src/stdio/ungetwc.c
+@@ -1,4 +1,5 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ #include <limits.h>
+ #include <ctype.h>
+@@ -8,21 +9,19 @@ wint_t ungetwc(wint_t c, FILE *f)
+ {
+ unsigned char mbc[MB_LEN_MAX];
+ int l=1;
+-
+- if (c == WEOF) return c;
+-
+- /* Try conversion early so we can fail without locking if invalid */
+- if (!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)
+- return WEOF;
++ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+
+ FLOCK(f);
+
+- f->mode |= f->mode+1;
++ if (f->mode <= 0) fwide(f, 1);
++ *ploc = f->locale;
+
+ if (!f->rpos) __toread(f);
+- if (!f->rpos || f->rpos < f->buf - UNGET + l) {
++ if (!f->rpos || f->rpos < f->buf - UNGET + l || c == WEOF ||
++ (!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)) {
+ FUNLOCK(f);
+- return EOF;
++ *ploc = loc;
++ return WEOF;
+ }
+
+ if (isascii(c)) *--f->rpos = c;
+@@ -31,5 +30,6 @@ wint_t ungetwc(wint_t c, FILE *f)
+ f->flags &= ~F_EOF;
+
+ FUNLOCK(f);
++ *ploc = loc;
+ return c;
+ }
+--- a/src/stdio/vfwprintf.c
++++ b/src/stdio/vfwprintf.c
+@@ -293,7 +293,10 @@ static int wprintf_core(FILE *f, const w
+ if ((fl&LEFT_ADJ)) fprintf(f, "%.*s", w-p, "");
+ l=w;
+ continue;
++ case 'm':
++ arg.p = strerror(errno);
+ case 's':
++ if (!arg.p) arg.p = "(null)";
+ bs = arg.p;
+ if (p<0) p = INT_MAX;
+ for (i=l=0; l<p && (i=mbtowc(&wc, bs, MB_LEN_MAX))>0; bs+=i, l++);
+@@ -356,7 +359,7 @@ int vfwprintf(FILE *restrict f, const wc
+ }
+
+ FLOCK(f);
+- f->mode |= f->mode+1;
++ fwide(f, 1);
+ olderr = f->flags & F_ERR;
+ f->flags &= ~F_ERR;
+ ret = wprintf_core(f, fmt, &ap2, nl_arg, nl_type);
+--- a/src/stdio/vfwscanf.c
++++ b/src/stdio/vfwscanf.c
+@@ -104,7 +104,7 @@ int vfwscanf(FILE *restrict f, const wch
+
+ FLOCK(f);
+
+- f->mode |= f->mode+1;
++ fwide(f, 1);
+
+ for (p=fmt; *p; p++) {
+
+--- /dev/null
++++ b/src/thread/__unmapself.c
+@@ -0,0 +1,29 @@
++#include "pthread_impl.h"
++#include "atomic.h"
++#include "syscall.h"
++/* cheat and reuse CRTJMP macro from dynlink code */
++#include "dynlink.h"
++
++static volatile int lock;
++static void *unmap_base;
++static size_t unmap_size;
++static char shared_stack[256];
++
++static void do_unmap()
++{
++ __syscall(SYS_munmap, unmap_base, unmap_size);
++ __syscall(SYS_exit);
++}
++
++void __unmapself(void *base, size_t size)
++{
++ int tid=__pthread_self()->tid;
++ char *stack = shared_stack + sizeof shared_stack;
++ stack -= (uintptr_t)stack % 16;
++ while (lock || a_cas(&lock, 0, tid))
++ a_spin();
++ __syscall(SYS_set_tid_address, &lock);
++ unmap_base = base;
++ unmap_size = size;
++ CRTJMP(do_unmap, stack);
++}
+--- a/src/thread/pthread_create.c
++++ b/src/thread/pthread_create.c
+@@ -191,8 +191,9 @@ int __pthread_create(pthread_t *restrict
+ if (!libc.can_do_threads) return ENOSYS;
+ self = __pthread_self();
+ if (!libc.threaded) {
+- for (FILE *f=libc.ofl_head; f; f=f->next)
++ for (FILE *f=*__ofl_lock(); f; f=f->next)
+ init_file_lock(f);
++ __ofl_unlock();
+ init_file_lock(__stdin_used);
+ init_file_lock(__stdout_used);
+ init_file_lock(__stderr_used);