From eb73f4aed798280e9012f535b6b7c227d4496b8f Mon Sep 17 00:00:00 2001
From: Steven Barth <cyrus@openwrt.org>
Date: Sat, 20 Jun 2015 18:36:50 +0000
Subject: musl: update musl to 2015-06-20

Fixes a mips-regression and a missing SSP function

Signed-off-by: Steven Barth <steven@midlink.org>

SVN-Revision: 46075
---
 toolchain/musl/patches/001-git-2015-06-16.patch | 1578 ----------------
 toolchain/musl/patches/001-git-2015-06-20.patch | 2268 +++++++++++++++++++++++
 2 files changed, 2268 insertions(+), 1578 deletions(-)
 delete mode 100644 toolchain/musl/patches/001-git-2015-06-16.patch
 create mode 100644 toolchain/musl/patches/001-git-2015-06-20.patch

(limited to 'toolchain')

diff --git a/toolchain/musl/patches/001-git-2015-06-16.patch b/toolchain/musl/patches/001-git-2015-06-16.patch
deleted file mode 100644
index 5941adc320..0000000000
--- a/toolchain/musl/patches/001-git-2015-06-16.patch
+++ /dev/null
@@ -1,1578 +0,0 @@
-commit 1b0cdc8700d29ef018bf226d74b2b58b23bce91c
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Tue Jun 16 07:11:19 2015 +0000
-
-    refactor stdio open file list handling, move it out of global libc struct
-    
-    functions which open in-memory FILE stream variants all shared a tail
-    with __fdopen, adding the FILE structure to stdio's open file list.
-    replacing this common tail with a function call reduces code size and
-    duplication of logic. the list is also partially encapsulated now.
-    
-    function signatures were chosen to facilitate tail call optimization
-    and reduce the need for additional accessor functions.
-    
-    with these changes, static linked programs that do not use stdio no
-    longer have an open file list at all.
-
-commit f22a9edaf8a6f2ca1d314d18b3785558279a5c03
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Tue Jun 16 06:18:00 2015 +0000
-
-    byte-based C locale, phase 3: make MB_CUR_MAX variable to activate code
-    
-    this patch activates the new byte-based C locale (high bytes treated
-    as abstract code unit "characters" rather than decoded as multibyte
-    characters) by making the value of MB_CUR_MAX depend on the active
-    locale. for the C locale, the LC_CTYPE category pointer is null,
-    yielding a value of 1. all other locales yield a value of 4.
-
-commit 16f18d036d9a7bf590ee6eb86785c0a9658220b6
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Tue Jun 16 05:35:31 2015 +0000
-
-    byte-based C locale, phase 2: stdio and iconv (multibyte callers)
-    
-    this patch adjusts libc components which use the multibyte functions
-    internally, and which depend on them operating in a particular
-    encoding, to make the appropriate locale changes before calling them
-    and restore the calling thread's locale afterwards. activating the
-    byte-based C locale without these changes would cause regressions in
-    stdio and iconv.
-    
-    in the case of iconv, the current implementation was simply using the
-    multibyte functions as UTF-8 conversions. setting a multibyte UTF-8
-    locale for the duration of the iconv operation allows the code to
-    continue working.
-    
-    in the case of stdio, POSIX requires that FILE streams have an
-    encoding rule bound at the time of setting wide orientation. as long
-    as all locales, including the C locale, used the same encoding,
-    treating high bytes as UTF-8, there was no need to store an encoding
-    rule as part of the stream's state.
-    
-    a new locale field in the FILE structure points to the locale that
-    should be made active during fgetwc/fputwc/ungetwc on the stream. it
-    cannot point to the locale active at the time the stream becomes
-    oriented, because this locale could be mutable (the global locale) or
-    could be destroyed (locale_t objects produced by newlocale) before the
-    stream is closed. instead, a pointer to the static C or C.UTF-8 locale
-    object added in commit commit aeeac9ca5490d7d90fe061ab72da446c01ddf746
-    is used. this is valid since categories other than LC_CTYPE will not
-    affect these functions.
-
-commit 1507ebf837334e9e07cfab1ca1c2e88449069a80
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Tue Jun 16 04:44:17 2015 +0000
-
-    byte-based C locale, phase 1: multibyte character handling functions
-    
-    this patch makes the functions which work directly on multibyte
-    characters treat the high bytes as individual abstract code units
-    rather than as multibyte sequences when MB_CUR_MAX is 1. since
-    MB_CUR_MAX is presently defined as a constant 4, all of the new code
-    added is dead code, and optimizing compilers' code generation should
-    not be affected at all. a future commit will activate the new code.
-    
-    as abstract code units, bytes 0x80 to 0xff are represented by wchar_t
-    values 0xdf80 to 0xdfff, at the end of the surrogates range. this
-    ensures that they will never be misinterpreted as Unicode characters,
-    and that all wctype functions return false for these "characters"
-    without needing locale-specific logic. a high range outside of Unicode
-    such as 0x7fffff80 to 0x7fffffff was also considered, but since C11's
-    char16_t also needs to be able to represent conversions of these
-    bytes, the surrogate range was the natural choice.
-
-commit 38e2f727237230300fea6aff68802db04625fd23
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Tue Jun 16 04:21:38 2015 +0000
-
-    fix btowc corner case
-    
-    btowc is required to interpret its argument by conversion to unsigned
-    char, unless the argument is equal to EOF. since the conversion to
-    produces a non-character value anyway, we can just unconditionally
-    convert, for now.
-
-commit ee59c296d56bf26f49f354d6eb32b4b6d4190188
-Author: Szabolcs Nagy <nsz@port70.net>
-Date:   Wed Jun 3 10:32:14 2015 +0100
-
-    arm: add vdso support
-    
-    vdso will be available on arm in linux v4.2, the user-space code
-    for it is in kernel commit 8512287a8165592466cb9cb347ba94892e9c56a5
-
-commit e3bc22f1eff87b8f029a6ab31f1a269d69e4b053
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sun Jun 14 01:59:02 2015 +0000
-
-    refactor malloc's expand_heap to share with __simple_malloc
-    
-    this extends the brk/stack collision protection added to full malloc
-    in commit 276904c2f6bde3a31a24ebfa201482601d18b4f9 to also protect the
-    __simple_malloc function used in static-linked programs that don't
-    reference the free function.
-    
-    it also extends support for using mmap when brk fails, which full
-    malloc got in commit 5446303328adf4b4e36d9fba21848e6feb55fab4, to
-    __simple_malloc.
-    
-    since __simple_malloc may expand the heap by arbitrarily large
-    increments, the stack collision detection is enhanced to detect
-    interval overlap rather than just proximity of a single address to the
-    stack. code size is increased a bit, but this is partly offset by the
-    sharing of code between the two malloc implementations, which due to
-    linking semantics, both get linked in a program that needs the full
-    malloc with realloc/free support.
-
-commit 4ef9b828c1f39553a69e0635ac91f0fcadd6e8c6
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sat Jun 13 20:53:02 2015 +0000
-
-    remove cancellation points in stdio
-    
-    commit 58165923890865a6ac042fafce13f440ee986fd9 added these optional
-    cancellation points on the basis that cancellable stdio could be
-    useful, to unblock threads stuck on stdio operations that will never
-    complete. however, the only way to ensure that cancellation can
-    achieve this is to violate the rules for side effects when
-    cancellation is acted upon, discarding knowledge of any partial data
-    transfer already completed. our implementation exhibited this behavior
-    and was thus non-conforming.
-    
-    in addition to improving correctness, removing these cancellation
-    points moderately reduces code size, and should significantly improve
-    performance on i386, where sysenter/syscall instructions can be used
-    instead of "int $128" for non-cancellable syscalls.
-
-commit 536c6d5a4205e2a3f161f2983ce1e0ac3082187d
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sat Jun 13 05:17:16 2015 +0000
-
-    fix idiom for setting stdio stream orientation to wide
-    
-    the old idiom, f->mode |= f->mode+1, was adapted from the idiom for
-    setting byte orientation, f->mode |= f->mode-1, but the adaptation was
-    incorrect. unless the stream was alreasdy set byte-oriented, this code
-    incremented f->mode each time it was executed, which would eventually
-    lead to overflow. it could be fixed by changing it to f->mode |= 1,
-    but upcoming changes will require slightly more work at the time of
-    wide orientation, so it makes sense to just call fwide. as an
-    optimization in the single-character functions, fwide is only called
-    if the stream is not already wide-oriented.
-
-commit f8f565df467c13248104223f99abf7f37cef7584
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sat Jun 13 04:42:38 2015 +0000
-
-    add printing of null %s arguments as "(null)" in wide printf
-    
-    this is undefined, but supported in our implementation of the normal
-    printf, so for consistency the wide variant should support it too.
-
-commit f9e25d813860d53cd1e9b6145cc63375d2fe2529
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sat Jun 13 04:37:27 2015 +0000
-
-    add %m support to wide printf
-
-commit ec634aad91f57479ef17525e33ed446c780a61f4
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Thu Jun 11 05:01:04 2015 +0000
-
-    add sh asm for vfork
-
-commit c30cbcb0a646b1f13a22c645616dce624465b883
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Wed Jun 10 02:27:40 2015 +0000
-
-    implement arch-generic version of __unmapself
-    
-    this can be used to put off writing an asm version of __unmapself for
-    new archs, or as a permanent solution on archs where it's not
-    practical or even possible to run momentarily with no stack.
-    
-    the concept here is simple: the caller takes a lock on a global shared
-    stack and uses it to make the munmap and exit syscalls. the only trick
-    is unlocking, which must be done after the thread exits, and this is
-    achieved by using the set_tid_address syscall to have the kernel zero
-    and futex-wake the lock word as part of the exit syscall.
-
-commit 276904c2f6bde3a31a24ebfa201482601d18b4f9
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Tue Jun 9 20:30:35 2015 +0000
-
-    in malloc, refuse to use brk if it grows into stack
-    
-    the linux/nommu fdpic ELF loader sets up the brk range to overlap
-    entirely with the main thread's stack (but growing from opposite
-    ends), so that the resulting failure mode for malloc is not to return
-    a null pointer but to start returning pointers to memory that overlaps
-    with the caller's stack. needless to say this extremely dangerous and
-    makes brk unusable.
-    
-    since it's non-trivial to detect execution environments that might be
-    affected by this kernel bug, and since the severity of the bug makes
-    any sort of detection that might yield false-negatives unsafe, we
-    instead check the proximity of the brk to the stack pointer each time
-    the brk is to be expanded. both the main thread's stack (where the
-    real known risk lies) and the calling thread's stack are checked. an
-    arbitrary gap distance of 8 MB is imposed, chosen to be larger than
-    linux default main-thread stack reservation sizes and larger than any
-    reasonable stack configuration on nommu.
-    
-    the effeciveness of this patch relies on an assumption that the amount
-    by which the brk is being grown is smaller than the gap limit, which
-    is always true for malloc's use of brk. reliance on this assumption is
-    why the check is being done in malloc-specific code and not in __brk.
-
-commit bd1eaceaa3975bd2a2a34e211cff896affaecadf
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Tue Jun 9 20:09:27 2015 +0000
-
-    fix spurious errors from pwd/grp functions when nscd backend is absent
-    
-    for several pwd/grp functions, the only way the caller can distinguish
-    between a successful negative result ("no such user/group") and an
-    internal error is by clearing errno before the call and checking errno
-    afterwards. the nscd backend support code correctly simulated a
-    not-found response on systems where such a backend is not running, but
-    failed to restore errno.
-    
-    this commit also fixed an outdated/incorrect comment.
-
-commit 75ce4503950621b11fcc7f1fd1187dbcf3cde312
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sun Jun 7 20:55:23 2015 +0000
-
-    fix regression in pre-v7 arm on kernels with kuser helper removed
-    
-    the arm atomics/TLS runtime selection code is called from
-    __set_thread_area and depends on having libc.auxv and __hwcap
-    available. commit 71f099cb7db821c51d8f39dfac622c61e54d794c moved the
-    first call to __set_thread_area to the top of dynamic linking stage 3,
-    before this data is made available, causing the runtime detection code
-    to always see __hwcap as zero and thereby select the atomics/TLS
-    implementations based on kuser helper.
-    
-    upcoming work on superh will use similar runtime detection.
-    
-    ideally this early-init code should be cleanly refactored and shared
-    between the dynamic linker and static-linked startup.
-
-commit 32f3c4f70633488550c29a2444f819aafdf345ff
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sun Jun 7 03:09:16 2015 +0000
-
-    add multiple inclusion guard to locale_impl.h
-
-commit 04b8360adbb6487f61aa0c00e53ec3a90a5a0d29
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sun Jun 7 02:59:49 2015 +0000
-
-    remove redefinition of MB_CUR_MAX in locale_impl.h
-    
-    unless/until the byte-based C locale is implemented, defining
-    MB_CUR_MAX to 1 in the C locale is wrong. no internal code currently
-    uses the MB_CUR_MAX macro, but having it defined inconsistently is
-    error-prone. applications get the value from stdlib.h and were
-    unaffected.
-
-commit 16bf466532d7328e971012b0731ad493b017ad29
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sat Jun 6 18:53:02 2015 +0000
-
-    make static C and C.UTF-8 locales available outside of newlocale
-
-commit 312eea2ea4f4363fb01b73660c08bfcf43dd3bb4
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sat Jun 6 18:20:30 2015 +0000
-
-    remove another invalid skip of locking in ungetwc
-
-commit 3d7e32d28dc9962e9efc1c317c5b44b5b2df3008
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sat Jun 6 18:16:22 2015 +0000
-
-    add macro version of ctype.h isascii function
-    
-    presumably internal code (ungetwc and fputwc) was written assuming a
-    macro implementation existed; otherwise use of isascii is just a
-    pessimization.
-
-commit 7e816a6487932cbb3cb71d94b609e50e81f4e5bf
-Author: Rich Felker <dalias@aerifal.cx>
-Date:   Sat Jun 6 18:11:17 2015 +0000
-
-    remove invalid skip of locking in ungetwc
-    
-    aside from being invalid, the early check only optimized the error
-    case, and likely pessimized the common case by separating the
-    two branches on isascii(c) at opposite ends of the function.
-
-commit 63f4b9f18f3674124d8bcb119739fec85e6da005
-Author: Timo Teräs <timo.teras@iki.fi>
-Date:   Fri Jun 5 10:39:42 2015 +0300
-
-    fix uselocale((locale_t)0) not to modify locale
-    
-    commit 68630b55c0c7219fe9df70dc28ffbf9efc8021d8 made the new locale to
-    be assigned unconditonally resulting in crashes later on.
-
---- a/arch/arm/syscall_arch.h
-+++ b/arch/arm/syscall_arch.h
-@@ -72,3 +72,7 @@ static inline long __syscall6(long n, lo
- 	register long r5 __asm__("r5") = f;
- 	__asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
- }
-+
-+#define VDSO_USEFUL
-+#define VDSO_CGT_SYM "__vdso_clock_gettime"
-+#define VDSO_CGT_VER "LINUX_2.6"
---- a/include/ctype.h
-+++ b/include/ctype.h
-@@ -64,6 +64,7 @@ int   isascii(int);
- int   toascii(int);
- #define _tolower(a) ((a)|0x20)
- #define _toupper(a) ((a)&0x5f)
-+#define isascii(a) (0 ? isascii(a) : (unsigned)(a) < 128)
- 
- #endif
- 
---- a/include/stdlib.h
-+++ b/include/stdlib.h
-@@ -76,7 +76,8 @@ size_t wcstombs (char *__restrict, const
- #define EXIT_FAILURE 1
- #define EXIT_SUCCESS 0
- 
--#define MB_CUR_MAX ((size_t)+4)
-+size_t __ctype_get_mb_cur_max(void);
-+#define MB_CUR_MAX (__ctype_get_mb_cur_max())
- 
- #define RAND_MAX (0x7fffffff)
- 
---- a/src/ctype/__ctype_get_mb_cur_max.c
-+++ b/src/ctype/__ctype_get_mb_cur_max.c
-@@ -1,6 +1,7 @@
--#include <stddef.h>
-+#include <stdlib.h>
-+#include "locale_impl.h"
- 
- size_t __ctype_get_mb_cur_max()
- {
--	return 4;
-+	return MB_CUR_MAX;
- }
---- a/src/ctype/isascii.c
-+++ b/src/ctype/isascii.c
-@@ -1,4 +1,5 @@
- #include <ctype.h>
-+#undef isascii
- 
- int isascii(int c)
- {
---- a/src/internal/libc.h
-+++ b/src/internal/libc.h
-@@ -17,8 +17,6 @@ struct __libc {
- 	int secure;
- 	volatile int threads_minus_1;
- 	size_t *auxv;
--	FILE *ofl_head;
--	volatile int ofl_lock[2];
- 	size_t tls_size;
- 	size_t page_size;
- 	struct __locale_struct global_locale;
---- a/src/internal/locale_impl.h
-+++ b/src/internal/locale_impl.h
-@@ -1,3 +1,6 @@
-+#ifndef _LOCALE_IMPL_H
-+#define _LOCALE_IMPL_H
-+
- #include <locale.h>
- #include <stdlib.h>
- #include "libc.h"
-@@ -12,6 +15,10 @@ struct __locale_map {
- 	const struct __locale_map *next;
- };
- 
-+extern const struct __locale_map __c_dot_utf8;
-+extern const struct __locale_struct __c_locale;
-+extern const struct __locale_struct __c_dot_utf8_locale;
-+
- const struct __locale_map *__get_locale(int, const char *);
- const char *__mo_lookup(const void *, size_t, const char *);
- const char *__lctrans(const char *, const struct __locale_map *);
-@@ -20,9 +27,14 @@ const char *__lctrans_cur(const char *);
- #define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)])
- #define LCTRANS_CUR(msg) __lctrans_cur(msg)
- 
-+#define C_LOCALE ((locale_t)&__c_locale)
-+#define UTF8_LOCALE ((locale_t)&__c_dot_utf8_locale)
-+
- #define CURRENT_LOCALE (__pthread_self()->locale)
- 
- #define CURRENT_UTF8 (!!__pthread_self()->locale->cat[LC_CTYPE])
- 
- #undef MB_CUR_MAX
- #define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1)
-+
-+#endif
---- a/src/internal/stdio_impl.h
-+++ b/src/internal/stdio_impl.h
-@@ -47,6 +47,7 @@ struct _IO_FILE {
- 	unsigned char *shend;
- 	off_t shlim, shcnt;
- 	FILE *prev_locked, *next_locked;
-+	struct __locale_struct *locale;
- };
- 
- size_t __stdio_read(FILE *, unsigned char *, size_t);
-@@ -75,8 +76,9 @@ int __putc_unlocked(int, FILE *);
- FILE *__fdopen(int, const char *);
- int __fmodeflags(const char *);
- 
--#define OFLLOCK() LOCK(libc.ofl_lock)
--#define OFLUNLOCK() UNLOCK(libc.ofl_lock)
-+FILE *__ofl_add(FILE *f);
-+FILE **__ofl_lock(void);
-+void __ofl_unlock(void);
- 
- #define feof(f) ((f)->flags & F_EOF)
- #define ferror(f) ((f)->flags & F_ERR)
---- a/src/ldso/dynlink.c
-+++ b/src/ldso/dynlink.c
-@@ -1192,6 +1192,17 @@ _Noreturn void __dls3(size_t *sp)
- 	char **argv_orig = argv;
- 	char **envp = argv+argc+1;
- 
-+	/* Find aux vector just past environ[] and use it to initialize
-+	 * global data that may be needed before we can make syscalls. */
-+	__environ = envp;
-+	for (i=argc+1; argv[i]; i++);
-+	libc.auxv = auxv = (void *)(argv+i+1);
-+	decode_vec(auxv, aux, AUX_CNT);
-+	__hwcap = aux[AT_HWCAP];
-+	libc.page_size = aux[AT_PAGESZ];
-+	libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
-+		|| aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
-+
- 	/* Setup early thread pointer in builtin_tls for ldso/libc itself to
- 	 * use during dynamic linking. If possible it will also serve as the
- 	 * thread pointer at runtime. */
-@@ -1200,25 +1211,11 @@ _Noreturn void __dls3(size_t *sp)
- 		a_crash();
- 	}
- 
--	/* Find aux vector just past environ[] */
--	for (i=argc+1; argv[i]; i++)
--		if (!memcmp(argv[i], "LD_LIBRARY_PATH=", 16))
--			env_path = argv[i]+16;
--		else if (!memcmp(argv[i], "LD_PRELOAD=", 11))
--			env_preload = argv[i]+11;
--	auxv = (void *)(argv+i+1);
--
--	decode_vec(auxv, aux, AUX_CNT);
--
- 	/* Only trust user/env if kernel says we're not suid/sgid */
--	if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
--	  || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]) {
--		env_path = 0;
--		env_preload = 0;
--		libc.secure = 1;
-+	if (!libc.secure) {
-+		env_path = getenv("LD_LIBRARY_PATH");
-+		env_preload = getenv("LD_PRELOAD");
- 	}
--	libc.page_size = aux[AT_PAGESZ];
--	libc.auxv = auxv;
- 
- 	/* If the main program was already loaded by the kernel,
- 	 * AT_PHDR will point to some location other than the dynamic
---- /dev/null
-+++ b/src/locale/c_locale.c
-@@ -0,0 +1,15 @@
-+#include "locale_impl.h"
-+#include <stdint.h>
-+
-+static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
-+
-+const struct __locale_map __c_dot_utf8 = {
-+	.map = empty_mo,
-+	.map_size = sizeof empty_mo,
-+	.name = "C.UTF-8"
-+};
-+
-+const struct __locale_struct __c_locale = { 0 };
-+const struct __locale_struct __c_dot_utf8_locale = {
-+	.cat[LC_CTYPE] = &__c_dot_utf8
-+};
---- a/src/locale/iconv.c
-+++ b/src/locale/iconv.c
-@@ -5,6 +5,7 @@
- #include <stdlib.h>
- #include <limits.h>
- #include <stdint.h>
-+#include "locale_impl.h"
- 
- #define UTF_32BE    0300
- #define UTF_16LE    0301
-@@ -165,9 +166,12 @@ size_t iconv(iconv_t cd0, char **restric
- 	int err;
- 	unsigned char type = map[-1];
- 	unsigned char totype = tomap[-1];
-+	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
- 
- 	if (!in || !*in || !*inb) return 0;
- 
-+	*ploc = UTF8_LOCALE;
-+
- 	for (; *inb; *in+=l, *inb-=l) {
- 		c = *(unsigned char *)*in;
- 		l = 1;
-@@ -431,6 +435,7 @@ size_t iconv(iconv_t cd0, char **restric
- 			break;
- 		}
- 	}
-+	*ploc = loc;
- 	return x;
- ilseq:
- 	err = EILSEQ;
-@@ -445,5 +450,6 @@ starved:
- 	x = -1;
- end:
- 	errno = err;
-+	*ploc = loc;
- 	return x;
- }
---- a/src/locale/langinfo.c
-+++ b/src/locale/langinfo.c
-@@ -33,7 +33,8 @@ char *__nl_langinfo_l(nl_item item, loca
- 	int idx = item & 65535;
- 	const char *str;
- 
--	if (item == CODESET) return "UTF-8";
-+	if (item == CODESET)
-+		return MB_CUR_MAX==1 ? "UTF-8-CODE-UNITS" : "UTF-8";
- 	
- 	switch (cat) {
- 	case LC_NUMERIC:
---- a/src/locale/locale_map.c
-+++ b/src/locale/locale_map.c
-@@ -24,14 +24,6 @@ static const char envvars[][12] = {
- 	"LC_MESSAGES",
- };
- 
--static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
--
--const struct __locale_map __c_dot_utf8 = {
--	.map = empty_mo,
--	.map_size = sizeof empty_mo,
--	.name = "C.UTF-8"
--};
--
- const struct __locale_map *__get_locale(int cat, const char *val)
- {
- 	static int lock[2];
-@@ -107,8 +99,8 @@ const struct __locale_map *__get_locale(
- 	 * sake of being able to do message translations at the
- 	 * application level. */
- 	if (!new && (new = malloc(sizeof *new))) {
--		new->map = empty_mo;
--		new->map_size = sizeof empty_mo;
-+		new->map = __c_dot_utf8.map;
-+		new->map_size = __c_dot_utf8.map_size;
- 		memcpy(new->name, val, n);
- 		new->name[n] = 0;
- 		new->next = loc_head;
---- a/src/locale/newlocale.c
-+++ b/src/locale/newlocale.c
-@@ -3,16 +3,9 @@
- #include "locale_impl.h"
- #include "libc.h"
- 
--extern const struct __locale_map __c_dot_utf8;
--
--static const struct __locale_struct c_locale = { 0 };
--static const struct __locale_struct c_dot_utf8_locale = {
--	.cat[LC_CTYPE] = &__c_dot_utf8
--};
--
- int __loc_is_allocated(locale_t loc)
- {
--	return loc && loc != &c_locale && loc != &c_dot_utf8_locale;
-+	return loc && loc != C_LOCALE && loc != UTF8_LOCALE;
- }
- 
- locale_t __newlocale(int mask, const char *name, locale_t loc)
-@@ -44,9 +37,9 @@ locale_t __newlocale(int mask, const cha
- 	}
- 
- 	if (!j)
--		return (locale_t)&c_locale;
--	if (j==1 && tmp.cat[LC_CTYPE]==c_dot_utf8_locale.cat[LC_CTYPE])
--		return (locale_t)&c_dot_utf8_locale;
-+		return C_LOCALE;
-+	if (j==1 && tmp.cat[LC_CTYPE]==&__c_dot_utf8)
-+		return UTF8_LOCALE;
- 
- 	if ((loc = malloc(sizeof *loc))) *loc = tmp;
- 
---- a/src/locale/uselocale.c
-+++ b/src/locale/uselocale.c
-@@ -8,9 +8,7 @@ locale_t __uselocale(locale_t new)
- 	locale_t old = self->locale;
- 	locale_t global = &libc.global_locale;
- 
--	if (new == LC_GLOBAL_LOCALE) new = global;
--
--	self->locale = new;
-+	if (new) self->locale = new == LC_GLOBAL_LOCALE ? global : new;
- 
- 	return old == global ? LC_GLOBAL_LOCALE : old;
- }
---- /dev/null
-+++ b/src/malloc/expand_heap.c
-@@ -0,0 +1,72 @@
-+#include <limits.h>
-+#include <stdint.h>
-+#include <errno.h>
-+#include <sys/mman.h>
-+#include "libc.h"
-+#include "syscall.h"
-+
-+/* This function returns true if the interval [old,new]
-+ * intersects the 'len'-sized interval below &libc.auxv
-+ * (interpreted as the main-thread stack) or below &b
-+ * (the current stack). It is used to defend against
-+ * buggy brk implementations that can cross the stack. */
-+
-+static int traverses_stack_p(uintptr_t old, uintptr_t new)
-+{
-+	const uintptr_t len = 8<<20;
-+	uintptr_t a, b;
-+
-+	b = (uintptr_t)libc.auxv;
-+	a = b > len ? b-len : 0;
-+	if (new>a && old<b) return 1;
-+
-+	b = (uintptr_t)&b;
-+	a = b > len ? b-len : 0;
-+	if (new>a && old<b) return 1;
-+
-+	return 0;
-+}
-+
-+void *__mmap(void *, size_t, int, int, int, off_t);
-+
-+/* Expand the heap in-place if brk can be used, or otherwise via mmap,
-+ * using an exponential lower bound on growth by mmap to make
-+ * fragmentation asymptotically irrelevant. The size argument is both
-+ * an input and an output, since the caller needs to know the size
-+ * allocated, which will be larger than requested due to page alignment
-+ * and mmap minimum size rules. The caller is responsible for locking
-+ * to prevent concurrent calls. */
-+
-+void *__expand_heap(size_t *pn)
-+{
-+	static uintptr_t brk;
-+	static unsigned mmap_step;
-+	size_t n = *pn;
-+
-+	if (n > SIZE_MAX/2 - PAGE_SIZE) {
-+		errno = ENOMEM;
-+		return 0;
-+	}
-+	n += -n & PAGE_SIZE-1;
-+
-+	if (!brk) {
-+		brk = __syscall(SYS_brk, 0);
-+		brk += -brk & PAGE_SIZE-1;
-+	}
-+
-+	if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n)
-+	    && __syscall(SYS_brk, brk+n)==brk+n) {
-+		*pn = n;
-+		brk += n;
-+		return (void *)(brk-n);
-+	}
-+
-+	size_t min = (size_t)PAGE_SIZE << mmap_step/2;
-+	if (n < min) n = min;
-+	void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
-+		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-+	if (area == MAP_FAILED) return 0;
-+	*pn = n;
-+	mmap_step++;
-+	return area;
-+}
---- a/src/malloc/lite_malloc.c
-+++ b/src/malloc/lite_malloc.c
-@@ -4,43 +4,46 @@
- #include <errno.h>
- #include "libc.h"
- 
--uintptr_t __brk(uintptr_t);
--
- #define ALIGN 16
- 
-+void *__expand_heap(size_t *);
-+
- void *__simple_malloc(size_t n)
- {
--	static uintptr_t cur, brk;
--	uintptr_t base, new;
-+	static char *cur, *end;
- 	static volatile int lock[2];
--	size_t align=1;
-+	size_t align=1, pad;
-+	void *p;
- 
- 	if (!n) n++;
--	if (n > SIZE_MAX/2) goto toobig;
--
- 	while (align<n && align<ALIGN)
- 		align += align;
--	n = n + align - 1 & -align;
- 
- 	LOCK(lock);
--	if (!cur) cur = brk = __brk(0)+16;
--	base = cur + align-1 & -align;
--	if (n > SIZE_MAX - PAGE_SIZE - base) goto fail;
--	if (base+n > brk) {
--		new = base+n + PAGE_SIZE-1 & -PAGE_SIZE;
--		if (__brk(new) != new) goto fail;
--		brk = new;
--	}
--	cur = base+n;
--	UNLOCK(lock);
- 
--	return (void *)base;
-+	pad = -(uintptr_t)cur & align-1;
-+
-+	if (n <= SIZE_MAX/2 + ALIGN) n += pad;
-+
-+	if (n > end-cur) {
-+		size_t m = n;
-+		char *new = __expand_heap(&m);
-+		if (!new) {
-+			UNLOCK(lock);
-+			return 0;
-+		}
-+		if (new != end) {
-+			cur = new;
-+			n -= pad;
-+			pad = 0;
-+		}
-+		end = new + m;
-+	}
- 
--fail:
-+	p = cur + pad;
-+	cur += n;
- 	UNLOCK(lock);
--toobig:
--	errno = ENOMEM;
--	return 0;
-+	return p;
- }
- 
- weak_alias(__simple_malloc, malloc);
---- a/src/malloc/malloc.c
-+++ b/src/malloc/malloc.c
-@@ -13,7 +13,6 @@
- #define inline inline __attribute__((always_inline))
- #endif
- 
--uintptr_t __brk(uintptr_t);
- void *__mmap(void *, size_t, int, int, int, off_t);
- int __munmap(void *, size_t);
- void *__mremap(void *, size_t, size_t, int, ...);
-@@ -31,13 +30,9 @@ struct bin {
- };
- 
- static struct {
--	uintptr_t brk;
--	size_t *heap;
- 	volatile uint64_t binmap;
- 	struct bin bins[64];
--	volatile int brk_lock[2];
- 	volatile int free_lock[2];
--	unsigned mmap_step;
- } mal;
- 
- 
-@@ -152,69 +147,52 @@ void __dump_heap(int x)
- }
- #endif
- 
-+void *__expand_heap(size_t *);
-+
- static struct chunk *expand_heap(size_t n)
- {
--	static int init;
-+	static int heap_lock[2];
-+	static void *end;
-+	void *p;
- 	struct chunk *w;
--	uintptr_t new;
--
--	lock(mal.brk_lock);
- 
--	if (!init) {
--		mal.brk = __brk(0);
--#ifdef SHARED
--		mal.brk = mal.brk + PAGE_SIZE-1 & -PAGE_SIZE;
--#endif
--		mal.brk = mal.brk + 2*SIZE_ALIGN-1 & -SIZE_ALIGN;
--		mal.heap = (void *)mal.brk;
--		init = 1;
-+	/* The argument n already accounts for the caller's chunk
-+	 * overhead needs, but if the heap can't be extended in-place,
-+	 * we need room for an extra zero-sized sentinel chunk. */
-+	n += SIZE_ALIGN;
-+
-+	lock(heap_lock);
-+
-+	p = __expand_heap(&n);
-+	if (!p) {
-+		unlock(heap_lock);
-+		return 0;
- 	}
- 
--	if (n > SIZE_MAX - mal.brk - 2*PAGE_SIZE) goto fail;
--	new = mal.brk + n + SIZE_ALIGN + PAGE_SIZE - 1 & -PAGE_SIZE;
--	n = new - mal.brk;
--
--	if (__brk(new) != new) {
--		size_t min = (size_t)PAGE_SIZE << mal.mmap_step/2;
--		n += -n & PAGE_SIZE-1;
--		if (n < min) n = min;
--		void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
--			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
--		if (area == MAP_FAILED) goto fail;
--
--		mal.mmap_step++;
--		area = (char *)area + SIZE_ALIGN - OVERHEAD;
--		w = area;
-+	/* If not just expanding existing space, we need to make a
-+	 * new sentinel chunk below the allocated space. */
-+	if (p != end) {
-+		/* Valid/safe because of the prologue increment. */
- 		n -= SIZE_ALIGN;
-+		p = (char *)p + SIZE_ALIGN;
-+		w = MEM_TO_CHUNK(p);
- 		w->psize = 0 | C_INUSE;
--		w->csize = n | C_INUSE;
--		w = NEXT_CHUNK(w);
--		w->psize = n | C_INUSE;
--		w->csize = 0 | C_INUSE;
--
--		unlock(mal.brk_lock);
--
--		return area;
- 	}
- 
--	w = MEM_TO_CHUNK(mal.heap);
--	w->psize = 0 | C_INUSE;
--
--	w = MEM_TO_CHUNK(new);
-+	/* Record new heap end and fill in footer. */
-+	end = (char *)p + n;
-+	w = MEM_TO_CHUNK(end);
- 	w->psize = n | C_INUSE;
- 	w->csize = 0 | C_INUSE;
- 
--	w = MEM_TO_CHUNK(mal.brk);
-+	/* Fill in header, which may be new or may be replacing a
-+	 * zero-size sentinel header at the old end-of-heap. */
-+	w = MEM_TO_CHUNK(p);
- 	w->csize = n | C_INUSE;
--	mal.brk = new;
--	
--	unlock(mal.brk_lock);
-+
-+	unlock(heap_lock);
- 
- 	return w;
--fail:
--	unlock(mal.brk_lock);
--	errno = ENOMEM;
--	return 0;
- }
- 
- static int adjust_size(size_t *n)
---- a/src/multibyte/btowc.c
-+++ b/src/multibyte/btowc.c
-@@ -1,7 +1,10 @@
- #include <stdio.h>
- #include <wchar.h>
-+#include <stdlib.h>
-+#include "internal.h"
- 
- wint_t btowc(int c)
- {
--	return c<128U ? c : EOF;
-+	int b = (unsigned char)c;
-+	return b<128U ? b : (MB_CUR_MAX==1 && c!=EOF) ? CODEUNIT(c) : WEOF;
- }
---- a/src/multibyte/internal.h
-+++ b/src/multibyte/internal.h
-@@ -23,3 +23,10 @@ extern const uint32_t bittab[];
- 
- #define SA 0xc2u
- #define SB 0xf4u
-+
-+/* Arbitrary encoding for representing code units instead of characters. */
-+#define CODEUNIT(c) (0xdfff & (signed char)(c))
-+#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80)
-+
-+/* Get inline definition of MB_CUR_MAX. */
-+#include "locale_impl.h"
---- a/src/multibyte/mbrtowc.c
-+++ b/src/multibyte/mbrtowc.c
-@@ -4,6 +4,7 @@
-  * unnecessary.
-  */
- 
-+#include <stdlib.h>
- #include <wchar.h>
- #include <errno.h>
- #include "internal.h"
-@@ -27,6 +28,7 @@ size_t mbrtowc(wchar_t *restrict wc, con
- 	if (!n) return -2;
- 	if (!c) {
- 		if (*s < 0x80) return !!(*wc = *s);
-+		if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
- 		if (*s-SA > SB-SA) goto ilseq;
- 		c = bittab[*s++-SA]; n--;
- 	}
---- a/src/multibyte/mbsrtowcs.c
-+++ b/src/multibyte/mbsrtowcs.c
-@@ -7,6 +7,8 @@
- #include <stdint.h>
- #include <wchar.h>
- #include <errno.h>
-+#include <string.h>
-+#include <stdlib.h>
- #include "internal.h"
- 
- size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st)
-@@ -24,6 +26,23 @@ size_t mbsrtowcs(wchar_t *restrict ws, c
- 		}
- 	}
- 
-+	if (MB_CUR_MAX==1) {
-+		if (!ws) return strlen((const char *)s);
-+		for (;;) {
-+			if (!wn) {
-+				*src = (const void *)s;
-+				return wn0;
-+			}
-+			if (!*s) break;
-+			c = *s++;
-+			*ws++ = CODEUNIT(c);
-+			wn--;
-+		}
-+		*ws = 0;
-+		*src = 0;
-+		return wn0-wn;
-+	}
-+
- 	if (!ws) for (;;) {
- 		if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
- 			while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
---- a/src/multibyte/mbtowc.c
-+++ b/src/multibyte/mbtowc.c
-@@ -4,6 +4,7 @@
-  * unnecessary.
-  */
- 
-+#include <stdlib.h>
- #include <wchar.h>
- #include <errno.h>
- #include "internal.h"
-@@ -19,6 +20,7 @@ int mbtowc(wchar_t *restrict wc, const c
- 	if (!wc) wc = &dummy;
- 
- 	if (*s < 0x80) return !!(*wc = *s);
-+	if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
- 	if (*s-SA > SB-SA) goto ilseq;
- 	c = bittab[*s++-SA];
- 
---- a/src/multibyte/wcrtomb.c
-+++ b/src/multibyte/wcrtomb.c
-@@ -4,8 +4,10 @@
-  * unnecessary.
-  */
- 
-+#include <stdlib.h>
- #include <wchar.h>
- #include <errno.h>
-+#include "internal.h"
- 
- size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)
- {
-@@ -13,6 +15,13 @@ size_t wcrtomb(char *restrict s, wchar_t
- 	if ((unsigned)wc < 0x80) {
- 		*s = wc;
- 		return 1;
-+	} else if (MB_CUR_MAX == 1) {
-+		if (!IS_CODEUNIT(wc)) {
-+			errno = EILSEQ;
-+			return -1;
-+		}
-+		*s = wc;
-+		return 1;
- 	} else if ((unsigned)wc < 0x800) {
- 		*s++ = 0xc0 | (wc>>6);
- 		*s = 0x80 | (wc&0x3f);
---- a/src/multibyte/wctob.c
-+++ b/src/multibyte/wctob.c
-@@ -1,8 +1,10 @@
--#include <stdio.h>
- #include <wchar.h>
-+#include <stdlib.h>
-+#include "internal.h"
- 
- int wctob(wint_t c)
- {
- 	if (c < 128U) return c;
-+	if (MB_CUR_MAX==1 && IS_CODEUNIT(c)) return (unsigned char)c;
- 	return EOF;
- }
---- a/src/passwd/nscd_query.c
-+++ b/src/passwd/nscd_query.c
-@@ -32,6 +32,7 @@ FILE *__nscd_query(int32_t req, const ch
- 		},
- 		.msg_iovlen = 2
- 	};
-+	int errno_save = errno;
- 
- 	*swap = 0;
- retry:
-@@ -50,11 +51,14 @@ retry:
- 		return f;
- 
- 	if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
--		/* If there isn't a running nscd we return -1 to indicate that
--		 * that is precisely what happened
--		 */
--		if (errno == EACCES || errno == ECONNREFUSED || errno == ENOENT)
-+		/* If there isn't a running nscd we simulate a "not found"
-+		 * result and the caller is responsible for calling
-+		 * fclose on the (unconnected) socket. The value of
-+		 * errno must be left unchanged in this case.  */
-+		if (errno == EACCES || errno == ECONNREFUSED || errno == ENOENT) {
-+			errno = errno_save;
- 			return f;
-+		}
- 		goto error;
- 	}
- 
---- /dev/null
-+++ b/src/process/sh/vfork.s
-@@ -0,0 +1,23 @@
-+.global __vfork
-+.weak vfork
-+.type __vfork,@function
-+.type vfork,@function
-+__vfork:
-+vfork:
-+	mov #95, r3
-+	add r3, r3
-+
-+	trapa #16
-+	or    r0, r0
-+	or    r0, r0
-+	or    r0, r0
-+	or    r0, r0
-+	or    r0, r0
-+
-+	mov r0, r4
-+	mov.l 1f, r0
-+2:	braf r0
-+	 nop
-+	.align 2
-+	.hidden __syscall_ret
-+1:	.long __syscall_ret@PLT-(2b+4-.)
---- a/src/regex/fnmatch.c
-+++ b/src/regex/fnmatch.c
-@@ -18,6 +18,7 @@
- #include <stdlib.h>
- #include <wchar.h>
- #include <wctype.h>
-+#include "locale_impl.h"
- 
- #define END 0
- #define UNMATCHABLE -2
-@@ -229,7 +230,7 @@ static int fnmatch_internal(const char *
- 	 * On illegal sequences we may get it wrong, but in that case
- 	 * we necessarily have a matching failure anyway. */
- 	for (s=endstr; s>str && tailcnt; tailcnt--) {
--		if (s[-1] < 128U) s--;
-+		if (s[-1] < 128U || MB_CUR_MAX==1) s--;
- 		else while ((unsigned char)*--s-0x80U<0x40 && s>str);
- 	}
- 	if (tailcnt) return FNM_NOMATCH;
---- a/src/stdio/__fdopen.c
-+++ b/src/stdio/__fdopen.c
-@@ -54,13 +54,7 @@ FILE *__fdopen(int fd, const char *mode)
- 	if (!libc.threaded) f->lock = -1;
- 
- 	/* Add new FILE to open file list */
--	OFLLOCK();
--	f->next = libc.ofl_head;
--	if (libc.ofl_head) libc.ofl_head->prev = f;
--	libc.ofl_head = f;
--	OFLUNLOCK();
--
--	return f;
-+	return __ofl_add(f);
- }
- 
- weak_alias(__fdopen, fdopen);
---- a/src/stdio/__stdio_exit.c
-+++ b/src/stdio/__stdio_exit.c
-@@ -16,8 +16,7 @@ static void close_file(FILE *f)
- void __stdio_exit(void)
- {
- 	FILE *f;
--	OFLLOCK();
--	for (f=libc.ofl_head; f; f=f->next) close_file(f);
-+	for (f=*__ofl_lock(); f; f=f->next) close_file(f);
- 	close_file(__stdin_used);
- 	close_file(__stdout_used);
- }
---- a/src/stdio/__stdio_read.c
-+++ b/src/stdio/__stdio_read.c
-@@ -1,12 +1,5 @@
- #include "stdio_impl.h"
- #include <sys/uio.h>
--#include <pthread.h>
--
--static void cleanup(void *p)
--{
--	FILE *f = p;
--	if (!f->lockcount) __unlockfile(f);
--}
- 
- size_t __stdio_read(FILE *f, unsigned char *buf, size_t len)
- {
-@@ -16,9 +9,7 @@ size_t __stdio_read(FILE *f, unsigned ch
- 	};
- 	ssize_t cnt;
- 
--	pthread_cleanup_push(cleanup, f);
--	cnt = syscall_cp(SYS_readv, f->fd, iov, 2);
--	pthread_cleanup_pop(0);
-+	cnt = syscall(SYS_readv, f->fd, iov, 2);
- 	if (cnt <= 0) {
- 		f->flags |= F_EOF ^ ((F_ERR^F_EOF) & cnt);
- 		return cnt;
---- a/src/stdio/__stdio_write.c
-+++ b/src/stdio/__stdio_write.c
-@@ -1,12 +1,5 @@
- #include "stdio_impl.h"
- #include <sys/uio.h>
--#include <pthread.h>
--
--static void cleanup(void *p)
--{
--	FILE *f = p;
--	if (!f->lockcount) __unlockfile(f);
--}
- 
- size_t __stdio_write(FILE *f, const unsigned char *buf, size_t len)
- {
-@@ -19,9 +12,7 @@ size_t __stdio_write(FILE *f, const unsi
- 	int iovcnt = 2;
- 	ssize_t cnt;
- 	for (;;) {
--		pthread_cleanup_push(cleanup, f);
--		cnt = syscall_cp(SYS_writev, f->fd, iov, iovcnt);
--		pthread_cleanup_pop(0);
-+		cnt = syscall(SYS_writev, f->fd, iov, iovcnt);
- 		if (cnt == rem) {
- 			f->wend = f->buf + f->buf_size;
- 			f->wpos = f->wbase = f->buf;
-@@ -34,11 +25,8 @@ size_t __stdio_write(FILE *f, const unsi
- 		}
- 		rem -= cnt;
- 		if (cnt > iov[0].iov_len) {
--			f->wpos = f->wbase = f->buf;
- 			cnt -= iov[0].iov_len;
- 			iov++; iovcnt--;
--		} else if (iovcnt == 2) {
--			f->wbase += cnt;
- 		}
- 		iov[0].iov_base = (char *)iov[0].iov_base + cnt;
- 		iov[0].iov_len -= cnt;
---- a/src/stdio/fclose.c
-+++ b/src/stdio/fclose.c
-@@ -14,11 +14,11 @@ int fclose(FILE *f)
- 	__unlist_locked_file(f);
- 
- 	if (!(perm = f->flags & F_PERM)) {
--		OFLLOCK();
-+		FILE **head = __ofl_lock();
- 		if (f->prev) f->prev->next = f->next;
- 		if (f->next) f->next->prev = f->prev;
--		if (libc.ofl_head == f) libc.ofl_head = f->next;
--		OFLUNLOCK();
-+		if (*head == f) *head = f->next;
-+		__ofl_unlock();
- 	}
- 
- 	r = fflush(f);
---- a/src/stdio/fflush.c
-+++ b/src/stdio/fflush.c
-@@ -35,13 +35,12 @@ int fflush(FILE *f)
- 
- 	r = __stdout_used ? fflush(__stdout_used) : 0;
- 
--	OFLLOCK();
--	for (f=libc.ofl_head; f; f=f->next) {
-+	for (f=*__ofl_lock(); f; f=f->next) {
- 		FLOCK(f);
- 		if (f->wpos > f->wbase) r |= __fflush_unlocked(f);
- 		FUNLOCK(f);
- 	}
--	OFLUNLOCK();
-+	__ofl_unlock();
- 	
- 	return r;
- }
---- a/src/stdio/fgetwc.c
-+++ b/src/stdio/fgetwc.c
-@@ -1,8 +1,9 @@
- #include "stdio_impl.h"
-+#include "locale_impl.h"
- #include <wchar.h>
- #include <errno.h>
- 
--wint_t __fgetwc_unlocked(FILE *f)
-+static wint_t __fgetwc_unlocked_internal(FILE *f)
- {
- 	mbstate_t st = { 0 };
- 	wchar_t wc;
-@@ -10,8 +11,6 @@ wint_t __fgetwc_unlocked(FILE *f)
- 	unsigned char b;
- 	size_t l;
- 
--	f->mode |= f->mode+1;
--
- 	/* Convert character from buffer if possible */
- 	if (f->rpos < f->rend) {
- 		l = mbrtowc(&wc, (void *)f->rpos, f->rend - f->rpos, &st);
-@@ -39,6 +38,16 @@ wint_t __fgetwc_unlocked(FILE *f)
- 	return wc;
- }
- 
-+wint_t __fgetwc_unlocked(FILE *f)
-+{
-+	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
-+	if (f->mode <= 0) fwide(f, 1);
-+	*ploc = f->locale;
-+	wchar_t wc = __fgetwc_unlocked_internal(f);
-+	*ploc = loc;
-+	return wc;
-+}
-+
- wint_t fgetwc(FILE *f)
- {
- 	wint_t c;
---- a/src/stdio/fmemopen.c
-+++ b/src/stdio/fmemopen.c
-@@ -110,11 +110,5 @@ FILE *fmemopen(void *restrict buf, size_
- 
- 	if (!libc.threaded) f->lock = -1;
- 
--	OFLLOCK();
--	f->next = libc.ofl_head;
--	if (libc.ofl_head) libc.ofl_head->prev = f;
--	libc.ofl_head = f;
--	OFLUNLOCK();
--
--	return f;
-+	return __ofl_add(f);
- }
---- a/src/stdio/fopen.c
-+++ b/src/stdio/fopen.c
-@@ -18,7 +18,7 @@ FILE *fopen(const char *restrict filenam
- 	/* Compute the flags to pass to open() */
- 	flags = __fmodeflags(mode);
- 
--	fd = sys_open_cp(filename, flags, 0666);
-+	fd = sys_open(filename, flags, 0666);
- 	if (fd < 0) return 0;
- 	if (flags & O_CLOEXEC)
- 		__syscall(SYS_fcntl, fd, F_SETFD, FD_CLOEXEC);
---- a/src/stdio/fputwc.c
-+++ b/src/stdio/fputwc.c
-@@ -1,4 +1,5 @@
- #include "stdio_impl.h"
-+#include "locale_impl.h"
- #include <wchar.h>
- #include <limits.h>
- #include <ctype.h>
-@@ -7,8 +8,10 @@ wint_t __fputwc_unlocked(wchar_t c, FILE
- {
- 	char mbc[MB_LEN_MAX];
- 	int l;
-+	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
- 
--	f->mode |= f->mode+1;
-+	if (f->mode <= 0) fwide(f, 1);
-+	*ploc = f->locale;
- 
- 	if (isascii(c)) {
- 		c = putc_unlocked(c, f);
-@@ -20,6 +23,8 @@ wint_t __fputwc_unlocked(wchar_t c, FILE
- 		l = wctomb(mbc, c);
- 		if (l < 0 || __fwritex((void *)mbc, l, f) < l) c = WEOF;
- 	}
-+	if (c==WEOF) f->flags |= F_ERR;
-+	*ploc = loc;
- 	return c;
- }
- 
---- a/src/stdio/fputws.c
-+++ b/src/stdio/fputws.c
-@@ -1,23 +1,28 @@
- #include "stdio_impl.h"
-+#include "locale_impl.h"
- #include <wchar.h>
- 
- int fputws(const wchar_t *restrict ws, FILE *restrict f)
- {
- 	unsigned char buf[BUFSIZ];
- 	size_t l=0;
-+	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
- 
- 	FLOCK(f);
- 
--	f->mode |= f->mode+1;
-+	fwide(f, 1);
-+	*ploc = f->locale;
- 
- 	while (ws && (l = wcsrtombs((void *)buf, (void*)&ws, sizeof buf, 0))+1 > 1)
- 		if (__fwritex(buf, l, f) < l) {
- 			FUNLOCK(f);
-+			*ploc = loc;
- 			return -1;
- 		}
- 
- 	FUNLOCK(f);
- 
-+	*ploc = loc;
- 	return l; /* 0 or -1 */
- }
- 
---- a/src/stdio/fwide.c
-+++ b/src/stdio/fwide.c
-@@ -1,13 +1,14 @@
--#include <wchar.h>
- #include "stdio_impl.h"
--
--#define SH (8*sizeof(int)-1)
--#define NORMALIZE(x) ((x)>>SH | -((-(x))>>SH))
-+#include "locale_impl.h"
- 
- int fwide(FILE *f, int mode)
- {
- 	FLOCK(f);
--	if (!f->mode) f->mode = NORMALIZE(mode);
-+	if (mode) {
-+		if (!f->locale) f->locale = MB_CUR_MAX==1
-+			? C_LOCALE : UTF8_LOCALE;
-+		if (!f->mode) f->mode = mode>0 ? 1 : -1;
-+	}
- 	mode = f->mode;
- 	FUNLOCK(f);
- 	return mode;
---- /dev/null
-+++ b/src/stdio/ofl.c
-@@ -0,0 +1,16 @@
-+#include "stdio_impl.h"
-+#include "libc.h"
-+
-+static FILE *ofl_head;
-+static volatile int ofl_lock[2];
-+
-+FILE **__ofl_lock()
-+{
-+	LOCK(ofl_lock);
-+	return &ofl_head;
-+}
-+
-+void __ofl_unlock()
-+{
-+	UNLOCK(ofl_lock);
-+}
---- /dev/null
-+++ b/src/stdio/ofl_add.c
-@@ -0,0 +1,11 @@
-+#include "stdio_impl.h"
-+
-+FILE *__ofl_add(FILE *f)
-+{
-+	FILE **head = __ofl_lock();
-+	f->next = *head;
-+	if (*head) (*head)->prev = f;
-+	*head = f;
-+	__ofl_unlock();
-+	return f;
-+}
---- a/src/stdio/open_memstream.c
-+++ b/src/stdio/open_memstream.c
-@@ -79,11 +79,5 @@ FILE *open_memstream(char **bufp, size_t
- 
- 	if (!libc.threaded) f->lock = -1;
- 
--	OFLLOCK();
--	f->next = libc.ofl_head;
--	if (libc.ofl_head) libc.ofl_head->prev = f;
--	libc.ofl_head = f;
--	OFLUNLOCK();
--
--	return f;
-+	return __ofl_add(f);
- }
---- a/src/stdio/open_wmemstream.c
-+++ b/src/stdio/open_wmemstream.c
-@@ -81,11 +81,5 @@ FILE *open_wmemstream(wchar_t **bufp, si
- 
- 	if (!libc.threaded) f->lock = -1;
- 
--	OFLLOCK();
--	f->next = libc.ofl_head;
--	if (libc.ofl_head) libc.ofl_head->prev = f;
--	libc.ofl_head = f;
--	OFLUNLOCK();
--
--	return f;
-+	return __ofl_add(f);
- }
---- a/src/stdio/ungetwc.c
-+++ b/src/stdio/ungetwc.c
-@@ -1,4 +1,5 @@
- #include "stdio_impl.h"
-+#include "locale_impl.h"
- #include <wchar.h>
- #include <limits.h>
- #include <ctype.h>
-@@ -8,21 +9,19 @@ wint_t ungetwc(wint_t c, FILE *f)
- {
- 	unsigned char mbc[MB_LEN_MAX];
- 	int l=1;
--
--	if (c == WEOF) return c;
--
--	/* Try conversion early so we can fail without locking if invalid */
--	if (!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)
--		return WEOF;
-+	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
- 
- 	FLOCK(f);
- 
--	f->mode |= f->mode+1;
-+	if (f->mode <= 0) fwide(f, 1);
-+	*ploc = f->locale;
- 
- 	if (!f->rpos) __toread(f);
--	if (!f->rpos || f->rpos < f->buf - UNGET + l) {
-+	if (!f->rpos || f->rpos < f->buf - UNGET + l || c == WEOF ||
-+	    (!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)) {
- 		FUNLOCK(f);
--		return EOF;
-+		*ploc = loc;
-+		return WEOF;
- 	}
- 
- 	if (isascii(c)) *--f->rpos = c;
-@@ -31,5 +30,6 @@ wint_t ungetwc(wint_t c, FILE *f)
- 	f->flags &= ~F_EOF;
- 
- 	FUNLOCK(f);
-+	*ploc = loc;
- 	return c;
- }
---- a/src/stdio/vfwprintf.c
-+++ b/src/stdio/vfwprintf.c
-@@ -293,7 +293,10 @@ static int wprintf_core(FILE *f, const w
- 			if ((fl&LEFT_ADJ)) fprintf(f, "%.*s", w-p, "");
- 			l=w;
- 			continue;
-+		case 'm':
-+			arg.p = strerror(errno);
- 		case 's':
-+			if (!arg.p) arg.p = "(null)";
- 			bs = arg.p;
- 			if (p<0) p = INT_MAX;
- 			for (i=l=0; l<p && (i=mbtowc(&wc, bs, MB_LEN_MAX))>0; bs+=i, l++);
-@@ -356,7 +359,7 @@ int vfwprintf(FILE *restrict f, const wc
- 	}
- 
- 	FLOCK(f);
--	f->mode |= f->mode+1;
-+	fwide(f, 1);
- 	olderr = f->flags & F_ERR;
- 	f->flags &= ~F_ERR;
- 	ret = wprintf_core(f, fmt, &ap2, nl_arg, nl_type);
---- a/src/stdio/vfwscanf.c
-+++ b/src/stdio/vfwscanf.c
-@@ -104,7 +104,7 @@ int vfwscanf(FILE *restrict f, const wch
- 
- 	FLOCK(f);
- 
--	f->mode |= f->mode+1;
-+	fwide(f, 1);
- 
- 	for (p=fmt; *p; p++) {
- 
---- /dev/null
-+++ b/src/thread/__unmapself.c
-@@ -0,0 +1,29 @@
-+#include "pthread_impl.h"
-+#include "atomic.h"
-+#include "syscall.h"
-+/* cheat and reuse CRTJMP macro from dynlink code */
-+#include "dynlink.h"
-+
-+static volatile int lock;
-+static void *unmap_base;
-+static size_t unmap_size;
-+static char shared_stack[256];
-+
-+static void do_unmap()
-+{
-+	__syscall(SYS_munmap, unmap_base, unmap_size);
-+	__syscall(SYS_exit);
-+}
-+
-+void __unmapself(void *base, size_t size)
-+{
-+	int tid=__pthread_self()->tid;
-+	char *stack = shared_stack + sizeof shared_stack;
-+	stack -= (uintptr_t)stack % 16;
-+	while (lock || a_cas(&lock, 0, tid))
-+		a_spin();
-+	__syscall(SYS_set_tid_address, &lock);
-+	unmap_base = base;
-+	unmap_size = size;
-+	CRTJMP(do_unmap, stack);
-+}
---- a/src/thread/pthread_create.c
-+++ b/src/thread/pthread_create.c
-@@ -191,8 +191,9 @@ int __pthread_create(pthread_t *restrict
- 	if (!libc.can_do_threads) return ENOSYS;
- 	self = __pthread_self();
- 	if (!libc.threaded) {
--		for (FILE *f=libc.ofl_head; f; f=f->next)
-+		for (FILE *f=*__ofl_lock(); f; f=f->next)
- 			init_file_lock(f);
-+		__ofl_unlock();
- 		init_file_lock(__stdin_used);
- 		init_file_lock(__stdout_used);
- 		init_file_lock(__stderr_used);
diff --git a/toolchain/musl/patches/001-git-2015-06-20.patch b/toolchain/musl/patches/001-git-2015-06-20.patch
new file mode 100644
index 0000000000..33768a59d8
--- /dev/null
+++ b/toolchain/musl/patches/001-git-2015-06-20.patch
@@ -0,0 +1,2268 @@
+From bafa38541e911806b74a1ab094a404bbdd692ade Mon Sep 17 00:00:00 2001
+From: Steven Barth <steven@midlink.org>
+Date: Sat, 20 Jun 2015 16:59:48 +0200
+Subject: [PATCH] commit 55d061f031085f24d138664c897791aebe9a2fab Author: Rich
+ Felker <dalias@aerifal.cx> Date:   Sat Jun 20 03:01:07 2015 +0000
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+    provide __stack_chk_fail_local in libc.a
+
+    this symbol is needed only on archs where the PLT call ABI is klunky,
+    and only for position-independent code compiled with stack protector.
+    thus references usually only appear in shared libraries or PIE
+    executables, but they can also appear when linking statically if some
+    of the object files being linked were built as PIC/PIE.
+
+    normally libssp_nonshared.a from the compiler toolchain should provide
+    __stack_chk_fail_local, but reportedly it appears prior to -lc in the
+    link order, thus failing to satisfy references from libc itself (which
+    arise only if libc.a was built as PIC/PIE with stack protector
+    enabled).
+
+commit ce3688eca920aa77549323f84e21f33522397115
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat Jun 20 02:54:30 2015 +0000
+
+    work around mips detached thread exit breakage due to kernel regression
+
+    linux kernel commit 46e12c07b3b9603c60fc1d421ff18618241cb081 caused
+    the mips syscall mechanism to fail with EFAULT when the userspace
+    stack pointer is invalid, breaking __unmapself used for detached
+    thread exit. the workaround is to set $sp to a known-valid, readable
+    address, and the simplest one to obtain is the address of the current
+    function, which is available (per o32 calling convention) in $25.
+
+commit 75eceb3ae824d54e865686c0c538551aeebf3372
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Wed Jun 17 17:21:46 2015 +0000
+
+    ignore ENOSYS error from mprotect in pthread_create and dynamic linker
+
+    this error simply indicated a system without memory protection (NOMMU)
+    and should not cause failure in the caller.
+
+commit 10d0268ccfab9152250eeeed3952ce3fed44131a
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 16 15:25:02 2015 +0000
+
+    switch to using trap number 31 for syscalls on sh
+
+    nominally the low bits of the trap number on sh are the number of
+    syscall arguments, but they have never been used by the kernel, and
+    some code making syscalls does not even know the number of arguments
+    and needs to pass an arbitrary high number anyway.
+
+    sh3/sh4 traditionally used the trap range 16-31 for syscalls, but part
+    of this range overlapped with hardware exceptions/interrupts on sh2
+    hardware, so an incompatible range 32-47 was chosen for sh2.
+
+    using trap number 31 everywhere, since it's in the existing sh3/sh4
+    range and does not conflict with sh2 hardware, is a proposed
+    unification of the kernel syscall convention that will allow binaries
+    to be shared between sh2 and sh3/sh4. if this is not accepted into the
+    kernel, we can refit the sh2 target with runtime selection mechanisms
+    for the trap number, but doing so would be invasive and would entail
+    non-trivial overhead.
+
+commit 3366a99b17847b58f2d8cc52cbb5d65deb824f8a
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 16 14:55:06 2015 +0000
+
+    switch sh port's __unmapself to generic version when running on sh2/nommu
+
+    due to the way the interrupt and syscall trap mechanism works,
+    userspace on sh2 must never set the stack pointer to an invalid value.
+    thus, the approach used on most archs, where __unmapself executes with
+    no stack for the interval between SYS_munmap and SYS_exit, is not
+    viable on sh2.
+
+    in order not to pessimize sh3/sh4, the sh asm version of __unmapself
+    is not removed. instead it's renamed and redirected through code that
+    calls either the generic (safe) __unmapself or the sh3/sh4 asm,
+    depending on compile-time and run-time conditions.
+
+commit f9d84554bae0fa17c9a1d724549c4408022228a5
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 16 14:28:30 2015 +0000
+
+    add support for sh2 interrupt-masking-based atomics to sh port
+
+    the sh2 target is being considered an ISA subset of sh3/sh4, in the
+    sense that binaries built for sh2 are intended to be usable on later
+    cpu models/kernels with mmu support. so rather than hard-coding
+    sh2-specific atomics, the runtime atomic selection mechanisms that was
+    already in place has been extended to add sh2 atomics.
+
+    at this time, the sh2 atomics are not SMP-compatible; since the ISA
+    lacks actual atomic operations, the new code instead masks interrupts
+    for the duration of the atomic operation, producing an atomic result
+    on single-core. this is only possible because the kernel/hardware does
+    not impose protections against userspace doing so. additional changes
+    will be needed to support future SMP systems.
+
+    care has been taken to avoid producing significant additional code
+    size in the case where it's known at compile-time that the target is
+    not sh2 and does not need sh2-specific code.
+
+commit 1b0cdc8700d29ef018bf226d74b2b58b23bce91c
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 16 07:11:19 2015 +0000
+
+    refactor stdio open file list handling, move it out of global libc struct
+
+    functions which open in-memory FILE stream variants all shared a tail
+    with __fdopen, adding the FILE structure to stdio's open file list.
+    replacing this common tail with a function call reduces code size and
+    duplication of logic. the list is also partially encapsulated now.
+
+    function signatures were chosen to facilitate tail call optimization
+    and reduce the need for additional accessor functions.
+
+    with these changes, static linked programs that do not use stdio no
+    longer have an open file list at all.
+
+commit f22a9edaf8a6f2ca1d314d18b3785558279a5c03
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 16 06:18:00 2015 +0000
+
+    byte-based C locale, phase 3: make MB_CUR_MAX variable to activate code
+
+    this patch activates the new byte-based C locale (high bytes treated
+    as abstract code unit "characters" rather than decoded as multibyte
+    characters) by making the value of MB_CUR_MAX depend on the active
+    locale. for the C locale, the LC_CTYPE category pointer is null,
+    yielding a value of 1. all other locales yield a value of 4.
+
+commit 16f18d036d9a7bf590ee6eb86785c0a9658220b6
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 16 05:35:31 2015 +0000
+
+    byte-based C locale, phase 2: stdio and iconv (multibyte callers)
+
+    this patch adjusts libc components which use the multibyte functions
+    internally, and which depend on them operating in a particular
+    encoding, to make the appropriate locale changes before calling them
+    and restore the calling thread's locale afterwards. activating the
+    byte-based C locale without these changes would cause regressions in
+    stdio and iconv.
+
+    in the case of iconv, the current implementation was simply using the
+    multibyte functions as UTF-8 conversions. setting a multibyte UTF-8
+    locale for the duration of the iconv operation allows the code to
+    continue working.
+
+    in the case of stdio, POSIX requires that FILE streams have an
+    encoding rule bound at the time of setting wide orientation. as long
+    as all locales, including the C locale, used the same encoding,
+    treating high bytes as UTF-8, there was no need to store an encoding
+    rule as part of the stream's state.
+
+    a new locale field in the FILE structure points to the locale that
+    should be made active during fgetwc/fputwc/ungetwc on the stream. it
+    cannot point to the locale active at the time the stream becomes
+    oriented, because this locale could be mutable (the global locale) or
+    could be destroyed (locale_t objects produced by newlocale) before the
+    stream is closed. instead, a pointer to the static C or C.UTF-8 locale
+    object added in commit commit aeeac9ca5490d7d90fe061ab72da446c01ddf746
+    is used. this is valid since categories other than LC_CTYPE will not
+    affect these functions.
+
+commit 1507ebf837334e9e07cfab1ca1c2e88449069a80
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 16 04:44:17 2015 +0000
+
+    byte-based C locale, phase 1: multibyte character handling functions
+
+    this patch makes the functions which work directly on multibyte
+    characters treat the high bytes as individual abstract code units
+    rather than as multibyte sequences when MB_CUR_MAX is 1. since
+    MB_CUR_MAX is presently defined as a constant 4, all of the new code
+    added is dead code, and optimizing compilers' code generation should
+    not be affected at all. a future commit will activate the new code.
+
+    as abstract code units, bytes 0x80 to 0xff are represented by wchar_t
+    values 0xdf80 to 0xdfff, at the end of the surrogates range. this
+    ensures that they will never be misinterpreted as Unicode characters,
+    and that all wctype functions return false for these "characters"
+    without needing locale-specific logic. a high range outside of Unicode
+    such as 0x7fffff80 to 0x7fffffff was also considered, but since C11's
+    char16_t also needs to be able to represent conversions of these
+    bytes, the surrogate range was the natural choice.
+
+commit 38e2f727237230300fea6aff68802db04625fd23
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 16 04:21:38 2015 +0000
+
+    fix btowc corner case
+
+    btowc is required to interpret its argument by conversion to unsigned
+    char, unless the argument is equal to EOF. since the conversion to
+    produces a non-character value anyway, we can just unconditionally
+    convert, for now.
+
+commit ee59c296d56bf26f49f354d6eb32b4b6d4190188
+Author: Szabolcs Nagy <nsz@port70.net>
+Date:   Wed Jun 3 10:32:14 2015 +0100
+
+    arm: add vdso support
+
+    vdso will be available on arm in linux v4.2, the user-space code
+    for it is in kernel commit 8512287a8165592466cb9cb347ba94892e9c56a5
+
+commit e3bc22f1eff87b8f029a6ab31f1a269d69e4b053
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sun Jun 14 01:59:02 2015 +0000
+
+    refactor malloc's expand_heap to share with __simple_malloc
+
+    this extends the brk/stack collision protection added to full malloc
+    in commit 276904c2f6bde3a31a24ebfa201482601d18b4f9 to also protect the
+    __simple_malloc function used in static-linked programs that don't
+    reference the free function.
+
+    it also extends support for using mmap when brk fails, which full
+    malloc got in commit 5446303328adf4b4e36d9fba21848e6feb55fab4, to
+    __simple_malloc.
+
+    since __simple_malloc may expand the heap by arbitrarily large
+    increments, the stack collision detection is enhanced to detect
+    interval overlap rather than just proximity of a single address to the
+    stack. code size is increased a bit, but this is partly offset by the
+    sharing of code between the two malloc implementations, which due to
+    linking semantics, both get linked in a program that needs the full
+    malloc with realloc/free support.
+
+commit 4ef9b828c1f39553a69e0635ac91f0fcadd6e8c6
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat Jun 13 20:53:02 2015 +0000
+
+    remove cancellation points in stdio
+
+    commit 58165923890865a6ac042fafce13f440ee986fd9 added these optional
+    cancellation points on the basis that cancellable stdio could be
+    useful, to unblock threads stuck on stdio operations that will never
+    complete. however, the only way to ensure that cancellation can
+    achieve this is to violate the rules for side effects when
+    cancellation is acted upon, discarding knowledge of any partial data
+    transfer already completed. our implementation exhibited this behavior
+    and was thus non-conforming.
+
+    in addition to improving correctness, removing these cancellation
+    points moderately reduces code size, and should significantly improve
+    performance on i386, where sysenter/syscall instructions can be used
+    instead of "int $128" for non-cancellable syscalls.
+
+commit 536c6d5a4205e2a3f161f2983ce1e0ac3082187d
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat Jun 13 05:17:16 2015 +0000
+
+    fix idiom for setting stdio stream orientation to wide
+
+    the old idiom, f->mode |= f->mode+1, was adapted from the idiom for
+    setting byte orientation, f->mode |= f->mode-1, but the adaptation was
+    incorrect. unless the stream was alreasdy set byte-oriented, this code
+    incremented f->mode each time it was executed, which would eventually
+    lead to overflow. it could be fixed by changing it to f->mode |= 1,
+    but upcoming changes will require slightly more work at the time of
+    wide orientation, so it makes sense to just call fwide. as an
+    optimization in the single-character functions, fwide is only called
+    if the stream is not already wide-oriented.
+
+commit f8f565df467c13248104223f99abf7f37cef7584
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat Jun 13 04:42:38 2015 +0000
+
+    add printing of null %s arguments as "(null)" in wide printf
+
+    this is undefined, but supported in our implementation of the normal
+    printf, so for consistency the wide variant should support it too.
+
+commit f9e25d813860d53cd1e9b6145cc63375d2fe2529
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat Jun 13 04:37:27 2015 +0000
+
+    add %m support to wide printf
+
+commit ec634aad91f57479ef17525e33ed446c780a61f4
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Thu Jun 11 05:01:04 2015 +0000
+
+    add sh asm for vfork
+
+commit c30cbcb0a646b1f13a22c645616dce624465b883
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Wed Jun 10 02:27:40 2015 +0000
+
+    implement arch-generic version of __unmapself
+
+    this can be used to put off writing an asm version of __unmapself for
+    new archs, or as a permanent solution on archs where it's not
+    practical or even possible to run momentarily with no stack.
+
+    the concept here is simple: the caller takes a lock on a global shared
+    stack and uses it to make the munmap and exit syscalls. the only trick
+    is unlocking, which must be done after the thread exits, and this is
+    achieved by using the set_tid_address syscall to have the kernel zero
+    and futex-wake the lock word as part of the exit syscall.
+
+commit 276904c2f6bde3a31a24ebfa201482601d18b4f9
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 9 20:30:35 2015 +0000
+
+    in malloc, refuse to use brk if it grows into stack
+
+    the linux/nommu fdpic ELF loader sets up the brk range to overlap
+    entirely with the main thread's stack (but growing from opposite
+    ends), so that the resulting failure mode for malloc is not to return
+    a null pointer but to start returning pointers to memory that overlaps
+    with the caller's stack. needless to say this extremely dangerous and
+    makes brk unusable.
+
+    since it's non-trivial to detect execution environments that might be
+    affected by this kernel bug, and since the severity of the bug makes
+    any sort of detection that might yield false-negatives unsafe, we
+    instead check the proximity of the brk to the stack pointer each time
+    the brk is to be expanded. both the main thread's stack (where the
+    real known risk lies) and the calling thread's stack are checked. an
+    arbitrary gap distance of 8 MB is imposed, chosen to be larger than
+    linux default main-thread stack reservation sizes and larger than any
+    reasonable stack configuration on nommu.
+
+    the effeciveness of this patch relies on an assumption that the amount
+    by which the brk is being grown is smaller than the gap limit, which
+    is always true for malloc's use of brk. reliance on this assumption is
+    why the check is being done in malloc-specific code and not in __brk.
+
+commit bd1eaceaa3975bd2a2a34e211cff896affaecadf
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue Jun 9 20:09:27 2015 +0000
+
+    fix spurious errors from pwd/grp functions when nscd backend is absent
+
+    for several pwd/grp functions, the only way the caller can distinguish
+    between a successful negative result ("no such user/group") and an
+    internal error is by clearing errno before the call and checking errno
+    afterwards. the nscd backend support code correctly simulated a
+    not-found response on systems where such a backend is not running, but
+    failed to restore errno.
+
+    this commit also fixed an outdated/incorrect comment.
+
+commit 75ce4503950621b11fcc7f1fd1187dbcf3cde312
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sun Jun 7 20:55:23 2015 +0000
+
+    fix regression in pre-v7 arm on kernels with kuser helper removed
+
+    the arm atomics/TLS runtime selection code is called from
+    __set_thread_area and depends on having libc.auxv and __hwcap
+    available. commit 71f099cb7db821c51d8f39dfac622c61e54d794c moved the
+    first call to __set_thread_area to the top of dynamic linking stage 3,
+    before this data is made available, causing the runtime detection code
+    to always see __hwcap as zero and thereby select the atomics/TLS
+    implementations based on kuser helper.
+
+    upcoming work on superh will use similar runtime detection.
+
+    ideally this early-init code should be cleanly refactored and shared
+    between the dynamic linker and static-linked startup.
+
+commit 32f3c4f70633488550c29a2444f819aafdf345ff
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sun Jun 7 03:09:16 2015 +0000
+
+    add multiple inclusion guard to locale_impl.h
+
+commit 04b8360adbb6487f61aa0c00e53ec3a90a5a0d29
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sun Jun 7 02:59:49 2015 +0000
+
+    remove redefinition of MB_CUR_MAX in locale_impl.h
+
+    unless/until the byte-based C locale is implemented, defining
+    MB_CUR_MAX to 1 in the C locale is wrong. no internal code currently
+    uses the MB_CUR_MAX macro, but having it defined inconsistently is
+    error-prone. applications get the value from stdlib.h and were
+    unaffected.
+
+commit 16bf466532d7328e971012b0731ad493b017ad29
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat Jun 6 18:53:02 2015 +0000
+
+    make static C and C.UTF-8 locales available outside of newlocale
+
+commit 312eea2ea4f4363fb01b73660c08bfcf43dd3bb4
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat Jun 6 18:20:30 2015 +0000
+
+    remove another invalid skip of locking in ungetwc
+
+commit 3d7e32d28dc9962e9efc1c317c5b44b5b2df3008
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat Jun 6 18:16:22 2015 +0000
+
+    add macro version of ctype.h isascii function
+
+    presumably internal code (ungetwc and fputwc) was written assuming a
+    macro implementation existed; otherwise use of isascii is just a
+    pessimization.
+
+commit 7e816a6487932cbb3cb71d94b609e50e81f4e5bf
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat Jun 6 18:11:17 2015 +0000
+
+    remove invalid skip of locking in ungetwc
+
+    aside from being invalid, the early check only optimized the error
+    case, and likely pessimized the common case by separating the
+    two branches on isascii(c) at opposite ends of the function.
+
+commit 63f4b9f18f3674124d8bcb119739fec85e6da005
+Author: Timo Teräs <timo.teras@iki.fi>
+Date:   Fri Jun 5 10:39:42 2015 +0300
+
+    fix uselocale((locale_t)0) not to modify locale
+
+    commit 68630b55c0c7219fe9df70dc28ffbf9efc8021d8 made the new locale to
+    be assigned unconditonally resulting in crashes later on.
+---
+ arch/arm/syscall_arch.h            |  4 ++
+ arch/sh/src/__set_thread_area.c    | 34 ++++++++++++++++
+ arch/sh/src/__unmapself.c          | 19 +++++++++
+ arch/sh/src/atomic.c               | 72 ++++++++++++++++++++++++++++++----
+ arch/sh/src/sh_atomic.h            | 15 +++++++
+ arch/sh/syscall_arch.h             |  2 +-
+ include/ctype.h                    |  1 +
+ include/stdlib.h                   |  3 +-
+ src/ctype/__ctype_get_mb_cur_max.c |  5 ++-
+ src/ctype/isascii.c                |  1 +
+ src/env/__stack_chk_fail.c         |  4 ++
+ src/internal/libc.h                |  2 -
+ src/internal/locale_impl.h         | 12 ++++++
+ src/internal/sh/syscall.s          |  2 +-
+ src/internal/stdio_impl.h          |  6 ++-
+ src/ldso/dynlink.c                 | 37 +++++++++---------
+ src/locale/c_locale.c              | 15 +++++++
+ src/locale/iconv.c                 |  6 +++
+ src/locale/langinfo.c              |  3 +-
+ src/locale/locale_map.c            | 12 +-----
+ src/locale/newlocale.c             | 15 ++-----
+ src/locale/uselocale.c             |  4 +-
+ src/malloc/expand_heap.c           | 72 ++++++++++++++++++++++++++++++++++
+ src/malloc/lite_malloc.c           | 49 ++++++++++++-----------
+ src/malloc/malloc.c                | 80 ++++++++++++++------------------------
+ src/multibyte/btowc.c              |  5 ++-
+ src/multibyte/internal.h           |  7 ++++
+ src/multibyte/mbrtowc.c            |  2 +
+ src/multibyte/mbsrtowcs.c          | 19 +++++++++
+ src/multibyte/mbtowc.c             |  2 +
+ src/multibyte/wcrtomb.c            |  9 +++++
+ src/multibyte/wctob.c              |  4 +-
+ src/passwd/nscd_query.c            | 12 ++++--
+ src/process/sh/vfork.s             | 23 +++++++++++
+ src/regex/fnmatch.c                |  3 +-
+ src/signal/sh/restore.s            |  4 +-
+ src/stdio/__fdopen.c               |  8 +---
+ src/stdio/__stdio_exit.c           |  3 +-
+ src/stdio/__stdio_read.c           | 11 +-----
+ src/stdio/__stdio_write.c          | 14 +------
+ src/stdio/fclose.c                 |  6 +--
+ src/stdio/fflush.c                 |  5 +--
+ src/stdio/fgetwc.c                 | 15 +++++--
+ src/stdio/fmemopen.c               |  8 +---
+ src/stdio/fopen.c                  |  2 +-
+ src/stdio/fputwc.c                 |  7 +++-
+ src/stdio/fputws.c                 |  7 +++-
+ src/stdio/fwide.c                  | 11 +++---
+ src/stdio/ofl.c                    | 16 ++++++++
+ src/stdio/ofl_add.c                | 11 ++++++
+ src/stdio/open_memstream.c         |  8 +---
+ src/stdio/open_wmemstream.c        |  8 +---
+ src/stdio/ungetwc.c                | 18 ++++-----
+ src/stdio/vfwprintf.c              |  5 ++-
+ src/stdio/vfwscanf.c               |  2 +-
+ src/thread/__unmapself.c           | 29 ++++++++++++++
+ src/thread/mips/__unmapself.s      |  1 +
+ src/thread/pthread_create.c        |  6 ++-
+ src/thread/sh/__set_thread_area.s  |  6 ---
+ src/thread/sh/__unmapself.s        | 10 ++---
+ src/thread/sh/clone.s              |  4 +-
+ src/thread/sh/syscall_cp.s         |  2 +-
+ src/unistd/sh/pipe.s               |  2 +-
+ 63 files changed, 548 insertions(+), 242 deletions(-)
+ create mode 100644 arch/sh/src/__set_thread_area.c
+ create mode 100644 arch/sh/src/__unmapself.c
+ create mode 100644 arch/sh/src/sh_atomic.h
+ create mode 100644 src/locale/c_locale.c
+ create mode 100644 src/malloc/expand_heap.c
+ create mode 100644 src/process/sh/vfork.s
+ create mode 100644 src/stdio/ofl.c
+ create mode 100644 src/stdio/ofl_add.c
+
+diff --git a/arch/arm/syscall_arch.h b/arch/arm/syscall_arch.h
+index 199ad2a..64461ec 100644
+--- a/arch/arm/syscall_arch.h
++++ b/arch/arm/syscall_arch.h
+@@ -72,3 +72,7 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
+ 	register long r5 __asm__("r5") = f;
+ 	__asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
+ }
++
++#define VDSO_USEFUL
++#define VDSO_CGT_SYM "__vdso_clock_gettime"
++#define VDSO_CGT_VER "LINUX_2.6"
+diff --git a/arch/sh/src/__set_thread_area.c b/arch/sh/src/__set_thread_area.c
+new file mode 100644
+index 0000000..1d3e022
+--- /dev/null
++++ b/arch/sh/src/__set_thread_area.c
+@@ -0,0 +1,34 @@
++#include "pthread_impl.h"
++#include "libc.h"
++#include "sh_atomic.h"
++#include <elf.h>
++
++/* Also perform sh-specific init */
++
++#define CPU_HAS_LLSC 0x0040
++
++__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu;
++
++int __set_thread_area(void *p)
++{
++	size_t *aux;
++	__asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
++#ifndef __SH4A__
++	if (__hwcap & CPU_HAS_LLSC) {
++		__sh_atomic_model = SH_A_LLSC;
++		return 0;
++	}
++#if !defined(__SH3__) && !defined(__SH4__)
++	for (aux=libc.auxv; *aux; aux+=2) {
++		if (*aux != AT_PLATFORM) continue;
++		const char *s = (void *)aux[1];
++		if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
++		__sh_atomic_model = SH_A_IMASK;
++		__sh_nommu = 1;
++		return 0;
++	}
++#endif
++	/* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */
++#endif
++	return 0;
++}
+diff --git a/arch/sh/src/__unmapself.c b/arch/sh/src/__unmapself.c
+new file mode 100644
+index 0000000..4df9e7b
+--- /dev/null
++++ b/arch/sh/src/__unmapself.c
+@@ -0,0 +1,19 @@
++#include "pthread_impl.h"
++
++void __unmapself_sh_mmu(void *, size_t);
++void __unmapself_sh_nommu(void *, size_t);
++
++#if !defined(__SH3__) && !defined(__SH4__)
++#define __unmapself __unmapself_sh_nommu
++#include "../../../src/thread/__unmapself.c"
++#undef __unmapself
++extern __attribute__((__visibility__("hidden"))) unsigned __sh_nommu;
++#else
++#define __sh_nommu 0
++#endif
++
++void __unmapself(void *base, size_t size)
++{
++	if (__sh_nommu) __unmapself_sh_nommu(base, size);
++	else __unmapself_sh_mmu(base, size);
++}
+diff --git a/arch/sh/src/atomic.c b/arch/sh/src/atomic.c
+index f8c615f..7fd7307 100644
+--- a/arch/sh/src/atomic.c
++++ b/arch/sh/src/atomic.c
+@@ -1,8 +1,26 @@
+ #ifndef __SH4A__
+ 
++#include "sh_atomic.h"
+ #include "atomic.h"
+ #include "libc.h"
+ 
++static inline unsigned mask()
++{
++	unsigned sr;
++	__asm__ __volatile__ ( "\n"
++	"	stc sr,r0 \n"
++	"	mov r0,%0 \n"
++	"	or #0xf0,r0 \n"
++	"	ldc r0,sr \n"
++	: "=&r"(sr) : : "memory", "r0" );
++	return sr;
++}
++
++static inline void unmask(unsigned sr)
++{
++	__asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" );
++}
++
+ /* gusa is a hack in the kernel which lets you create a sequence of instructions
+  * which will be restarted if the process is preempted in the middle of the
+  * sequence. It will do for implementing atomics on non-smp systems. ABI is:
+@@ -25,11 +43,17 @@
+ 	"	mov.l " new ", @" mem "\n" \
+ 	"1:	mov r1, r15\n"
+ 
+-#define CPU_HAS_LLSC 0x0040
+-
+ int __sh_cas(volatile int *p, int t, int s)
+ {
+-	if (__hwcap & CPU_HAS_LLSC) return __sh_cas_llsc(p, t, s);
++	if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s);
++
++	if (__sh_atomic_model == SH_A_IMASK) {
++		unsigned sr = mask();
++		int old = *p;
++		if (old==t) *p = s;
++		unmask(sr);
++		return old;
++	}
+ 
+ 	int old;
+ 	__asm__ __volatile__(
+@@ -43,7 +67,15 @@ int __sh_cas(volatile int *p, int t, int s)
+ 
+ int __sh_swap(volatile int *x, int v)
+ {
+-	if (__hwcap & CPU_HAS_LLSC) return __sh_swap_llsc(x, v);
++	if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v);
++
++	if (__sh_atomic_model == SH_A_IMASK) {
++		unsigned sr = mask();
++		int old = *x;
++		*x = v;
++		unmask(sr);
++		return old;
++	}
+ 
+ 	int old;
+ 	__asm__ __volatile__(
+@@ -55,7 +87,15 @@ int __sh_swap(volatile int *x, int v)
+ 
+ int __sh_fetch_add(volatile int *x, int v)
+ {
+-	if (__hwcap & CPU_HAS_LLSC) return __sh_fetch_add_llsc(x, v);
++	if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v);
++
++	if (__sh_atomic_model == SH_A_IMASK) {
++		unsigned sr = mask();
++		int old = *x;
++		*x = old + v;
++		unmask(sr);
++		return old;
++	}
+ 
+ 	int old, dummy;
+ 	__asm__ __volatile__(
+@@ -69,7 +109,7 @@ int __sh_fetch_add(volatile int *x, int v)
+ 
+ void __sh_store(volatile int *p, int x)
+ {
+-	if (__hwcap & CPU_HAS_LLSC) return __sh_store_llsc(p, x);
++	if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x);
+ 	__asm__ __volatile__(
+ 		"	mov.l %1, @%0\n"
+ 		: : "r"(p), "r"(x) : "memory");
+@@ -77,7 +117,15 @@ void __sh_store(volatile int *p, int x)
+ 
+ void __sh_and(volatile int *x, int v)
+ {
+-	if (__hwcap & CPU_HAS_LLSC) return __sh_and_llsc(x, v);
++	if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v);
++
++	if (__sh_atomic_model == SH_A_IMASK) {
++		unsigned sr = mask();
++		int old = *x;
++		*x = old & v;
++		unmask(sr);
++		return;
++	}
+ 
+ 	int dummy;
+ 	__asm__ __volatile__(
+@@ -89,7 +137,15 @@ void __sh_and(volatile int *x, int v)
+ 
+ void __sh_or(volatile int *x, int v)
+ {
+-	if (__hwcap & CPU_HAS_LLSC) return __sh_or_llsc(x, v);
++	if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v);
++
++	if (__sh_atomic_model == SH_A_IMASK) {
++		unsigned sr = mask();
++		int old = *x;
++		*x = old | v;
++		unmask(sr);
++		return;
++	}
+ 
+ 	int dummy;
+ 	__asm__ __volatile__(
+diff --git a/arch/sh/src/sh_atomic.h b/arch/sh/src/sh_atomic.h
+new file mode 100644
+index 0000000..054c2a3
+--- /dev/null
++++ b/arch/sh/src/sh_atomic.h
+@@ -0,0 +1,15 @@
++#ifndef _SH_ATOMIC_H
++#define _SH_ATOMIC_H
++
++#define SH_A_GUSA 0
++#define SH_A_LLSC 1
++#define SH_A_CAS 2
++#if !defined(__SH3__) && !defined(__SH4__)
++#define SH_A_IMASK 3
++#else
++#define SH_A_IMASK -1LL /* unmatchable by unsigned int */
++#endif
++
++extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model;
++
++#endif
+diff --git a/arch/sh/syscall_arch.h b/arch/sh/syscall_arch.h
+index 7ee21a5..f63675a 100644
+--- a/arch/sh/syscall_arch.h
++++ b/arch/sh/syscall_arch.h
+@@ -8,7 +8,7 @@
+  */
+ #define __asm_syscall(trapno, ...) do {   \
+ 	__asm__ __volatile__ (                \
+-		"trapa #" #trapno "\n"            \
++		"trapa #31\n"            \
+ 		"or r0, r0\n"                     \
+ 		"or r0, r0\n"                     \
+ 		"or r0, r0\n"                     \
+diff --git a/include/ctype.h b/include/ctype.h
+index cd2e016..7936536 100644
+--- a/include/ctype.h
++++ b/include/ctype.h
+@@ -64,6 +64,7 @@ int   isascii(int);
+ int   toascii(int);
+ #define _tolower(a) ((a)|0x20)
+ #define _toupper(a) ((a)&0x5f)
++#define isascii(a) (0 ? isascii(a) : (unsigned)(a) < 128)
+ 
+ #endif
+ 
+diff --git a/include/stdlib.h b/include/stdlib.h
+index 97ce5a7..d2c911f 100644
+--- a/include/stdlib.h
++++ b/include/stdlib.h
+@@ -76,7 +76,8 @@ size_t wcstombs (char *__restrict, const wchar_t *__restrict, size_t);
+ #define EXIT_FAILURE 1
+ #define EXIT_SUCCESS 0
+ 
+-#define MB_CUR_MAX ((size_t)+4)
++size_t __ctype_get_mb_cur_max(void);
++#define MB_CUR_MAX (__ctype_get_mb_cur_max())
+ 
+ #define RAND_MAX (0x7fffffff)
+ 
+diff --git a/src/ctype/__ctype_get_mb_cur_max.c b/src/ctype/__ctype_get_mb_cur_max.c
+index d235f4d..8e946fc 100644
+--- a/src/ctype/__ctype_get_mb_cur_max.c
++++ b/src/ctype/__ctype_get_mb_cur_max.c
+@@ -1,6 +1,7 @@
+-#include <stddef.h>
++#include <stdlib.h>
++#include "locale_impl.h"
+ 
+ size_t __ctype_get_mb_cur_max()
+ {
+-	return 4;
++	return MB_CUR_MAX;
+ }
+diff --git a/src/ctype/isascii.c b/src/ctype/isascii.c
+index 3af0a10..54ad3bf 100644
+--- a/src/ctype/isascii.c
++++ b/src/ctype/isascii.c
+@@ -1,4 +1,5 @@
+ #include <ctype.h>
++#undef isascii
+ 
+ int isascii(int c)
+ {
+diff --git a/src/env/__stack_chk_fail.c b/src/env/__stack_chk_fail.c
+index 47784c6..be0c184 100644
+--- a/src/env/__stack_chk_fail.c
++++ b/src/env/__stack_chk_fail.c
+@@ -25,4 +25,8 @@ void __stack_chk_fail_local(void)
+ 	a_crash();
+ }
+ 
++#else
++
++weak_alias(__stack_chk_fail, __stack_chk_fail_local);
++
+ #endif
+diff --git a/src/internal/libc.h b/src/internal/libc.h
+index 6810cd8..98c7535 100644
+--- a/src/internal/libc.h
++++ b/src/internal/libc.h
+@@ -17,8 +17,6 @@ struct __libc {
+ 	int secure;
+ 	volatile int threads_minus_1;
+ 	size_t *auxv;
+-	FILE *ofl_head;
+-	volatile int ofl_lock[2];
+ 	size_t tls_size;
+ 	size_t page_size;
+ 	struct __locale_struct global_locale;
+diff --git a/src/internal/locale_impl.h b/src/internal/locale_impl.h
+index 9b8385e..f5e4d9b 100644
+--- a/src/internal/locale_impl.h
++++ b/src/internal/locale_impl.h
+@@ -1,3 +1,6 @@
++#ifndef _LOCALE_IMPL_H
++#define _LOCALE_IMPL_H
++
+ #include <locale.h>
+ #include <stdlib.h>
+ #include "libc.h"
+@@ -12,6 +15,10 @@ struct __locale_map {
+ 	const struct __locale_map *next;
+ };
+ 
++extern const struct __locale_map __c_dot_utf8;
++extern const struct __locale_struct __c_locale;
++extern const struct __locale_struct __c_dot_utf8_locale;
++
+ const struct __locale_map *__get_locale(int, const char *);
+ const char *__mo_lookup(const void *, size_t, const char *);
+ const char *__lctrans(const char *, const struct __locale_map *);
+@@ -20,9 +27,14 @@ const char *__lctrans_cur(const char *);
+ #define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)])
+ #define LCTRANS_CUR(msg) __lctrans_cur(msg)
+ 
++#define C_LOCALE ((locale_t)&__c_locale)
++#define UTF8_LOCALE ((locale_t)&__c_dot_utf8_locale)
++
+ #define CURRENT_LOCALE (__pthread_self()->locale)
+ 
+ #define CURRENT_UTF8 (!!__pthread_self()->locale->cat[LC_CTYPE])
+ 
+ #undef MB_CUR_MAX
+ #define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1)
++
++#endif
+diff --git a/src/internal/sh/syscall.s b/src/internal/sh/syscall.s
+index d00712a..331918a 100644
+--- a/src/internal/sh/syscall.s
++++ b/src/internal/sh/syscall.s
+@@ -13,7 +13,7 @@ __syscall:
+ 	mov.l @r15, r7
+ 	mov.l @(4,r15), r0
+ 	mov.l @(8,r15), r1
+-	trapa #22
++	trapa #31
+ 	or r0, r0
+ 	or r0, r0
+ 	or r0, r0
+diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h
+index e1325fe..0dd7fb5 100644
+--- a/src/internal/stdio_impl.h
++++ b/src/internal/stdio_impl.h
+@@ -47,6 +47,7 @@ struct _IO_FILE {
+ 	unsigned char *shend;
+ 	off_t shlim, shcnt;
+ 	FILE *prev_locked, *next_locked;
++	struct __locale_struct *locale;
+ };
+ 
+ size_t __stdio_read(FILE *, unsigned char *, size_t);
+@@ -75,8 +76,9 @@ int __putc_unlocked(int, FILE *);
+ FILE *__fdopen(int, const char *);
+ int __fmodeflags(const char *);
+ 
+-#define OFLLOCK() LOCK(libc.ofl_lock)
+-#define OFLUNLOCK() UNLOCK(libc.ofl_lock)
++FILE *__ofl_add(FILE *f);
++FILE **__ofl_lock(void);
++void __ofl_unlock(void);
+ 
+ #define feof(f) ((f)->flags & F_EOF)
+ #define ferror(f) ((f)->flags & F_ERR)
+diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
+index 42b056d..7e56693 100644
+--- a/src/ldso/dynlink.c
++++ b/src/ldso/dynlink.c
+@@ -536,7 +536,8 @@ static void *map_library(int fd, struct dso *dso)
+ 	}
+ 	for (i=0; ((size_t *)(base+dyn))[i]; i+=2)
+ 		if (((size_t *)(base+dyn))[i]==DT_TEXTREL) {
+-			if (mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC) < 0)
++			if (mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC)
++			    && errno != ENOSYS)
+ 				goto error;
+ 			break;
+ 		}
+@@ -927,7 +928,8 @@ static void reloc_all(struct dso *p)
+ 		do_relocs(p, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ], 3);
+ 
+ 		if (head != &ldso && p->relro_start != p->relro_end &&
+-		    mprotect(p->base+p->relro_start, p->relro_end-p->relro_start, PROT_READ) < 0) {
++		    mprotect(p->base+p->relro_start, p->relro_end-p->relro_start, PROT_READ)
++		    && errno != ENOSYS) {
+ 			error("Error relocating %s: RELRO protection failed: %m",
+ 				p->name);
+ 			if (runtime) longjmp(*rtld_fail, 1);
+@@ -1192,6 +1194,17 @@ _Noreturn void __dls3(size_t *sp)
+ 	char **argv_orig = argv;
+ 	char **envp = argv+argc+1;
+ 
++	/* Find aux vector just past environ[] and use it to initialize
++	 * global data that may be needed before we can make syscalls. */
++	__environ = envp;
++	for (i=argc+1; argv[i]; i++);
++	libc.auxv = auxv = (void *)(argv+i+1);
++	decode_vec(auxv, aux, AUX_CNT);
++	__hwcap = aux[AT_HWCAP];
++	libc.page_size = aux[AT_PAGESZ];
++	libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
++		|| aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
++
+ 	/* Setup early thread pointer in builtin_tls for ldso/libc itself to
+ 	 * use during dynamic linking. If possible it will also serve as the
+ 	 * thread pointer at runtime. */
+@@ -1200,25 +1213,11 @@ _Noreturn void __dls3(size_t *sp)
+ 		a_crash();
+ 	}
+ 
+-	/* Find aux vector just past environ[] */
+-	for (i=argc+1; argv[i]; i++)
+-		if (!memcmp(argv[i], "LD_LIBRARY_PATH=", 16))
+-			env_path = argv[i]+16;
+-		else if (!memcmp(argv[i], "LD_PRELOAD=", 11))
+-			env_preload = argv[i]+11;
+-	auxv = (void *)(argv+i+1);
+-
+-	decode_vec(auxv, aux, AUX_CNT);
+-
+ 	/* Only trust user/env if kernel says we're not suid/sgid */
+-	if ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
+-	  || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]) {
+-		env_path = 0;
+-		env_preload = 0;
+-		libc.secure = 1;
++	if (!libc.secure) {
++		env_path = getenv("LD_LIBRARY_PATH");
++		env_preload = getenv("LD_PRELOAD");
+ 	}
+-	libc.page_size = aux[AT_PAGESZ];
+-	libc.auxv = auxv;
+ 
+ 	/* If the main program was already loaded by the kernel,
+ 	 * AT_PHDR will point to some location other than the dynamic
+diff --git a/src/locale/c_locale.c b/src/locale/c_locale.c
+new file mode 100644
+index 0000000..77ccf58
+--- /dev/null
++++ b/src/locale/c_locale.c
+@@ -0,0 +1,15 @@
++#include "locale_impl.h"
++#include <stdint.h>
++
++static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
++
++const struct __locale_map __c_dot_utf8 = {
++	.map = empty_mo,
++	.map_size = sizeof empty_mo,
++	.name = "C.UTF-8"
++};
++
++const struct __locale_struct __c_locale = { 0 };
++const struct __locale_struct __c_dot_utf8_locale = {
++	.cat[LC_CTYPE] = &__c_dot_utf8
++};
+diff --git a/src/locale/iconv.c b/src/locale/iconv.c
+index e6121ae..1eeea94 100644
+--- a/src/locale/iconv.c
++++ b/src/locale/iconv.c
+@@ -5,6 +5,7 @@
+ #include <stdlib.h>
+ #include <limits.h>
+ #include <stdint.h>
++#include "locale_impl.h"
+ 
+ #define UTF_32BE    0300
+ #define UTF_16LE    0301
+@@ -165,9 +166,12 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
+ 	int err;
+ 	unsigned char type = map[-1];
+ 	unsigned char totype = tomap[-1];
++	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+ 
+ 	if (!in || !*in || !*inb) return 0;
+ 
++	*ploc = UTF8_LOCALE;
++
+ 	for (; *inb; *in+=l, *inb-=l) {
+ 		c = *(unsigned char *)*in;
+ 		l = 1;
+@@ -431,6 +435,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
+ 			break;
+ 		}
+ 	}
++	*ploc = loc;
+ 	return x;
+ ilseq:
+ 	err = EILSEQ;
+@@ -445,5 +450,6 @@ starved:
+ 	x = -1;
+ end:
+ 	errno = err;
++	*ploc = loc;
+ 	return x;
+ }
+diff --git a/src/locale/langinfo.c b/src/locale/langinfo.c
+index a1ada24..776b447 100644
+--- a/src/locale/langinfo.c
++++ b/src/locale/langinfo.c
+@@ -33,7 +33,8 @@ char *__nl_langinfo_l(nl_item item, locale_t loc)
+ 	int idx = item & 65535;
+ 	const char *str;
+ 
+-	if (item == CODESET) return "UTF-8";
++	if (item == CODESET)
++		return MB_CUR_MAX==1 ? "UTF-8-CODE-UNITS" : "UTF-8";
+ 	
+ 	switch (cat) {
+ 	case LC_NUMERIC:
+diff --git a/src/locale/locale_map.c b/src/locale/locale_map.c
+index 4346bb0..c3e5917 100644
+--- a/src/locale/locale_map.c
++++ b/src/locale/locale_map.c
+@@ -24,14 +24,6 @@ static const char envvars[][12] = {
+ 	"LC_MESSAGES",
+ };
+ 
+-static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
+-
+-const struct __locale_map __c_dot_utf8 = {
+-	.map = empty_mo,
+-	.map_size = sizeof empty_mo,
+-	.name = "C.UTF-8"
+-};
+-
+ const struct __locale_map *__get_locale(int cat, const char *val)
+ {
+ 	static int lock[2];
+@@ -107,8 +99,8 @@ const struct __locale_map *__get_locale(int cat, const char *val)
+ 	 * sake of being able to do message translations at the
+ 	 * application level. */
+ 	if (!new && (new = malloc(sizeof *new))) {
+-		new->map = empty_mo;
+-		new->map_size = sizeof empty_mo;
++		new->map = __c_dot_utf8.map;
++		new->map_size = __c_dot_utf8.map_size;
+ 		memcpy(new->name, val, n);
+ 		new->name[n] = 0;
+ 		new->next = loc_head;
+diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c
+index 89d36b1..f50bbe9 100644
+--- a/src/locale/newlocale.c
++++ b/src/locale/newlocale.c
+@@ -3,16 +3,9 @@
+ #include "locale_impl.h"
+ #include "libc.h"
+ 
+-extern const struct __locale_map __c_dot_utf8;
+-
+-static const struct __locale_struct c_locale = { 0 };
+-static const struct __locale_struct c_dot_utf8_locale = {
+-	.cat[LC_CTYPE] = &__c_dot_utf8
+-};
+-
+ int __loc_is_allocated(locale_t loc)
+ {
+-	return loc && loc != &c_locale && loc != &c_dot_utf8_locale;
++	return loc && loc != C_LOCALE && loc != UTF8_LOCALE;
+ }
+ 
+ locale_t __newlocale(int mask, const char *name, locale_t loc)
+@@ -44,9 +37,9 @@ locale_t __newlocale(int mask, const char *name, locale_t loc)
+ 	}
+ 
+ 	if (!j)
+-		return (locale_t)&c_locale;
+-	if (j==1 && tmp.cat[LC_CTYPE]==c_dot_utf8_locale.cat[LC_CTYPE])
+-		return (locale_t)&c_dot_utf8_locale;
++		return C_LOCALE;
++	if (j==1 && tmp.cat[LC_CTYPE]==&__c_dot_utf8)
++		return UTF8_LOCALE;
+ 
+ 	if ((loc = malloc(sizeof *loc))) *loc = tmp;
+ 
+diff --git a/src/locale/uselocale.c b/src/locale/uselocale.c
+index b70a0c1..0fc5ecb 100644
+--- a/src/locale/uselocale.c
++++ b/src/locale/uselocale.c
+@@ -8,9 +8,7 @@ locale_t __uselocale(locale_t new)
+ 	locale_t old = self->locale;
+ 	locale_t global = &libc.global_locale;
+ 
+-	if (new == LC_GLOBAL_LOCALE) new = global;
+-
+-	self->locale = new;
++	if (new) self->locale = new == LC_GLOBAL_LOCALE ? global : new;
+ 
+ 	return old == global ? LC_GLOBAL_LOCALE : old;
+ }
+diff --git a/src/malloc/expand_heap.c b/src/malloc/expand_heap.c
+new file mode 100644
+index 0000000..d8c0be7
+--- /dev/null
++++ b/src/malloc/expand_heap.c
+@@ -0,0 +1,72 @@
++#include <limits.h>
++#include <stdint.h>
++#include <errno.h>
++#include <sys/mman.h>
++#include "libc.h"
++#include "syscall.h"
++
++/* This function returns true if the interval [old,new]
++ * intersects the 'len'-sized interval below &libc.auxv
++ * (interpreted as the main-thread stack) or below &b
++ * (the current stack). It is used to defend against
++ * buggy brk implementations that can cross the stack. */
++
++static int traverses_stack_p(uintptr_t old, uintptr_t new)
++{
++	const uintptr_t len = 8<<20;
++	uintptr_t a, b;
++
++	b = (uintptr_t)libc.auxv;
++	a = b > len ? b-len : 0;
++	if (new>a && old<b) return 1;
++
++	b = (uintptr_t)&b;
++	a = b > len ? b-len : 0;
++	if (new>a && old<b) return 1;
++
++	return 0;
++}
++
++void *__mmap(void *, size_t, int, int, int, off_t);
++
++/* Expand the heap in-place if brk can be used, or otherwise via mmap,
++ * using an exponential lower bound on growth by mmap to make
++ * fragmentation asymptotically irrelevant. The size argument is both
++ * an input and an output, since the caller needs to know the size
++ * allocated, which will be larger than requested due to page alignment
++ * and mmap minimum size rules. The caller is responsible for locking
++ * to prevent concurrent calls. */
++
++void *__expand_heap(size_t *pn)
++{
++	static uintptr_t brk;
++	static unsigned mmap_step;
++	size_t n = *pn;
++
++	if (n > SIZE_MAX/2 - PAGE_SIZE) {
++		errno = ENOMEM;
++		return 0;
++	}
++	n += -n & PAGE_SIZE-1;
++
++	if (!brk) {
++		brk = __syscall(SYS_brk, 0);
++		brk += -brk & PAGE_SIZE-1;
++	}
++
++	if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n)
++	    && __syscall(SYS_brk, brk+n)==brk+n) {
++		*pn = n;
++		brk += n;
++		return (void *)(brk-n);
++	}
++
++	size_t min = (size_t)PAGE_SIZE << mmap_step/2;
++	if (n < min) n = min;
++	void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
++		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
++	if (area == MAP_FAILED) return 0;
++	*pn = n;
++	mmap_step++;
++	return area;
++}
+diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c
+index 7643fc2..008549d 100644
+--- a/src/malloc/lite_malloc.c
++++ b/src/malloc/lite_malloc.c
+@@ -4,43 +4,46 @@
+ #include <errno.h>
+ #include "libc.h"
+ 
+-uintptr_t __brk(uintptr_t);
+-
+ #define ALIGN 16
+ 
++void *__expand_heap(size_t *);
++
+ void *__simple_malloc(size_t n)
+ {
+-	static uintptr_t cur, brk;
+-	uintptr_t base, new;
++	static char *cur, *end;
+ 	static volatile int lock[2];
+-	size_t align=1;
++	size_t align=1, pad;
++	void *p;
+ 
+ 	if (!n) n++;
+-	if (n > SIZE_MAX/2) goto toobig;
+-
+ 	while (align<n && align<ALIGN)
+ 		align += align;
+-	n = n + align - 1 & -align;
+ 
+ 	LOCK(lock);
+-	if (!cur) cur = brk = __brk(0)+16;
+-	base = cur + align-1 & -align;
+-	if (n > SIZE_MAX - PAGE_SIZE - base) goto fail;
+-	if (base+n > brk) {
+-		new = base+n + PAGE_SIZE-1 & -PAGE_SIZE;
+-		if (__brk(new) != new) goto fail;
+-		brk = new;
+-	}
+-	cur = base+n;
+-	UNLOCK(lock);
+ 
+-	return (void *)base;
++	pad = -(uintptr_t)cur & align-1;
++
++	if (n <= SIZE_MAX/2 + ALIGN) n += pad;
++
++	if (n > end-cur) {
++		size_t m = n;
++		char *new = __expand_heap(&m);
++		if (!new) {
++			UNLOCK(lock);
++			return 0;
++		}
++		if (new != end) {
++			cur = new;
++			n -= pad;
++			pad = 0;
++		}
++		end = new + m;
++	}
+ 
+-fail:
++	p = cur + pad;
++	cur += n;
+ 	UNLOCK(lock);
+-toobig:
+-	errno = ENOMEM;
+-	return 0;
++	return p;
+ }
+ 
+ weak_alias(__simple_malloc, malloc);
+diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
+index d4de2dc..290fda1 100644
+--- a/src/malloc/malloc.c
++++ b/src/malloc/malloc.c
+@@ -13,7 +13,6 @@
+ #define inline inline __attribute__((always_inline))
+ #endif
+ 
+-uintptr_t __brk(uintptr_t);
+ void *__mmap(void *, size_t, int, int, int, off_t);
+ int __munmap(void *, size_t);
+ void *__mremap(void *, size_t, size_t, int, ...);
+@@ -31,13 +30,9 @@ struct bin {
+ };
+ 
+ static struct {
+-	uintptr_t brk;
+-	size_t *heap;
+ 	volatile uint64_t binmap;
+ 	struct bin bins[64];
+-	volatile int brk_lock[2];
+ 	volatile int free_lock[2];
+-	unsigned mmap_step;
+ } mal;
+ 
+ 
+@@ -152,69 +147,52 @@ void __dump_heap(int x)
+ }
+ #endif
+ 
++void *__expand_heap(size_t *);
++
+ static struct chunk *expand_heap(size_t n)
+ {
+-	static int init;
++	static int heap_lock[2];
++	static void *end;
++	void *p;
+ 	struct chunk *w;
+-	uintptr_t new;
+-
+-	lock(mal.brk_lock);
+ 
+-	if (!init) {
+-		mal.brk = __brk(0);
+-#ifdef SHARED
+-		mal.brk = mal.brk + PAGE_SIZE-1 & -PAGE_SIZE;
+-#endif
+-		mal.brk = mal.brk + 2*SIZE_ALIGN-1 & -SIZE_ALIGN;
+-		mal.heap = (void *)mal.brk;
+-		init = 1;
+-	}
++	/* The argument n already accounts for the caller's chunk
++	 * overhead needs, but if the heap can't be extended in-place,
++	 * we need room for an extra zero-sized sentinel chunk. */
++	n += SIZE_ALIGN;
+ 
+-	if (n > SIZE_MAX - mal.brk - 2*PAGE_SIZE) goto fail;
+-	new = mal.brk + n + SIZE_ALIGN + PAGE_SIZE - 1 & -PAGE_SIZE;
+-	n = new - mal.brk;
++	lock(heap_lock);
+ 
+-	if (__brk(new) != new) {
+-		size_t min = (size_t)PAGE_SIZE << mal.mmap_step/2;
+-		n += -n & PAGE_SIZE-1;
+-		if (n < min) n = min;
+-		void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
+-			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+-		if (area == MAP_FAILED) goto fail;
++	p = __expand_heap(&n);
++	if (!p) {
++		unlock(heap_lock);
++		return 0;
++	}
+ 
+-		mal.mmap_step++;
+-		area = (char *)area + SIZE_ALIGN - OVERHEAD;
+-		w = area;
++	/* If not just expanding existing space, we need to make a
++	 * new sentinel chunk below the allocated space. */
++	if (p != end) {
++		/* Valid/safe because of the prologue increment. */
+ 		n -= SIZE_ALIGN;
++		p = (char *)p + SIZE_ALIGN;
++		w = MEM_TO_CHUNK(p);
+ 		w->psize = 0 | C_INUSE;
+-		w->csize = n | C_INUSE;
+-		w = NEXT_CHUNK(w);
+-		w->psize = n | C_INUSE;
+-		w->csize = 0 | C_INUSE;
+-
+-		unlock(mal.brk_lock);
+-
+-		return area;
+ 	}
+ 
+-	w = MEM_TO_CHUNK(mal.heap);
+-	w->psize = 0 | C_INUSE;
+-
+-	w = MEM_TO_CHUNK(new);
++	/* Record new heap end and fill in footer. */
++	end = (char *)p + n;
++	w = MEM_TO_CHUNK(end);
+ 	w->psize = n | C_INUSE;
+ 	w->csize = 0 | C_INUSE;
+ 
+-	w = MEM_TO_CHUNK(mal.brk);
++	/* Fill in header, which may be new or may be replacing a
++	 * zero-size sentinel header at the old end-of-heap. */
++	w = MEM_TO_CHUNK(p);
+ 	w->csize = n | C_INUSE;
+-	mal.brk = new;
+-	
+-	unlock(mal.brk_lock);
++
++	unlock(heap_lock);
+ 
+ 	return w;
+-fail:
+-	unlock(mal.brk_lock);
+-	errno = ENOMEM;
+-	return 0;
+ }
+ 
+ static int adjust_size(size_t *n)
+diff --git a/src/multibyte/btowc.c b/src/multibyte/btowc.c
+index 9d2c3b1..8acd0a2 100644
+--- a/src/multibyte/btowc.c
++++ b/src/multibyte/btowc.c
+@@ -1,7 +1,10 @@
+ #include <stdio.h>
+ #include <wchar.h>
++#include <stdlib.h>
++#include "internal.h"
+ 
+ wint_t btowc(int c)
+ {
+-	return c<128U ? c : EOF;
++	int b = (unsigned char)c;
++	return b<128U ? b : (MB_CUR_MAX==1 && c!=EOF) ? CODEUNIT(c) : WEOF;
+ }
+diff --git a/src/multibyte/internal.h b/src/multibyte/internal.h
+index cc017fa..53d62ed 100644
+--- a/src/multibyte/internal.h
++++ b/src/multibyte/internal.h
+@@ -23,3 +23,10 @@ extern const uint32_t bittab[];
+ 
+ #define SA 0xc2u
+ #define SB 0xf4u
++
++/* Arbitrary encoding for representing code units instead of characters. */
++#define CODEUNIT(c) (0xdfff & (signed char)(c))
++#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80)
++
++/* Get inline definition of MB_CUR_MAX. */
++#include "locale_impl.h"
+diff --git a/src/multibyte/mbrtowc.c b/src/multibyte/mbrtowc.c
+index e7b3654..ca7da70 100644
+--- a/src/multibyte/mbrtowc.c
++++ b/src/multibyte/mbrtowc.c
+@@ -4,6 +4,7 @@
+  * unnecessary.
+  */
+ 
++#include <stdlib.h>
+ #include <wchar.h>
+ #include <errno.h>
+ #include "internal.h"
+@@ -27,6 +28,7 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate
+ 	if (!n) return -2;
+ 	if (!c) {
+ 		if (*s < 0x80) return !!(*wc = *s);
++		if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
+ 		if (*s-SA > SB-SA) goto ilseq;
+ 		c = bittab[*s++-SA]; n--;
+ 	}
+diff --git a/src/multibyte/mbsrtowcs.c b/src/multibyte/mbsrtowcs.c
+index 3c1343a..e23083d 100644
+--- a/src/multibyte/mbsrtowcs.c
++++ b/src/multibyte/mbsrtowcs.c
+@@ -7,6 +7,8 @@
+ #include <stdint.h>
+ #include <wchar.h>
+ #include <errno.h>
++#include <string.h>
++#include <stdlib.h>
+ #include "internal.h"
+ 
+ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st)
+@@ -24,6 +26,23 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs
+ 		}
+ 	}
+ 
++	if (MB_CUR_MAX==1) {
++		if (!ws) return strlen((const char *)s);
++		for (;;) {
++			if (!wn) {
++				*src = (const void *)s;
++				return wn0;
++			}
++			if (!*s) break;
++			c = *s++;
++			*ws++ = CODEUNIT(c);
++			wn--;
++		}
++		*ws = 0;
++		*src = 0;
++		return wn0-wn;
++	}
++
+ 	if (!ws) for (;;) {
+ 		if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
+ 			while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
+diff --git a/src/multibyte/mbtowc.c b/src/multibyte/mbtowc.c
+index 803d221..71a9506 100644
+--- a/src/multibyte/mbtowc.c
++++ b/src/multibyte/mbtowc.c
+@@ -4,6 +4,7 @@
+  * unnecessary.
+  */
+ 
++#include <stdlib.h>
+ #include <wchar.h>
+ #include <errno.h>
+ #include "internal.h"
+@@ -19,6 +20,7 @@ int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n)
+ 	if (!wc) wc = &dummy;
+ 
+ 	if (*s < 0x80) return !!(*wc = *s);
++	if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1;
+ 	if (*s-SA > SB-SA) goto ilseq;
+ 	c = bittab[*s++-SA];
+ 
+diff --git a/src/multibyte/wcrtomb.c b/src/multibyte/wcrtomb.c
+index 59f733d..ddc37a5 100644
+--- a/src/multibyte/wcrtomb.c
++++ b/src/multibyte/wcrtomb.c
+@@ -4,8 +4,10 @@
+  * unnecessary.
+  */
+ 
++#include <stdlib.h>
+ #include <wchar.h>
+ #include <errno.h>
++#include "internal.h"
+ 
+ size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)
+ {
+@@ -13,6 +15,13 @@ size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st)
+ 	if ((unsigned)wc < 0x80) {
+ 		*s = wc;
+ 		return 1;
++	} else if (MB_CUR_MAX == 1) {
++		if (!IS_CODEUNIT(wc)) {
++			errno = EILSEQ;
++			return -1;
++		}
++		*s = wc;
++		return 1;
+ 	} else if ((unsigned)wc < 0x800) {
+ 		*s++ = 0xc0 | (wc>>6);
+ 		*s = 0x80 | (wc&0x3f);
+diff --git a/src/multibyte/wctob.c b/src/multibyte/wctob.c
+index d6353ee..4aeda6a 100644
+--- a/src/multibyte/wctob.c
++++ b/src/multibyte/wctob.c
+@@ -1,8 +1,10 @@
+-#include <stdio.h>
+ #include <wchar.h>
++#include <stdlib.h>
++#include "internal.h"
+ 
+ int wctob(wint_t c)
+ {
+ 	if (c < 128U) return c;
++	if (MB_CUR_MAX==1 && IS_CODEUNIT(c)) return (unsigned char)c;
+ 	return EOF;
+ }
+diff --git a/src/passwd/nscd_query.c b/src/passwd/nscd_query.c
+index 69a7815..d38e371 100644
+--- a/src/passwd/nscd_query.c
++++ b/src/passwd/nscd_query.c
+@@ -32,6 +32,7 @@ FILE *__nscd_query(int32_t req, const char *key, int32_t *buf, size_t len, int *
+ 		},
+ 		.msg_iovlen = 2
+ 	};
++	int errno_save = errno;
+ 
+ 	*swap = 0;
+ retry:
+@@ -50,11 +51,14 @@ retry:
+ 		return f;
+ 
+ 	if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
+-		/* If there isn't a running nscd we return -1 to indicate that
+-		 * that is precisely what happened
+-		 */
+-		if (errno == EACCES || errno == ECONNREFUSED || errno == ENOENT)
++		/* If there isn't a running nscd we simulate a "not found"
++		 * result and the caller is responsible for calling
++		 * fclose on the (unconnected) socket. The value of
++		 * errno must be left unchanged in this case.  */
++		if (errno == EACCES || errno == ECONNREFUSED || errno == ENOENT) {
++			errno = errno_save;
+ 			return f;
++		}
+ 		goto error;
+ 	}
+ 
+diff --git a/src/process/sh/vfork.s b/src/process/sh/vfork.s
+new file mode 100644
+index 0000000..48cc939
+--- /dev/null
++++ b/src/process/sh/vfork.s
+@@ -0,0 +1,23 @@
++.global __vfork
++.weak vfork
++.type __vfork,@function
++.type vfork,@function
++__vfork:
++vfork:
++	mov #95, r3
++	add r3, r3
++
++	trapa #31
++	or    r0, r0
++	or    r0, r0
++	or    r0, r0
++	or    r0, r0
++	or    r0, r0
++
++	mov r0, r4
++	mov.l 1f, r0
++2:	braf r0
++	 nop
++	.align 2
++	.hidden __syscall_ret
++1:	.long __syscall_ret@PLT-(2b+4-.)
+diff --git a/src/regex/fnmatch.c b/src/regex/fnmatch.c
+index 7f6b65f..978fff8 100644
+--- a/src/regex/fnmatch.c
++++ b/src/regex/fnmatch.c
+@@ -18,6 +18,7 @@
+ #include <stdlib.h>
+ #include <wchar.h>
+ #include <wctype.h>
++#include "locale_impl.h"
+ 
+ #define END 0
+ #define UNMATCHABLE -2
+@@ -229,7 +230,7 @@ static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n
+ 	 * On illegal sequences we may get it wrong, but in that case
+ 	 * we necessarily have a matching failure anyway. */
+ 	for (s=endstr; s>str && tailcnt; tailcnt--) {
+-		if (s[-1] < 128U) s--;
++		if (s[-1] < 128U || MB_CUR_MAX==1) s--;
+ 		else while ((unsigned char)*--s-0x80U<0x40 && s>str);
+ 	}
+ 	if (tailcnt) return FNM_NOMATCH;
+diff --git a/src/signal/sh/restore.s b/src/signal/sh/restore.s
+index ab26034..eaedcdf 100644
+--- a/src/signal/sh/restore.s
++++ b/src/signal/sh/restore.s
+@@ -2,7 +2,7 @@
+ .type   __restore, @function
+ __restore:
+ 	mov   #119, r3  !__NR_sigreturn
+-	trapa #16
++	trapa #31
+ 
+ 	or    r0, r0
+ 	or    r0, r0
+@@ -15,7 +15,7 @@ __restore:
+ __restore_rt:
+ 	mov   #100, r3  !__NR_rt_sigreturn
+ 	add   #73, r3
+-	trapa #16
++	trapa #31
+ 
+ 	or    r0, r0
+ 	or    r0, r0
+diff --git a/src/stdio/__fdopen.c b/src/stdio/__fdopen.c
+index ef8f47d..8d6ce81 100644
+--- a/src/stdio/__fdopen.c
++++ b/src/stdio/__fdopen.c
+@@ -54,13 +54,7 @@ FILE *__fdopen(int fd, const char *mode)
+ 	if (!libc.threaded) f->lock = -1;
+ 
+ 	/* Add new FILE to open file list */
+-	OFLLOCK();
+-	f->next = libc.ofl_head;
+-	if (libc.ofl_head) libc.ofl_head->prev = f;
+-	libc.ofl_head = f;
+-	OFLUNLOCK();
+-
+-	return f;
++	return __ofl_add(f);
+ }
+ 
+ weak_alias(__fdopen, fdopen);
+diff --git a/src/stdio/__stdio_exit.c b/src/stdio/__stdio_exit.c
+index 716e5f7..191b445 100644
+--- a/src/stdio/__stdio_exit.c
++++ b/src/stdio/__stdio_exit.c
+@@ -16,8 +16,7 @@ static void close_file(FILE *f)
+ void __stdio_exit(void)
+ {
+ 	FILE *f;
+-	OFLLOCK();
+-	for (f=libc.ofl_head; f; f=f->next) close_file(f);
++	for (f=*__ofl_lock(); f; f=f->next) close_file(f);
+ 	close_file(__stdin_used);
+ 	close_file(__stdout_used);
+ }
+diff --git a/src/stdio/__stdio_read.c b/src/stdio/__stdio_read.c
+index 5947344..f8fa6d3 100644
+--- a/src/stdio/__stdio_read.c
++++ b/src/stdio/__stdio_read.c
+@@ -1,12 +1,5 @@
+ #include "stdio_impl.h"
+ #include <sys/uio.h>
+-#include <pthread.h>
+-
+-static void cleanup(void *p)
+-{
+-	FILE *f = p;
+-	if (!f->lockcount) __unlockfile(f);
+-}
+ 
+ size_t __stdio_read(FILE *f, unsigned char *buf, size_t len)
+ {
+@@ -16,9 +9,7 @@ size_t __stdio_read(FILE *f, unsigned char *buf, size_t len)
+ 	};
+ 	ssize_t cnt;
+ 
+-	pthread_cleanup_push(cleanup, f);
+-	cnt = syscall_cp(SYS_readv, f->fd, iov, 2);
+-	pthread_cleanup_pop(0);
++	cnt = syscall(SYS_readv, f->fd, iov, 2);
+ 	if (cnt <= 0) {
+ 		f->flags |= F_EOF ^ ((F_ERR^F_EOF) & cnt);
+ 		return cnt;
+diff --git a/src/stdio/__stdio_write.c b/src/stdio/__stdio_write.c
+index 8c89389..d2d8947 100644
+--- a/src/stdio/__stdio_write.c
++++ b/src/stdio/__stdio_write.c
+@@ -1,12 +1,5 @@
+ #include "stdio_impl.h"
+ #include <sys/uio.h>
+-#include <pthread.h>
+-
+-static void cleanup(void *p)
+-{
+-	FILE *f = p;
+-	if (!f->lockcount) __unlockfile(f);
+-}
+ 
+ size_t __stdio_write(FILE *f, const unsigned char *buf, size_t len)
+ {
+@@ -19,9 +12,7 @@ size_t __stdio_write(FILE *f, const unsigned char *buf, size_t len)
+ 	int iovcnt = 2;
+ 	ssize_t cnt;
+ 	for (;;) {
+-		pthread_cleanup_push(cleanup, f);
+-		cnt = syscall_cp(SYS_writev, f->fd, iov, iovcnt);
+-		pthread_cleanup_pop(0);
++		cnt = syscall(SYS_writev, f->fd, iov, iovcnt);
+ 		if (cnt == rem) {
+ 			f->wend = f->buf + f->buf_size;
+ 			f->wpos = f->wbase = f->buf;
+@@ -34,11 +25,8 @@ size_t __stdio_write(FILE *f, const unsigned char *buf, size_t len)
+ 		}
+ 		rem -= cnt;
+ 		if (cnt > iov[0].iov_len) {
+-			f->wpos = f->wbase = f->buf;
+ 			cnt -= iov[0].iov_len;
+ 			iov++; iovcnt--;
+-		} else if (iovcnt == 2) {
+-			f->wbase += cnt;
+ 		}
+ 		iov[0].iov_base = (char *)iov[0].iov_base + cnt;
+ 		iov[0].iov_len -= cnt;
+diff --git a/src/stdio/fclose.c b/src/stdio/fclose.c
+index 317b3c9..839d88a 100644
+--- a/src/stdio/fclose.c
++++ b/src/stdio/fclose.c
+@@ -14,11 +14,11 @@ int fclose(FILE *f)
+ 	__unlist_locked_file(f);
+ 
+ 	if (!(perm = f->flags & F_PERM)) {
+-		OFLLOCK();
++		FILE **head = __ofl_lock();
+ 		if (f->prev) f->prev->next = f->next;
+ 		if (f->next) f->next->prev = f->prev;
+-		if (libc.ofl_head == f) libc.ofl_head = f->next;
+-		OFLUNLOCK();
++		if (*head == f) *head = f->next;
++		__ofl_unlock();
+ 	}
+ 
+ 	r = fflush(f);
+diff --git a/src/stdio/fflush.c b/src/stdio/fflush.c
+index 7bf862a..3f462c8 100644
+--- a/src/stdio/fflush.c
++++ b/src/stdio/fflush.c
+@@ -35,13 +35,12 @@ int fflush(FILE *f)
+ 
+ 	r = __stdout_used ? fflush(__stdout_used) : 0;
+ 
+-	OFLLOCK();
+-	for (f=libc.ofl_head; f; f=f->next) {
++	for (f=*__ofl_lock(); f; f=f->next) {
+ 		FLOCK(f);
+ 		if (f->wpos > f->wbase) r |= __fflush_unlocked(f);
+ 		FUNLOCK(f);
+ 	}
+-	OFLUNLOCK();
++	__ofl_unlock();
+ 	
+ 	return r;
+ }
+diff --git a/src/stdio/fgetwc.c b/src/stdio/fgetwc.c
+index 8626d54..e455cfe 100644
+--- a/src/stdio/fgetwc.c
++++ b/src/stdio/fgetwc.c
+@@ -1,8 +1,9 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ #include <errno.h>
+ 
+-wint_t __fgetwc_unlocked(FILE *f)
++static wint_t __fgetwc_unlocked_internal(FILE *f)
+ {
+ 	mbstate_t st = { 0 };
+ 	wchar_t wc;
+@@ -10,8 +11,6 @@ wint_t __fgetwc_unlocked(FILE *f)
+ 	unsigned char b;
+ 	size_t l;
+ 
+-	f->mode |= f->mode+1;
+-
+ 	/* Convert character from buffer if possible */
+ 	if (f->rpos < f->rend) {
+ 		l = mbrtowc(&wc, (void *)f->rpos, f->rend - f->rpos, &st);
+@@ -39,6 +38,16 @@ wint_t __fgetwc_unlocked(FILE *f)
+ 	return wc;
+ }
+ 
++wint_t __fgetwc_unlocked(FILE *f)
++{
++	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
++	if (f->mode <= 0) fwide(f, 1);
++	*ploc = f->locale;
++	wchar_t wc = __fgetwc_unlocked_internal(f);
++	*ploc = loc;
++	return wc;
++}
++
+ wint_t fgetwc(FILE *f)
+ {
+ 	wint_t c;
+diff --git a/src/stdio/fmemopen.c b/src/stdio/fmemopen.c
+index d784960..7c193a5 100644
+--- a/src/stdio/fmemopen.c
++++ b/src/stdio/fmemopen.c
+@@ -110,11 +110,5 @@ FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode)
+ 
+ 	if (!libc.threaded) f->lock = -1;
+ 
+-	OFLLOCK();
+-	f->next = libc.ofl_head;
+-	if (libc.ofl_head) libc.ofl_head->prev = f;
+-	libc.ofl_head = f;
+-	OFLUNLOCK();
+-
+-	return f;
++	return __ofl_add(f);
+ }
+diff --git a/src/stdio/fopen.c b/src/stdio/fopen.c
+index 07bdb6e..252f082 100644
+--- a/src/stdio/fopen.c
++++ b/src/stdio/fopen.c
+@@ -18,7 +18,7 @@ FILE *fopen(const char *restrict filename, const char *restrict mode)
+ 	/* Compute the flags to pass to open() */
+ 	flags = __fmodeflags(mode);
+ 
+-	fd = sys_open_cp(filename, flags, 0666);
++	fd = sys_open(filename, flags, 0666);
+ 	if (fd < 0) return 0;
+ 	if (flags & O_CLOEXEC)
+ 		__syscall(SYS_fcntl, fd, F_SETFD, FD_CLOEXEC);
+diff --git a/src/stdio/fputwc.c b/src/stdio/fputwc.c
+index 7b621dd..789fe9c 100644
+--- a/src/stdio/fputwc.c
++++ b/src/stdio/fputwc.c
+@@ -1,4 +1,5 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ #include <limits.h>
+ #include <ctype.h>
+@@ -7,8 +8,10 @@ wint_t __fputwc_unlocked(wchar_t c, FILE *f)
+ {
+ 	char mbc[MB_LEN_MAX];
+ 	int l;
++	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+ 
+-	f->mode |= f->mode+1;
++	if (f->mode <= 0) fwide(f, 1);
++	*ploc = f->locale;
+ 
+ 	if (isascii(c)) {
+ 		c = putc_unlocked(c, f);
+@@ -20,6 +23,8 @@ wint_t __fputwc_unlocked(wchar_t c, FILE *f)
+ 		l = wctomb(mbc, c);
+ 		if (l < 0 || __fwritex((void *)mbc, l, f) < l) c = WEOF;
+ 	}
++	if (c==WEOF) f->flags |= F_ERR;
++	*ploc = loc;
+ 	return c;
+ }
+ 
+diff --git a/src/stdio/fputws.c b/src/stdio/fputws.c
+index 5723cbc..0ed02f1 100644
+--- a/src/stdio/fputws.c
++++ b/src/stdio/fputws.c
+@@ -1,23 +1,28 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ 
+ int fputws(const wchar_t *restrict ws, FILE *restrict f)
+ {
+ 	unsigned char buf[BUFSIZ];
+ 	size_t l=0;
++	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+ 
+ 	FLOCK(f);
+ 
+-	f->mode |= f->mode+1;
++	fwide(f, 1);
++	*ploc = f->locale;
+ 
+ 	while (ws && (l = wcsrtombs((void *)buf, (void*)&ws, sizeof buf, 0))+1 > 1)
+ 		if (__fwritex(buf, l, f) < l) {
+ 			FUNLOCK(f);
++			*ploc = loc;
+ 			return -1;
+ 		}
+ 
+ 	FUNLOCK(f);
+ 
++	*ploc = loc;
+ 	return l; /* 0 or -1 */
+ }
+ 
+diff --git a/src/stdio/fwide.c b/src/stdio/fwide.c
+index 8088e7a..8410b15 100644
+--- a/src/stdio/fwide.c
++++ b/src/stdio/fwide.c
+@@ -1,13 +1,14 @@
+-#include <wchar.h>
+ #include "stdio_impl.h"
+-
+-#define SH (8*sizeof(int)-1)
+-#define NORMALIZE(x) ((x)>>SH | -((-(x))>>SH))
++#include "locale_impl.h"
+ 
+ int fwide(FILE *f, int mode)
+ {
+ 	FLOCK(f);
+-	if (!f->mode) f->mode = NORMALIZE(mode);
++	if (mode) {
++		if (!f->locale) f->locale = MB_CUR_MAX==1
++			? C_LOCALE : UTF8_LOCALE;
++		if (!f->mode) f->mode = mode>0 ? 1 : -1;
++	}
+ 	mode = f->mode;
+ 	FUNLOCK(f);
+ 	return mode;
+diff --git a/src/stdio/ofl.c b/src/stdio/ofl.c
+new file mode 100644
+index 0000000..b143999
+--- /dev/null
++++ b/src/stdio/ofl.c
+@@ -0,0 +1,16 @@
++#include "stdio_impl.h"
++#include "libc.h"
++
++static FILE *ofl_head;
++static volatile int ofl_lock[2];
++
++FILE **__ofl_lock()
++{
++	LOCK(ofl_lock);
++	return &ofl_head;
++}
++
++void __ofl_unlock()
++{
++	UNLOCK(ofl_lock);
++}
+diff --git a/src/stdio/ofl_add.c b/src/stdio/ofl_add.c
+new file mode 100644
+index 0000000..d7de9f1
+--- /dev/null
++++ b/src/stdio/ofl_add.c
+@@ -0,0 +1,11 @@
++#include "stdio_impl.h"
++
++FILE *__ofl_add(FILE *f)
++{
++	FILE **head = __ofl_lock();
++	f->next = *head;
++	if (*head) (*head)->prev = f;
++	*head = f;
++	__ofl_unlock();
++	return f;
++}
+diff --git a/src/stdio/open_memstream.c b/src/stdio/open_memstream.c
+index 9eafdfb..58504c9 100644
+--- a/src/stdio/open_memstream.c
++++ b/src/stdio/open_memstream.c
+@@ -79,11 +79,5 @@ FILE *open_memstream(char **bufp, size_t *sizep)
+ 
+ 	if (!libc.threaded) f->lock = -1;
+ 
+-	OFLLOCK();
+-	f->next = libc.ofl_head;
+-	if (libc.ofl_head) libc.ofl_head->prev = f;
+-	libc.ofl_head = f;
+-	OFLUNLOCK();
+-
+-	return f;
++	return __ofl_add(f);
+ }
+diff --git a/src/stdio/open_wmemstream.c b/src/stdio/open_wmemstream.c
+index 3537030..7ab2c64 100644
+--- a/src/stdio/open_wmemstream.c
++++ b/src/stdio/open_wmemstream.c
+@@ -81,11 +81,5 @@ FILE *open_wmemstream(wchar_t **bufp, size_t *sizep)
+ 
+ 	if (!libc.threaded) f->lock = -1;
+ 
+-	OFLLOCK();
+-	f->next = libc.ofl_head;
+-	if (libc.ofl_head) libc.ofl_head->prev = f;
+-	libc.ofl_head = f;
+-	OFLUNLOCK();
+-
+-	return f;
++	return __ofl_add(f);
+ }
+diff --git a/src/stdio/ungetwc.c b/src/stdio/ungetwc.c
+index 913f716..80d6e20 100644
+--- a/src/stdio/ungetwc.c
++++ b/src/stdio/ungetwc.c
+@@ -1,4 +1,5 @@
+ #include "stdio_impl.h"
++#include "locale_impl.h"
+ #include <wchar.h>
+ #include <limits.h>
+ #include <ctype.h>
+@@ -8,21 +9,19 @@ wint_t ungetwc(wint_t c, FILE *f)
+ {
+ 	unsigned char mbc[MB_LEN_MAX];
+ 	int l=1;
+-
+-	if (c == WEOF) return c;
+-
+-	/* Try conversion early so we can fail without locking if invalid */
+-	if (!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)
+-		return WEOF;
++	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+ 
+ 	FLOCK(f);
+ 
+-	f->mode |= f->mode+1;
++	if (f->mode <= 0) fwide(f, 1);
++	*ploc = f->locale;
+ 
+ 	if (!f->rpos) __toread(f);
+-	if (!f->rpos || f->rpos < f->buf - UNGET + l) {
++	if (!f->rpos || f->rpos < f->buf - UNGET + l || c == WEOF ||
++	    (!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)) {
+ 		FUNLOCK(f);
+-		return EOF;
++		*ploc = loc;
++		return WEOF;
+ 	}
+ 
+ 	if (isascii(c)) *--f->rpos = c;
+@@ -31,5 +30,6 @@ wint_t ungetwc(wint_t c, FILE *f)
+ 	f->flags &= ~F_EOF;
+ 
+ 	FUNLOCK(f);
++	*ploc = loc;
+ 	return c;
+ }
+diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
+index ebdff00..f06d5ae 100644
+--- a/src/stdio/vfwprintf.c
++++ b/src/stdio/vfwprintf.c
+@@ -293,7 +293,10 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
+ 			if ((fl&LEFT_ADJ)) fprintf(f, "%.*s", w-p, "");
+ 			l=w;
+ 			continue;
++		case 'm':
++			arg.p = strerror(errno);
+ 		case 's':
++			if (!arg.p) arg.p = "(null)";
+ 			bs = arg.p;
+ 			if (p<0) p = INT_MAX;
+ 			for (i=l=0; l<p && (i=mbtowc(&wc, bs, MB_LEN_MAX))>0; bs+=i, l++);
+@@ -356,7 +359,7 @@ int vfwprintf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
+ 	}
+ 
+ 	FLOCK(f);
+-	f->mode |= f->mode+1;
++	fwide(f, 1);
+ 	olderr = f->flags & F_ERR;
+ 	f->flags &= ~F_ERR;
+ 	ret = wprintf_core(f, fmt, &ap2, nl_arg, nl_type);
+diff --git a/src/stdio/vfwscanf.c b/src/stdio/vfwscanf.c
+index ac5c2c2..223aad4 100644
+--- a/src/stdio/vfwscanf.c
++++ b/src/stdio/vfwscanf.c
+@@ -104,7 +104,7 @@ int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
+ 
+ 	FLOCK(f);
+ 
+-	f->mode |= f->mode+1;
++	fwide(f, 1);
+ 
+ 	for (p=fmt; *p; p++) {
+ 
+diff --git a/src/thread/__unmapself.c b/src/thread/__unmapself.c
+index e69de29..1d3bee1 100644
+--- a/src/thread/__unmapself.c
++++ b/src/thread/__unmapself.c
+@@ -0,0 +1,29 @@
++#include "pthread_impl.h"
++#include "atomic.h"
++#include "syscall.h"
++/* cheat and reuse CRTJMP macro from dynlink code */
++#include "dynlink.h"
++
++static volatile int lock;
++static void *unmap_base;
++static size_t unmap_size;
++static char shared_stack[256];
++
++static void do_unmap()
++{
++	__syscall(SYS_munmap, unmap_base, unmap_size);
++	__syscall(SYS_exit);
++}
++
++void __unmapself(void *base, size_t size)
++{
++	int tid=__pthread_self()->tid;
++	char *stack = shared_stack + sizeof shared_stack;
++	stack -= (uintptr_t)stack % 16;
++	while (lock || a_cas(&lock, 0, tid))
++		a_spin();
++	__syscall(SYS_set_tid_address, &lock);
++	unmap_base = base;
++	unmap_size = size;
++	CRTJMP(do_unmap, stack);
++}
+diff --git a/src/thread/mips/__unmapself.s b/src/thread/mips/__unmapself.s
+index 9aa0371..ba139dc 100644
+--- a/src/thread/mips/__unmapself.s
++++ b/src/thread/mips/__unmapself.s
+@@ -2,6 +2,7 @@
+ .global __unmapself
+ .type   __unmapself,@function
+ __unmapself:
++	move $sp, $25
+ 	li $2, 4091
+ 	syscall
+ 	li $4, 0
+diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
+index de72818..e7df34a 100644
+--- a/src/thread/pthread_create.c
++++ b/src/thread/pthread_create.c
+@@ -191,8 +191,9 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
+ 	if (!libc.can_do_threads) return ENOSYS;
+ 	self = __pthread_self();
+ 	if (!libc.threaded) {
+-		for (FILE *f=libc.ofl_head; f; f=f->next)
++		for (FILE *f=*__ofl_lock(); f; f=f->next)
+ 			init_file_lock(f);
++		__ofl_unlock();
+ 		init_file_lock(__stdin_used);
+ 		init_file_lock(__stdout_used);
+ 		init_file_lock(__stderr_used);
+@@ -231,7 +232,8 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
+ 		if (guard) {
+ 			map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
+ 			if (map == MAP_FAILED) goto fail;
+-			if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
++			if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)
++			    && errno != ENOSYS) {
+ 				__munmap(map, size);
+ 				goto fail;
+ 			}
+diff --git a/src/thread/sh/__set_thread_area.s b/src/thread/sh/__set_thread_area.s
+index d9f1181..e69de29 100644
+--- a/src/thread/sh/__set_thread_area.s
++++ b/src/thread/sh/__set_thread_area.s
+@@ -1,6 +0,0 @@
+-.global __set_thread_area
+-.type   __set_thread_area, @function
+-__set_thread_area:
+-	ldc r4, gbr
+-	rts
+-	 mov #0, r0
+diff --git a/src/thread/sh/__unmapself.s b/src/thread/sh/__unmapself.s
+index b34c3c8..0161d53 100644
+--- a/src/thread/sh/__unmapself.s
++++ b/src/thread/sh/__unmapself.s
+@@ -1,9 +1,9 @@
+ .text
+-.global __unmapself
+-.type   __unmapself, @function
+-__unmapself:
++.global __unmapself_sh_mmu
++.type   __unmapself_sh_mmu, @function
++__unmapself_sh_mmu:
+ 	mov   #91, r3  ! SYS_munmap
+-	trapa #18
++	trapa #31
+ 
+ 	or    r0, r0
+ 	or    r0, r0
+@@ -13,7 +13,7 @@ __unmapself:
+ 
+ 	mov   #1, r3   ! SYS_exit
+ 	mov   #0, r4
+-	trapa #17
++	trapa #31
+ 
+ 	or    r0, r0
+ 	or    r0, r0
+diff --git a/src/thread/sh/clone.s b/src/thread/sh/clone.s
+index d6c9184..f8ad845 100644
+--- a/src/thread/sh/clone.s
++++ b/src/thread/sh/clone.s
+@@ -17,7 +17,7 @@ __clone:
+ 	mov.l @r15,     r6   ! r6 = ptid
+ 	mov.l @(8,r15), r7   ! r7 = ctid
+ 	mov.l @(4,r15), r0   ! r0 = tls
+-	trapa #21
++	trapa #31
+ 
+ 	or r0, r0
+ 	or r0, r0
+@@ -38,7 +38,7 @@ __clone:
+ 
+ 	mov   #1, r3   ! __NR_exit
+ 	mov   r0, r4
+-	trapa #17
++	trapa #31
+ 
+ 	or   r0, r0
+ 	or   r0, r0
+diff --git a/src/thread/sh/syscall_cp.s b/src/thread/sh/syscall_cp.s
+index 6b28ddf..c3cafac 100644
+--- a/src/thread/sh/syscall_cp.s
++++ b/src/thread/sh/syscall_cp.s
+@@ -31,7 +31,7 @@ L1:	.long __cancel@PLT-(1b-.)
+ 	mov.l @(4,r15), r7
+ 	mov.l @(8,r15), r0
+ 	mov.l @(12,r15), r1
+-	trapa #22
++	trapa #31
+ 
+ __cp_end:
+ 	! work around hardware bug
+diff --git a/src/unistd/sh/pipe.s b/src/unistd/sh/pipe.s
+index d865ae3..46c4908 100644
+--- a/src/unistd/sh/pipe.s
++++ b/src/unistd/sh/pipe.s
+@@ -2,7 +2,7 @@
+ .type   pipe, @function
+ pipe:
+ 	mov    #42, r3
+-	trapa  #17
++	trapa  #31
+ 
+ 	! work around hardware bug
+ 	or     r0, r0
+-- 
+2.1.4
+
-- 
cgit v1.2.3