diff options
Diffstat (limited to 'toolchain/musl/patches/000-update-to-git-2016-01-22.patch')
-rw-r--r-- | toolchain/musl/patches/000-update-to-git-2016-01-22.patch | 7636 |
1 files changed, 0 insertions, 7636 deletions
diff --git a/toolchain/musl/patches/000-update-to-git-2016-01-22.patch b/toolchain/musl/patches/000-update-to-git-2016-01-22.patch deleted file mode 100644 index f5fc159ad1..0000000000 --- a/toolchain/musl/patches/000-update-to-git-2016-01-22.patch +++ /dev/null @@ -1,7636 +0,0 @@ ---- a/.gitignore -+++ b/.gitignore -@@ -5,9 +5,6 @@ - *.so.1 - arch/*/bits/alltypes.h - config.mak --include/bits --tools/musl-gcc --tools/musl-clang --tools/ld.musl-clang - lib/musl-gcc.specs - src/internal/version.h -+/obj/ ---- a/Makefile -+++ b/Makefile -@@ -8,6 +8,7 @@ - # Do not make changes here. - # - -+srcdir = . - exec_prefix = /usr/local - bindir = $(exec_prefix)/bin - -@@ -16,31 +17,38 @@ includedir = $(prefix)/include - libdir = $(prefix)/lib - syslibdir = /lib - --SRCS = $(sort $(wildcard src/*/*.c arch/$(ARCH)/src/*.c)) --OBJS = $(SRCS:.c=.o) -+BASE_SRCS = $(sort $(wildcard $(srcdir)/src/*/*.c $(srcdir)/arch/$(ARCH)/src/*.[csS])) -+BASE_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(BASE_SRCS))) -+ARCH_SRCS = $(wildcard $(srcdir)/src/*/$(ARCH)/*.[csS]) -+ARCH_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(ARCH_SRCS))) -+REPLACED_OBJS = $(sort $(subst /$(ARCH)/,/,$(ARCH_OBJS))) -+OBJS = $(addprefix obj/, $(filter-out $(REPLACED_OBJS), $(sort $(BASE_OBJS) $(ARCH_OBJS)))) - LOBJS = $(OBJS:.o=.lo) --GENH = include/bits/alltypes.h --GENH_INT = src/internal/version.h --IMPH = src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h -+GENH = obj/include/bits/alltypes.h -+GENH_INT = obj/src/internal/version.h -+IMPH = $(addprefix $(srcdir)/, src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h) - --LDFLAGS = -+LDFLAGS = -+LDFLAGS_AUTO = - LIBCC = -lgcc - CPPFLAGS = --CFLAGS = -Os -pipe -+CFLAGS = -+CFLAGS_AUTO = -Os -pipe - CFLAGS_C99FSE = -std=c99 -ffreestanding -nostdinc - - CFLAGS_ALL = $(CFLAGS_C99FSE) --CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I./arch/$(ARCH) -I./src/internal -I./include --CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS) --CFLAGS_ALL_STATIC = $(CFLAGS_ALL) --CFLAGS_ALL_SHARED = $(CFLAGS_ALL) -fPIC -DSHARED -+CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I$(srcdir)/arch/$(ARCH) -Iobj/src/internal -I$(srcdir)/src/internal -Iobj/include -I$(srcdir)/include -+CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS_AUTO) $(CFLAGS) -+ -+LDFLAGS_ALL = $(LDFLAGS_AUTO) $(LDFLAGS) - - AR = $(CROSS_COMPILE)ar - RANLIB = $(CROSS_COMPILE)ranlib --INSTALL = ./tools/install.sh -+INSTALL = $(srcdir)/tools/install.sh - --ARCH_INCLUDES = $(wildcard arch/$(ARCH)/bits/*.h) --ALL_INCLUDES = $(sort $(wildcard include/*.h include/*/*.h) $(GENH) $(ARCH_INCLUDES:arch/$(ARCH)/%=include/%)) -+ARCH_INCLUDES = $(wildcard $(srcdir)/arch/$(ARCH)/bits/*.h) -+INCLUDES = $(wildcard $(srcdir)/include/*.h $(srcdir)/include/*/*.h) -+ALL_INCLUDES = $(sort $(INCLUDES:$(srcdir)/%=%) $(GENH:obj/%=%) $(ARCH_INCLUDES:$(srcdir)/arch/$(ARCH)/%=include/%)) - - EMPTY_LIB_NAMES = m rt pthread crypt util xnet resolv dl - EMPTY_LIBS = $(EMPTY_LIB_NAMES:%=lib/lib%.a) -@@ -49,7 +57,7 @@ STATIC_LIBS = lib/libc.a - SHARED_LIBS = lib/libc.so - TOOL_LIBS = lib/musl-gcc.specs - ALL_LIBS = $(CRT_LIBS) $(STATIC_LIBS) $(SHARED_LIBS) $(EMPTY_LIBS) $(TOOL_LIBS) --ALL_TOOLS = tools/musl-gcc -+ALL_TOOLS = obj/musl-gcc - - WRAPCC_GCC = gcc - WRAPCC_CLANG = clang -@@ -58,95 +66,93 @@ LDSO_PATHNAME = $(syslibdir)/ld-musl-$(A - - -include config.mak - -+ifeq ($(ARCH),) -+$(error Please set ARCH in config.mak before running make.) -+endif -+ - all: $(ALL_LIBS) $(ALL_TOOLS) - -+OBJ_DIRS = $(sort $(patsubst %/,%,$(dir $(ALL_LIBS) $(ALL_TOOLS) $(OBJS) $(GENH) $(GENH_INT))) $(addprefix obj/, crt crt/$(ARCH) include)) -+ -+$(ALL_LIBS) $(ALL_TOOLS) $(CRT_LIBS:lib/%=obj/crt/%) $(OBJS) $(LOBJS) $(GENH) $(GENH_INT): | $(OBJ_DIRS) -+ -+$(OBJ_DIRS): -+ mkdir -p $@ -+ - install: install-libs install-headers install-tools - - clean: -- rm -f crt/*.o -- rm -f $(OBJS) -- rm -f $(LOBJS) -- rm -f $(ALL_LIBS) lib/*.[ao] lib/*.so -- rm -f $(ALL_TOOLS) -- rm -f $(GENH) $(GENH_INT) -- rm -f include/bits -+ rm -rf obj lib - - distclean: clean - rm -f config.mak - --include/bits: -- @test "$(ARCH)" || { echo "Please set ARCH in config.mak before running make." ; exit 1 ; } -- ln -sf ../arch/$(ARCH)/bits $@ -+obj/include/bits/alltypes.h: $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in $(srcdir)/tools/mkalltypes.sed -+ sed -f $(srcdir)/tools/mkalltypes.sed $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in > $@ - --include/bits/alltypes.h.in: include/bits -+obj/src/internal/version.h: $(wildcard $(srcdir)/VERSION $(srcdir)/.git) -+ printf '#define VERSION "%s"\n' "$$(cd $(srcdir); sh tools/version.sh)" > $@ - --include/bits/alltypes.h: include/bits/alltypes.h.in include/alltypes.h.in tools/mkalltypes.sed -- sed -f tools/mkalltypes.sed include/bits/alltypes.h.in include/alltypes.h.in > $@ -+obj/src/internal/version.o obj/src/internal/version.lo: obj/src/internal/version.h - --src/internal/version.h: $(wildcard VERSION .git) -- printf '#define VERSION "%s"\n' "$$(sh tools/version.sh)" > $@ -+obj/crt/rcrt1.o obj/src/ldso/dlstart.lo obj/src/ldso/dynlink.lo: $(srcdir)/src/internal/dynlink.h $(srcdir)/arch/$(ARCH)/reloc.h - --src/internal/version.lo: src/internal/version.h -+obj/crt/crt1.o obj/crt/scrt1.o obj/crt/rcrt1.o obj/src/ldso/dlstart.lo: $(srcdir)/arch/$(ARCH)/crt_arch.h - --crt/rcrt1.o src/ldso/dlstart.lo src/ldso/dynlink.lo: src/internal/dynlink.h arch/$(ARCH)/reloc.h -+obj/crt/rcrt1.o: $(srcdir)/src/ldso/dlstart.c - --crt/crt1.o crt/Scrt1.o crt/rcrt1.o src/ldso/dlstart.lo: $(wildcard arch/$(ARCH)/crt_arch.h) -+obj/crt/Scrt1.o obj/crt/rcrt1.o: CFLAGS_ALL += -fPIC - --crt/rcrt1.o: src/ldso/dlstart.c -+obj/crt/$(ARCH)/crti.o: $(srcdir)/crt/$(ARCH)/crti.s - --crt/Scrt1.o crt/rcrt1.o: CFLAGS += -fPIC -+obj/crt/$(ARCH)/crtn.o: $(srcdir)/crt/$(ARCH)/crtn.s - --OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=src/%)) --$(OPTIMIZE_SRCS:%.c=%.o) $(OPTIMIZE_SRCS:%.c=%.lo): CFLAGS += -O3 -+OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=$(srcdir)/src/%)) -+$(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.o) $(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.lo): CFLAGS += -O3 - - MEMOPS_SRCS = src/string/memcpy.c src/string/memmove.c src/string/memcmp.c src/string/memset.c --$(MEMOPS_SRCS:%.c=%.o) $(MEMOPS_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_MEMOPS) -+$(MEMOPS_SRCS:%.c=obj/%.o) $(MEMOPS_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_MEMOPS) - - NOSSP_SRCS = $(wildcard crt/*.c) \ - src/env/__libc_start_main.c src/env/__init_tls.c \ - src/thread/__set_thread_area.c src/env/__stack_chk_fail.c \ - src/string/memset.c src/string/memcpy.c \ - src/ldso/dlstart.c src/ldso/dynlink.c --$(NOSSP_SRCS:%.c=%.o) $(NOSSP_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_NOSSP) -+$(NOSSP_SRCS:%.c=obj/%.o) $(NOSSP_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_NOSSP) -+ -+$(CRT_LIBS:lib/%=obj/crt/%): CFLAGS_ALL += -DCRT - --$(CRT_LIBS:lib/%=crt/%): CFLAGS += -DCRT -+$(LOBJS): CFLAGS_ALL += -fPIC -DSHARED - --# This incantation ensures that changes to any subarch asm files will --# force the corresponding object file to be rebuilt, even if the implicit --# rule below goes indirectly through a .sub file. --define mkasmdep --$(dir $(patsubst %/,%,$(dir $(1))))$(notdir $(1:.s=.o)): $(1) --endef --$(foreach s,$(wildcard src/*/$(ARCH)*/*.s),$(eval $(call mkasmdep,$(s)))) -+CC_CMD = $(CC) $(CFLAGS_ALL) -c -o $@ $< - - # Choose invocation of assembler to be used --# $(1) is input file, $(2) is output file, $(3) is assembler flags - ifeq ($(ADD_CFI),yes) -- AS_CMD = LC_ALL=C awk -f tools/add-cfi.common.awk -f tools/add-cfi.$(ARCH).awk $< | $(CC) -x assembler -c -o $@ - -+ AS_CMD = LC_ALL=C awk -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk $< | $(CC) $(CFLAGS_ALL) -x assembler -c -o $@ - - else -- AS_CMD = $(CC) -c -o $@ $< -+ AS_CMD = $(CC_CMD) - endif - --%.o: $(ARCH)$(ASMSUBARCH)/%.sub -- $(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $(dir $<)$(shell cat $<) -+obj/%.o: $(srcdir)/%.s -+ $(AS_CMD) - --%.o: $(ARCH)/%.s -- $(AS_CMD) $(CFLAGS_ALL_STATIC) -+obj/%.o: $(srcdir)/%.S -+ $(CC_CMD) - --%.o: %.c $(GENH) $(IMPH) -- $(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $< -+obj/%.o: $(srcdir)/%.c $(GENH) $(IMPH) -+ $(CC_CMD) - --%.lo: $(ARCH)$(ASMSUBARCH)/%.sub -- $(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $(dir $<)$(shell cat $<) -+obj/%.lo: $(srcdir)/%.s -+ $(AS_CMD) - --%.lo: $(ARCH)/%.s -- $(AS_CMD) $(CFLAGS_ALL_SHARED) -+obj/%.lo: $(srcdir)/%.S -+ $(CC_CMD) - --%.lo: %.c $(GENH) $(IMPH) -- $(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $< -+obj/%.lo: $(srcdir)/%.c $(GENH) $(IMPH) -+ $(CC_CMD) - - lib/libc.so: $(LOBJS) -- $(CC) $(CFLAGS_ALL_SHARED) $(LDFLAGS) -nostdlib -shared \ -+ $(CC) $(CFLAGS_ALL) $(LDFLAGS_ALL) -nostdlib -shared \ - -Wl,-e,_dlstart -Wl,-Bsymbolic-functions \ - -o $@ $(LOBJS) $(LIBCC) - -@@ -159,21 +165,27 @@ $(EMPTY_LIBS): - rm -f $@ - $(AR) rc $@ - --lib/%.o: crt/%.o -+lib/%.o: obj/crt/%.o - cp $< $@ - --lib/musl-gcc.specs: tools/musl-gcc.specs.sh config.mak -+lib/crti.o: obj/crt/$(ARCH)/crti.o -+ cp $< $@ -+ -+lib/crtn.o: obj/crt/$(ARCH)/crtn.o -+ cp $< $@ -+ -+lib/musl-gcc.specs: $(srcdir)/tools/musl-gcc.specs.sh config.mak - sh $< "$(includedir)" "$(libdir)" "$(LDSO_PATHNAME)" > $@ - --tools/musl-gcc: config.mak -+obj/musl-gcc: config.mak - printf '#!/bin/sh\nexec "$${REALGCC:-$(WRAPCC_GCC)}" "$$@" -specs "%s/musl-gcc.specs"\n' "$(libdir)" > $@ - chmod +x $@ - --tools/%-clang: tools/%-clang.in config.mak -+obj/%-clang: $(srcdir)/tools/%-clang.in config.mak - sed -e 's!@CC@!$(WRAPCC_CLANG)!g' -e 's!@PREFIX@!$(prefix)!g' -e 's!@INCDIR@!$(includedir)!g' -e 's!@LIBDIR@!$(libdir)!g' -e 's!@LDSO@!$(LDSO_PATHNAME)!g' $< > $@ - chmod +x $@ - --$(DESTDIR)$(bindir)/%: tools/% -+$(DESTDIR)$(bindir)/%: obj/% - $(INSTALL) -D $< $@ - - $(DESTDIR)$(libdir)/%.so: lib/%.so -@@ -182,10 +194,13 @@ $(DESTDIR)$(libdir)/%.so: lib/%.so - $(DESTDIR)$(libdir)/%: lib/% - $(INSTALL) -D -m 644 $< $@ - --$(DESTDIR)$(includedir)/bits/%: arch/$(ARCH)/bits/% -+$(DESTDIR)$(includedir)/bits/%: $(srcdir)/arch/$(ARCH)/bits/% -+ $(INSTALL) -D -m 644 $< $@ -+ -+$(DESTDIR)$(includedir)/bits/%: obj/include/bits/% - $(INSTALL) -D -m 644 $< $@ - --$(DESTDIR)$(includedir)/%: include/% -+$(DESTDIR)$(includedir)/%: $(srcdir)/include/% - $(INSTALL) -D -m 644 $< $@ - - $(DESTDIR)$(LDSO_PATHNAME): $(DESTDIR)$(libdir)/libc.so -@@ -195,12 +210,12 @@ install-libs: $(ALL_LIBS:lib/%=$(DESTDIR - - install-headers: $(ALL_INCLUDES:include/%=$(DESTDIR)$(includedir)/%) - --install-tools: $(ALL_TOOLS:tools/%=$(DESTDIR)$(bindir)/%) -+install-tools: $(ALL_TOOLS:obj/%=$(DESTDIR)$(bindir)/%) - - musl-git-%.tar.gz: .git -- git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@) -+ git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@) - - musl-%.tar.gz: .git -- git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@) -+ git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@) - - .PHONY: all clean install install-libs install-headers install-tools ---- a/arch/aarch64/atomic.h -+++ /dev/null -@@ -1,206 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> -- --static inline int a_ctz_64(uint64_t x) --{ -- __asm__( -- " rbit %0, %1\n" -- " clz %0, %0\n" -- : "=r"(x) : "r"(x)); -- return x; --} -- --static inline int a_ctz_l(unsigned long x) --{ -- return a_ctz_64(x); --} -- --static inline void a_barrier() --{ -- __asm__ __volatile__("dmb ish"); --} -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- void *old; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %0,%3\n" -- " cmp %0,%1\n" -- " b.ne 1f\n" -- " stxr %w0,%2,%3\n" -- " cbnz %w0,1b\n" -- " mov %0,%1\n" -- "1: dmb ish\n" -- : "=&r"(old) -- : "r"(t), "r"(s), "Q"(*(long*)p) -- : "memory", "cc"); -- return old; --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- int old; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %w0,%3\n" -- " cmp %w0,%w1\n" -- " b.ne 1f\n" -- " stxr %w0,%w2,%3\n" -- " cbnz %w0,1b\n" -- " mov %w0,%w1\n" -- "1: dmb ish\n" -- : "=&r"(old) -- : "r"(t), "r"(s), "Q"(*p) -- : "memory", "cc"); -- return old; --} -- --static inline int a_swap(volatile int *x, int v) --{ -- int old, tmp; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %w0,%3\n" -- " stxr %w1,%w2,%3\n" -- " cbnz %w1,1b\n" -- " dmb ish\n" -- : "=&r"(old), "=&r"(tmp) -- : "r"(v), "Q"(*x) -- : "memory", "cc" ); -- return old; --} -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- int old, tmp; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %w0,%3\n" -- " add %w0,%w0,%w2\n" -- " stxr %w1,%w0,%3\n" -- " cbnz %w1,1b\n" -- " dmb ish\n" -- : "=&r"(old), "=&r"(tmp) -- : "r"(v), "Q"(*x) -- : "memory", "cc" ); -- return old-v; --} -- --static inline void a_inc(volatile int *x) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %w0,%2\n" -- " add %w0,%w0,#1\n" -- " stxr %w1,%w0,%2\n" -- " cbnz %w1,1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "Q"(*x) -- : "memory", "cc" ); --} -- --static inline void a_dec(volatile int *x) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %w0,%2\n" -- " sub %w0,%w0,#1\n" -- " stxr %w1,%w0,%2\n" -- " cbnz %w1,1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "Q"(*x) -- : "memory", "cc" ); --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %0,%3\n" -- " and %0,%0,%2\n" -- " stxr %w1,%0,%3\n" -- " cbnz %w1,1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "r"(v), "Q"(*p) -- : "memory", "cc" ); --} -- --static inline void a_and(volatile int *p, int v) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %w0,%3\n" -- " and %w0,%w0,%w2\n" -- " stxr %w1,%w0,%3\n" -- " cbnz %w1,1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "r"(v), "Q"(*p) -- : "memory", "cc" ); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %0,%3\n" -- " orr %0,%0,%2\n" -- " stxr %w1,%0,%3\n" -- " cbnz %w1,1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "r"(v), "Q"(*p) -- : "memory", "cc" ); --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- return a_or_64(p, v); --} -- --static inline void a_or(volatile int *p, int v) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldxr %w0,%3\n" -- " orr %w0,%w0,%w2\n" -- " stxr %w1,%w0,%3\n" -- " cbnz %w1,1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "r"(v), "Q"(*p) -- : "memory", "cc" ); --} -- --static inline void a_store(volatile int *p, int x) --{ -- __asm__ __volatile__( -- " dmb ish\n" -- " str %w1,%0\n" -- " dmb ish\n" -- : "=m"(*p) -- : "r"(x) -- : "memory", "cc" ); --} -- --#define a_spin a_barrier -- --static inline void a_crash() --{ -- *(volatile char *)0=0; --} -- -- --#endif ---- /dev/null -+++ b/arch/aarch64/atomic_arch.h -@@ -0,0 +1,53 @@ -+#define a_ll a_ll -+static inline int a_ll(volatile int *p) -+{ -+ int v; -+ __asm__ __volatile__ ("ldxr %0, %1" : "=r"(v) : "Q"(*p)); -+ return v; -+} -+ -+#define a_sc a_sc -+static inline int a_sc(volatile int *p, int v) -+{ -+ int r; -+ __asm__ __volatile__ ("stxr %w0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory"); -+ return !r; -+} -+ -+#define a_barrier a_barrier -+static inline void a_barrier() -+{ -+ __asm__ __volatile__ ("dmb ish" : : : "memory"); -+} -+ -+#define a_pre_llsc a_barrier -+#define a_post_llsc a_barrier -+ -+#define a_cas_p a_cas_p -+static inline void *a_cas_p(volatile void *p, void *t, void *s) -+{ -+ void *old; -+ __asm__ __volatile__( -+ " dmb ish\n" -+ "1: ldxr %0,%3\n" -+ " cmp %0,%1\n" -+ " b.ne 1f\n" -+ " stxr %w0,%2,%3\n" -+ " cbnz %w0,1b\n" -+ " mov %0,%1\n" -+ "1: dmb ish\n" -+ : "=&r"(old) -+ : "r"(t), "r"(s), "Q"(*(void *volatile *)p) -+ : "memory", "cc"); -+ return old; -+} -+ -+#define a_ctz_64 a_ctz_64 -+static inline int a_ctz_64(uint64_t x) -+{ -+ __asm__( -+ " rbit %0, %1\n" -+ " clz %0, %0\n" -+ : "=r"(x) : "r"(x)); -+ return x; -+} ---- a/arch/aarch64/pthread_arch.h -+++ b/arch/aarch64/pthread_arch.h -@@ -8,4 +8,4 @@ static inline struct pthread *__pthread_ - #define TLS_ABOVE_TP - #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 16) - --#define CANCEL_REG_IP 33 -+#define MC_PC pc ---- a/arch/arm/atomic.h -+++ /dev/null -@@ -1,261 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> -- --static inline int a_ctz_l(unsigned long x) --{ -- static const char debruijn32[32] = { -- 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, -- 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 -- }; -- return debruijn32[(x&-x)*0x076be629 >> 27]; --} -- --static inline int a_ctz_64(uint64_t x) --{ -- uint32_t y = x; -- if (!y) { -- y = x>>32; -- return 32 + a_ctz_l(y); -- } -- return a_ctz_l(y); --} -- --#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 -- --static inline void a_barrier() --{ -- __asm__ __volatile__("dmb ish"); --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- int old; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldrex %0,%3\n" -- " cmp %0,%1\n" -- " bne 1f\n" -- " strex %0,%2,%3\n" -- " cmp %0, #0\n" -- " bne 1b\n" -- " mov %0, %1\n" -- "1: dmb ish\n" -- : "=&r"(old) -- : "r"(t), "r"(s), "Q"(*p) -- : "memory", "cc" ); -- return old; --} -- --static inline int a_swap(volatile int *x, int v) --{ -- int old, tmp; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldrex %0,%3\n" -- " strex %1,%2,%3\n" -- " cmp %1, #0\n" -- " bne 1b\n" -- " dmb ish\n" -- : "=&r"(old), "=&r"(tmp) -- : "r"(v), "Q"(*x) -- : "memory", "cc" ); -- return old; --} -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- int old, tmp; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldrex %0,%3\n" -- " add %0,%0,%2\n" -- " strex %1,%0,%3\n" -- " cmp %1, #0\n" -- " bne 1b\n" -- " dmb ish\n" -- : "=&r"(old), "=&r"(tmp) -- : "r"(v), "Q"(*x) -- : "memory", "cc" ); -- return old-v; --} -- --static inline void a_inc(volatile int *x) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldrex %0,%2\n" -- " add %0,%0,#1\n" -- " strex %1,%0,%2\n" -- " cmp %1, #0\n" -- " bne 1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "Q"(*x) -- : "memory", "cc" ); --} -- --static inline void a_dec(volatile int *x) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldrex %0,%2\n" -- " sub %0,%0,#1\n" -- " strex %1,%0,%2\n" -- " cmp %1, #0\n" -- " bne 1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "Q"(*x) -- : "memory", "cc" ); --} -- --static inline void a_and(volatile int *x, int v) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldrex %0,%3\n" -- " and %0,%0,%2\n" -- " strex %1,%0,%3\n" -- " cmp %1, #0\n" -- " bne 1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "r"(v), "Q"(*x) -- : "memory", "cc" ); --} -- --static inline void a_or(volatile int *x, int v) --{ -- int tmp, tmp2; -- __asm__ __volatile__( -- " dmb ish\n" -- "1: ldrex %0,%3\n" -- " orr %0,%0,%2\n" -- " strex %1,%0,%3\n" -- " cmp %1, #0\n" -- " bne 1b\n" -- " dmb ish\n" -- : "=&r"(tmp), "=&r"(tmp2) -- : "r"(v), "Q"(*x) -- : "memory", "cc" ); --} -- --static inline void a_store(volatile int *p, int x) --{ -- __asm__ __volatile__( -- " dmb ish\n" -- " str %1,%0\n" -- " dmb ish\n" -- : "=m"(*p) -- : "r"(x) -- : "memory", "cc" ); --} -- --#else -- --int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden"))); --#define __k_cas __a_cas -- --static inline void a_barrier() --{ -- __asm__ __volatile__("bl __a_barrier" -- : : : "memory", "cc", "ip", "lr" ); --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- int old; -- for (;;) { -- if (!__k_cas(t, s, p)) -- return t; -- if ((old=*p) != t) -- return old; -- } --} -- --static inline int a_swap(volatile int *x, int v) --{ -- int old; -- do old = *x; -- while (__k_cas(old, v, x)); -- return old; --} -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- int old; -- do old = *x; -- while (__k_cas(old, old+v, x)); -- return old; --} -- --static inline void a_inc(volatile int *x) --{ -- a_fetch_add(x, 1); --} -- --static inline void a_dec(volatile int *x) --{ -- a_fetch_add(x, -1); --} -- --static inline void a_store(volatile int *p, int x) --{ -- a_barrier(); -- *p = x; -- a_barrier(); --} -- --static inline void a_and(volatile int *p, int v) --{ -- int old; -- do old = *p; -- while (__k_cas(old, old&v, p)); --} -- --static inline void a_or(volatile int *p, int v) --{ -- int old; -- do old = *p; -- while (__k_cas(old, old|v, p)); --} -- --#endif -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- return (void *)a_cas(p, (int)t, (int)s); --} -- --#define a_spin a_barrier -- --static inline void a_crash() --{ -- *(volatile char *)0=0; --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- a_or(p, v); --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_and((int *)p, u.r[0]); -- a_and((int *)p+1, u.r[1]); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_or((int *)p, u.r[0]); -- a_or((int *)p+1, u.r[1]); --} -- --#endif ---- /dev/null -+++ b/arch/arm/atomic_arch.h -@@ -0,0 +1,64 @@ -+__attribute__((__visibility__("hidden"))) -+extern const void *__arm_atomics[3]; /* gettp, cas, barrier */ -+ -+#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \ -+ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 -+ -+#define a_ll a_ll -+static inline int a_ll(volatile int *p) -+{ -+ int v; -+ __asm__ __volatile__ ("ldrex %0, %1" : "=r"(v) : "Q"(*p)); -+ return v; -+} -+ -+#define a_sc a_sc -+static inline int a_sc(volatile int *p, int v) -+{ -+ int r; -+ __asm__ __volatile__ ("strex %0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory"); -+ return !r; -+} -+ -+#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 -+ -+#define a_barrier a_barrier -+static inline void a_barrier() -+{ -+ __asm__ __volatile__ ("dmb ish" : : : "memory"); -+} -+ -+#endif -+ -+#define a_pre_llsc a_barrier -+#define a_post_llsc a_barrier -+ -+#else -+ -+#define a_cas a_cas -+static inline int a_cas(volatile int *p, int t, int s) -+{ -+ for (;;) { -+ register int r0 __asm__("r0") = t; -+ register int r1 __asm__("r1") = s; -+ register volatile int *r2 __asm__("r2") = p; -+ int old; -+ __asm__ __volatile__ ( -+ "bl __a_cas" -+ : "+r"(r0) : "r"(r1), "r"(r2) -+ : "memory", "r3", "lr", "ip", "cc" ); -+ if (!r0) return t; -+ if ((old=*p)!=t) return old; -+ } -+} -+ -+#endif -+ -+#ifndef a_barrier -+#define a_barrier a_barrier -+static inline void a_barrier() -+{ -+ __asm__ __volatile__("bl __a_barrier" -+ : : : "memory", "cc", "ip", "lr" ); -+} -+#endif ---- a/arch/arm/pthread_arch.h -+++ b/arch/arm/pthread_arch.h -@@ -27,4 +27,4 @@ static inline pthread_t __pthread_self() - #define TLS_ABOVE_TP - #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8) - --#define CANCEL_REG_IP 18 -+#define MC_PC arm_pc ---- a/arch/arm/reloc.h -+++ b/arch/arm/reloc.h -@@ -6,10 +6,10 @@ - #define ENDIAN_SUFFIX "" - #endif - --#if __SOFTFP__ --#define FP_SUFFIX "" --#else -+#if __ARM_PCS_VFP - #define FP_SUFFIX "hf" -+#else -+#define FP_SUFFIX "" - #endif - - #define LDSO_ARCH "arm" ENDIAN_SUFFIX FP_SUFFIX -@@ -28,10 +28,5 @@ - #define REL_TPOFF R_ARM_TLS_TPOFF32 - //#define REL_TLSDESC R_ARM_TLS_DESC - --#ifdef __thumb__ - #define CRTJMP(pc,sp) __asm__ __volatile__( \ - "mov sp,%1 ; bx %0" : : "r"(pc), "r"(sp) : "memory" ) --#else --#define CRTJMP(pc,sp) __asm__ __volatile__( \ -- "mov sp,%1 ; tst %0,#1 ; moveq pc,%0 ; bx %0" : : "r"(pc), "r"(sp) : "memory" ) --#endif ---- a/arch/arm/src/__aeabi_atexit.c -+++ /dev/null -@@ -1,6 +0,0 @@ --int __cxa_atexit(void (*func)(void *), void *arg, void *dso); -- --int __aeabi_atexit (void *obj, void (*func) (void *), void *d) --{ -- return __cxa_atexit (func, obj, d); --} ---- a/arch/arm/src/__aeabi_memclr.c -+++ /dev/null -@@ -1,9 +0,0 @@ --#include <string.h> --#include "libc.h" -- --void __aeabi_memclr(void *dest, size_t n) --{ -- memset(dest, 0, n); --} --weak_alias(__aeabi_memclr, __aeabi_memclr4); --weak_alias(__aeabi_memclr, __aeabi_memclr8); ---- a/arch/arm/src/__aeabi_memcpy.c -+++ /dev/null -@@ -1,9 +0,0 @@ --#include <string.h> --#include "libc.h" -- --void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n) --{ -- memcpy(dest, src, n); --} --weak_alias(__aeabi_memcpy, __aeabi_memcpy4); --weak_alias(__aeabi_memcpy, __aeabi_memcpy8); ---- a/arch/arm/src/__aeabi_memmove.c -+++ /dev/null -@@ -1,9 +0,0 @@ --#include <string.h> --#include "libc.h" -- --void __aeabi_memmove(void *dest, const void *src, size_t n) --{ -- memmove(dest, src, n); --} --weak_alias(__aeabi_memmove, __aeabi_memmove4); --weak_alias(__aeabi_memmove, __aeabi_memmove8); ---- a/arch/arm/src/__aeabi_memset.c -+++ /dev/null -@@ -1,9 +0,0 @@ --#include <string.h> --#include "libc.h" -- --void __aeabi_memset(void *dest, size_t n, int c) --{ -- memset(dest, c, n); --} --weak_alias(__aeabi_memset, __aeabi_memset4); --weak_alias(__aeabi_memset, __aeabi_memset8); ---- a/arch/arm/src/__set_thread_area.c -+++ /dev/null -@@ -1,49 +0,0 @@ --#include <stdint.h> --#include <elf.h> --#include "pthread_impl.h" --#include "libc.h" -- --#define HWCAP_TLS (1 << 15) -- --extern const unsigned char __attribute__((__visibility__("hidden"))) -- __a_barrier_dummy[], __a_barrier_oldkuser[], -- __a_barrier_v6[], __a_barrier_v7[], -- __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[], -- __a_gettp_dummy[]; -- --#define __a_barrier_kuser 0xffff0fa0 --#define __a_cas_kuser 0xffff0fc0 --#define __a_gettp_kuser 0xffff0fe0 -- --extern uintptr_t __attribute__((__visibility__("hidden"))) -- __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr; -- --#define SET(op,ver) (__a_##op##_ptr = \ -- (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy) -- --int __set_thread_area(void *p) --{ --#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 -- if (__hwcap & HWCAP_TLS) { -- size_t *aux; -- SET(cas, v7); -- SET(barrier, v7); -- for (aux=libc.auxv; *aux; aux+=2) { -- if (*aux != AT_PLATFORM) continue; -- const char *s = (void *)aux[1]; -- if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break; -- SET(cas, v6); -- SET(barrier, v6); -- break; -- } -- } else { -- int ver = *(int *)0xffff0ffc; -- SET(gettp, kuser); -- SET(cas, kuser); -- SET(barrier, kuser); -- if (ver < 2) a_crash(); -- if (ver < 3) SET(barrier, oldkuser); -- } --#endif -- return __syscall(0xf0005, p); --} ---- a/arch/arm/src/arm/atomics.s -+++ /dev/null -@@ -1,116 +0,0 @@ --.text -- --.global __a_barrier --.hidden __a_barrier --.type __a_barrier,%function --__a_barrier: -- ldr ip,1f -- ldr ip,[pc,ip] -- add pc,pc,ip --1: .word __a_barrier_ptr-1b --.global __a_barrier_dummy --.hidden __a_barrier_dummy --__a_barrier_dummy: -- tst lr,#1 -- moveq pc,lr -- bx lr --.global __a_barrier_oldkuser --.hidden __a_barrier_oldkuser --__a_barrier_oldkuser: -- push {r0,r1,r2,r3,ip,lr} -- mov r1,r0 -- mov r2,sp -- ldr ip,=0xffff0fc0 -- mov lr,pc -- mov pc,ip -- pop {r0,r1,r2,r3,ip,lr} -- tst lr,#1 -- moveq pc,lr -- bx lr --.global __a_barrier_v6 --.hidden __a_barrier_v6 --__a_barrier_v6: -- mcr p15,0,r0,c7,c10,5 -- bx lr --.global __a_barrier_v7 --.hidden __a_barrier_v7 --__a_barrier_v7: -- .word 0xf57ff05b /* dmb ish */ -- bx lr -- --.global __a_cas --.hidden __a_cas --.type __a_cas,%function --__a_cas: -- ldr ip,1f -- ldr ip,[pc,ip] -- add pc,pc,ip --1: .word __a_cas_ptr-1b --.global __a_cas_dummy --.hidden __a_cas_dummy --__a_cas_dummy: -- mov r3,r0 -- ldr r0,[r2] -- subs r0,r3,r0 -- streq r1,[r2] -- tst lr,#1 -- moveq pc,lr -- bx lr --.global __a_cas_v6 --.hidden __a_cas_v6 --__a_cas_v6: -- mov r3,r0 -- mcr p15,0,r0,c7,c10,5 --1: .word 0xe1920f9f /* ldrex r0,[r2] */ -- subs r0,r3,r0 -- .word 0x01820f91 /* strexeq r0,r1,[r2] */ -- teqeq r0,#1 -- beq 1b -- mcr p15,0,r0,c7,c10,5 -- bx lr --.global __a_cas_v7 --.hidden __a_cas_v7 --__a_cas_v7: -- mov r3,r0 -- .word 0xf57ff05b /* dmb ish */ --1: .word 0xe1920f9f /* ldrex r0,[r2] */ -- subs r0,r3,r0 -- .word 0x01820f91 /* strexeq r0,r1,[r2] */ -- teqeq r0,#1 -- beq 1b -- .word 0xf57ff05b /* dmb ish */ -- bx lr -- --.global __aeabi_read_tp --.type __aeabi_read_tp,%function --__aeabi_read_tp: -- --.global __a_gettp --.hidden __a_gettp --.type __a_gettp,%function --__a_gettp: -- ldr r0,1f -- ldr r0,[pc,r0] -- add pc,pc,r0 --1: .word __a_gettp_ptr-1b --.global __a_gettp_dummy --.hidden __a_gettp_dummy --__a_gettp_dummy: -- mrc p15,0,r0,c13,c0,3 -- bx lr -- --.data --.global __a_barrier_ptr --.hidden __a_barrier_ptr --__a_barrier_ptr: -- .word 0 -- --.global __a_cas_ptr --.hidden __a_cas_ptr --__a_cas_ptr: -- .word 0 -- --.global __a_gettp_ptr --.hidden __a_gettp_ptr --__a_gettp_ptr: -- .word 0 ---- a/arch/arm/src/find_exidx.c -+++ /dev/null -@@ -1,42 +0,0 @@ --#define _GNU_SOURCE --#include <link.h> --#include <stdint.h> -- --struct find_exidx_data { -- uintptr_t pc, exidx_start; -- int exidx_len; --}; -- --static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr) --{ -- struct find_exidx_data *data = ptr; -- const ElfW(Phdr) *phdr = info->dlpi_phdr; -- uintptr_t addr, exidx_start = 0; -- int i, match = 0, exidx_len = 0; -- -- for (i = info->dlpi_phnum; i > 0; i--, phdr++) { -- addr = info->dlpi_addr + phdr->p_vaddr; -- switch (phdr->p_type) { -- case PT_LOAD: -- match |= data->pc >= addr && data->pc < addr + phdr->p_memsz; -- break; -- case PT_ARM_EXIDX: -- exidx_start = addr; -- exidx_len = phdr->p_memsz; -- break; -- } -- } -- data->exidx_start = exidx_start; -- data->exidx_len = exidx_len; -- return match; --} -- --uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount) --{ -- struct find_exidx_data data; -- data.pc = pc; -- if (dl_iterate_phdr(find_exidx, &data) <= 0) -- return 0; -- *pcount = data.exidx_len / 8; -- return data.exidx_start; --} ---- a/arch/i386/atomic.h -+++ /dev/null -@@ -1,110 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> -- --static inline int a_ctz_64(uint64_t x) --{ -- int r; -- __asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; addl $32,%0\n1:" -- : "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) ); -- return r; --} -- --static inline int a_ctz_l(unsigned long x) --{ -- long r; -- __asm__( "bsf %1,%0" : "=r"(r) : "r"(x) ); -- return r; --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- __asm__( "lock ; andl %1, (%0) ; lock ; andl %2, 4(%0)" -- : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" ); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- __asm__( "lock ; orl %1, (%0) ; lock ; orl %2, 4(%0)" -- : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" ); --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- __asm__( "lock ; orl %1, %0" -- : "=m"(*(long *)p) : "r"(v) : "memory" ); --} -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- __asm__( "lock ; cmpxchg %3, %1" -- : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" ); -- return t; --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- __asm__( "lock ; cmpxchg %3, %1" -- : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); -- return t; --} -- --static inline void a_or(volatile int *p, int v) --{ -- __asm__( "lock ; orl %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline void a_and(volatile int *p, int v) --{ -- __asm__( "lock ; andl %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline int a_swap(volatile int *x, int v) --{ -- __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -- return v; --} -- --#define a_xchg a_swap -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -- return v; --} -- --static inline void a_inc(volatile int *x) --{ -- __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); --} -- --static inline void a_dec(volatile int *x) --{ -- __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); --} -- --static inline void a_store(volatile int *p, int x) --{ -- __asm__( "movl %1, %0 ; lock ; orl $0,(%%esp)" : "=m"(*p) : "r"(x) : "memory" ); --} -- --static inline void a_spin() --{ -- __asm__ __volatile__( "pause" : : : "memory" ); --} -- --static inline void a_barrier() --{ -- __asm__ __volatile__( "" : : : "memory" ); --} -- --static inline void a_crash() --{ -- __asm__ __volatile__( "hlt" : : : "memory" ); --} -- -- --#endif ---- /dev/null -+++ b/arch/i386/atomic_arch.h -@@ -0,0 +1,109 @@ -+#define a_ctz_64 a_ctz_64 -+static inline int a_ctz_64(uint64_t x) -+{ -+ int r; -+ __asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; addl $32,%0\n1:" -+ : "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) ); -+ return r; -+} -+ -+#define a_ctz_l a_ctz_l -+static inline int a_ctz_l(unsigned long x) -+{ -+ long r; -+ __asm__( "bsf %1,%0" : "=r"(r) : "r"(x) ); -+ return r; -+} -+ -+#define a_and_64 a_and_64 -+static inline void a_and_64(volatile uint64_t *p, uint64_t v) -+{ -+ __asm__( "lock ; andl %1, (%0) ; lock ; andl %2, 4(%0)" -+ : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" ); -+} -+ -+#define a_or_64 a_or_64 -+static inline void a_or_64(volatile uint64_t *p, uint64_t v) -+{ -+ __asm__( "lock ; orl %1, (%0) ; lock ; orl %2, 4(%0)" -+ : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" ); -+} -+ -+#define a_or_l a_or_l -+static inline void a_or_l(volatile void *p, long v) -+{ -+ __asm__( "lock ; orl %1, %0" -+ : "=m"(*(long *)p) : "r"(v) : "memory" ); -+} -+ -+#define a_cas a_cas -+static inline int a_cas(volatile int *p, int t, int s) -+{ -+ __asm__( "lock ; cmpxchg %3, %1" -+ : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); -+ return t; -+} -+ -+#define a_or a_or -+static inline void a_or(volatile int *p, int v) -+{ -+ __asm__( "lock ; orl %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_and a_and -+static inline void a_and(volatile int *p, int v) -+{ -+ __asm__( "lock ; andl %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_swap a_swap -+static inline int a_swap(volatile int *x, int v) -+{ -+ __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -+ return v; -+} -+ -+#define a_fetch_add a_fetch_add -+static inline int a_fetch_add(volatile int *x, int v) -+{ -+ __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -+ return v; -+} -+ -+#define a_inc a_inc -+static inline void a_inc(volatile int *x) -+{ -+ __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); -+} -+ -+#define a_dec a_dec -+static inline void a_dec(volatile int *x) -+{ -+ __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); -+} -+ -+#define a_store a_store -+static inline void a_store(volatile int *p, int x) -+{ -+ __asm__( "movl %1, %0 ; lock ; orl $0,(%%esp)" : "=m"(*p) : "r"(x) : "memory" ); -+} -+ -+#define a_spin a_spin -+static inline void a_spin() -+{ -+ __asm__ __volatile__( "pause" : : : "memory" ); -+} -+ -+#define a_barrier a_barrier -+static inline void a_barrier() -+{ -+ __asm__ __volatile__( "" : : : "memory" ); -+} -+ -+#define a_crash a_crash -+static inline void a_crash() -+{ -+ __asm__ __volatile__( "hlt" : : : "memory" ); -+} ---- a/arch/i386/bits/alltypes.h.in -+++ b/arch/i386/bits/alltypes.h.in -@@ -26,10 +26,12 @@ TYPEDEF long double float_t; - TYPEDEF long double double_t; - #endif - --#ifdef __cplusplus --TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t; --#else -+#if !defined(__cplusplus) - TYPEDEF struct { _Alignas(8) long long __ll; long double __ld; } max_align_t; -+#elif defined(__GNUC__) -+TYPEDEF struct { __attribute__((__aligned__(8))) long long __ll; long double __ld; } max_align_t; -+#else -+TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t; - #endif - - TYPEDEF long time_t; ---- a/arch/i386/pthread_arch.h -+++ b/arch/i386/pthread_arch.h -@@ -7,4 +7,4 @@ static inline struct pthread *__pthread_ - - #define TP_ADJ(p) (p) - --#define CANCEL_REG_IP 14 -+#define MC_PC gregs[REG_EIP] ---- a/arch/microblaze/atomic.h -+++ /dev/null -@@ -1,143 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> -- --static inline int a_ctz_l(unsigned long x) --{ -- static const char debruijn32[32] = { -- 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, -- 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 -- }; -- return debruijn32[(x&-x)*0x076be629 >> 27]; --} -- --static inline int a_ctz_64(uint64_t x) --{ -- uint32_t y = x; -- if (!y) { -- y = x>>32; -- return 32 + a_ctz_l(y); -- } -- return a_ctz_l(y); --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- register int old, tmp; -- __asm__ __volatile__ ( -- " addi %0, r0, 0\n" -- "1: lwx %0, %2, r0\n" -- " rsubk %1, %0, %3\n" -- " bnei %1, 1f\n" -- " swx %4, %2, r0\n" -- " addic %1, r0, 0\n" -- " bnei %1, 1b\n" -- "1: " -- : "=&r"(old), "=&r"(tmp) -- : "r"(p), "r"(t), "r"(s) -- : "cc", "memory" ); -- return old; --} -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- return (void *)a_cas(p, (int)t, (int)s); --} -- --static inline int a_swap(volatile int *x, int v) --{ -- register int old, tmp; -- __asm__ __volatile__ ( -- " addi %0, r0, 0\n" -- "1: lwx %0, %2, r0\n" -- " swx %3, %2, r0\n" -- " addic %1, r0, 0\n" -- " bnei %1, 1b\n" -- "1: " -- : "=&r"(old), "=&r"(tmp) -- : "r"(x), "r"(v) -- : "cc", "memory" ); -- return old; --} -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- register int new, tmp; -- __asm__ __volatile__ ( -- " addi %0, r0, 0\n" -- "1: lwx %0, %2, r0\n" -- " addk %0, %0, %3\n" -- " swx %0, %2, r0\n" -- " addic %1, r0, 0\n" -- " bnei %1, 1b\n" -- "1: " -- : "=&r"(new), "=&r"(tmp) -- : "r"(x), "r"(v) -- : "cc", "memory" ); -- return new-v; --} -- --static inline void a_inc(volatile int *x) --{ -- a_fetch_add(x, 1); --} -- --static inline void a_dec(volatile int *x) --{ -- a_fetch_add(x, -1); --} -- --static inline void a_store(volatile int *p, int x) --{ -- __asm__ __volatile__ ( -- "swi %1, %0" -- : "=m"(*p) : "r"(x) : "memory" ); --} -- --#define a_spin a_barrier -- --static inline void a_barrier() --{ -- a_cas(&(int){0}, 0, 0); --} -- --static inline void a_crash() --{ -- *(volatile char *)0=0; --} -- --static inline void a_and(volatile int *p, int v) --{ -- int old; -- do old = *p; -- while (a_cas(p, old, old&v) != old); --} -- --static inline void a_or(volatile int *p, int v) --{ -- int old; -- do old = *p; -- while (a_cas(p, old, old|v) != old); --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- a_or(p, v); --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_and((int *)p, u.r[0]); -- a_and((int *)p+1, u.r[1]); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_or((int *)p, u.r[0]); -- a_or((int *)p+1, u.r[1]); --} -- --#endif ---- /dev/null -+++ b/arch/microblaze/atomic_arch.h -@@ -0,0 +1,53 @@ -+#define a_cas a_cas -+static inline int a_cas(volatile int *p, int t, int s) -+{ -+ register int old, tmp; -+ __asm__ __volatile__ ( -+ " addi %0, r0, 0\n" -+ "1: lwx %0, %2, r0\n" -+ " rsubk %1, %0, %3\n" -+ " bnei %1, 1f\n" -+ " swx %4, %2, r0\n" -+ " addic %1, r0, 0\n" -+ " bnei %1, 1b\n" -+ "1: " -+ : "=&r"(old), "=&r"(tmp) -+ : "r"(p), "r"(t), "r"(s) -+ : "cc", "memory" ); -+ return old; -+} -+ -+#define a_swap a_swap -+static inline int a_swap(volatile int *x, int v) -+{ -+ register int old, tmp; -+ __asm__ __volatile__ ( -+ " addi %0, r0, 0\n" -+ "1: lwx %0, %2, r0\n" -+ " swx %3, %2, r0\n" -+ " addic %1, r0, 0\n" -+ " bnei %1, 1b\n" -+ "1: " -+ : "=&r"(old), "=&r"(tmp) -+ : "r"(x), "r"(v) -+ : "cc", "memory" ); -+ return old; -+} -+ -+#define a_fetch_add a_fetch_add -+static inline int a_fetch_add(volatile int *x, int v) -+{ -+ register int new, tmp; -+ __asm__ __volatile__ ( -+ " addi %0, r0, 0\n" -+ "1: lwx %0, %2, r0\n" -+ " addk %0, %0, %3\n" -+ " swx %0, %2, r0\n" -+ " addic %1, r0, 0\n" -+ " bnei %1, 1b\n" -+ "1: " -+ : "=&r"(new), "=&r"(tmp) -+ : "r"(x), "r"(v) -+ : "cc", "memory" ); -+ return new-v; -+} ---- a/arch/microblaze/pthread_arch.h -+++ b/arch/microblaze/pthread_arch.h -@@ -7,4 +7,4 @@ static inline struct pthread *__pthread_ - - #define TP_ADJ(p) (p) - --#define CANCEL_REG_IP 32 -+#define MC_PC regs.pc ---- a/arch/mips/atomic.h -+++ /dev/null -@@ -1,205 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> -- --static inline int a_ctz_l(unsigned long x) --{ -- static const char debruijn32[32] = { -- 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, -- 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 -- }; -- return debruijn32[(x&-x)*0x076be629 >> 27]; --} -- --static inline int a_ctz_64(uint64_t x) --{ -- uint32_t y = x; -- if (!y) { -- y = x>>32; -- return 32 + a_ctz_l(y); -- } -- return a_ctz_l(y); --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- int dummy; -- __asm__ __volatile__( -- ".set push\n" -- ".set mips2\n" -- ".set noreorder\n" -- " sync\n" -- "1: ll %0, %2\n" -- " bne %0, %3, 1f\n" -- " addu %1, %4, $0\n" -- " sc %1, %2\n" -- " beq %1, $0, 1b\n" -- " nop\n" -- " sync\n" -- "1: \n" -- ".set pop\n" -- : "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" ); -- return t; --} -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- return (void *)a_cas(p, (int)t, (int)s); --} -- --static inline int a_swap(volatile int *x, int v) --{ -- int old, dummy; -- __asm__ __volatile__( -- ".set push\n" -- ".set mips2\n" -- ".set noreorder\n" -- " sync\n" -- "1: ll %0, %2\n" -- " addu %1, %3, $0\n" -- " sc %1, %2\n" -- " beq %1, $0, 1b\n" -- " nop\n" -- " sync\n" -- ".set pop\n" -- : "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" ); -- return old; --} -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- int old, dummy; -- __asm__ __volatile__( -- ".set push\n" -- ".set mips2\n" -- ".set noreorder\n" -- " sync\n" -- "1: ll %0, %2\n" -- " addu %1, %0, %3\n" -- " sc %1, %2\n" -- " beq %1, $0, 1b\n" -- " nop\n" -- " sync\n" -- ".set pop\n" -- : "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" ); -- return old; --} -- --static inline void a_inc(volatile int *x) --{ -- int dummy; -- __asm__ __volatile__( -- ".set push\n" -- ".set mips2\n" -- ".set noreorder\n" -- " sync\n" -- "1: ll %0, %1\n" -- " addu %0, %0, 1\n" -- " sc %0, %1\n" -- " beq %0, $0, 1b\n" -- " nop\n" -- " sync\n" -- ".set pop\n" -- : "=&r"(dummy), "+m"(*x) : : "memory" ); --} -- --static inline void a_dec(volatile int *x) --{ -- int dummy; -- __asm__ __volatile__( -- ".set push\n" -- ".set mips2\n" -- ".set noreorder\n" -- " sync\n" -- "1: ll %0, %1\n" -- " subu %0, %0, 1\n" -- " sc %0, %1\n" -- " beq %0, $0, 1b\n" -- " nop\n" -- " sync\n" -- ".set pop\n" -- : "=&r"(dummy), "+m"(*x) : : "memory" ); --} -- --static inline void a_store(volatile int *p, int x) --{ -- __asm__ __volatile__( -- ".set push\n" -- ".set mips2\n" -- ".set noreorder\n" -- " sync\n" -- " sw %1, %0\n" -- " sync\n" -- ".set pop\n" -- : "+m"(*p) : "r"(x) : "memory" ); --} -- --#define a_spin a_barrier -- --static inline void a_barrier() --{ -- a_cas(&(int){0}, 0, 0); --} -- --static inline void a_crash() --{ -- *(volatile char *)0=0; --} -- --static inline void a_and(volatile int *p, int v) --{ -- int dummy; -- __asm__ __volatile__( -- ".set push\n" -- ".set mips2\n" -- ".set noreorder\n" -- " sync\n" -- "1: ll %0, %1\n" -- " and %0, %0, %2\n" -- " sc %0, %1\n" -- " beq %0, $0, 1b\n" -- " nop\n" -- " sync\n" -- ".set pop\n" -- : "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" ); --} -- --static inline void a_or(volatile int *p, int v) --{ -- int dummy; -- __asm__ __volatile__( -- ".set push\n" -- ".set mips2\n" -- ".set noreorder\n" -- " sync\n" -- "1: ll %0, %1\n" -- " or %0, %0, %2\n" -- " sc %0, %1\n" -- " beq %0, $0, 1b\n" -- " nop\n" -- " sync\n" -- ".set pop\n" -- : "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" ); --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- a_or(p, v); --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_and((int *)p, u.r[0]); -- a_and((int *)p+1, u.r[1]); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_or((int *)p, u.r[0]); -- a_or((int *)p+1, u.r[1]); --} -- --#endif ---- /dev/null -+++ b/arch/mips/atomic_arch.h -@@ -0,0 +1,39 @@ -+#define a_ll a_ll -+static inline int a_ll(volatile int *p) -+{ -+ int v; -+ __asm__ __volatile__ ( -+ ".set push ; .set mips2\n\t" -+ "ll %0, %1" -+ "\n\t.set pop" -+ : "=r"(v) : "m"(*p)); -+ return v; -+} -+ -+#define a_sc a_sc -+static inline int a_sc(volatile int *p, int v) -+{ -+ int r; -+ __asm__ __volatile__ ( -+ ".set push ; .set mips2\n\t" -+ "sc %0, %1" -+ "\n\t.set pop" -+ : "=r"(r), "=m"(*p) : "0"(v) : "memory"); -+ return r; -+} -+ -+#define a_barrier a_barrier -+static inline void a_barrier() -+{ -+ /* mips2 sync, but using too many directives causes -+ * gcc not to inline it, so encode with .long instead. */ -+ __asm__ __volatile__ (".long 0xf" : : : "memory"); -+#if 0 -+ __asm__ __volatile__ ( -+ ".set push ; .set mips2 ; sync ; .set pop" -+ : : : "memory"); -+#endif -+} -+ -+#define a_pre_llsc a_barrier -+#define a_post_llsc a_barrier ---- a/arch/mips/crt_arch.h -+++ b/arch/mips/crt_arch.h -@@ -4,13 +4,16 @@ __asm__( - ".text \n" - ".global _" START "\n" - ".global " START "\n" -+".global " START "_data\n" - ".type _" START ", @function\n" - ".type " START ", @function\n" -+".type " START "_data, @function\n" - "_" START ":\n" - "" START ":\n" - " bal 1f \n" - " move $fp, $0 \n" --"2: .gpword 2b \n" -+"" START "_data: \n" -+" .gpword " START "_data \n" - " .gpword " START "_c \n" - ".weak _DYNAMIC \n" - ".hidden _DYNAMIC \n" ---- a/arch/mips/pthread_arch.h -+++ b/arch/mips/pthread_arch.h -@@ -16,4 +16,4 @@ static inline struct pthread *__pthread_ - - #define DTP_OFFSET 0x8000 - --#define CANCEL_REG_IP (3-(union {int __i; char __b;}){1}.__b) -+#define MC_PC pc ---- a/arch/mips/syscall_arch.h -+++ b/arch/mips/syscall_arch.h -@@ -3,9 +3,7 @@ - ((union { long long ll; long l[2]; }){ .ll = x }).l[1] - #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x)) - --#ifdef SHARED - __attribute__((visibility("hidden"))) --#endif - long (__syscall)(long, ...); - - #define SYSCALL_RLIM_INFINITY (-1UL/2) ---- a/arch/or1k/atomic.h -+++ /dev/null -@@ -1,120 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> -- --static inline int a_ctz_l(unsigned long x) --{ -- static const char debruijn32[32] = { -- 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, -- 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 -- }; -- return debruijn32[(x&-x)*0x076be629 >> 27]; --} -- --static inline int a_ctz_64(uint64_t x) --{ -- uint32_t y = x; -- if (!y) { -- y = x>>32; -- return 32 + a_ctz_l(y); -- } -- return a_ctz_l(y); --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- __asm__("1: l.lwa %0, %1\n" -- " l.sfeq %0, %2\n" -- " l.bnf 1f\n" -- " l.nop\n" -- " l.swa %1, %3\n" -- " l.bnf 1b\n" -- " l.nop\n" -- "1: \n" -- : "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" ); -- return t; --} -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- return (void *)a_cas(p, (int)t, (int)s); --} -- --static inline int a_swap(volatile int *x, int v) --{ -- int old; -- do old = *x; -- while (a_cas(x, old, v) != old); -- return old; --} -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- int old; -- do old = *x; -- while (a_cas(x, old, old+v) != old); -- return old; --} -- --static inline void a_inc(volatile int *x) --{ -- a_fetch_add(x, 1); --} -- --static inline void a_dec(volatile int *x) --{ -- a_fetch_add(x, -1); --} -- --static inline void a_store(volatile int *p, int x) --{ -- a_swap(p, x); --} -- --#define a_spin a_barrier -- --static inline void a_barrier() --{ -- a_cas(&(int){0}, 0, 0); --} -- --static inline void a_crash() --{ -- *(volatile char *)0=0; --} -- --static inline void a_and(volatile int *p, int v) --{ -- int old; -- do old = *p; -- while (a_cas(p, old, old&v) != old); --} -- --static inline void a_or(volatile int *p, int v) --{ -- int old; -- do old = *p; -- while (a_cas(p, old, old|v) != old); --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- a_or(p, v); --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_and((int *)p, u.r[0]); -- a_and((int *)p+1, u.r[1]); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_or((int *)p, u.r[0]); -- a_or((int *)p+1, u.r[1]); --} -- --#endif ---- /dev/null -+++ b/arch/or1k/atomic_arch.h -@@ -0,0 +1,14 @@ -+#define a_cas a_cas -+static inline int a_cas(volatile int *p, int t, int s) -+{ -+ __asm__("1: l.lwa %0, %1\n" -+ " l.sfeq %0, %2\n" -+ " l.bnf 1f\n" -+ " l.nop\n" -+ " l.swa %1, %3\n" -+ " l.bnf 1b\n" -+ " l.nop\n" -+ "1: \n" -+ : "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" ); -+ return t; -+} ---- a/arch/or1k/pthread_arch.h -+++ b/arch/or1k/pthread_arch.h -@@ -14,5 +14,4 @@ static inline struct pthread *__pthread_ - #define TLS_ABOVE_TP - #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread)) - --/* word-offset to 'pc' in mcontext_t */ --#define CANCEL_REG_IP 32 -+#define MC_PC regs.pc ---- a/arch/powerpc/atomic.h -+++ /dev/null -@@ -1,126 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> --#include <endian.h> -- --static inline int a_ctz_l(unsigned long x) --{ -- static const char debruijn32[32] = { -- 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, -- 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 -- }; -- return debruijn32[(x&-x)*0x076be629 >> 27]; --} -- --static inline int a_ctz_64(uint64_t x) --{ -- uint32_t y = x; -- if (!y) { -- y = x>>32; -- return 32 + a_ctz_l(y); -- } -- return a_ctz_l(y); --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- __asm__("\n" -- " sync\n" -- "1: lwarx %0, 0, %4\n" -- " cmpw %0, %2\n" -- " bne 1f\n" -- " stwcx. %3, 0, %4\n" -- " bne- 1b\n" -- " isync\n" -- "1: \n" -- : "=&r"(t), "+m"(*p) : "r"(t), "r"(s), "r"(p) : "cc", "memory" ); -- return t; --} -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- return (void *)a_cas(p, (int)t, (int)s); --} -- --static inline int a_swap(volatile int *x, int v) --{ -- int old; -- do old = *x; -- while (a_cas(x, old, v) != old); -- return old; --} -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- int old; -- do old = *x; -- while (a_cas(x, old, old+v) != old); -- return old; --} -- --static inline void a_inc(volatile int *x) --{ -- a_fetch_add(x, 1); --} -- --static inline void a_dec(volatile int *x) --{ -- a_fetch_add(x, -1); --} -- --static inline void a_store(volatile int *p, int x) --{ -- __asm__ __volatile__ ("\n" -- " sync\n" -- " stw %1, %0\n" -- " isync\n" -- : "=m"(*p) : "r"(x) : "memory" ); --} -- --#define a_spin a_barrier -- --static inline void a_barrier() --{ -- a_cas(&(int){0}, 0, 0); --} -- --static inline void a_crash() --{ -- *(volatile char *)0=0; --} -- --static inline void a_and(volatile int *p, int v) --{ -- int old; -- do old = *p; -- while (a_cas(p, old, old&v) != old); --} -- --static inline void a_or(volatile int *p, int v) --{ -- int old; -- do old = *p; -- while (a_cas(p, old, old|v) != old); --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- a_or(p, v); --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_and((int *)p, u.r[0]); -- a_and((int *)p+1, u.r[1]); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_or((int *)p, u.r[0]); -- a_or((int *)p+1, u.r[1]); --} -- --#endif ---- /dev/null -+++ b/arch/powerpc/atomic_arch.h -@@ -0,0 +1,15 @@ -+#define a_cas a_cas -+static inline int a_cas(volatile int *p, int t, int s) -+{ -+ __asm__("\n" -+ " sync\n" -+ "1: lwarx %0, 0, %4\n" -+ " cmpw %0, %2\n" -+ " bne 1f\n" -+ " stwcx. %3, 0, %4\n" -+ " bne- 1b\n" -+ " isync\n" -+ "1: \n" -+ : "=&r"(t), "+m"(*p) : "r"(t), "r"(s), "r"(p) : "cc", "memory" ); -+ return t; -+} ---- a/arch/powerpc/pthread_arch.h -+++ b/arch/powerpc/pthread_arch.h -@@ -15,9 +15,8 @@ static inline struct pthread *__pthread_ - - #define DTP_OFFSET 0x8000 - --// offset of the PC register in mcontext_t, divided by the system wordsize - // the kernel calls the ip "nip", it's the first saved value after the 32 - // GPRs. --#define CANCEL_REG_IP 32 -+#define MC_PC gregs[32] - - #define CANARY canary_at_end ---- a/arch/sh/atomic.h -+++ /dev/null -@@ -1,168 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> -- --static inline int a_ctz_l(unsigned long x) --{ -- static const char debruijn32[32] = { -- 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, -- 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 -- }; -- return debruijn32[(x&-x)*0x076be629 >> 27]; --} -- --static inline int a_ctz_64(uint64_t x) --{ -- uint32_t y = x; -- if (!y) { -- y = x>>32; -- return 32 + a_ctz_l(y); -- } -- return a_ctz_l(y); --} -- --#define LLSC_CLOBBERS "r0", "t", "memory" --#define LLSC_START(mem) "synco\n" \ -- "0: movli.l @" mem ", r0\n" --#define LLSC_END(mem) \ -- "1: movco.l r0, @" mem "\n" \ -- " bf 0b\n" \ -- " synco\n" -- --static inline int __sh_cas_llsc(volatile int *p, int t, int s) --{ -- int old; -- __asm__ __volatile__( -- LLSC_START("%1") -- " mov r0, %0\n" -- " cmp/eq %0, %2\n" -- " bf 1f\n" -- " mov %3, r0\n" -- LLSC_END("%1") -- : "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS); -- return old; --} -- --static inline int __sh_swap_llsc(volatile int *x, int v) --{ -- int old; -- __asm__ __volatile__( -- LLSC_START("%1") -- " mov r0, %0\n" -- " mov %2, r0\n" -- LLSC_END("%1") -- : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS); -- return old; --} -- --static inline int __sh_fetch_add_llsc(volatile int *x, int v) --{ -- int old; -- __asm__ __volatile__( -- LLSC_START("%1") -- " mov r0, %0\n" -- " add %2, r0\n" -- LLSC_END("%1") -- : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS); -- return old; --} -- --static inline void __sh_store_llsc(volatile int *p, int x) --{ -- __asm__ __volatile__( -- " synco\n" -- " mov.l %1, @%0\n" -- " synco\n" -- : : "r"(p), "r"(x) : "memory"); --} -- --static inline void __sh_and_llsc(volatile int *x, int v) --{ -- __asm__ __volatile__( -- LLSC_START("%0") -- " and %1, r0\n" -- LLSC_END("%0") -- : : "r"(x), "r"(v) : LLSC_CLOBBERS); --} -- --static inline void __sh_or_llsc(volatile int *x, int v) --{ -- __asm__ __volatile__( -- LLSC_START("%0") -- " or %1, r0\n" -- LLSC_END("%0") -- : : "r"(x), "r"(v) : LLSC_CLOBBERS); --} -- --#ifdef __SH4A__ --#define a_cas(p,t,s) __sh_cas_llsc(p,t,s) --#define a_swap(x,v) __sh_swap_llsc(x,v) --#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v) --#define a_store(x,v) __sh_store_llsc(x, v) --#define a_and(x,v) __sh_and_llsc(x, v) --#define a_or(x,v) __sh_or_llsc(x, v) --#else -- --int __sh_cas(volatile int *, int, int); --int __sh_swap(volatile int *, int); --int __sh_fetch_add(volatile int *, int); --void __sh_store(volatile int *, int); --void __sh_and(volatile int *, int); --void __sh_or(volatile int *, int); -- --#define a_cas(p,t,s) __sh_cas(p,t,s) --#define a_swap(x,v) __sh_swap(x,v) --#define a_fetch_add(x,v) __sh_fetch_add(x, v) --#define a_store(x,v) __sh_store(x, v) --#define a_and(x,v) __sh_and(x, v) --#define a_or(x,v) __sh_or(x, v) --#endif -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- return (void *)a_cas(p, (int)t, (int)s); --} -- --static inline void a_inc(volatile int *x) --{ -- a_fetch_add(x, 1); --} -- --static inline void a_dec(volatile int *x) --{ -- a_fetch_add(x, -1); --} -- --#define a_spin a_barrier -- --static inline void a_barrier() --{ -- a_cas(&(int){0}, 0, 0); --} -- --static inline void a_crash() --{ -- *(volatile char *)0=0; --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- a_or(p, v); --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_and((int *)p, u.r[0]); -- a_and((int *)p+1, u.r[1]); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- union { uint64_t v; uint32_t r[2]; } u = { v }; -- a_or((int *)p, u.r[0]); -- a_or((int *)p+1, u.r[1]); --} -- --#endif ---- /dev/null -+++ b/arch/sh/atomic_arch.h -@@ -0,0 +1,46 @@ -+#if defined(__SH4A__) -+ -+#define a_ll a_ll -+static inline int a_ll(volatile int *p) -+{ -+ int v; -+ __asm__ __volatile__ ("movli.l @%1, %0" : "=z"(v) : "r"(p), "m"(*p)); -+ return v; -+} -+ -+#define a_sc a_sc -+static inline int a_sc(volatile int *p, int v) -+{ -+ int r; -+ __asm__ __volatile__ ( -+ "movco.l %2, @%3 ; movt %0" -+ : "=r"(r), "=m"(*p) : "z"(v), "r"(p) : "memory", "cc"); -+ return r; -+} -+ -+#define a_barrier a_barrier -+static inline void a_barrier() -+{ -+ __asm__ __volatile__ ("synco" : : "memory"); -+} -+ -+#define a_pre_llsc a_barrier -+#define a_post_llsc a_barrier -+ -+#else -+ -+#define a_cas a_cas -+__attribute__((__visibility__("hidden"))) extern const void *__sh_cas_ptr; -+static inline int a_cas(volatile int *p, int t, int s) -+{ -+ register int r1 __asm__("r1"); -+ register int r2 __asm__("r2") = t; -+ register int r3 __asm__("r3") = s; -+ __asm__ __volatile__ ( -+ "jsr @%4 ; nop" -+ : "=r"(r1), "+r"(r3) : "z"(p), "r"(r2), "r"(__sh_cas_ptr) -+ : "memory", "pr", "cc"); -+ return r3; -+} -+ -+#endif ---- a/arch/sh/crt_arch.h -+++ b/arch/sh/crt_arch.h -@@ -22,7 +22,8 @@ START ": \n" - " mov.l 1f, r5 \n" - " mov.l 1f+4, r6 \n" - " add r0, r5 \n" --" bsr __fdpic_fixup \n" -+" mov.l 4f, r1 \n" -+"5: bsrf r1 \n" - " add r0, r6 \n" - " mov r0, r12 \n" - #endif -@@ -31,11 +32,16 @@ START ": \n" - " mov.l r9, @-r15 \n" - " mov.l r8, @-r15 \n" - " mov #-16, r0 \n" --" bsr " START "_c \n" -+" mov.l 2f, r1 \n" -+"3: bsrf r1 \n" - " and r0, r15 \n" - ".align 2 \n" - "1: .long __ROFIXUP_LIST__@PCREL \n" - " .long __ROFIXUP_END__@PCREL + 4 \n" -+"2: .long " START "_c@PCREL - (3b+4-.) \n" -+#ifndef SHARED -+"4: .long __fdpic_fixup@PCREL - (5b+4-.) \n" -+#endif - ); - - #ifndef SHARED -@@ -53,13 +59,14 @@ START ": \n" - " add r0, r5 \n" - " mov r15, r4 \n" - " mov #-16, r0 \n" --" and r0, r15 \n" --" bsr " START "_c \n" --" nop \n" -+" mov.l 2f, r1 \n" -+"3: bsrf r1 \n" -+" and r0, r15 \n" - ".align 2 \n" - ".weak _DYNAMIC \n" - ".hidden _DYNAMIC \n" - "1: .long _DYNAMIC-. \n" -+"2: .long " START "_c@PCREL - (3b+4-.) \n" - ); - - #endif ---- a/arch/sh/pthread_arch.h -+++ b/arch/sh/pthread_arch.h -@@ -8,4 +8,4 @@ static inline struct pthread *__pthread_ - #define TLS_ABOVE_TP - #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8) - --#define CANCEL_REG_IP 17 -+#define MC_PC sc_pc ---- a/arch/sh/reloc.h -+++ b/arch/sh/reloc.h -@@ -32,6 +32,8 @@ - #define REL_DTPOFF R_SH_TLS_DTPOFF32 - #define REL_TPOFF R_SH_TLS_TPOFF32 - -+#define DL_NOMMU_SUPPORT 1 -+ - #if __SH_FDPIC__ - #define REL_FUNCDESC R_SH_FUNCDESC - #define REL_FUNCDESC_VAL R_SH_FUNCDESC_VALUE ---- a/arch/sh/src/__set_thread_area.c -+++ /dev/null -@@ -1,34 +0,0 @@ --#include "pthread_impl.h" --#include "libc.h" --#include "sh_atomic.h" --#include <elf.h> -- --/* Also perform sh-specific init */ -- --#define CPU_HAS_LLSC 0x0040 -- --__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu; -- --int __set_thread_area(void *p) --{ -- size_t *aux; -- __asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" ); --#ifndef __SH4A__ -- if (__hwcap & CPU_HAS_LLSC) { -- __sh_atomic_model = SH_A_LLSC; -- return 0; -- } --#if !defined(__SH3__) && !defined(__SH4__) -- for (aux=libc.auxv; *aux; aux+=2) { -- if (*aux != AT_PLATFORM) continue; -- const char *s = (void *)aux[1]; -- if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break; -- __sh_atomic_model = SH_A_IMASK; -- __sh_nommu = 1; -- return 0; -- } --#endif -- /* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */ --#endif -- return 0; --} ---- a/arch/sh/src/atomic.c -+++ /dev/null -@@ -1,158 +0,0 @@ --#ifndef __SH4A__ -- --#include "sh_atomic.h" --#include "atomic.h" --#include "libc.h" -- --static inline unsigned mask() --{ -- unsigned sr; -- __asm__ __volatile__ ( "\n" -- " stc sr,r0 \n" -- " mov r0,%0 \n" -- " or #0xf0,r0 \n" -- " ldc r0,sr \n" -- : "=&r"(sr) : : "memory", "r0" ); -- return sr; --} -- --static inline void unmask(unsigned sr) --{ -- __asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" ); --} -- --/* gusa is a hack in the kernel which lets you create a sequence of instructions -- * which will be restarted if the process is preempted in the middle of the -- * sequence. It will do for implementing atomics on non-smp systems. ABI is: -- * r0 = address of first instruction after the atomic sequence -- * r1 = original stack pointer -- * r15 = -1 * length of atomic sequence in bytes -- */ --#define GUSA_CLOBBERS "r0", "r1", "memory" --#define GUSA_START(mem,old,nop) \ -- " .align 2\n" \ -- " mova 1f, r0\n" \ -- nop \ -- " mov r15, r1\n" \ -- " mov #(0f-1f), r15\n" \ -- "0: mov.l @" mem ", " old "\n" --/* the target of mova must be 4 byte aligned, so we may need a nop */ --#define GUSA_START_ODD(mem,old) GUSA_START(mem,old,"") --#define GUSA_START_EVEN(mem,old) GUSA_START(mem,old,"\tnop\n") --#define GUSA_END(mem,new) \ -- " mov.l " new ", @" mem "\n" \ -- "1: mov r1, r15\n" -- --int __sh_cas(volatile int *p, int t, int s) --{ -- if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s); -- -- if (__sh_atomic_model == SH_A_IMASK) { -- unsigned sr = mask(); -- int old = *p; -- if (old==t) *p = s; -- unmask(sr); -- return old; -- } -- -- int old; -- __asm__ __volatile__( -- GUSA_START_EVEN("%1", "%0") -- " cmp/eq %0, %2\n" -- " bf 1f\n" -- GUSA_END("%1", "%3") -- : "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t"); -- return old; --} -- --int __sh_swap(volatile int *x, int v) --{ -- if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v); -- -- if (__sh_atomic_model == SH_A_IMASK) { -- unsigned sr = mask(); -- int old = *x; -- *x = v; -- unmask(sr); -- return old; -- } -- -- int old; -- __asm__ __volatile__( -- GUSA_START_EVEN("%1", "%0") -- GUSA_END("%1", "%2") -- : "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS); -- return old; --} -- --int __sh_fetch_add(volatile int *x, int v) --{ -- if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v); -- -- if (__sh_atomic_model == SH_A_IMASK) { -- unsigned sr = mask(); -- int old = *x; -- *x = old + v; -- unmask(sr); -- return old; -- } -- -- int old, dummy; -- __asm__ __volatile__( -- GUSA_START_EVEN("%2", "%0") -- " mov %0, %1\n" -- " add %3, %1\n" -- GUSA_END("%2", "%1") -- : "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS); -- return old; --} -- --void __sh_store(volatile int *p, int x) --{ -- if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x); -- __asm__ __volatile__( -- " mov.l %1, @%0\n" -- : : "r"(p), "r"(x) : "memory"); --} -- --void __sh_and(volatile int *x, int v) --{ -- if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v); -- -- if (__sh_atomic_model == SH_A_IMASK) { -- unsigned sr = mask(); -- int old = *x; -- *x = old & v; -- unmask(sr); -- return; -- } -- -- int dummy; -- __asm__ __volatile__( -- GUSA_START_ODD("%1", "%0") -- " and %2, %0\n" -- GUSA_END("%1", "%0") -- : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS); --} -- --void __sh_or(volatile int *x, int v) --{ -- if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v); -- -- if (__sh_atomic_model == SH_A_IMASK) { -- unsigned sr = mask(); -- int old = *x; -- *x = old | v; -- unmask(sr); -- return; -- } -- -- int dummy; -- __asm__ __volatile__( -- GUSA_START_ODD("%1", "%0") -- " or %2, %0\n" -- GUSA_END("%1", "%0") -- : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS); --} -- --#endif ---- a/arch/sh/src/sh_atomic.h -+++ /dev/null -@@ -1,15 +0,0 @@ --#ifndef _SH_ATOMIC_H --#define _SH_ATOMIC_H -- --#define SH_A_GUSA 0 --#define SH_A_LLSC 1 --#define SH_A_CAS 2 --#if !defined(__SH3__) && !defined(__SH4__) --#define SH_A_IMASK 3 --#else --#define SH_A_IMASK -1LL /* unmatchable by unsigned int */ --#endif -- --extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model; -- --#endif ---- a/arch/x32/atomic.h -+++ /dev/null -@@ -1,105 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> -- --static inline int a_ctz_64(uint64_t x) --{ -- __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); -- return x; --} -- --static inline int a_ctz_l(unsigned long x) --{ -- __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); -- return x; --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- __asm__( "lock ; and %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- __asm__( "lock ; or %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- __asm__( "lock ; or %1, %0" -- : "=m"(*(long *)p) : "r"(v) : "memory" ); --} -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- __asm__( "lock ; cmpxchg %3, %1" -- : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" ); -- return t; --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- __asm__( "lock ; cmpxchg %3, %1" -- : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); -- return t; --} -- --static inline void a_or(volatile int *p, int v) --{ -- __asm__( "lock ; or %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline void a_and(volatile int *p, int v) --{ -- __asm__( "lock ; and %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline int a_swap(volatile int *x, int v) --{ -- __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -- return v; --} -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -- return v; --} -- --static inline void a_inc(volatile int *x) --{ -- __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); --} -- --static inline void a_dec(volatile int *x) --{ -- __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); --} -- --static inline void a_store(volatile int *p, int x) --{ -- __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" ); --} -- --static inline void a_spin() --{ -- __asm__ __volatile__( "pause" : : : "memory" ); --} -- --static inline void a_barrier() --{ -- __asm__ __volatile__( "" : : : "memory" ); --} -- --static inline void a_crash() --{ -- __asm__ __volatile__( "hlt" : : : "memory" ); --} -- -- --#endif ---- /dev/null -+++ b/arch/x32/atomic_arch.h -@@ -0,0 +1,106 @@ -+#define a_ctz_64 a_ctz_64 -+static inline int a_ctz_64(uint64_t x) -+{ -+ __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); -+ return x; -+} -+ -+#define a_ctz_l a_ctz_l -+static inline int a_ctz_l(unsigned long x) -+{ -+ __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); -+ return x; -+} -+ -+#define a_and_64 a_and_64 -+static inline void a_and_64(volatile uint64_t *p, uint64_t v) -+{ -+ __asm__( "lock ; and %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_or_64 a_or_64 -+static inline void a_or_64(volatile uint64_t *p, uint64_t v) -+{ -+ __asm__( "lock ; or %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_or_l a_or_l -+static inline void a_or_l(volatile void *p, long v) -+{ -+ __asm__( "lock ; or %1, %0" -+ : "=m"(*(long *)p) : "r"(v) : "memory" ); -+} -+ -+#define a_cas a_cas -+static inline int a_cas(volatile int *p, int t, int s) -+{ -+ __asm__( "lock ; cmpxchg %3, %1" -+ : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); -+ return t; -+} -+ -+#define a_or a_or -+static inline void a_or(volatile int *p, int v) -+{ -+ __asm__( "lock ; or %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_and a_and -+static inline void a_and(volatile int *p, int v) -+{ -+ __asm__( "lock ; and %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_swap a_swap -+static inline int a_swap(volatile int *x, int v) -+{ -+ __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -+ return v; -+} -+ -+#define a_fetch_add a_fetch_add -+static inline int a_fetch_add(volatile int *x, int v) -+{ -+ __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -+ return v; -+} -+ -+#define a_inc a_inc -+static inline void a_inc(volatile int *x) -+{ -+ __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); -+} -+ -+#define a_dec a_dec -+static inline void a_dec(volatile int *x) -+{ -+ __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); -+} -+ -+#define a_store a_store -+static inline void a_store(volatile int *p, int x) -+{ -+ __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" ); -+} -+ -+#define a_spin a_spin -+static inline void a_spin() -+{ -+ __asm__ __volatile__( "pause" : : : "memory" ); -+} -+ -+#define a_barrier a_barrier -+static inline void a_barrier() -+{ -+ __asm__ __volatile__( "" : : : "memory" ); -+} -+ -+#define a_crash a_crash -+static inline void a_crash() -+{ -+ __asm__ __volatile__( "hlt" : : : "memory" ); -+} ---- a/arch/x32/pthread_arch.h -+++ b/arch/x32/pthread_arch.h -@@ -7,6 +7,6 @@ static inline struct pthread *__pthread_ - - #define TP_ADJ(p) (p) - --#define CANCEL_REG_IP 32 -+#define MC_PC gregs[REG_RIP] - - #define CANARY canary2 ---- a/arch/x32/src/syscall_cp_fixup.c -+++ b/arch/x32/src/syscall_cp_fixup.c -@@ -1,8 +1,6 @@ - #include <sys/syscall.h> - --#ifdef SHARED - __attribute__((__visibility__("hidden"))) --#endif - long __syscall_cp_internal(volatile void*, long long, long long, long long, long long, - long long, long long, long long); - -@@ -14,9 +12,7 @@ struct __timespec_kernel { long long tv_ - ts->tv_nsec = __tsc(X)->tv_nsec; \ - (X) = (unsigned long)ts; } } while(0) - --#ifdef SHARED - __attribute__((__visibility__("hidden"))) --#endif - long __syscall_cp_asm (volatile void * foo, long long n, long long a1, long long a2, long long a3, - long long a4, long long a5, long long a6) - { ---- a/arch/x86_64/atomic.h -+++ /dev/null -@@ -1,105 +0,0 @@ --#ifndef _INTERNAL_ATOMIC_H --#define _INTERNAL_ATOMIC_H -- --#include <stdint.h> -- --static inline int a_ctz_64(uint64_t x) --{ -- __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); -- return x; --} -- --static inline int a_ctz_l(unsigned long x) --{ -- __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); -- return x; --} -- --static inline void a_and_64(volatile uint64_t *p, uint64_t v) --{ -- __asm__( "lock ; and %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline void a_or_64(volatile uint64_t *p, uint64_t v) --{ -- __asm__( "lock ; or %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline void a_or_l(volatile void *p, long v) --{ -- __asm__( "lock ; or %1, %0" -- : "=m"(*(long *)p) : "r"(v) : "memory" ); --} -- --static inline void *a_cas_p(volatile void *p, void *t, void *s) --{ -- __asm__( "lock ; cmpxchg %3, %1" -- : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" ); -- return t; --} -- --static inline int a_cas(volatile int *p, int t, int s) --{ -- __asm__( "lock ; cmpxchg %3, %1" -- : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); -- return t; --} -- --static inline void a_or(volatile int *p, int v) --{ -- __asm__( "lock ; or %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline void a_and(volatile int *p, int v) --{ -- __asm__( "lock ; and %1, %0" -- : "=m"(*p) : "r"(v) : "memory" ); --} -- --static inline int a_swap(volatile int *x, int v) --{ -- __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -- return v; --} -- --static inline int a_fetch_add(volatile int *x, int v) --{ -- __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -- return v; --} -- --static inline void a_inc(volatile int *x) --{ -- __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); --} -- --static inline void a_dec(volatile int *x) --{ -- __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); --} -- --static inline void a_store(volatile int *p, int x) --{ -- __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" ); --} -- --static inline void a_spin() --{ -- __asm__ __volatile__( "pause" : : : "memory" ); --} -- --static inline void a_barrier() --{ -- __asm__ __volatile__( "" : : : "memory" ); --} -- --static inline void a_crash() --{ -- __asm__ __volatile__( "hlt" : : : "memory" ); --} -- -- --#endif ---- /dev/null -+++ b/arch/x86_64/atomic_arch.h -@@ -0,0 +1,107 @@ -+#define a_ctz_64 a_ctz_64 -+static inline int a_ctz_64(uint64_t x) -+{ -+ __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); -+ return x; -+} -+ -+#define a_and_64 a_and_64 -+static inline void a_and_64(volatile uint64_t *p, uint64_t v) -+{ -+ __asm__( "lock ; and %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_or_64 a_or_64 -+static inline void a_or_64(volatile uint64_t *p, uint64_t v) -+{ -+ __asm__( "lock ; or %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_or_l a_or_l -+static inline void a_or_l(volatile void *p, long v) -+{ -+ __asm__( "lock ; or %1, %0" -+ : "=m"(*(long *)p) : "r"(v) : "memory" ); -+} -+ -+#define a_cas_p a_cas_p -+static inline void *a_cas_p(volatile void *p, void *t, void *s) -+{ -+ __asm__( "lock ; cmpxchg %3, %1" -+ : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" ); -+ return t; -+} -+ -+#define a_cas a_cas -+static inline int a_cas(volatile int *p, int t, int s) -+{ -+ __asm__( "lock ; cmpxchg %3, %1" -+ : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); -+ return t; -+} -+ -+#define a_or a_or -+static inline void a_or(volatile int *p, int v) -+{ -+ __asm__( "lock ; or %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_and a_and -+static inline void a_and(volatile int *p, int v) -+{ -+ __asm__( "lock ; and %1, %0" -+ : "=m"(*p) : "r"(v) : "memory" ); -+} -+ -+#define a_swap a_swap -+static inline int a_swap(volatile int *x, int v) -+{ -+ __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -+ return v; -+} -+ -+#define a_fetch_add a_fetch_add -+static inline int a_fetch_add(volatile int *x, int v) -+{ -+ __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); -+ return v; -+} -+ -+#define a_inc a_inc -+static inline void a_inc(volatile int *x) -+{ -+ __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); -+} -+ -+#define a_dec a_dec -+static inline void a_dec(volatile int *x) -+{ -+ __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); -+} -+ -+#define a_store a_store -+static inline void a_store(volatile int *p, int x) -+{ -+ __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" ); -+} -+ -+#define a_spin a_spin -+static inline void a_spin() -+{ -+ __asm__ __volatile__( "pause" : : : "memory" ); -+} -+ -+#define a_barrier a_barrier -+static inline void a_barrier() -+{ -+ __asm__ __volatile__( "" : : : "memory" ); -+} -+ -+#define a_crash a_crash -+static inline void a_crash() -+{ -+ __asm__ __volatile__( "hlt" : : : "memory" ); -+} ---- a/arch/x86_64/pthread_arch.h -+++ b/arch/x86_64/pthread_arch.h -@@ -7,4 +7,4 @@ static inline struct pthread *__pthread_ - - #define TP_ADJ(p) (p) - --#define CANCEL_REG_IP 16 -+#define MC_PC gregs[REG_RIP] ---- a/configure -+++ b/configure -@@ -9,6 +9,9 @@ VAR=VALUE. See below for descriptions o - - Defaults for the options are specified in brackets. - -+Configuration: -+ --srcdir=DIR source directory [detected] -+ - Installation directories: - --prefix=PREFIX main installation prefix [/usr/local/musl] - --exec-prefix=EPREFIX installation prefix for executable files [PREFIX] -@@ -117,6 +120,7 @@ CFLAGS_TRY= - LDFLAGS_AUTO= - LDFLAGS_TRY= - OPTIMIZE_GLOBS= -+srcdir= - prefix=/usr/local/musl - exec_prefix='$(prefix)' - bindir='$(exec_prefix)/bin' -@@ -139,6 +143,7 @@ clang_wrapper=no - for arg ; do - case "$arg" in - --help) usage ;; -+--srcdir=*) srcdir=${arg#*=} ;; - --prefix=*) prefix=${arg#*=} ;; - --exec-prefix=*) exec_prefix=${arg#*=} ;; - --bindir=*) bindir=${arg#*=} ;; -@@ -179,11 +184,23 @@ LIBCC=*) LIBCC=${arg#*=} ;; - esac - done - --for i in prefix exec_prefix bindir libdir includedir syslibdir ; do -+for i in srcdir prefix exec_prefix bindir libdir includedir syslibdir ; do - stripdir $i - done - - # -+# Get the source dir for out-of-tree builds -+# -+if test -z "$srcdir" ; then -+srcdir="${0%/configure}" -+stripdir srcdir -+fi -+abs_builddir="$(pwd)" || fail "$0: cannot determine working directory" -+abs_srcdir="$(cd $srcdir && pwd)" || fail "$0: invalid source directory $srcdir" -+test "$abs_srcdir" = "$abs_builddir" && srcdir=. -+test "$srcdir" != "." -a -f Makefile -a ! -h Makefile && fail "$0: Makefile already exists in the working directory" -+ -+# - # Get a temp filename we can use - # - i=0 -@@ -263,11 +280,11 @@ fi - fi - - if test "$gcc_wrapper" = yes ; then --tools="$tools tools/musl-gcc" -+tools="$tools obj/musl-gcc" - tool_libs="$tool_libs lib/musl-gcc.specs" - fi - if test "$clang_wrapper" = yes ; then --tools="$tools tools/musl-clang tools/ld.musl-clang" -+tools="$tools obj/musl-clang obj/ld.musl-clang" - fi - - # -@@ -321,7 +338,7 @@ __attribute__((__may_alias__)) - #endif - x; - EOF --if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \ -+if $CC $CFLAGS_C99FSE -I$srcdir/arch/$ARCH -I$srcdir/include $CPPFLAGS $CFLAGS \ - -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then - printf "no\n" - else -@@ -330,6 +347,13 @@ CFLAGS_C99FSE="$CFLAGS_C99FSE -D__may_al - fi - - # -+# The GNU toolchain defaults to assuming unmarked files need an -+# executable stack, potentially exposing vulnerabilities in programs -+# linked with such object files. Fix this. -+# -+tryflag CFLAGS_C99FSE -Wa,--noexecstack -+ -+# - # Check for options to disable stack protector, which needs to be - # disabled for a few early-bootstrap translation units. If not found, - # this is not an error; we assume the toolchain does not do ssp. -@@ -430,11 +454,15 @@ tryflag CFLAGS_AUTO -fno-unwind-tables - tryflag CFLAGS_AUTO -fno-asynchronous-unwind-tables - - # --# The GNU toolchain defaults to assuming unmarked files need an --# executable stack, potentially exposing vulnerabilities in programs --# linked with such object files. Fix this. -+# Attempt to put each function and each data object in its own -+# section. This both allows additional size optimizations at link -+# time and works around a dangerous class of compiler/assembler bugs -+# whereby relative address expressions are constant-folded by the -+# assembler even when one or more of the symbols involved is -+# replaceable. See gas pr 18561 and gcc pr 66609, 68178, etc. - # --tryflag CFLAGS_AUTO -Wa,--noexecstack -+tryflag CFLAGS_AUTO -ffunction-sections -+tryflag CFLAGS_AUTO -fdata-sections - - # - # On x86, make sure we don't have incompatible instruction set -@@ -489,7 +517,7 @@ int foo(void) { } - int bar(void) { fp = foo; return foo(); } - EOF - if $CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS \ -- -DSHARED -fPIC -I./src/internal -include vis.h \ -+ -DSHARED -fPIC -I$srcdir/src/internal -include vis.h \ - -nostdlib -shared -Wl,-Bsymbolic-functions \ - -o /dev/null "$tmpc" >/dev/null 2>&1 ; then - visibility=yes -@@ -504,6 +532,16 @@ CFLAGS_AUTO="$CFLAGS_AUTO -include vis.h - CFLAGS_AUTO="${CFLAGS_AUTO# }" - fi - -+# Reduce space lost to padding for alignment purposes by sorting data -+# objects according to their alignment reqirements. This approximates -+# optimal packing. -+tryldflag LDFLAGS_AUTO -Wl,--sort-section,alignment -+tryldflag LDFLAGS_AUTO -Wl,--sort-common -+ -+# When linking shared library, drop dummy weak definitions that were -+# replaced by strong definitions from other translation units. -+tryldflag LDFLAGS_AUTO -Wl,--gc-sections -+ - # Some patched GCC builds have these defaults messed up... - tryldflag LDFLAGS_AUTO -Wl,--hash-style=both - -@@ -513,6 +551,11 @@ tryldflag LDFLAGS_AUTO -Wl,--hash-style= - # runtime library; implementation error is also a possibility. - tryldflag LDFLAGS_AUTO -Wl,--no-undefined - -+# Avoid exporting symbols from compiler runtime libraries. They -+# should be hidden anyway, but some toolchains including old gcc -+# versions built without shared library support and pcc are broken. -+tryldflag LDFLAGS_AUTO -Wl,--exclude-libs=ALL -+ - test "$shared" = "no" || { - # Disable dynamic linking if ld is broken and can't do -Bsymbolic-functions - LDFLAGS_DUMMY= -@@ -599,7 +642,7 @@ echo '#include <float.h>' > "$tmpc" - echo '#if LDBL_MANT_DIG == 53' >> "$tmpc" - echo 'typedef char ldcheck[9-(int)sizeof(long double)];' >> "$tmpc" - echo '#endif' >> "$tmpc" --if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \ -+if $CC $CFLAGS_C99FSE -I$srcdir/arch/$ARCH -I$srcdir/include $CPPFLAGS $CFLAGS \ - -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then - printf "yes\n" - else -@@ -622,6 +665,7 @@ cat << EOF - ARCH = $ARCH - SUBARCH = $SUBARCH - ASMSUBARCH = $ASMSUBARCH -+srcdir = $srcdir - prefix = $prefix - exec_prefix = $exec_prefix - bindir = $bindir -@@ -629,12 +673,14 @@ libdir = $libdir - includedir = $includedir - syslibdir = $syslibdir - CC = $CC --CFLAGS = $CFLAGS_AUTO $CFLAGS -+CFLAGS = $CFLAGS -+CFLAGS_AUTO = $CFLAGS_AUTO - CFLAGS_C99FSE = $CFLAGS_C99FSE - CFLAGS_MEMOPS = $CFLAGS_MEMOPS - CFLAGS_NOSSP = $CFLAGS_NOSSP - CPPFLAGS = $CPPFLAGS --LDFLAGS = $LDFLAGS_AUTO $LDFLAGS -+LDFLAGS = $LDFLAGS -+LDFLAGS_AUTO = $LDFLAGS_AUTO - CROSS_COMPILE = $CROSS_COMPILE - LIBCC = $LIBCC - OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS -@@ -648,4 +694,6 @@ test "x$cc_family" = xgcc && echo 'WRAPC - test "x$cc_family" = xclang && echo 'WRAPCC_CLANG = $(CC)' - exec 1>&3 3>&- - -+test "$srcdir" = "." || ln -sf $srcdir/Makefile . -+ - printf "done\n" ---- a/crt/arm/crti.s -+++ b/crt/arm/crti.s -@@ -1,3 +1,5 @@ -+.syntax unified -+ - .section .init - .global _init - .type _init,%function ---- a/crt/arm/crtn.s -+++ b/crt/arm/crtn.s -@@ -1,11 +1,9 @@ -+.syntax unified -+ - .section .init - pop {r0,lr} -- tst lr,#1 -- moveq pc,lr - bx lr - - .section .fini - pop {r0,lr} -- tst lr,#1 -- moveq pc,lr - bx lr ---- a/include/complex.h -+++ b/include/complex.h -@@ -116,7 +116,7 @@ long double creall(long double complex); - - #if __STDC_VERSION__ >= 201112L - #if defined(_Imaginary_I) --#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y))) -+#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y)) - #elif defined(__clang__) - #define __CMPLX(x, y, t) (+(_Complex t){ (t)(x), (t)(y) }) - #else ---- a/include/netinet/tcp.h -+++ b/include/netinet/tcp.h -@@ -41,7 +41,20 @@ - #define TCP_CLOSING 11 - - #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) -+#define TCPOPT_EOL 0 -+#define TCPOPT_NOP 1 -+#define TCPOPT_MAXSEG 2 -+#define TCPOPT_WINDOW 3 -+#define TCPOPT_SACK_PERMITTED 4 -+#define TCPOPT_SACK 5 -+#define TCPOPT_TIMESTAMP 8 -+#define TCPOLEN_SACK_PERMITTED 2 -+#define TCPOLEN_WINDOW 3 -+#define TCPOLEN_MAXSEG 4 -+#define TCPOLEN_TIMESTAMP 10 -+ - #define SOL_TCP 6 -+ - #include <sys/types.h> - #include <sys/socket.h> - #include <stdint.h> ---- a/src/env/__init_tls.c -+++ b/src/env/__init_tls.c -@@ -8,9 +8,6 @@ - #include "atomic.h" - #include "syscall.h" - --#ifndef SHARED --static --#endif - int __init_tp(void *p) - { - pthread_t td = p; -@@ -24,8 +21,6 @@ int __init_tp(void *p) - return 0; - } - --#ifndef SHARED -- - static struct builtin_tls { - char c; - struct pthread pt; -@@ -33,33 +28,40 @@ static struct builtin_tls { - } builtin_tls[1]; - #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt) - --struct tls_image { -- void *image; -- size_t len, size, align; --} __static_tls; -- --#define T __static_tls -+static struct tls_module main_tls; - - void *__copy_tls(unsigned char *mem) - { - pthread_t td; -- if (!T.image) return mem; -- void **dtv = (void *)mem; -- dtv[0] = (void *)1; -+ struct tls_module *p; -+ size_t i; -+ void **dtv; -+ - #ifdef TLS_ABOVE_TP -- mem += sizeof(void *) * 2; -- mem += -((uintptr_t)mem + sizeof(struct pthread)) & (T.align-1); -+ dtv = (void **)(mem + libc.tls_size) - (libc.tls_cnt + 1); -+ -+ mem += -((uintptr_t)mem + sizeof(struct pthread)) & (libc.tls_align-1); - td = (pthread_t)mem; - mem += sizeof(struct pthread); -+ -+ for (i=1, p=libc.tls_head; p; i++, p=p->next) { -+ dtv[i] = mem + p->offset; -+ memcpy(dtv[i], p->image, p->len); -+ } - #else -+ dtv = (void **)mem; -+ - mem += libc.tls_size - sizeof(struct pthread); -- mem -= (uintptr_t)mem & (T.align-1); -+ mem -= (uintptr_t)mem & (libc.tls_align-1); - td = (pthread_t)mem; -- mem -= T.size; -+ -+ for (i=1, p=libc.tls_head; p; i++, p=p->next) { -+ dtv[i] = mem - p->offset; -+ memcpy(dtv[i], p->image, p->len); -+ } - #endif -+ dtv[0] = (void *)libc.tls_cnt; - td->dtv = td->dtv_copy = dtv; -- dtv[1] = mem; -- memcpy(mem, T.image, T.len); - return td; - } - -@@ -69,7 +71,7 @@ typedef Elf32_Phdr Phdr; - typedef Elf64_Phdr Phdr; - #endif - --void __init_tls(size_t *aux) -+static void static_init_tls(size_t *aux) - { - unsigned char *p; - size_t n; -@@ -86,16 +88,24 @@ void __init_tls(size_t *aux) - } - - if (tls_phdr) { -- T.image = (void *)(base + tls_phdr->p_vaddr); -- T.len = tls_phdr->p_filesz; -- T.size = tls_phdr->p_memsz; -- T.align = tls_phdr->p_align; -+ main_tls.image = (void *)(base + tls_phdr->p_vaddr); -+ main_tls.len = tls_phdr->p_filesz; -+ main_tls.size = tls_phdr->p_memsz; -+ main_tls.align = tls_phdr->p_align; -+ libc.tls_cnt = 1; -+ libc.tls_head = &main_tls; - } - -- T.size += (-T.size - (uintptr_t)T.image) & (T.align-1); -- if (T.align < MIN_TLS_ALIGN) T.align = MIN_TLS_ALIGN; -+ main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image) -+ & (main_tls.align-1); -+ if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN; -+#ifndef TLS_ABOVE_TP -+ main_tls.offset = main_tls.size; -+#endif - -- libc.tls_size = 2*sizeof(void *)+T.size+T.align+sizeof(struct pthread) -+ libc.tls_align = main_tls.align; -+ libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread) -+ + main_tls.size + main_tls.align - + MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN; - - if (libc.tls_size > sizeof builtin_tls) { -@@ -117,6 +127,5 @@ void __init_tls(size_t *aux) - if (__init_tp(__copy_tls(mem)) < 0) - a_crash(); - } --#else --void __init_tls(size_t *auxv) { } --#endif -+ -+weak_alias(static_init_tls, __init_tls); ---- a/src/env/__libc_start_main.c -+++ b/src/env/__libc_start_main.c -@@ -8,21 +8,17 @@ - - void __init_tls(size_t *); - --#ifndef SHARED --static void dummy() {} -+static void dummy(void) {} - weak_alias(dummy, _init); --extern void (*const __init_array_start)() __attribute__((weak)); --extern void (*const __init_array_end)() __attribute__((weak)); --#endif -+ -+__attribute__((__weak__, __visibility__("hidden"))) -+extern void (*const __init_array_start)(void), (*const __init_array_end)(void); - - static void dummy1(void *p) {} - weak_alias(dummy1, __init_ssp); - - #define AUX_CNT 38 - --#ifndef SHARED --static --#endif - void __init_libc(char **envp, char *pn) - { - size_t i, *auxv, aux[AUX_CNT] = { 0 }; -@@ -57,20 +53,22 @@ void __init_libc(char **envp, char *pn) - libc.secure = 1; - } - --int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv) -+static void libc_start_init(void) - { -- char **envp = argv+argc+1; -- --#ifndef SHARED -- __init_libc(envp, argv[0]); - _init(); - uintptr_t a = (uintptr_t)&__init_array_start; - for (; a<(uintptr_t)&__init_array_end; a+=sizeof(void(*)())) - (*(void (**)())a)(); --#else -- void __libc_start_init(void); -+} -+ -+weak_alias(libc_start_init, __libc_start_init); -+ -+int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv) -+{ -+ char **envp = argv+argc+1; -+ -+ __init_libc(envp, argv[0]); - __libc_start_init(); --#endif - - /* Pass control to the application */ - exit(main(argc, argv, envp)); ---- a/src/env/__reset_tls.c -+++ b/src/env/__reset_tls.c -@@ -1,21 +1,16 @@ --#ifndef SHARED -- - #include <string.h> - #include "pthread_impl.h" -- --extern struct tls_image { -- void *image; -- size_t len, size, align; --} __static_tls; -- --#define T __static_tls -+#include "libc.h" - - void __reset_tls() - { -- if (!T.size) return; - pthread_t self = __pthread_self(); -- memcpy(self->dtv[1], T.image, T.len); -- memset((char *)self->dtv[1]+T.len, 0, T.size-T.len); -+ struct tls_module *p; -+ size_t i, n = (size_t)self->dtv[0]; -+ if (n) for (p=libc.tls_head, i=1; i<=n; i++, p=p->next) { -+ if (!self->dtv[i]) continue; -+ memcpy(self->dtv[i], p->image, p->len); -+ memset((char *)self->dtv[i]+p->len, 0, -+ p->size - p->len); -+ } - } -- --#endif ---- a/src/env/__stack_chk_fail.c -+++ b/src/env/__stack_chk_fail.c -@@ -17,16 +17,7 @@ void __stack_chk_fail(void) - a_crash(); - } - --#ifdef SHARED -- - __attribute__((__visibility__("hidden"))) --void __stack_chk_fail_local(void) --{ -- a_crash(); --} -- --#else -+void __stack_chk_fail_local(void); - - weak_alias(__stack_chk_fail, __stack_chk_fail_local); -- --#endif ---- /dev/null -+++ b/src/exit/arm/__aeabi_atexit.c -@@ -0,0 +1,6 @@ -+int __cxa_atexit(void (*func)(void *), void *arg, void *dso); -+ -+int __aeabi_atexit (void *obj, void (*func) (void *), void *d) -+{ -+ return __cxa_atexit (func, obj, d); -+} ---- a/src/exit/exit.c -+++ b/src/exit/exit.c -@@ -10,25 +10,25 @@ static void dummy() - * as a consequence of linking either __toread.c or __towrite.c. */ - weak_alias(dummy, __funcs_on_exit); - weak_alias(dummy, __stdio_exit); -- --#ifndef SHARED - weak_alias(dummy, _fini); --extern void (*const __fini_array_start)() __attribute__((weak)); --extern void (*const __fini_array_end)() __attribute__((weak)); --#endif - --_Noreturn void exit(int code) --{ -- __funcs_on_exit(); -+__attribute__((__weak__, __visibility__("hidden"))) -+extern void (*const __fini_array_start)(void), (*const __fini_array_end)(void); - --#ifndef SHARED -+static void libc_exit_fini(void) -+{ - uintptr_t a = (uintptr_t)&__fini_array_end; - for (; a>(uintptr_t)&__fini_array_start; a-=sizeof(void(*)())) - (*(void (**)())(a-sizeof(void(*)())))(); - _fini(); --#endif -+} - -- __stdio_exit(); -+weak_alias(libc_exit_fini, __libc_exit_fini); - -+_Noreturn void exit(int code) -+{ -+ __funcs_on_exit(); -+ __libc_exit_fini(); -+ __stdio_exit(); - _Exit(code); - } ---- /dev/null -+++ b/src/fenv/arm/fenv-hf.S -@@ -0,0 +1,69 @@ -+#if __ARM_PCS_VFP -+ -+.syntax unified -+.fpu vfp -+ -+.global fegetround -+.type fegetround,%function -+fegetround: -+ fmrx r0, fpscr -+ and r0, r0, #0xc00000 -+ bx lr -+ -+.global __fesetround -+.type __fesetround,%function -+__fesetround: -+ fmrx r3, fpscr -+ bic r3, r3, #0xc00000 -+ orr r3, r3, r0 -+ fmxr fpscr, r3 -+ mov r0, #0 -+ bx lr -+ -+.global fetestexcept -+.type fetestexcept,%function -+fetestexcept: -+ and r0, r0, #0x1f -+ fmrx r3, fpscr -+ and r0, r0, r3 -+ bx lr -+ -+.global feclearexcept -+.type feclearexcept,%function -+feclearexcept: -+ and r0, r0, #0x1f -+ fmrx r3, fpscr -+ bic r3, r3, r0 -+ fmxr fpscr, r3 -+ mov r0, #0 -+ bx lr -+ -+.global feraiseexcept -+.type feraiseexcept,%function -+feraiseexcept: -+ and r0, r0, #0x1f -+ fmrx r3, fpscr -+ orr r3, r3, r0 -+ fmxr fpscr, r3 -+ mov r0, #0 -+ bx lr -+ -+.global fegetenv -+.type fegetenv,%function -+fegetenv: -+ fmrx r3, fpscr -+ str r3, [r0] -+ mov r0, #0 -+ bx lr -+ -+.global fesetenv -+.type fesetenv,%function -+fesetenv: -+ cmn r0, #1 -+ moveq r3, #0 -+ ldrne r3, [r0] -+ fmxr fpscr, r3 -+ mov r0, #0 -+ bx lr -+ -+#endif ---- /dev/null -+++ b/src/fenv/arm/fenv.c -@@ -0,0 +1,3 @@ -+#if !__ARM_PCS_VFP -+#include "../fenv.c" -+#endif ---- a/src/fenv/armebhf/fenv.sub -+++ /dev/null -@@ -1 +0,0 @@ --../armhf/fenv.s ---- a/src/fenv/armhf/fenv.s -+++ /dev/null -@@ -1,64 +0,0 @@ --.fpu vfp -- --.global fegetround --.type fegetround,%function --fegetround: -- mrc p10, 7, r0, cr1, cr0, 0 -- and r0, r0, #0xc00000 -- bx lr -- --.global __fesetround --.type __fesetround,%function --__fesetround: -- mrc p10, 7, r3, cr1, cr0, 0 -- bic r3, r3, #0xc00000 -- orr r3, r3, r0 -- mcr p10, 7, r3, cr1, cr0, 0 -- mov r0, #0 -- bx lr -- --.global fetestexcept --.type fetestexcept,%function --fetestexcept: -- and r0, r0, #0x1f -- mrc p10, 7, r3, cr1, cr0, 0 -- and r0, r0, r3 -- bx lr -- --.global feclearexcept --.type feclearexcept,%function --feclearexcept: -- and r0, r0, #0x1f -- mrc p10, 7, r3, cr1, cr0, 0 -- bic r3, r3, r0 -- mcr p10, 7, r3, cr1, cr0, 0 -- mov r0, #0 -- bx lr -- --.global feraiseexcept --.type feraiseexcept,%function --feraiseexcept: -- and r0, r0, #0x1f -- mrc p10, 7, r3, cr1, cr0, 0 -- orr r3, r3, r0 -- mcr p10, 7, r3, cr1, cr0, 0 -- mov r0, #0 -- bx lr -- --.global fegetenv --.type fegetenv,%function --fegetenv: -- mrc p10, 7, r3, cr1, cr0, 0 -- str r3, [r0] -- mov r0, #0 -- bx lr -- --.global fesetenv --.type fesetenv,%function --fesetenv: -- cmn r0, #1 -- moveq r3, #0 -- ldrne r3, [r0] -- mcr p10, 7, r3, cr1, cr0, 0 -- mov r0, #0 -- bx lr ---- a/src/fenv/armhf/fenv.sub -+++ /dev/null -@@ -1 +0,0 @@ --fenv.s ---- a/src/fenv/mips-sf/fenv.sub -+++ /dev/null -@@ -1 +0,0 @@ --../fenv.c ---- /dev/null -+++ b/src/fenv/mips/fenv-sf.c -@@ -0,0 +1,3 @@ -+#ifdef __mips_soft_float -+#include "../fenv.c" -+#endif ---- /dev/null -+++ b/src/fenv/mips/fenv.S -@@ -0,0 +1,71 @@ -+#ifndef __mips_soft_float -+ -+.set noreorder -+ -+.global feclearexcept -+.type feclearexcept,@function -+feclearexcept: -+ and $4, $4, 0x7c -+ cfc1 $5, $31 -+ or $5, $5, $4 -+ xor $5, $5, $4 -+ ctc1 $5, $31 -+ jr $ra -+ li $2, 0 -+ -+.global feraiseexcept -+.type feraiseexcept,@function -+feraiseexcept: -+ and $4, $4, 0x7c -+ cfc1 $5, $31 -+ or $5, $5, $4 -+ ctc1 $5, $31 -+ jr $ra -+ li $2, 0 -+ -+.global fetestexcept -+.type fetestexcept,@function -+fetestexcept: -+ and $4, $4, 0x7c -+ cfc1 $2, $31 -+ jr $ra -+ and $2, $2, $4 -+ -+.global fegetround -+.type fegetround,@function -+fegetround: -+ cfc1 $2, $31 -+ jr $ra -+ andi $2, $2, 3 -+ -+.global __fesetround -+.type __fesetround,@function -+__fesetround: -+ cfc1 $5, $31 -+ li $6, -4 -+ and $5, $5, $6 -+ or $5, $5, $4 -+ ctc1 $5, $31 -+ jr $ra -+ li $2, 0 -+ -+.global fegetenv -+.type fegetenv,@function -+fegetenv: -+ cfc1 $5, $31 -+ sw $5, 0($4) -+ jr $ra -+ li $2, 0 -+ -+.global fesetenv -+.type fesetenv,@function -+fesetenv: -+ addiu $5, $4, 1 -+ beq $5, $0, 1f -+ nop -+ lw $5, 0($4) -+1: ctc1 $5, $31 -+ jr $ra -+ li $2, 0 -+ -+#endif ---- a/src/fenv/mips/fenv.s -+++ /dev/null -@@ -1,67 +0,0 @@ --.set noreorder -- --.global feclearexcept --.type feclearexcept,@function --feclearexcept: -- and $4, $4, 0x7c -- cfc1 $5, $31 -- or $5, $5, $4 -- xor $5, $5, $4 -- ctc1 $5, $31 -- jr $ra -- li $2, 0 -- --.global feraiseexcept --.type feraiseexcept,@function --feraiseexcept: -- and $4, $4, 0x7c -- cfc1 $5, $31 -- or $5, $5, $4 -- ctc1 $5, $31 -- jr $ra -- li $2, 0 -- --.global fetestexcept --.type fetestexcept,@function --fetestexcept: -- and $4, $4, 0x7c -- cfc1 $2, $31 -- jr $ra -- and $2, $2, $4 -- --.global fegetround --.type fegetround,@function --fegetround: -- cfc1 $2, $31 -- jr $ra -- andi $2, $2, 3 -- --.global __fesetround --.type __fesetround,@function --__fesetround: -- cfc1 $5, $31 -- li $6, -4 -- and $5, $5, $6 -- or $5, $5, $4 -- ctc1 $5, $31 -- jr $ra -- li $2, 0 -- --.global fegetenv --.type fegetenv,@function --fegetenv: -- cfc1 $5, $31 -- sw $5, 0($4) -- jr $ra -- li $2, 0 -- --.global fesetenv --.type fesetenv,@function --fesetenv: -- addiu $5, $4, 1 -- beq $5, $0, 1f -- nop -- lw $5, 0($4) --1: ctc1 $5, $31 -- jr $ra -- li $2, 0 ---- a/src/fenv/mipsel-sf/fenv.sub -+++ /dev/null -@@ -1 +0,0 @@ --../fenv.c ---- a/src/fenv/sh-nofpu/fenv.sub -+++ /dev/null -@@ -1 +0,0 @@ --../fenv.c ---- /dev/null -+++ b/src/fenv/sh/fenv-nofpu.c -@@ -0,0 +1,3 @@ -+#if !__SH_FPU_ANY__ && !__SH4__ -+#include "../fenv.c" -+#endif ---- /dev/null -+++ b/src/fenv/sh/fenv.S -@@ -0,0 +1,78 @@ -+#if __SH_FPU_ANY__ || __SH4__ -+ -+.global fegetround -+.type fegetround, @function -+fegetround: -+ sts fpscr, r0 -+ rts -+ and #3, r0 -+ -+.global __fesetround -+.type __fesetround, @function -+__fesetround: -+ sts fpscr, r0 -+ or r4, r0 -+ lds r0, fpscr -+ rts -+ mov #0, r0 -+ -+.global fetestexcept -+.type fetestexcept, @function -+fetestexcept: -+ sts fpscr, r0 -+ and r4, r0 -+ rts -+ and #0x7c, r0 -+ -+.global feclearexcept -+.type feclearexcept, @function -+feclearexcept: -+ mov r4, r0 -+ and #0x7c, r0 -+ not r0, r4 -+ sts fpscr, r0 -+ and r4, r0 -+ lds r0, fpscr -+ rts -+ mov #0, r0 -+ -+.global feraiseexcept -+.type feraiseexcept, @function -+feraiseexcept: -+ mov r4, r0 -+ and #0x7c, r0 -+ sts fpscr, r4 -+ or r4, r0 -+ lds r0, fpscr -+ rts -+ mov #0, r0 -+ -+.global fegetenv -+.type fegetenv, @function -+fegetenv: -+ sts fpscr, r0 -+ mov.l r0, @r4 -+ rts -+ mov #0, r0 -+ -+.global fesetenv -+.type fesetenv, @function -+fesetenv: -+ mov r4, r0 -+ cmp/eq #-1, r0 -+ bf 1f -+ -+ ! the default environment is complicated by the fact that we need to -+ ! preserve the current precision bit, which we do not know a priori -+ sts fpscr, r0 -+ mov #8, r1 -+ swap.w r1, r1 -+ bra 2f -+ and r1, r0 -+ -+1: mov.l @r4, r0 ! non-default environment -+2: lds r0, fpscr -+ rts -+ mov #0, r0 -+ -+#endif ---- a/src/fenv/sh/fenv.s -+++ /dev/null -@@ -1,74 +0,0 @@ --.global fegetround --.type fegetround, @function --fegetround: -- sts fpscr, r0 -- rts -- and #3, r0 -- --.global __fesetround --.type __fesetround, @function --__fesetround: -- sts fpscr, r0 -- or r4, r0 -- lds r0, fpscr -- rts -- mov #0, r0 -- --.global fetestexcept --.type fetestexcept, @function --fetestexcept: -- sts fpscr, r0 -- and r4, r0 -- rts -- and #0x7c, r0 -- --.global feclearexcept --.type feclearexcept, @function --feclearexcept: -- mov r4, r0 -- and #0x7c, r0 -- not r0, r4 -- sts fpscr, r0 -- and r4, r0 -- lds r0, fpscr -- rts -- mov #0, r0 -- --.global feraiseexcept --.type feraiseexcept, @function --feraiseexcept: -- mov r4, r0 -- and #0x7c, r0 -- sts fpscr, r4 -- or r4, r0 -- lds r0, fpscr -- rts -- mov #0, r0 -- --.global fegetenv --.type fegetenv, @function --fegetenv: -- sts fpscr, r0 -- mov.l r0, @r4 -- rts -- mov #0, r0 -- --.global fesetenv --.type fesetenv, @function --fesetenv: -- mov r4, r0 -- cmp/eq #-1, r0 -- bf 1f -- -- ! the default environment is complicated by the fact that we need to -- ! preserve the current precision bit, which we do not know a priori -- sts fpscr, r0 -- mov #8, r1 -- swap.w r1, r1 -- bra 2f -- and r1, r0 -- --1: mov.l @r4, r0 ! non-default environment --2: lds r0, fpscr -- rts -- mov #0, r0 ---- a/src/fenv/sheb-nofpu/fenv.sub -+++ /dev/null -@@ -1 +0,0 @@ --../fenv.c ---- a/src/internal/arm/syscall.s -+++ b/src/internal/arm/syscall.s -@@ -1,3 +1,4 @@ -+.syntax unified - .global __syscall - .hidden __syscall - .type __syscall,%function -@@ -11,6 +12,4 @@ __syscall: - ldmfd ip,{r3,r4,r5,r6} - svc 0 - ldmfd sp!,{r4,r5,r6,r7} -- tst lr,#1 -- moveq pc,lr - bx lr ---- /dev/null -+++ b/src/internal/atomic.h -@@ -0,0 +1,275 @@ -+#ifndef _ATOMIC_H -+#define _ATOMIC_H -+ -+#include <stdint.h> -+ -+#include "atomic_arch.h" -+ -+#ifdef a_ll -+ -+#ifndef a_pre_llsc -+#define a_pre_llsc() -+#endif -+ -+#ifndef a_post_llsc -+#define a_post_llsc() -+#endif -+ -+#ifndef a_cas -+#define a_cas a_cas -+static inline int a_cas(volatile int *p, int t, int s) -+{ -+ int old; -+ a_pre_llsc(); -+ do old = a_ll(p); -+ while (old==t && !a_sc(p, s)); -+ a_post_llsc(); -+ return old; -+} -+#endif -+ -+#ifndef a_swap -+#define a_swap a_swap -+static inline int a_swap(volatile int *p, int v) -+{ -+ int old; -+ a_pre_llsc(); -+ do old = a_ll(p); -+ while (!a_sc(p, v)); -+ a_post_llsc(); -+ return old; -+} -+#endif -+ -+#ifndef a_fetch_add -+#define a_fetch_add a_fetch_add -+static inline int a_fetch_add(volatile int *p, int v) -+{ -+ int old; -+ a_pre_llsc(); -+ do old = a_ll(p); -+ while (!a_sc(p, (unsigned)old + v)); -+ a_post_llsc(); -+ return old; -+} -+#endif -+ -+#ifndef a_fetch_and -+#define a_fetch_and a_fetch_and -+static inline int a_fetch_and(volatile int *p, int v) -+{ -+ int old; -+ a_pre_llsc(); -+ do old = a_ll(p); -+ while (!a_sc(p, old & v)); -+ a_post_llsc(); -+ return old; -+} -+#endif -+ -+#ifndef a_fetch_or -+#define a_fetch_or a_fetch_or -+static inline int a_fetch_or(volatile int *p, int v) -+{ -+ int old; -+ a_pre_llsc(); -+ do old = a_ll(p); -+ while (!a_sc(p, old | v)); -+ a_post_llsc(); -+ return old; -+} -+#endif -+ -+#endif -+ -+#ifndef a_cas -+#error missing definition of a_cas -+#endif -+ -+#ifndef a_swap -+#define a_swap a_swap -+static inline int a_swap(volatile int *p, int v) -+{ -+ int old; -+ do old = *p; -+ while (a_cas(p, old, v) != old); -+ return old; -+} -+#endif -+ -+#ifndef a_fetch_add -+#define a_fetch_add a_fetch_add -+static inline int a_fetch_add(volatile int *p, int v) -+{ -+ int old; -+ do old = *p; -+ while (a_cas(p, old, (unsigned)old+v) != old); -+ return old; -+} -+#endif -+ -+#ifndef a_fetch_and -+#define a_fetch_and a_fetch_and -+static inline int a_fetch_and(volatile int *p, int v) -+{ -+ int old; -+ do old = *p; -+ while (a_cas(p, old, old&v) != old); -+ return old; -+} -+#endif -+#ifndef a_fetch_or -+#define a_fetch_or a_fetch_or -+static inline int a_fetch_or(volatile int *p, int v) -+{ -+ int old; -+ do old = *p; -+ while (a_cas(p, old, old|v) != old); -+ return old; -+} -+#endif -+ -+#ifndef a_and -+#define a_and a_and -+static inline void a_and(volatile int *p, int v) -+{ -+ a_fetch_and(p, v); -+} -+#endif -+ -+#ifndef a_or -+#define a_or a_or -+static inline void a_or(volatile int *p, int v) -+{ -+ a_fetch_or(p, v); -+} -+#endif -+ -+#ifndef a_inc -+#define a_inc a_inc -+static inline void a_inc(volatile int *p) -+{ -+ a_fetch_add(p, 1); -+} -+#endif -+ -+#ifndef a_dec -+#define a_dec a_dec -+static inline void a_dec(volatile int *p) -+{ -+ a_fetch_add(p, -1); -+} -+#endif -+ -+#ifndef a_store -+#define a_store a_store -+static inline void a_store(volatile int *p, int v) -+{ -+#ifdef a_barrier -+ a_barrier(); -+ *p = v; -+ a_barrier(); -+#else -+ a_swap(p, v); -+#endif -+} -+#endif -+ -+#ifndef a_barrier -+#define a_barrier a_barrier -+static void a_barrier() -+{ -+ volatile int tmp = 0; -+ a_cas(&tmp, 0, 0); -+} -+#endif -+ -+#ifndef a_spin -+#define a_spin a_barrier -+#endif -+ -+#ifndef a_and_64 -+#define a_and_64 a_and_64 -+static inline void a_and_64(volatile uint64_t *p, uint64_t v) -+{ -+ union { uint64_t v; uint32_t r[2]; } u = { v }; -+ if (u.r[0]+1) a_and((int *)p, u.r[0]); -+ if (u.r[1]+1) a_and((int *)p+1, u.r[1]); -+} -+#endif -+ -+#ifndef a_or_64 -+#define a_or_64 a_or_64 -+static inline void a_or_64(volatile uint64_t *p, uint64_t v) -+{ -+ union { uint64_t v; uint32_t r[2]; } u = { v }; -+ if (u.r[0]) a_or((int *)p, u.r[0]); -+ if (u.r[1]) a_or((int *)p+1, u.r[1]); -+} -+#endif -+ -+#ifndef a_cas_p -+#define a_cas_p a_cas_p -+static inline void *a_cas_p(volatile void *p, void *t, void *s) -+{ -+ return (void *)a_cas((volatile int *)p, (int)t, (int)s); -+} -+#endif -+ -+#ifndef a_or_l -+#define a_or_l a_or_l -+static inline void a_or_l(volatile void *p, long v) -+{ -+ if (sizeof(long) == sizeof(int)) a_or(p, v); -+ else a_or_64(p, v); -+} -+#endif -+ -+#ifndef a_crash -+#define a_crash a_crash -+static inline void a_crash() -+{ -+ *(volatile char *)0=0; -+} -+#endif -+ -+#ifndef a_ctz_64 -+#define a_ctz_64 a_ctz_64 -+static inline int a_ctz_64(uint64_t x) -+{ -+ static const char debruijn64[64] = { -+ 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, -+ 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, -+ 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, -+ 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12 -+ }; -+ static const char debruijn32[32] = { -+ 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, -+ 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 -+ }; -+ if (sizeof(long) < 8) { -+ uint32_t y = x; -+ if (!y) { -+ y = x>>32; -+ return 32 + debruijn32[(y&-y)*0x076be629 >> 27]; -+ } -+ return debruijn32[(y&-y)*0x076be629 >> 27]; -+ } -+ return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58]; -+} -+#endif -+ -+#ifndef a_ctz_l -+#define a_ctz_l a_ctz_l -+static inline int a_ctz_l(unsigned long x) -+{ -+ static const char debruijn32[32] = { -+ 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, -+ 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 -+ }; -+ if (sizeof(long) == 8) return a_ctz_64(x); -+ return debruijn32[(x&-x)*0x076be629 >> 27]; -+} -+#endif -+ -+#endif ---- a/src/internal/dynlink.h -+++ b/src/internal/dynlink.h -@@ -64,6 +64,10 @@ struct fdpic_dummy_loadmap { - #define DL_FDPIC 0 - #endif - -+#ifndef DL_NOMMU_SUPPORT -+#define DL_NOMMU_SUPPORT 0 -+#endif -+ - #if !DL_FDPIC - #define IS_RELATIVE(x,s) ( \ - (R_TYPE(x) == REL_RELATIVE) || \ ---- a/src/internal/libc.h -+++ b/src/internal/libc.h -@@ -11,13 +11,20 @@ struct __locale_struct { - const struct __locale_map *volatile cat[6]; - }; - -+struct tls_module { -+ struct tls_module *next; -+ void *image; -+ size_t len, size, align, offset; -+}; -+ - struct __libc { - int can_do_threads; - int threaded; - int secure; - volatile int threads_minus_1; - size_t *auxv; -- size_t tls_size; -+ struct tls_module *tls_head; -+ size_t tls_size, tls_align, tls_cnt; - size_t page_size; - struct __locale_struct global_locale; - }; ---- a/src/internal/syscall.h -+++ b/src/internal/syscall.h -@@ -17,9 +17,7 @@ - typedef long syscall_arg_t; - #endif - --#ifdef SHARED - __attribute__((visibility("hidden"))) --#endif - long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...), - __syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, - syscall_arg_t, syscall_arg_t, syscall_arg_t); ---- a/src/internal/version.c -+++ b/src/internal/version.c -@@ -1,12 +1,9 @@ --#ifdef SHARED -- - #include "version.h" - - static const char version[] = VERSION; - -+__attribute__((__visibility__("hidden"))) - const char *__libc_get_version() - { - return version; - } -- --#endif ---- a/src/internal/vis.h -+++ b/src/internal/vis.h -@@ -4,10 +4,9 @@ - * override default visibilities to reduce the size and performance costs - * of position-independent code. */ - --#ifndef CRT --#ifdef SHARED -+#if !defined(CRT) && !defined(__ASSEMBLER__) - --/* For shared libc.so, all symbols should be protected, but some toolchains -+/* Conceptually, all symbols should be protected, but some toolchains - * fail to support copy relocations for protected data, so exclude all - * exported data symbols. */ - -@@ -25,16 +24,4 @@ extern char *optarg, **environ, **__envi - - #pragma GCC visibility push(protected) - --#elif defined(__PIC__) -- --/* If building static libc.a as position-independent code, try to make -- * everything hidden except possibly-undefined weak references. */ -- --__attribute__((__visibility__("default"))) --extern void (*const __init_array_start)(), (*const __init_array_end)(), -- (*const __fini_array_start)(), (*const __fini_array_end)(); -- --#pragma GCC visibility push(hidden) -- --#endif - #endif ---- a/src/ldso/arm/dlsym.s -+++ b/src/ldso/arm/dlsym.s -@@ -1,3 +1,4 @@ -+.syntax unified - .text - .global dlsym - .hidden __dlsym ---- /dev/null -+++ b/src/ldso/arm/find_exidx.c -@@ -0,0 +1,42 @@ -+#define _GNU_SOURCE -+#include <link.h> -+#include <stdint.h> -+ -+struct find_exidx_data { -+ uintptr_t pc, exidx_start; -+ int exidx_len; -+}; -+ -+static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr) -+{ -+ struct find_exidx_data *data = ptr; -+ const ElfW(Phdr) *phdr = info->dlpi_phdr; -+ uintptr_t addr, exidx_start = 0; -+ int i, match = 0, exidx_len = 0; -+ -+ for (i = info->dlpi_phnum; i > 0; i--, phdr++) { -+ addr = info->dlpi_addr + phdr->p_vaddr; -+ switch (phdr->p_type) { -+ case PT_LOAD: -+ match |= data->pc >= addr && data->pc < addr + phdr->p_memsz; -+ break; -+ case PT_ARM_EXIDX: -+ exidx_start = addr; -+ exidx_len = phdr->p_memsz; -+ break; -+ } -+ } -+ data->exidx_start = exidx_start; -+ data->exidx_len = exidx_len; -+ return match; -+} -+ -+uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount) -+{ -+ struct find_exidx_data data; -+ data.pc = pc; -+ if (dl_iterate_phdr(find_exidx, &data) <= 0) -+ return 0; -+ *pcount = data.exidx_len / 8; -+ return data.exidx_start; -+} ---- a/src/ldso/dynlink.c -+++ b/src/ldso/dynlink.c -@@ -70,8 +70,8 @@ struct dso { - char kernel_mapped; - struct dso **deps, *needed_by; - char *rpath_orig, *rpath; -- void *tls_image; -- size_t tls_len, tls_size, tls_align, tls_id, tls_offset; -+ struct tls_module tls; -+ size_t tls_id; - size_t relro_start, relro_end; - void **new_dtv; - unsigned char *new_tls; -@@ -99,7 +99,9 @@ struct symdef { - - int __init_tp(void *); - void __init_libc(char **, char *); -+void *__copy_tls(unsigned char *); - -+__attribute__((__visibility__("hidden"))) - const char *__libc_get_version(void); - - static struct builtin_tls { -@@ -123,6 +125,7 @@ static int noload; - static jmp_buf *rtld_fail; - static pthread_rwlock_t lock; - static struct debug debug; -+static struct tls_module *tls_tail; - static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN; - static size_t static_tls_cnt; - static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE }; -@@ -131,6 +134,15 @@ static struct fdpic_dummy_loadmap app_du - - struct debug *_dl_debug_addr = &debug; - -+__attribute__((__visibility__("hidden"))) -+void (*const __init_array_start)(void)=0, (*const __fini_array_start)(void)=0; -+ -+__attribute__((__visibility__("hidden"))) -+extern void (*const __init_array_end)(void), (*const __fini_array_end)(void); -+ -+weak_alias(__init_array_start, __init_array_end); -+weak_alias(__fini_array_start, __fini_array_end); -+ - static int dl_strcmp(const char *l, const char *r) - { - for (; *l==*r && *l; l++, r++); -@@ -397,14 +409,14 @@ static void do_relocs(struct dso *dso, s - break; - #ifdef TLS_ABOVE_TP - case REL_TPOFF: -- *reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend; -+ *reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend; - break; - #else - case REL_TPOFF: -- *reloc_addr = tls_val - def.dso->tls_offset + addend; -+ *reloc_addr = tls_val - def.dso->tls.offset + addend; - break; - case REL_TPOFF_NEG: -- *reloc_addr = def.dso->tls_offset - tls_val + addend; -+ *reloc_addr = def.dso->tls.offset - tls_val + addend; - break; - #endif - case REL_TLSDESC: -@@ -426,10 +438,10 @@ static void do_relocs(struct dso *dso, s - } else { - reloc_addr[0] = (size_t)__tlsdesc_static; - #ifdef TLS_ABOVE_TP -- reloc_addr[1] = tls_val + def.dso->tls_offset -+ reloc_addr[1] = tls_val + def.dso->tls.offset - + TPOFF_K + addend; - #else -- reloc_addr[1] = tls_val - def.dso->tls_offset -+ reloc_addr[1] = tls_val - def.dso->tls.offset - + addend; - #endif - } -@@ -482,8 +494,14 @@ static void reclaim_gaps(struct dso *dso - - static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off) - { -- char *q = mmap(p, n, prot, flags, fd, off); -- if (q != MAP_FAILED || errno != EINVAL) return q; -+ static int no_map_fixed; -+ char *q; -+ if (!no_map_fixed) { -+ q = mmap(p, n, prot, flags|MAP_FIXED, fd, off); -+ if (!DL_NOMMU_SUPPORT || q != MAP_FAILED || errno != EINVAL) -+ return q; -+ no_map_fixed = 1; -+ } - /* Fallbacks for MAP_FIXED failure on NOMMU kernels. */ - if (flags & MAP_ANONYMOUS) { - memset(p, 0, n); -@@ -561,9 +579,9 @@ static void *map_library(int fd, struct - dyn = ph->p_vaddr; - } else if (ph->p_type == PT_TLS) { - tls_image = ph->p_vaddr; -- dso->tls_align = ph->p_align; -- dso->tls_len = ph->p_filesz; -- dso->tls_size = ph->p_memsz; -+ dso->tls.align = ph->p_align; -+ dso->tls.len = ph->p_filesz; -+ dso->tls.size = ph->p_memsz; - } else if (ph->p_type == PT_GNU_RELRO) { - dso->relro_start = ph->p_vaddr & -PAGE_SIZE; - dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE; -@@ -593,7 +611,7 @@ static void *map_library(int fd, struct - ((ph->p_flags&PF_W) ? PROT_WRITE: 0) | - ((ph->p_flags&PF_X) ? PROT_EXEC : 0)); - map = mmap(0, ph->p_memsz + (ph->p_vaddr & PAGE_SIZE-1), -- prot, (prot&PROT_WRITE) ? MAP_PRIVATE : MAP_SHARED, -+ prot, MAP_PRIVATE, - fd, ph->p_offset & -PAGE_SIZE); - if (map == MAP_FAILED) { - unmap_library(dso); -@@ -604,6 +622,19 @@ static void *map_library(int fd, struct - dso->loadmap->segs[i].p_vaddr = ph->p_vaddr; - dso->loadmap->segs[i].p_memsz = ph->p_memsz; - i++; -+ if (prot & PROT_WRITE) { -+ size_t brk = (ph->p_vaddr & PAGE_SIZE-1) -+ + ph->p_filesz; -+ size_t pgbrk = brk + PAGE_SIZE-1 & -PAGE_SIZE; -+ size_t pgend = brk + ph->p_memsz - ph->p_filesz -+ + PAGE_SIZE-1 & -PAGE_SIZE; -+ if (pgend > pgbrk && mmap_fixed(map+pgbrk, -+ pgend-pgbrk, prot, -+ MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -+ -1, off_start) == MAP_FAILED) -+ goto error; -+ memset(map + brk, 0, pgbrk-brk); -+ } - } - map = (void *)dso->loadmap->segs[0].addr; - map_len = 0; -@@ -618,7 +649,11 @@ static void *map_library(int fd, struct - * the length of the file. This is okay because we will not - * use the invalid part; we just need to reserve the right - * amount of virtual address space to map over later. */ -- map = mmap((void *)addr_min, map_len, prot, MAP_PRIVATE, fd, off_start); -+ map = DL_NOMMU_SUPPORT -+ ? mmap((void *)addr_min, map_len, PROT_READ|PROT_WRITE|PROT_EXEC, -+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) -+ : mmap((void *)addr_min, map_len, prot, -+ MAP_PRIVATE, fd, off_start); - if (map==MAP_FAILED) goto error; - dso->map = map; - dso->map_len = map_len; -@@ -643,7 +678,8 @@ static void *map_library(int fd, struct - dso->phentsize = eh->e_phentsize; - } - /* Reuse the existing mapping for the lowest-address LOAD */ -- if ((ph->p_vaddr & -PAGE_SIZE) == addr_min) continue; -+ if ((ph->p_vaddr & -PAGE_SIZE) == addr_min && !DL_NOMMU_SUPPORT) -+ continue; - this_min = ph->p_vaddr & -PAGE_SIZE; - this_max = ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE; - off_start = ph->p_offset & -PAGE_SIZE; -@@ -670,7 +706,7 @@ static void *map_library(int fd, struct - done_mapping: - dso->base = base; - dso->dynv = laddr(dso, dyn); -- if (dso->tls_size) dso->tls_image = laddr(dso, tls_image); -+ if (dso->tls.size) dso->tls.image = laddr(dso, tls_image); - if (!runtime) reclaim_gaps(dso); - free(allocated_buf); - return map; -@@ -987,8 +1023,8 @@ static struct dso *load_library(const ch - * extended DTV capable of storing an additional slot for - * the newly-loaded DSO. */ - alloc_size = sizeof *p + strlen(pathname) + 1; -- if (runtime && temp_dso.tls_image) { -- size_t per_th = temp_dso.tls_size + temp_dso.tls_align -+ if (runtime && temp_dso.tls.image) { -+ size_t per_th = temp_dso.tls.size + temp_dso.tls.align - + sizeof(void *) * (tls_cnt+3); - n_th = libc.threads_minus_1 + 1; - if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX; -@@ -1009,22 +1045,25 @@ static struct dso *load_library(const ch - strcpy(p->name, pathname); - /* Add a shortname only if name arg was not an explicit pathname. */ - if (pathname != name) p->shortname = strrchr(p->name, '/')+1; -- if (p->tls_image) { -+ if (p->tls.image) { - p->tls_id = ++tls_cnt; -- tls_align = MAXP2(tls_align, p->tls_align); -+ tls_align = MAXP2(tls_align, p->tls.align); - #ifdef TLS_ABOVE_TP -- p->tls_offset = tls_offset + ( (tls_align-1) & -- -(tls_offset + (uintptr_t)p->tls_image) ); -- tls_offset += p->tls_size; -+ p->tls.offset = tls_offset + ( (tls_align-1) & -+ -(tls_offset + (uintptr_t)p->tls.image) ); -+ tls_offset += p->tls.size; - #else -- tls_offset += p->tls_size + p->tls_align - 1; -- tls_offset -= (tls_offset + (uintptr_t)p->tls_image) -- & (p->tls_align-1); -- p->tls_offset = tls_offset; -+ tls_offset += p->tls.size + p->tls.align - 1; -+ tls_offset -= (tls_offset + (uintptr_t)p->tls.image) -+ & (p->tls.align-1); -+ p->tls.offset = tls_offset; - #endif - p->new_dtv = (void *)(-sizeof(size_t) & - (uintptr_t)(p->name+strlen(p->name)+sizeof(size_t))); - p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1)); -+ if (tls_tail) tls_tail->next = &p->tls; -+ else libc.tls_head = &p->tls; -+ tls_tail = &p->tls; - } - - tail->next = p; -@@ -1151,7 +1190,7 @@ static void kernel_mapped_dso(struct dso - p->kernel_mapped = 1; - } - --static void do_fini() -+void __libc_exit_fini() - { - struct dso *p; - size_t dyn[DYN_CNT]; -@@ -1214,53 +1253,8 @@ static void dl_debug_state(void) - - weak_alias(dl_debug_state, _dl_debug_state); - --void __reset_tls() -+void __init_tls(size_t *auxv) - { -- pthread_t self = __pthread_self(); -- struct dso *p; -- for (p=head; p; p=p->next) { -- if (!p->tls_id || !self->dtv[p->tls_id]) continue; -- memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len); -- memset((char *)self->dtv[p->tls_id]+p->tls_len, 0, -- p->tls_size - p->tls_len); -- if (p->tls_id == (size_t)self->dtv[0]) break; -- } --} -- --void *__copy_tls(unsigned char *mem) --{ -- pthread_t td; -- struct dso *p; -- void **dtv; -- --#ifdef TLS_ABOVE_TP -- dtv = (void **)(mem + libc.tls_size) - (tls_cnt + 1); -- -- mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1); -- td = (pthread_t)mem; -- mem += sizeof(struct pthread); -- -- for (p=head; p; p=p->next) { -- if (!p->tls_id) continue; -- dtv[p->tls_id] = mem + p->tls_offset; -- memcpy(dtv[p->tls_id], p->tls_image, p->tls_len); -- } --#else -- dtv = (void **)mem; -- -- mem += libc.tls_size - sizeof(struct pthread); -- mem -= (uintptr_t)mem & (tls_align-1); -- td = (pthread_t)mem; -- -- for (p=head; p; p=p->next) { -- if (!p->tls_id) continue; -- dtv[p->tls_id] = mem - p->tls_offset; -- memcpy(dtv[p->tls_id], p->tls_image, p->tls_len); -- } --#endif -- dtv[0] = (void *)tls_cnt; -- td->dtv = td->dtv_copy = dtv; -- return td; - } - - __attribute__((__visibility__("hidden"))) -@@ -1286,7 +1280,7 @@ void *__tls_get_new(size_t *v) - /* Get new DTV space from new DSO if needed */ - if (v[0] > (size_t)self->dtv[0]) { - void **newdtv = p->new_dtv + -- (v[0]+1)*sizeof(void *)*a_fetch_add(&p->new_dtv_idx,1); -+ (v[0]+1)*a_fetch_add(&p->new_dtv_idx,1); - memcpy(newdtv, self->dtv, - ((size_t)self->dtv[0]+1) * sizeof(void *)); - newdtv[0] = (void *)v[0]; -@@ -1297,12 +1291,12 @@ void *__tls_get_new(size_t *v) - unsigned char *mem; - for (p=head; ; p=p->next) { - if (!p->tls_id || self->dtv[p->tls_id]) continue; -- mem = p->new_tls + (p->tls_size + p->tls_align) -+ mem = p->new_tls + (p->tls.size + p->tls.align) - * a_fetch_add(&p->new_tls_idx,1); -- mem += ((uintptr_t)p->tls_image - (uintptr_t)mem) -- & (p->tls_align-1); -+ mem += ((uintptr_t)p->tls.image - (uintptr_t)mem) -+ & (p->tls.align-1); - self->dtv[p->tls_id] = mem; -- memcpy(mem, p->tls_image, p->tls_len); -+ memcpy(mem, p->tls.image, p->tls.len); - if (p->tls_id == v[0]) break; - } - __restore_sigs(&set); -@@ -1311,6 +1305,8 @@ void *__tls_get_new(size_t *v) - - static void update_tls_size() - { -+ libc.tls_cnt = tls_cnt; -+ libc.tls_align = tls_align; - libc.tls_size = ALIGN( - (1+tls_cnt) * sizeof(void *) + - tls_offset + -@@ -1421,6 +1417,7 @@ _Noreturn void __dls3(size_t *sp) - * use during dynamic linking. If possible it will also serve as the - * thread pointer at runtime. */ - libc.tls_size = sizeof builtin_tls; -+ libc.tls_align = tls_align; - if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) { - a_crash(); - } -@@ -1448,13 +1445,13 @@ _Noreturn void __dls3(size_t *sp) - interp_off = (size_t)phdr->p_vaddr; - else if (phdr->p_type == PT_TLS) { - tls_image = phdr->p_vaddr; -- app.tls_len = phdr->p_filesz; -- app.tls_size = phdr->p_memsz; -- app.tls_align = phdr->p_align; -+ app.tls.len = phdr->p_filesz; -+ app.tls.size = phdr->p_memsz; -+ app.tls.align = phdr->p_align; - } - } - if (DL_FDPIC) app.loadmap = app_loadmap; -- if (app.tls_size) app.tls_image = laddr(&app, tls_image); -+ if (app.tls.size) app.tls.image = laddr(&app, tls_image); - if (interp_off) ldso.name = laddr(&app, interp_off); - if ((aux[0] & (1UL<<AT_EXECFN)) - && strncmp((char *)aux[AT_EXECFN], "/proc/", 6)) -@@ -1523,19 +1520,20 @@ _Noreturn void __dls3(size_t *sp) - dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base); - } - } -- if (app.tls_size) { -+ if (app.tls.size) { -+ libc.tls_head = &app.tls; - app.tls_id = tls_cnt = 1; - #ifdef TLS_ABOVE_TP -- app.tls_offset = 0; -- tls_offset = app.tls_size -- + ( -((uintptr_t)app.tls_image + app.tls_size) -- & (app.tls_align-1) ); -+ app.tls.offset = 0; -+ tls_offset = app.tls.size -+ + ( -((uintptr_t)app.tls.image + app.tls.size) -+ & (app.tls.align-1) ); - #else -- tls_offset = app.tls_offset = app.tls_size -- + ( -((uintptr_t)app.tls_image + app.tls_size) -- & (app.tls_align-1) ); -+ tls_offset = app.tls.offset = app.tls.size -+ + ( -((uintptr_t)app.tls.image + app.tls.size) -+ & (app.tls.align-1) ); - #endif -- tls_align = MAXP2(tls_align, app.tls_align); -+ tls_align = MAXP2(tls_align, app.tls.align); - } - app.global = 1; - decode_dyn(&app); -@@ -1635,8 +1633,6 @@ _Noreturn void __dls3(size_t *sp) - debug.state = 0; - _dl_debug_state(); - -- __init_libc(envp, argv[0]); -- atexit(do_fini); - errno = 0; - - CRTJMP((void *)aux[AT_ENTRY], argv-1); -@@ -1646,6 +1642,7 @@ _Noreturn void __dls3(size_t *sp) - void *dlopen(const char *file, int mode) - { - struct dso *volatile p, *orig_tail, *next; -+ struct tls_module *orig_tls_tail; - size_t orig_tls_cnt, orig_tls_offset, orig_tls_align; - size_t i; - int cs; -@@ -1658,6 +1655,7 @@ void *dlopen(const char *file, int mode) - __inhibit_ptc(); - - p = 0; -+ orig_tls_tail = tls_tail; - orig_tls_cnt = tls_cnt; - orig_tls_offset = tls_offset; - orig_tls_align = tls_align; -@@ -1684,6 +1682,8 @@ void *dlopen(const char *file, int mode) - unmap_library(p); - free(p); - } -+ if (!orig_tls_tail) libc.tls_head = 0; -+ tls_tail = orig_tls_tail; - tls_cnt = orig_tls_cnt; - tls_offset = orig_tls_offset; - tls_align = orig_tls_align; -@@ -1900,7 +1900,7 @@ int dl_iterate_phdr(int(*callback)(struc - info.dlpi_adds = gencnt; - info.dlpi_subs = 0; - info.dlpi_tls_modid = current->tls_id; -- info.dlpi_tls_data = current->tls_image; -+ info.dlpi_tls_data = current->tls.image; - - ret = (callback)(&info, sizeof (info), data); - ---- a/src/locale/langinfo.c -+++ b/src/locale/langinfo.c -@@ -37,23 +37,23 @@ char *__nl_langinfo_l(nl_item item, loca - - switch (cat) { - case LC_NUMERIC: -- if (idx > 1) return NULL; -+ if (idx > 1) return ""; - str = c_numeric; - break; - case LC_TIME: -- if (idx > 0x31) return NULL; -+ if (idx > 0x31) return ""; - str = c_time; - break; - case LC_MONETARY: -- if (idx > 0) return NULL; -+ if (idx > 0) return ""; - str = ""; - break; - case LC_MESSAGES: -- if (idx > 3) return NULL; -+ if (idx > 3) return ""; - str = c_messages; - break; - default: -- return NULL; -+ return ""; - } - - for (; idx; idx--, str++) for (; *str; str++); ---- a/src/malloc/lite_malloc.c -+++ b/src/malloc/lite_malloc.c -@@ -8,7 +8,7 @@ - - void *__expand_heap(size_t *); - --void *__simple_malloc(size_t n) -+static void *__simple_malloc(size_t n) - { - static char *cur, *end; - static volatile int lock[2]; ---- a/src/math/__rem_pio2.c -+++ b/src/math/__rem_pio2.c -@@ -118,7 +118,7 @@ int __rem_pio2(double x, double *y) - if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ - medium: - /* rint(x/(pi/2)), Assume round-to-nearest. */ -- fn = x*invpio2 + toint - toint; -+ fn = (double_t)x*invpio2 + toint - toint; - n = (int32_t)fn; - r = x - fn*pio2_1; - w = fn*pio2_1t; /* 1st round, good to 85 bits */ ---- a/src/math/__rem_pio2f.c -+++ b/src/math/__rem_pio2f.c -@@ -51,7 +51,7 @@ int __rem_pio2f(float x, double *y) - /* 25+53 bit pi is good enough for medium size */ - if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ -- fn = x*invpio2 + toint - toint; -+ fn = (double_t)x*invpio2 + toint - toint; - n = (int32_t)fn; - *y = x - fn*pio2_1 - fn*pio2_1t; - return n; ---- /dev/null -+++ b/src/math/arm/fabs.c -@@ -0,0 +1,15 @@ -+#include <math.h> -+ -+#if __ARM_PCS_VFP -+ -+double fabs(double x) -+{ -+ __asm__ ("vabs.f64 %P0, %P1" : "=w"(x) : "w"(x)); -+ return x; -+} -+ -+#else -+ -+#include "../fabs.c" -+ -+#endif ---- /dev/null -+++ b/src/math/arm/fabsf.c -@@ -0,0 +1,15 @@ -+#include <math.h> -+ -+#if __ARM_PCS_VFP -+ -+float fabsf(float x) -+{ -+ __asm__ ("vabs.f32 %0, %1" : "=t"(x) : "t"(x)); -+ return x; -+} -+ -+#else -+ -+#include "../fabsf.c" -+ -+#endif ---- /dev/null -+++ b/src/math/arm/sqrt.c -@@ -0,0 +1,15 @@ -+#include <math.h> -+ -+#if __VFP_FP__ && !__SOFTFP__ -+ -+double sqrt(double x) -+{ -+ __asm__ ("vsqrt.f64 %P0, %P1" : "=w"(x) : "w"(x)); -+ return x; -+} -+ -+#else -+ -+#include "../sqrt.c" -+ -+#endif ---- /dev/null -+++ b/src/math/arm/sqrtf.c -@@ -0,0 +1,15 @@ -+#include <math.h> -+ -+#if __VFP_FP__ && !__SOFTFP__ -+ -+float sqrtf(float x) -+{ -+ __asm__ ("vsqrt.f32 %0, %1" : "=t"(x) : "t"(x)); -+ return x; -+} -+ -+#else -+ -+#include "../sqrtf.c" -+ -+#endif ---- a/src/math/armebhf/fabs.sub -+++ /dev/null -@@ -1 +0,0 @@ --../armhf/fabs.s ---- a/src/math/armebhf/fabsf.sub -+++ /dev/null -@@ -1 +0,0 @@ --../armhf/fabsf.s ---- a/src/math/armebhf/sqrt.sub -+++ /dev/null -@@ -1 +0,0 @@ --../armhf/sqrt.s ---- a/src/math/armebhf/sqrtf.sub -+++ /dev/null -@@ -1 +0,0 @@ --../armhf/sqrtf.s ---- a/src/math/armhf/fabs.s -+++ /dev/null -@@ -1,7 +0,0 @@ --.fpu vfp --.text --.global fabs --.type fabs,%function --fabs: -- vabs.f64 d0, d0 -- bx lr ---- a/src/math/armhf/fabs.sub -+++ /dev/null -@@ -1 +0,0 @@ --fabs.s ---- a/src/math/armhf/fabsf.s -+++ /dev/null -@@ -1,7 +0,0 @@ --.fpu vfp --.text --.global fabsf --.type fabsf,%function --fabsf: -- vabs.f32 s0, s0 -- bx lr ---- a/src/math/armhf/fabsf.sub -+++ /dev/null -@@ -1 +0,0 @@ --fabsf.s ---- a/src/math/armhf/sqrt.s -+++ /dev/null -@@ -1,7 +0,0 @@ --.fpu vfp --.text --.global sqrt --.type sqrt,%function --sqrt: -- vsqrt.f64 d0, d0 -- bx lr ---- a/src/math/armhf/sqrt.sub -+++ /dev/null -@@ -1 +0,0 @@ --sqrt.s ---- a/src/math/armhf/sqrtf.s -+++ /dev/null -@@ -1,7 +0,0 @@ --.fpu vfp --.text --.global sqrtf --.type sqrtf,%function --sqrtf: -- vsqrt.f32 s0, s0 -- bx lr ---- a/src/math/armhf/sqrtf.sub -+++ /dev/null -@@ -1 +0,0 @@ --sqrtf.s ---- a/src/math/hypot.c -+++ b/src/math/hypot.c -@@ -12,10 +12,10 @@ static void sq(double_t *hi, double_t *l - { - double_t xh, xl, xc; - -- xc = x*SPLIT; -+ xc = (double_t)x*SPLIT; - xh = x - xc + xc; - xl = x - xh; -- *hi = x*x; -+ *hi = (double_t)x*x; - *lo = xh*xh - *hi + 2*xh*xl + xl*xl; - } - ---- a/src/mman/mremap.c -+++ b/src/mman/mremap.c -@@ -1,17 +1,31 @@ -+#define _GNU_SOURCE - #include <unistd.h> - #include <sys/mman.h> -+#include <errno.h> -+#include <stdint.h> - #include <stdarg.h> - #include "syscall.h" - #include "libc.h" - -+static void dummy(void) { } -+weak_alias(dummy, __vm_wait); -+ - void *__mremap(void *old_addr, size_t old_len, size_t new_len, int flags, ...) - { - va_list ap; -- void *new_addr; -- -- va_start(ap, flags); -- new_addr = va_arg(ap, void *); -- va_end(ap); -+ void *new_addr = 0; -+ -+ if (new_len >= PTRDIFF_MAX) { -+ errno = ENOMEM; -+ return MAP_FAILED; -+ } -+ -+ if (flags & MREMAP_FIXED) { -+ __vm_wait(); -+ va_start(ap, flags); -+ new_addr = va_arg(ap, void *); -+ va_end(ap); -+ } - - return (void *)syscall(SYS_mremap, old_addr, old_len, new_len, flags, new_addr); - } ---- a/src/network/getifaddrs.c -+++ b/src/network/getifaddrs.c -@@ -162,13 +162,26 @@ static int netlink_msg_to_ifaddr(void *p - for (rta = NLMSG_RTA(h, sizeof(*ifa)); NLMSG_RTAOK(rta, h); rta = RTA_NEXT(rta)) { - switch (rta->rta_type) { - case IFA_ADDRESS: -- copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); -+ /* If ifa_addr is already set we, received an IFA_LOCAL before -+ * so treat this as destination address */ -+ if (ifs->ifa.ifa_addr) -+ copy_addr(&ifs->ifa.ifa_dstaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); -+ else -+ copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); - break; - case IFA_BROADCAST: -- /* For point-to-point links this is peer, but ifa_broadaddr -- * and ifa_dstaddr are union, so this works for both. */ - copy_addr(&ifs->ifa.ifa_broadaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); - break; -+ case IFA_LOCAL: -+ /* If ifa_addr is set and we get IFA_LOCAL, assume we have -+ * a point-to-point network. Move address to correct field. */ -+ if (ifs->ifa.ifa_addr) { -+ ifs->ifu = ifs->addr; -+ ifs->ifa.ifa_dstaddr = &ifs->ifu.sa; -+ memset(&ifs->addr, 0, sizeof(ifs->addr)); -+ } -+ copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); -+ break; - case IFA_LABEL: - if (RTA_DATALEN(rta) < sizeof(ifs->name)) { - memcpy(ifs->name, RTA_DATA(rta), RTA_DATALEN(rta)); ---- a/src/network/getnameinfo.c -+++ b/src/network/getnameinfo.c -@@ -135,13 +135,13 @@ int getnameinfo(const struct sockaddr *r - switch (af) { - case AF_INET: - a = (void *)&((struct sockaddr_in *)sa)->sin_addr; -- if (sl != sizeof(struct sockaddr_in)) return EAI_FAMILY; -+ if (sl < sizeof(struct sockaddr_in)) return EAI_FAMILY; - mkptr4(ptr, a); - scopeid = 0; - break; - case AF_INET6: - a = (void *)&((struct sockaddr_in6 *)sa)->sin6_addr; -- if (sl != sizeof(struct sockaddr_in6)) return EAI_FAMILY; -+ if (sl < sizeof(struct sockaddr_in6)) return EAI_FAMILY; - if (memcmp(a, "\0\0\0\0\0\0\0\0\0\0\xff\xff", 12)) - mkptr6(ptr, a); - else ---- a/src/network/if_nametoindex.c -+++ b/src/network/if_nametoindex.c -@@ -10,7 +10,7 @@ unsigned if_nametoindex(const char *name - struct ifreq ifr; - int fd, r; - -- if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return -1; -+ if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return 0; - strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); - r = ioctl(fd, SIOCGIFINDEX, &ifr); - __syscall(SYS_close, fd); ---- a/src/network/lookup_name.c -+++ b/src/network/lookup_name.c -@@ -9,6 +9,7 @@ - #include <fcntl.h> - #include <unistd.h> - #include <pthread.h> -+#include <errno.h> - #include "lookup.h" - #include "stdio_impl.h" - #include "syscall.h" -@@ -51,7 +52,14 @@ static int name_from_hosts(struct addres - int cnt = 0; - unsigned char _buf[1032]; - FILE _f, *f = __fopen_rb_ca("/etc/hosts", &_f, _buf, sizeof _buf); -- if (!f) return 0; -+ if (!f) switch (errno) { -+ case ENOENT: -+ case ENOTDIR: -+ case EACCES: -+ return 0; -+ default: -+ return EAI_SYSTEM; -+ } - while (fgets(line, sizeof line, f) && cnt < MAXADDRS) { - char *p, *z; - ---- a/src/network/lookup_serv.c -+++ b/src/network/lookup_serv.c -@@ -4,6 +4,7 @@ - #include <ctype.h> - #include <string.h> - #include <fcntl.h> -+#include <errno.h> - #include "lookup.h" - #include "stdio_impl.h" - -@@ -69,7 +70,14 @@ int __lookup_serv(struct service buf[sta - - unsigned char _buf[1032]; - FILE _f, *f = __fopen_rb_ca("/etc/services", &_f, _buf, sizeof _buf); -- if (!f) return EAI_SERVICE; -+ if (!f) switch (errno) { -+ case ENOENT: -+ case ENOTDIR: -+ case EACCES: -+ return EAI_SERVICE; -+ default: -+ return EAI_SYSTEM; -+ } - - while (fgets(line, sizeof line, f) && cnt < MAXSERVS) { - if ((p=strchr(line, '#'))) *p++='\n', *p=0; ---- a/src/network/proto.c -+++ b/src/network/proto.c -@@ -9,21 +9,36 @@ static const unsigned char protos[] = { - "\001icmp\0" - "\002igmp\0" - "\003ggp\0" -+ "\004ipencap\0" -+ "\005st\0" - "\006tcp\0" -+ "\008egp\0" - "\014pup\0" - "\021udp\0" -- "\026idp\0" -+ "\024hmp\0" -+ "\026xns-idp\0" -+ "\033rdp\0" -+ "\035iso-tp4\0" -+ "\044xtp\0" -+ "\045ddp\0" -+ "\046idpr-cmtp\0" - "\051ipv6\0" - "\053ipv6-route\0" - "\054ipv6-frag\0" -+ "\055idrp\0" -+ "\056rsvp\0" - "\057gre\0" - "\062esp\0" - "\063ah\0" -+ "\071skip\0" - "\072ipv6-icmp\0" - "\073ipv6-nonxt\0" - "\074ipv6-opts\0" -+ "\111rspf\0" -+ "\121vmtp\0" - "\131ospf\0" - "\136ipip\0" -+ "\142encap\0" - "\147pim\0" - "\377raw" - }; ---- a/src/network/res_msend.c -+++ b/src/network/res_msend.c -@@ -54,7 +54,15 @@ int __res_msend(int nqueries, const unsi - - /* Get nameservers from resolv.conf, fallback to localhost */ - f = __fopen_rb_ca("/etc/resolv.conf", &_f, _buf, sizeof _buf); -- if (f) for (nns=0; nns<3 && fgets(line, sizeof line, f); ) { -+ if (!f) switch (errno) { -+ case ENOENT: -+ case ENOTDIR: -+ case EACCES: -+ goto no_resolv_conf; -+ default: -+ return -1; -+ } -+ for (nns=0; nns<3 && fgets(line, sizeof line, f); ) { - if (!strncmp(line, "options", 7) && isspace(line[7])) { - unsigned long x; - char *p, *z; -@@ -92,7 +100,8 @@ int __res_msend(int nqueries, const unsi - } - } - } -- if (f) __fclose_ca(f); -+ __fclose_ca(f); -+no_resolv_conf: - if (!nns) { - ns[0].sin.sin_family = AF_INET; - ns[0].sin.sin_port = htons(53); ---- a/src/search/tsearch_avl.c -+++ b/src/search/tsearch_avl.c -@@ -77,38 +77,45 @@ static struct node *find(struct node *n, - return find(n->right, k, cmp); - } - --static struct node *insert(struct node **n, const void *k, -- int (*cmp)(const void *, const void *), int *new) -+static struct node *insert(struct node *n, const void *k, -+ int (*cmp)(const void *, const void *), struct node **found) - { -- struct node *r = *n; -+ struct node *r; - int c; - -- if (!r) { -- *n = r = malloc(sizeof **n); -- if (r) { -- r->key = k; -- r->left = r->right = 0; -- r->height = 1; -+ if (!n) { -+ n = malloc(sizeof *n); -+ if (n) { -+ n->key = k; -+ n->left = n->right = 0; -+ n->height = 1; - } -- *new = 1; -- return r; -+ *found = n; -+ return n; -+ } -+ c = cmp(k, n->key); -+ if (c == 0) { -+ *found = n; -+ return 0; -+ } -+ r = insert(c < 0 ? n->left : n->right, k, cmp, found); -+ if (r) { -+ if (c < 0) -+ n->left = r; -+ else -+ n->right = r; -+ r = balance(n); - } -- c = cmp(k, r->key); -- if (c == 0) -- return r; -- if (c < 0) -- r = insert(&r->left, k, cmp, new); -- else -- r = insert(&r->right, k, cmp, new); -- if (*new) -- *n = balance(*n); - return r; - } - --static struct node *movr(struct node *n, struct node *r) { -- if (!n) -- return r; -- n->right = movr(n->right, r); -+static struct node *remove_rightmost(struct node *n, struct node **rightmost) -+{ -+ if (!n->right) { -+ *rightmost = n; -+ return n->left; -+ } -+ n->right = remove_rightmost(n->right, rightmost); - return balance(n); - } - -@@ -122,7 +129,13 @@ static struct node *remove(struct node * - c = cmp(k, (*n)->key); - if (c == 0) { - struct node *r = *n; -- *n = movr(r->left, r->right); -+ if (r->left) { -+ r->left = remove_rightmost(r->left, n); -+ (*n)->left = r->left; -+ (*n)->right = r->right; -+ *n = balance(*n); -+ } else -+ *n = r->right; - free(r); - return parent; - } -@@ -138,6 +151,8 @@ static struct node *remove(struct node * - void *tdelete(const void *restrict key, void **restrict rootp, - int(*compar)(const void *, const void *)) - { -+ if (!rootp) -+ return 0; - struct node *n = *rootp; - struct node *ret; - /* last argument is arbitrary non-null pointer -@@ -150,17 +165,21 @@ void *tdelete(const void *restrict key, - void *tfind(const void *key, void *const *rootp, - int(*compar)(const void *, const void *)) - { -+ if (!rootp) -+ return 0; - return find(*rootp, key, compar); - } - - void *tsearch(const void *key, void **rootp, - int (*compar)(const void *, const void *)) - { -- int new = 0; -- struct node *n = *rootp; -+ struct node *update; - struct node *ret; -- ret = insert(&n, key, compar, &new); -- *rootp = n; -+ if (!rootp) -+ return 0; -+ update = insert(*rootp, key, compar, &ret); -+ if (update) -+ *rootp = update; - return ret; - } - ---- a/src/setjmp/arm/longjmp.s -+++ b/src/setjmp/arm/longjmp.s -@@ -1,3 +1,4 @@ -+.syntax unified - .global _longjmp - .global longjmp - .type _longjmp,%function -@@ -20,7 +21,11 @@ longjmp: - ldc p2, cr4, [ip], #48 - 2: tst r1,#0x40 - beq 2f -- .word 0xecbc8b10 /* vldmia ip!, {d8-d15} */ -+ .fpu vfp -+ vldmia ip!, {d8-d15} -+ .fpu softvfp -+ .eabi_attribute 10, 0 -+ .eabi_attribute 27, 0 - 2: tst r1,#0x200 - beq 3f - ldcl p1, cr10, [ip], #8 -@@ -29,9 +34,7 @@ longjmp: - ldcl p1, cr13, [ip], #8 - ldcl p1, cr14, [ip], #8 - ldcl p1, cr15, [ip], #8 --3: tst lr,#1 -- moveq pc,lr -- bx lr -+3: bx lr - - .hidden __hwcap - 1: .word __hwcap-1b ---- a/src/setjmp/arm/setjmp.s -+++ b/src/setjmp/arm/setjmp.s -@@ -1,3 +1,4 @@ -+.syntax unified - .global __setjmp - .global _setjmp - .global setjmp -@@ -22,7 +23,11 @@ setjmp: - stc p2, cr4, [ip], #48 - 2: tst r1,#0x40 - beq 2f -- .word 0xecac8b10 /* vstmia ip!, {d8-d15} */ -+ .fpu vfp -+ vstmia ip!, {d8-d15} -+ .fpu softvfp -+ .eabi_attribute 10, 0 -+ .eabi_attribute 27, 0 - 2: tst r1,#0x200 - beq 3f - stcl p1, cr10, [ip], #8 -@@ -31,9 +36,7 @@ setjmp: - stcl p1, cr13, [ip], #8 - stcl p1, cr14, [ip], #8 - stcl p1, cr15, [ip], #8 --3: tst lr,#1 -- moveq pc,lr -- bx lr -+3: bx lr - - .hidden __hwcap - 1: .word __hwcap-1b ---- a/src/setjmp/mips-sf/longjmp.s -+++ /dev/null -@@ -1,25 +0,0 @@ --.set noreorder -- --.global _longjmp --.global longjmp --.type _longjmp,@function --.type longjmp,@function --_longjmp: --longjmp: -- move $2, $5 -- bne $2, $0, 1f -- nop -- addu $2, $2, 1 --1: lw $ra, 0($4) -- lw $sp, 4($4) -- lw $16, 8($4) -- lw $17, 12($4) -- lw $18, 16($4) -- lw $19, 20($4) -- lw $20, 24($4) -- lw $21, 28($4) -- lw $22, 32($4) -- lw $23, 36($4) -- lw $30, 40($4) -- jr $ra -- lw $28, 44($4) ---- a/src/setjmp/mips-sf/longjmp.sub -+++ /dev/null -@@ -1 +0,0 @@ --longjmp.s ---- a/src/setjmp/mips-sf/setjmp.s -+++ /dev/null -@@ -1,25 +0,0 @@ --.set noreorder -- --.global __setjmp --.global _setjmp --.global setjmp --.type __setjmp,@function --.type _setjmp,@function --.type setjmp,@function --__setjmp: --_setjmp: --setjmp: -- sw $ra, 0($4) -- sw $sp, 4($4) -- sw $16, 8($4) -- sw $17, 12($4) -- sw $18, 16($4) -- sw $19, 20($4) -- sw $20, 24($4) -- sw $21, 28($4) -- sw $22, 32($4) -- sw $23, 36($4) -- sw $30, 40($4) -- sw $28, 44($4) -- jr $ra -- li $2, 0 ---- a/src/setjmp/mips-sf/setjmp.sub -+++ /dev/null -@@ -1 +0,0 @@ --setjmp.s ---- /dev/null -+++ b/src/setjmp/mips/longjmp.S -@@ -0,0 +1,40 @@ -+.set noreorder -+ -+.global _longjmp -+.global longjmp -+.type _longjmp,@function -+.type longjmp,@function -+_longjmp: -+longjmp: -+ move $2, $5 -+ bne $2, $0, 1f -+ nop -+ addu $2, $2, 1 -+1: -+#ifndef __mips_soft_float -+ lwc1 $20, 56($4) -+ lwc1 $21, 60($4) -+ lwc1 $22, 64($4) -+ lwc1 $23, 68($4) -+ lwc1 $24, 72($4) -+ lwc1 $25, 76($4) -+ lwc1 $26, 80($4) -+ lwc1 $27, 84($4) -+ lwc1 $28, 88($4) -+ lwc1 $29, 92($4) -+ lwc1 $30, 96($4) -+ lwc1 $31, 100($4) -+#endif -+ lw $ra, 0($4) -+ lw $sp, 4($4) -+ lw $16, 8($4) -+ lw $17, 12($4) -+ lw $18, 16($4) -+ lw $19, 20($4) -+ lw $20, 24($4) -+ lw $21, 28($4) -+ lw $22, 32($4) -+ lw $23, 36($4) -+ lw $30, 40($4) -+ jr $ra -+ lw $28, 44($4) ---- a/src/setjmp/mips/longjmp.s -+++ /dev/null -@@ -1,37 +0,0 @@ --.set noreorder -- --.global _longjmp --.global longjmp --.type _longjmp,@function --.type longjmp,@function --_longjmp: --longjmp: -- move $2, $5 -- bne $2, $0, 1f -- nop -- addu $2, $2, 1 --1: lwc1 $20, 56($4) -- lwc1 $21, 60($4) -- lwc1 $22, 64($4) -- lwc1 $23, 68($4) -- lwc1 $24, 72($4) -- lwc1 $25, 76($4) -- lwc1 $26, 80($4) -- lwc1 $27, 84($4) -- lwc1 $28, 88($4) -- lwc1 $29, 92($4) -- lwc1 $30, 96($4) -- lwc1 $31, 100($4) -- lw $ra, 0($4) -- lw $sp, 4($4) -- lw $16, 8($4) -- lw $17, 12($4) -- lw $18, 16($4) -- lw $19, 20($4) -- lw $20, 24($4) -- lw $21, 28($4) -- lw $22, 32($4) -- lw $23, 36($4) -- lw $30, 40($4) -- jr $ra -- lw $28, 44($4) ---- /dev/null -+++ b/src/setjmp/mips/setjmp.S -@@ -0,0 +1,39 @@ -+.set noreorder -+ -+.global __setjmp -+.global _setjmp -+.global setjmp -+.type __setjmp,@function -+.type _setjmp,@function -+.type setjmp,@function -+__setjmp: -+_setjmp: -+setjmp: -+ sw $ra, 0($4) -+ sw $sp, 4($4) -+ sw $16, 8($4) -+ sw $17, 12($4) -+ sw $18, 16($4) -+ sw $19, 20($4) -+ sw $20, 24($4) -+ sw $21, 28($4) -+ sw $22, 32($4) -+ sw $23, 36($4) -+ sw $30, 40($4) -+ sw $28, 44($4) -+#ifndef __mips_soft_float -+ swc1 $20, 56($4) -+ swc1 $21, 60($4) -+ swc1 $22, 64($4) -+ swc1 $23, 68($4) -+ swc1 $24, 72($4) -+ swc1 $25, 76($4) -+ swc1 $26, 80($4) -+ swc1 $27, 84($4) -+ swc1 $28, 88($4) -+ swc1 $29, 92($4) -+ swc1 $30, 96($4) -+ swc1 $31, 100($4) -+#endif -+ jr $ra -+ li $2, 0 ---- a/src/setjmp/mips/setjmp.s -+++ /dev/null -@@ -1,37 +0,0 @@ --.set noreorder -- --.global __setjmp --.global _setjmp --.global setjmp --.type __setjmp,@function --.type _setjmp,@function --.type setjmp,@function --__setjmp: --_setjmp: --setjmp: -- sw $ra, 0($4) -- sw $sp, 4($4) -- sw $16, 8($4) -- sw $17, 12($4) -- sw $18, 16($4) -- sw $19, 20($4) -- sw $20, 24($4) -- sw $21, 28($4) -- sw $22, 32($4) -- sw $23, 36($4) -- sw $30, 40($4) -- sw $28, 44($4) -- swc1 $20, 56($4) -- swc1 $21, 60($4) -- swc1 $22, 64($4) -- swc1 $23, 68($4) -- swc1 $24, 72($4) -- swc1 $25, 76($4) -- swc1 $26, 80($4) -- swc1 $27, 84($4) -- swc1 $28, 88($4) -- swc1 $29, 92($4) -- swc1 $30, 96($4) -- swc1 $31, 100($4) -- jr $ra -- li $2, 0 ---- a/src/setjmp/mipsel-sf/longjmp.sub -+++ /dev/null -@@ -1 +0,0 @@ --../mips-sf/longjmp.s ---- a/src/setjmp/mipsel-sf/setjmp.sub -+++ /dev/null -@@ -1 +0,0 @@ --../mips-sf/setjmp.s ---- a/src/setjmp/sh-nofpu/longjmp.s -+++ /dev/null -@@ -1,22 +0,0 @@ --.global _longjmp --.global longjmp --.type _longjmp, @function --.type longjmp, @function --_longjmp: --longjmp: -- mov.l @r4+, r8 -- mov.l @r4+, r9 -- mov.l @r4+, r10 -- mov.l @r4+, r11 -- mov.l @r4+, r12 -- mov.l @r4+, r13 -- mov.l @r4+, r14 -- mov.l @r4+, r15 -- lds.l @r4+, pr -- -- tst r5, r5 -- movt r0 -- add r5, r0 -- -- rts -- nop ---- a/src/setjmp/sh-nofpu/longjmp.sub -+++ /dev/null -@@ -1 +0,0 @@ --longjmp.s ---- a/src/setjmp/sh-nofpu/setjmp.s -+++ /dev/null -@@ -1,24 +0,0 @@ --.global ___setjmp --.hidden ___setjmp --.global __setjmp --.global _setjmp --.global setjmp --.type __setjmp, @function --.type _setjmp, @function --.type setjmp, @function --___setjmp: --__setjmp: --_setjmp: --setjmp: -- add #36, r4 -- sts.l pr, @-r4 -- mov.l r15 @-r4 -- mov.l r14, @-r4 -- mov.l r13, @-r4 -- mov.l r12, @-r4 -- mov.l r11, @-r4 -- mov.l r10, @-r4 -- mov.l r9, @-r4 -- mov.l r8, @-r4 -- rts -- mov #0, r0 ---- a/src/setjmp/sh-nofpu/setjmp.sub -+++ /dev/null -@@ -1 +0,0 @@ --setjmp.s ---- /dev/null -+++ b/src/setjmp/sh/longjmp.S -@@ -0,0 +1,28 @@ -+.global _longjmp -+.global longjmp -+.type _longjmp, @function -+.type longjmp, @function -+_longjmp: -+longjmp: -+ mov.l @r4+, r8 -+ mov.l @r4+, r9 -+ mov.l @r4+, r10 -+ mov.l @r4+, r11 -+ mov.l @r4+, r12 -+ mov.l @r4+, r13 -+ mov.l @r4+, r14 -+ mov.l @r4+, r15 -+ lds.l @r4+, pr -+#if __SH_FPU_ANY__ || __SH4__ -+ fmov.s @r4+, fr12 -+ fmov.s @r4+, fr13 -+ fmov.s @r4+, fr14 -+ fmov.s @r4+, fr15 -+#endif -+ -+ tst r5, r5 -+ movt r0 -+ add r5, r0 -+ -+ rts -+ nop ---- a/src/setjmp/sh/longjmp.s -+++ /dev/null -@@ -1,26 +0,0 @@ --.global _longjmp --.global longjmp --.type _longjmp, @function --.type longjmp, @function --_longjmp: --longjmp: -- mov.l @r4+, r8 -- mov.l @r4+, r9 -- mov.l @r4+, r10 -- mov.l @r4+, r11 -- mov.l @r4+, r12 -- mov.l @r4+, r13 -- mov.l @r4+, r14 -- mov.l @r4+, r15 -- lds.l @r4+, pr -- fmov.s @r4+, fr12 -- fmov.s @r4+, fr13 -- fmov.s @r4+, fr14 -- fmov.s @r4+, fr15 -- -- tst r5, r5 -- movt r0 -- add r5, r0 -- -- rts -- nop ---- /dev/null -+++ b/src/setjmp/sh/setjmp.S -@@ -0,0 +1,32 @@ -+.global ___setjmp -+.hidden ___setjmp -+.global __setjmp -+.global _setjmp -+.global setjmp -+.type __setjmp, @function -+.type _setjmp, @function -+.type setjmp, @function -+___setjmp: -+__setjmp: -+_setjmp: -+setjmp: -+#if __SH_FPU_ANY__ || __SH4__ -+ add #52, r4 -+ fmov.s fr15, @-r4 -+ fmov.s fr14, @-r4 -+ fmov.s fr13, @-r4 -+ fmov.s fr12, @-r4 -+#else -+ add #36, r4 -+#endif -+ sts.l pr, @-r4 -+ mov.l r15, @-r4 -+ mov.l r14, @-r4 -+ mov.l r13, @-r4 -+ mov.l r12, @-r4 -+ mov.l r11, @-r4 -+ mov.l r10, @-r4 -+ mov.l r9, @-r4 -+ mov.l r8, @-r4 -+ rts -+ mov #0, r0 ---- a/src/setjmp/sh/setjmp.s -+++ /dev/null -@@ -1,28 +0,0 @@ --.global ___setjmp --.hidden ___setjmp --.global __setjmp --.global _setjmp --.global setjmp --.type __setjmp, @function --.type _setjmp, @function --.type setjmp, @function --___setjmp: --__setjmp: --_setjmp: --setjmp: -- add #52, r4 -- fmov.s fr15, @-r4 -- fmov.s fr14, @-r4 -- fmov.s fr13, @-r4 -- fmov.s fr12, @-r4 -- sts.l pr, @-r4 -- mov.l r15, @-r4 -- mov.l r14, @-r4 -- mov.l r13, @-r4 -- mov.l r12, @-r4 -- mov.l r11, @-r4 -- mov.l r10, @-r4 -- mov.l r9, @-r4 -- mov.l r8, @-r4 -- rts -- mov #0, r0 ---- a/src/setjmp/sheb-nofpu/longjmp.sub -+++ /dev/null -@@ -1 +0,0 @@ --../sh-nofpu/longjmp.s ---- a/src/setjmp/sheb-nofpu/setjmp.sub -+++ /dev/null -@@ -1 +0,0 @@ --../sh-nofpu/setjmp.s ---- a/src/signal/arm/restore.s -+++ b/src/signal/arm/restore.s -@@ -1,3 +1,5 @@ -+.syntax unified -+ - .global __restore - .type __restore,%function - __restore: ---- a/src/signal/arm/sigsetjmp.s -+++ b/src/signal/arm/sigsetjmp.s -@@ -1,3 +1,4 @@ -+.syntax unified - .global sigsetjmp - .global __sigsetjmp - .type sigsetjmp,%function ---- a/src/signal/sigaction.c -+++ b/src/signal/sigaction.c -@@ -17,10 +17,6 @@ void __get_handler_set(sigset_t *set) - int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old) - { - struct k_sigaction ksa, ksa_old; -- if (sig >= (unsigned)_NSIG) { -- errno = EINVAL; -- return -1; -- } - if (sa) { - if ((uintptr_t)sa->sa_handler > 1UL) { - a_or_l(handler_set+(sig-1)/(8*sizeof(long)), -@@ -57,7 +53,7 @@ int __libc_sigaction(int sig, const stru - - int __sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old) - { -- if (sig-32U < 3) { -+ if (sig-32U < 3 || sig-1U >= _NSIG-1) { - errno = EINVAL; - return -1; - } ---- a/src/signal/sigsetjmp_tail.c -+++ b/src/signal/sigsetjmp_tail.c -@@ -2,9 +2,7 @@ - #include <signal.h> - #include "syscall.h" - --#ifdef SHARED - __attribute__((__visibility__("hidden"))) --#endif - int __sigsetjmp_tail(sigjmp_buf jb, int ret) - { - void *p = jb->__ss; ---- a/src/stdio/getdelim.c -+++ b/src/stdio/getdelim.c -@@ -27,17 +27,18 @@ ssize_t getdelim(char **restrict s, size - for (;;) { - z = memchr(f->rpos, delim, f->rend - f->rpos); - k = z ? z - f->rpos + 1 : f->rend - f->rpos; -- if (i+k >= *n) { -+ if (i+k+1 >= *n) { - if (k >= SIZE_MAX/2-i) goto oom; -- *n = i+k+2; -- if (*n < SIZE_MAX/4) *n *= 2; -- tmp = realloc(*s, *n); -+ size_t m = i+k+2; -+ if (!z && m < SIZE_MAX/4) m += m/2; -+ tmp = realloc(*s, m); - if (!tmp) { -- *n = i+k+2; -- tmp = realloc(*s, *n); -+ m = i+k+2; -+ tmp = realloc(*s, m); - if (!tmp) goto oom; - } - *s = tmp; -+ *n = m; - } - memcpy(*s+i, f->rpos, k); - f->rpos += k; ---- /dev/null -+++ b/src/string/arm/__aeabi_memclr.c -@@ -0,0 +1,9 @@ -+#include <string.h> -+#include "libc.h" -+ -+void __aeabi_memclr(void *dest, size_t n) -+{ -+ memset(dest, 0, n); -+} -+weak_alias(__aeabi_memclr, __aeabi_memclr4); -+weak_alias(__aeabi_memclr, __aeabi_memclr8); ---- /dev/null -+++ b/src/string/arm/__aeabi_memcpy.c -@@ -0,0 +1,9 @@ -+#include <string.h> -+#include "libc.h" -+ -+void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n) -+{ -+ memcpy(dest, src, n); -+} -+weak_alias(__aeabi_memcpy, __aeabi_memcpy4); -+weak_alias(__aeabi_memcpy, __aeabi_memcpy8); ---- /dev/null -+++ b/src/string/arm/__aeabi_memmove.c -@@ -0,0 +1,9 @@ -+#include <string.h> -+#include "libc.h" -+ -+void __aeabi_memmove(void *dest, const void *src, size_t n) -+{ -+ memmove(dest, src, n); -+} -+weak_alias(__aeabi_memmove, __aeabi_memmove4); -+weak_alias(__aeabi_memmove, __aeabi_memmove8); ---- /dev/null -+++ b/src/string/arm/__aeabi_memset.c -@@ -0,0 +1,9 @@ -+#include <string.h> -+#include "libc.h" -+ -+void __aeabi_memset(void *dest, size_t n, int c) -+{ -+ memset(dest, c, n); -+} -+weak_alias(__aeabi_memset, __aeabi_memset4); -+weak_alias(__aeabi_memset, __aeabi_memset8); ---- /dev/null -+++ b/src/string/arm/memcpy.c -@@ -0,0 +1,3 @@ -+#if __ARMEB__ -+#include "../memcpy.c" -+#endif ---- /dev/null -+++ b/src/string/arm/memcpy_le.S -@@ -0,0 +1,383 @@ -+#ifndef __ARMEB__ -+ -+/* -+ * Copyright (C) 2008 The Android Open Source Project -+ * All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ */ -+ -+ -+/* -+ * Optimized memcpy() for ARM. -+ * -+ * note that memcpy() always returns the destination pointer, -+ * so we have to preserve R0. -+ */ -+ -+/* -+ * This file has been modified from the original for use in musl libc. -+ * The main changes are: addition of .type memcpy,%function to make the -+ * code safely callable from thumb mode, adjusting the return -+ * instructions to be compatible with pre-thumb ARM cpus, and removal -+ * of prefetch code that is not compatible with older cpus. -+ */ -+ -+.syntax unified -+ -+.global memcpy -+.type memcpy,%function -+memcpy: -+ /* The stack must always be 64-bits aligned to be compliant with the -+ * ARM ABI. Since we have to save R0, we might as well save R4 -+ * which we can use for better pipelining of the reads below -+ */ -+ .fnstart -+ .save {r0, r4, lr} -+ stmfd sp!, {r0, r4, lr} -+ /* Making room for r5-r11 which will be spilled later */ -+ .pad #28 -+ sub sp, sp, #28 -+ -+ /* it simplifies things to take care of len<4 early */ -+ cmp r2, #4 -+ blo copy_last_3_and_return -+ -+ /* compute the offset to align the source -+ * offset = (4-(src&3))&3 = -src & 3 -+ */ -+ rsb r3, r1, #0 -+ ands r3, r3, #3 -+ beq src_aligned -+ -+ /* align source to 32 bits. We need to insert 2 instructions between -+ * a ldr[b|h] and str[b|h] because byte and half-word instructions -+ * stall 2 cycles. -+ */ -+ movs r12, r3, lsl #31 -+ sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ -+ ldrbmi r3, [r1], #1 -+ ldrbcs r4, [r1], #1 -+ ldrbcs r12,[r1], #1 -+ strbmi r3, [r0], #1 -+ strbcs r4, [r0], #1 -+ strbcs r12,[r0], #1 -+ -+src_aligned: -+ -+ /* see if src and dst are aligned together (congruent) */ -+ eor r12, r0, r1 -+ tst r12, #3 -+ bne non_congruent -+ -+ /* Use post-incriment mode for stm to spill r5-r11 to reserved stack -+ * frame. Don't update sp. -+ */ -+ stmea sp, {r5-r11} -+ -+ /* align the destination to a cache-line */ -+ rsb r3, r0, #0 -+ ands r3, r3, #0x1C -+ beq congruent_aligned32 -+ cmp r3, r2 -+ andhi r3, r2, #0x1C -+ -+ /* conditionnaly copies 0 to 7 words (length in r3) */ -+ movs r12, r3, lsl #28 -+ ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ -+ ldmmi r1!, {r8, r9} /* 8 bytes */ -+ stmcs r0!, {r4, r5, r6, r7} -+ stmmi r0!, {r8, r9} -+ tst r3, #0x4 -+ ldrne r10,[r1], #4 /* 4 bytes */ -+ strne r10,[r0], #4 -+ sub r2, r2, r3 -+ -+congruent_aligned32: -+ /* -+ * here source is aligned to 32 bytes. -+ */ -+ -+cached_aligned32: -+ subs r2, r2, #32 -+ blo less_than_32_left -+ -+ /* -+ * We preload a cache-line up to 64 bytes ahead. On the 926, this will -+ * stall only until the requested world is fetched, but the linefill -+ * continues in the the background. -+ * While the linefill is going, we write our previous cache-line -+ * into the write-buffer (which should have some free space). -+ * When the linefill is done, the writebuffer will -+ * start dumping its content into memory -+ * -+ * While all this is going, we then load a full cache line into -+ * 8 registers, this cache line should be in the cache by now -+ * (or partly in the cache). -+ * -+ * This code should work well regardless of the source/dest alignment. -+ * -+ */ -+ -+ /* Align the preload register to a cache-line because the cpu does -+ * "critical word first" (the first word requested is loaded first). -+ */ -+ @ bic r12, r1, #0x1F -+ @ add r12, r12, #64 -+ -+1: ldmia r1!, { r4-r11 } -+ subs r2, r2, #32 -+ -+ /* -+ * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi -+ * for ARM9 preload will not be safely guarded by the preceding subs. -+ * When it is safely guarded the only possibility to have SIGSEGV here -+ * is because the caller overstates the length. -+ */ -+ @ ldrhi r3, [r12], #32 /* cheap ARM9 preload */ -+ stmia r0!, { r4-r11 } -+ bhs 1b -+ -+ add r2, r2, #32 -+ -+less_than_32_left: -+ /* -+ * less than 32 bytes left at this point (length in r2) -+ */ -+ -+ /* skip all this if there is nothing to do, which should -+ * be a common case (if not executed the code below takes -+ * about 16 cycles) -+ */ -+ tst r2, #0x1F -+ beq 1f -+ -+ /* conditionnaly copies 0 to 31 bytes */ -+ movs r12, r2, lsl #28 -+ ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ -+ ldmmi r1!, {r8, r9} /* 8 bytes */ -+ stmcs r0!, {r4, r5, r6, r7} -+ stmmi r0!, {r8, r9} -+ movs r12, r2, lsl #30 -+ ldrcs r3, [r1], #4 /* 4 bytes */ -+ ldrhmi r4, [r1], #2 /* 2 bytes */ -+ strcs r3, [r0], #4 -+ strhmi r4, [r0], #2 -+ tst r2, #0x1 -+ ldrbne r3, [r1] /* last byte */ -+ strbne r3, [r0] -+ -+ /* we're done! restore everything and return */ -+1: ldmfd sp!, {r5-r11} -+ ldmfd sp!, {r0, r4, lr} -+ bx lr -+ -+ /********************************************************************/ -+ -+non_congruent: -+ /* -+ * here source is aligned to 4 bytes -+ * but destination is not. -+ * -+ * in the code below r2 is the number of bytes read -+ * (the number of bytes written is always smaller, because we have -+ * partial words in the shift queue) -+ */ -+ cmp r2, #4 -+ blo copy_last_3_and_return -+ -+ /* Use post-incriment mode for stm to spill r5-r11 to reserved stack -+ * frame. Don't update sp. -+ */ -+ stmea sp, {r5-r11} -+ -+ /* compute shifts needed to align src to dest */ -+ rsb r5, r0, #0 -+ and r5, r5, #3 /* r5 = # bytes in partial words */ -+ mov r12, r5, lsl #3 /* r12 = right */ -+ rsb lr, r12, #32 /* lr = left */ -+ -+ /* read the first word */ -+ ldr r3, [r1], #4 -+ sub r2, r2, #4 -+ -+ /* write a partial word (0 to 3 bytes), such that destination -+ * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) -+ */ -+ movs r5, r5, lsl #31 -+ strbmi r3, [r0], #1 -+ movmi r3, r3, lsr #8 -+ strbcs r3, [r0], #1 -+ movcs r3, r3, lsr #8 -+ strbcs r3, [r0], #1 -+ movcs r3, r3, lsr #8 -+ -+ cmp r2, #4 -+ blo partial_word_tail -+ -+ /* Align destination to 32 bytes (cache line boundary) */ -+1: tst r0, #0x1c -+ beq 2f -+ ldr r5, [r1], #4 -+ sub r2, r2, #4 -+ orr r4, r3, r5, lsl lr -+ mov r3, r5, lsr r12 -+ str r4, [r0], #4 -+ cmp r2, #4 -+ bhs 1b -+ blo partial_word_tail -+ -+ /* copy 32 bytes at a time */ -+2: subs r2, r2, #32 -+ blo less_than_thirtytwo -+ -+ /* Use immediate mode for the shifts, because there is an extra cycle -+ * for register shifts, which could account for up to 50% of -+ * performance hit. -+ */ -+ -+ cmp r12, #24 -+ beq loop24 -+ cmp r12, #8 -+ beq loop8 -+ -+loop16: -+ ldr r12, [r1], #4 -+1: mov r4, r12 -+ ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} -+ subs r2, r2, #32 -+ ldrhs r12, [r1], #4 -+ orr r3, r3, r4, lsl #16 -+ mov r4, r4, lsr #16 -+ orr r4, r4, r5, lsl #16 -+ mov r5, r5, lsr #16 -+ orr r5, r5, r6, lsl #16 -+ mov r6, r6, lsr #16 -+ orr r6, r6, r7, lsl #16 -+ mov r7, r7, lsr #16 -+ orr r7, r7, r8, lsl #16 -+ mov r8, r8, lsr #16 -+ orr r8, r8, r9, lsl #16 -+ mov r9, r9, lsr #16 -+ orr r9, r9, r10, lsl #16 -+ mov r10, r10, lsr #16 -+ orr r10, r10, r11, lsl #16 -+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} -+ mov r3, r11, lsr #16 -+ bhs 1b -+ b less_than_thirtytwo -+ -+loop8: -+ ldr r12, [r1], #4 -+1: mov r4, r12 -+ ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} -+ subs r2, r2, #32 -+ ldrhs r12, [r1], #4 -+ orr r3, r3, r4, lsl #24 -+ mov r4, r4, lsr #8 -+ orr r4, r4, r5, lsl #24 -+ mov r5, r5, lsr #8 -+ orr r5, r5, r6, lsl #24 -+ mov r6, r6, lsr #8 -+ orr r6, r6, r7, lsl #24 -+ mov r7, r7, lsr #8 -+ orr r7, r7, r8, lsl #24 -+ mov r8, r8, lsr #8 -+ orr r8, r8, r9, lsl #24 -+ mov r9, r9, lsr #8 -+ orr r9, r9, r10, lsl #24 -+ mov r10, r10, lsr #8 -+ orr r10, r10, r11, lsl #24 -+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} -+ mov r3, r11, lsr #8 -+ bhs 1b -+ b less_than_thirtytwo -+ -+loop24: -+ ldr r12, [r1], #4 -+1: mov r4, r12 -+ ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} -+ subs r2, r2, #32 -+ ldrhs r12, [r1], #4 -+ orr r3, r3, r4, lsl #8 -+ mov r4, r4, lsr #24 -+ orr r4, r4, r5, lsl #8 -+ mov r5, r5, lsr #24 -+ orr r5, r5, r6, lsl #8 -+ mov r6, r6, lsr #24 -+ orr r6, r6, r7, lsl #8 -+ mov r7, r7, lsr #24 -+ orr r7, r7, r8, lsl #8 -+ mov r8, r8, lsr #24 -+ orr r8, r8, r9, lsl #8 -+ mov r9, r9, lsr #24 -+ orr r9, r9, r10, lsl #8 -+ mov r10, r10, lsr #24 -+ orr r10, r10, r11, lsl #8 -+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} -+ mov r3, r11, lsr #24 -+ bhs 1b -+ -+less_than_thirtytwo: -+ /* copy the last 0 to 31 bytes of the source */ -+ rsb r12, lr, #32 /* we corrupted r12, recompute it */ -+ add r2, r2, #32 -+ cmp r2, #4 -+ blo partial_word_tail -+ -+1: ldr r5, [r1], #4 -+ sub r2, r2, #4 -+ orr r4, r3, r5, lsl lr -+ mov r3, r5, lsr r12 -+ str r4, [r0], #4 -+ cmp r2, #4 -+ bhs 1b -+ -+partial_word_tail: -+ /* we have a partial word in the input buffer */ -+ movs r5, lr, lsl #(31-3) -+ strbmi r3, [r0], #1 -+ movmi r3, r3, lsr #8 -+ strbcs r3, [r0], #1 -+ movcs r3, r3, lsr #8 -+ strbcs r3, [r0], #1 -+ -+ /* Refill spilled registers from the stack. Don't update sp. */ -+ ldmfd sp, {r5-r11} -+ -+copy_last_3_and_return: -+ movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ -+ ldrbmi r2, [r1], #1 -+ ldrbcs r3, [r1], #1 -+ ldrbcs r12,[r1] -+ strbmi r2, [r0], #1 -+ strbcs r3, [r0], #1 -+ strbcs r12,[r0] -+ -+ /* we're done! restore sp and spilled registers and return */ -+ add sp, sp, #28 -+ ldmfd sp!, {r0, r4, lr} -+ bx lr -+ -+#endif ---- a/src/string/armel/memcpy.s -+++ /dev/null -@@ -1,381 +0,0 @@ --/* -- * Copyright (C) 2008 The Android Open Source Project -- * All rights reserved. -- * -- * Redistribution and use in source and binary forms, with or without -- * modification, are permitted provided that the following conditions -- * are met: -- * * Redistributions of source code must retain the above copyright -- * notice, this list of conditions and the following disclaimer. -- * * Redistributions in binary form must reproduce the above copyright -- * notice, this list of conditions and the following disclaimer in -- * the documentation and/or other materials provided with the -- * distribution. -- * -- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -- * SUCH DAMAGE. -- */ -- -- --/* -- * Optimized memcpy() for ARM. -- * -- * note that memcpy() always returns the destination pointer, -- * so we have to preserve R0. -- */ -- --/* -- * This file has been modified from the original for use in musl libc. -- * The main changes are: addition of .type memcpy,%function to make the -- * code safely callable from thumb mode, adjusting the return -- * instructions to be compatible with pre-thumb ARM cpus, and removal -- * of prefetch code that is not compatible with older cpus. -- */ -- --.global memcpy --.type memcpy,%function --memcpy: -- /* The stack must always be 64-bits aligned to be compliant with the -- * ARM ABI. Since we have to save R0, we might as well save R4 -- * which we can use for better pipelining of the reads below -- */ -- .fnstart -- .save {r0, r4, lr} -- stmfd sp!, {r0, r4, lr} -- /* Making room for r5-r11 which will be spilled later */ -- .pad #28 -- sub sp, sp, #28 -- -- /* it simplifies things to take care of len<4 early */ -- cmp r2, #4 -- blo copy_last_3_and_return -- -- /* compute the offset to align the source -- * offset = (4-(src&3))&3 = -src & 3 -- */ -- rsb r3, r1, #0 -- ands r3, r3, #3 -- beq src_aligned -- -- /* align source to 32 bits. We need to insert 2 instructions between -- * a ldr[b|h] and str[b|h] because byte and half-word instructions -- * stall 2 cycles. -- */ -- movs r12, r3, lsl #31 -- sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ -- .word 0x44d13001 /* ldrbmi r3, [r1], #1 */ -- .word 0x24d14001 /* ldrbcs r4, [r1], #1 */ -- .word 0x24d1c001 /* ldrbcs r12,[r1], #1 */ -- .word 0x44c03001 /* strbmi r3, [r0], #1 */ -- .word 0x24c04001 /* strbcs r4, [r0], #1 */ -- .word 0x24c0c001 /* strbcs r12,[r0], #1 */ -- --src_aligned: -- -- /* see if src and dst are aligned together (congruent) */ -- eor r12, r0, r1 -- tst r12, #3 -- bne non_congruent -- -- /* Use post-incriment mode for stm to spill r5-r11 to reserved stack -- * frame. Don't update sp. -- */ -- stmea sp, {r5-r11} -- -- /* align the destination to a cache-line */ -- rsb r3, r0, #0 -- ands r3, r3, #0x1C -- beq congruent_aligned32 -- cmp r3, r2 -- andhi r3, r2, #0x1C -- -- /* conditionnaly copies 0 to 7 words (length in r3) */ -- movs r12, r3, lsl #28 -- ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ -- ldmmi r1!, {r8, r9} /* 8 bytes */ -- stmcs r0!, {r4, r5, r6, r7} -- stmmi r0!, {r8, r9} -- tst r3, #0x4 -- ldrne r10,[r1], #4 /* 4 bytes */ -- strne r10,[r0], #4 -- sub r2, r2, r3 -- --congruent_aligned32: -- /* -- * here source is aligned to 32 bytes. -- */ -- --cached_aligned32: -- subs r2, r2, #32 -- blo less_than_32_left -- -- /* -- * We preload a cache-line up to 64 bytes ahead. On the 926, this will -- * stall only until the requested world is fetched, but the linefill -- * continues in the the background. -- * While the linefill is going, we write our previous cache-line -- * into the write-buffer (which should have some free space). -- * When the linefill is done, the writebuffer will -- * start dumping its content into memory -- * -- * While all this is going, we then load a full cache line into -- * 8 registers, this cache line should be in the cache by now -- * (or partly in the cache). -- * -- * This code should work well regardless of the source/dest alignment. -- * -- */ -- -- /* Align the preload register to a cache-line because the cpu does -- * "critical word first" (the first word requested is loaded first). -- */ -- @ bic r12, r1, #0x1F -- @ add r12, r12, #64 -- --1: ldmia r1!, { r4-r11 } -- subs r2, r2, #32 -- -- /* -- * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi -- * for ARM9 preload will not be safely guarded by the preceding subs. -- * When it is safely guarded the only possibility to have SIGSEGV here -- * is because the caller overstates the length. -- */ -- @ ldrhi r3, [r12], #32 /* cheap ARM9 preload */ -- stmia r0!, { r4-r11 } -- bhs 1b -- -- add r2, r2, #32 -- --less_than_32_left: -- /* -- * less than 32 bytes left at this point (length in r2) -- */ -- -- /* skip all this if there is nothing to do, which should -- * be a common case (if not executed the code below takes -- * about 16 cycles) -- */ -- tst r2, #0x1F -- beq 1f -- -- /* conditionnaly copies 0 to 31 bytes */ -- movs r12, r2, lsl #28 -- ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ -- ldmmi r1!, {r8, r9} /* 8 bytes */ -- stmcs r0!, {r4, r5, r6, r7} -- stmmi r0!, {r8, r9} -- movs r12, r2, lsl #30 -- ldrcs r3, [r1], #4 /* 4 bytes */ -- .word 0x40d140b2 /* ldrhmi r4, [r1], #2 */ /* 2 bytes */ -- strcs r3, [r0], #4 -- .word 0x40c040b2 /* strhmi r4, [r0], #2 */ -- tst r2, #0x1 -- .word 0x15d13000 /* ldrbne r3, [r1] */ /* last byte */ -- .word 0x15c03000 /* strbne r3, [r0] */ -- -- /* we're done! restore everything and return */ --1: ldmfd sp!, {r5-r11} -- ldmfd sp!, {r0, r4, lr} -- tst lr, #1 -- moveq pc, lr -- bx lr -- -- /********************************************************************/ -- --non_congruent: -- /* -- * here source is aligned to 4 bytes -- * but destination is not. -- * -- * in the code below r2 is the number of bytes read -- * (the number of bytes written is always smaller, because we have -- * partial words in the shift queue) -- */ -- cmp r2, #4 -- blo copy_last_3_and_return -- -- /* Use post-incriment mode for stm to spill r5-r11 to reserved stack -- * frame. Don't update sp. -- */ -- stmea sp, {r5-r11} -- -- /* compute shifts needed to align src to dest */ -- rsb r5, r0, #0 -- and r5, r5, #3 /* r5 = # bytes in partial words */ -- mov r12, r5, lsl #3 /* r12 = right */ -- rsb lr, r12, #32 /* lr = left */ -- -- /* read the first word */ -- ldr r3, [r1], #4 -- sub r2, r2, #4 -- -- /* write a partial word (0 to 3 bytes), such that destination -- * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) -- */ -- movs r5, r5, lsl #31 -- .word 0x44c03001 /* strbmi r3, [r0], #1 */ -- movmi r3, r3, lsr #8 -- .word 0x24c03001 /* strbcs r3, [r0], #1 */ -- movcs r3, r3, lsr #8 -- .word 0x24c03001 /* strbcs r3, [r0], #1 */ -- movcs r3, r3, lsr #8 -- -- cmp r2, #4 -- blo partial_word_tail -- -- /* Align destination to 32 bytes (cache line boundary) */ --1: tst r0, #0x1c -- beq 2f -- ldr r5, [r1], #4 -- sub r2, r2, #4 -- orr r4, r3, r5, lsl lr -- mov r3, r5, lsr r12 -- str r4, [r0], #4 -- cmp r2, #4 -- bhs 1b -- blo partial_word_tail -- -- /* copy 32 bytes at a time */ --2: subs r2, r2, #32 -- blo less_than_thirtytwo -- -- /* Use immediate mode for the shifts, because there is an extra cycle -- * for register shifts, which could account for up to 50% of -- * performance hit. -- */ -- -- cmp r12, #24 -- beq loop24 -- cmp r12, #8 -- beq loop8 -- --loop16: -- ldr r12, [r1], #4 --1: mov r4, r12 -- ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} -- subs r2, r2, #32 -- ldrhs r12, [r1], #4 -- orr r3, r3, r4, lsl #16 -- mov r4, r4, lsr #16 -- orr r4, r4, r5, lsl #16 -- mov r5, r5, lsr #16 -- orr r5, r5, r6, lsl #16 -- mov r6, r6, lsr #16 -- orr r6, r6, r7, lsl #16 -- mov r7, r7, lsr #16 -- orr r7, r7, r8, lsl #16 -- mov r8, r8, lsr #16 -- orr r8, r8, r9, lsl #16 -- mov r9, r9, lsr #16 -- orr r9, r9, r10, lsl #16 -- mov r10, r10, lsr #16 -- orr r10, r10, r11, lsl #16 -- stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} -- mov r3, r11, lsr #16 -- bhs 1b -- b less_than_thirtytwo -- --loop8: -- ldr r12, [r1], #4 --1: mov r4, r12 -- ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} -- subs r2, r2, #32 -- ldrhs r12, [r1], #4 -- orr r3, r3, r4, lsl #24 -- mov r4, r4, lsr #8 -- orr r4, r4, r5, lsl #24 -- mov r5, r5, lsr #8 -- orr r5, r5, r6, lsl #24 -- mov r6, r6, lsr #8 -- orr r6, r6, r7, lsl #24 -- mov r7, r7, lsr #8 -- orr r7, r7, r8, lsl #24 -- mov r8, r8, lsr #8 -- orr r8, r8, r9, lsl #24 -- mov r9, r9, lsr #8 -- orr r9, r9, r10, lsl #24 -- mov r10, r10, lsr #8 -- orr r10, r10, r11, lsl #24 -- stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} -- mov r3, r11, lsr #8 -- bhs 1b -- b less_than_thirtytwo -- --loop24: -- ldr r12, [r1], #4 --1: mov r4, r12 -- ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} -- subs r2, r2, #32 -- ldrhs r12, [r1], #4 -- orr r3, r3, r4, lsl #8 -- mov r4, r4, lsr #24 -- orr r4, r4, r5, lsl #8 -- mov r5, r5, lsr #24 -- orr r5, r5, r6, lsl #8 -- mov r6, r6, lsr #24 -- orr r6, r6, r7, lsl #8 -- mov r7, r7, lsr #24 -- orr r7, r7, r8, lsl #8 -- mov r8, r8, lsr #24 -- orr r8, r8, r9, lsl #8 -- mov r9, r9, lsr #24 -- orr r9, r9, r10, lsl #8 -- mov r10, r10, lsr #24 -- orr r10, r10, r11, lsl #8 -- stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} -- mov r3, r11, lsr #24 -- bhs 1b -- --less_than_thirtytwo: -- /* copy the last 0 to 31 bytes of the source */ -- rsb r12, lr, #32 /* we corrupted r12, recompute it */ -- add r2, r2, #32 -- cmp r2, #4 -- blo partial_word_tail -- --1: ldr r5, [r1], #4 -- sub r2, r2, #4 -- orr r4, r3, r5, lsl lr -- mov r3, r5, lsr r12 -- str r4, [r0], #4 -- cmp r2, #4 -- bhs 1b -- --partial_word_tail: -- /* we have a partial word in the input buffer */ -- movs r5, lr, lsl #(31-3) -- .word 0x44c03001 /* strbmi r3, [r0], #1 */ -- movmi r3, r3, lsr #8 -- .word 0x24c03001 /* strbcs r3, [r0], #1 */ -- movcs r3, r3, lsr #8 -- .word 0x24c03001 /* strbcs r3, [r0], #1 */ -- -- /* Refill spilled registers from the stack. Don't update sp. */ -- ldmfd sp, {r5-r11} -- --copy_last_3_and_return: -- movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ -- .word 0x44d12001 /* ldrbmi r2, [r1], #1 */ -- .word 0x24d13001 /* ldrbcs r3, [r1], #1 */ -- .word 0x25d1c000 /* ldrbcs r12,[r1] */ -- .word 0x44c02001 /* strbmi r2, [r0], #1 */ -- .word 0x24c03001 /* strbcs r3, [r0], #1 */ -- .word 0x25c0c000 /* strbcs r12,[r0] */ -- -- /* we're done! restore sp and spilled registers and return */ -- add sp, sp, #28 -- ldmfd sp!, {r0, r4, lr} -- tst lr, #1 -- moveq pc, lr -- bx lr ---- a/src/string/armel/memcpy.sub -+++ /dev/null -@@ -1 +0,0 @@ --memcpy.s ---- a/src/string/armhf/memcpy.sub -+++ /dev/null -@@ -1 +0,0 @@ --../armel/memcpy.s ---- a/src/thread/__syscall_cp.c -+++ b/src/thread/__syscall_cp.c -@@ -1,9 +1,7 @@ - #include "pthread_impl.h" - #include "syscall.h" - --#ifdef SHARED - __attribute__((__visibility__("hidden"))) --#endif - long __syscall_cp_c(); - - static long sccp(syscall_arg_t nr, ---- a/src/thread/__tls_get_addr.c -+++ b/src/thread/__tls_get_addr.c -@@ -1,16 +1,16 @@ - #include <stddef.h> - #include "pthread_impl.h" -+#include "libc.h" -+ -+__attribute__((__visibility__("hidden"))) -+void *__tls_get_new(size_t *); - - void *__tls_get_addr(size_t *v) - { - pthread_t self = __pthread_self(); --#ifdef SHARED -- __attribute__((__visibility__("hidden"))) -- void *__tls_get_new(size_t *); - if (v[0]<=(size_t)self->dtv[0]) - return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET; - return __tls_get_new(v); --#else -- return (char *)self->dtv[1]+v[1]+DTP_OFFSET; --#endif - } -+ -+weak_alias(__tls_get_addr, __tls_get_new); ---- a/src/thread/aarch64/syscall_cp.s -+++ b/src/thread/aarch64/syscall_cp.s -@@ -17,7 +17,7 @@ - __syscall_cp_asm: - __cp_begin: - ldr w0,[x0] -- cbnz w0,1f -+ cbnz w0,__cp_cancel - mov x8,x1 - mov x0,x2 - mov x1,x3 -@@ -28,6 +28,5 @@ __cp_begin: - svc 0 - __cp_end: - ret -- -- // cbnz might not be able to jump far enough --1: b __cancel -+__cp_cancel: -+ b __cancel ---- /dev/null -+++ b/src/thread/arm/__set_thread_area.c -@@ -0,0 +1,49 @@ -+#include <stdint.h> -+#include <elf.h> -+#include "pthread_impl.h" -+#include "libc.h" -+ -+#define HWCAP_TLS (1 << 15) -+ -+extern const unsigned char __attribute__((__visibility__("hidden"))) -+ __a_barrier_dummy[], __a_barrier_oldkuser[], -+ __a_barrier_v6[], __a_barrier_v7[], -+ __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[], -+ __a_gettp_dummy[]; -+ -+#define __a_barrier_kuser 0xffff0fa0 -+#define __a_cas_kuser 0xffff0fc0 -+#define __a_gettp_kuser 0xffff0fe0 -+ -+extern uintptr_t __attribute__((__visibility__("hidden"))) -+ __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr; -+ -+#define SET(op,ver) (__a_##op##_ptr = \ -+ (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy) -+ -+int __set_thread_area(void *p) -+{ -+#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 -+ if (__hwcap & HWCAP_TLS) { -+ size_t *aux; -+ SET(cas, v7); -+ SET(barrier, v7); -+ for (aux=libc.auxv; *aux; aux+=2) { -+ if (*aux != AT_PLATFORM) continue; -+ const char *s = (void *)aux[1]; -+ if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break; -+ SET(cas, v6); -+ SET(barrier, v6); -+ break; -+ } -+ } else { -+ int ver = *(int *)0xffff0ffc; -+ SET(gettp, kuser); -+ SET(cas, kuser); -+ SET(barrier, kuser); -+ if (ver < 2) a_crash(); -+ if (ver < 3) SET(barrier, oldkuser); -+ } -+#endif -+ return __syscall(0xf0005, p); -+} ---- a/src/thread/arm/__set_thread_area.s -+++ /dev/null -@@ -1 +0,0 @@ --/* Replaced by C code in arch/arm/src */ ---- a/src/thread/arm/__unmapself.s -+++ b/src/thread/arm/__unmapself.s -@@ -1,3 +1,4 @@ -+.syntax unified - .text - .global __unmapself - .type __unmapself,%function ---- /dev/null -+++ b/src/thread/arm/atomics.s -@@ -0,0 +1,111 @@ -+.syntax unified -+.text -+ -+.global __a_barrier -+.hidden __a_barrier -+.type __a_barrier,%function -+__a_barrier: -+ ldr ip,1f -+ ldr ip,[pc,ip] -+ add pc,pc,ip -+1: .word __a_barrier_ptr-1b -+.global __a_barrier_dummy -+.hidden __a_barrier_dummy -+__a_barrier_dummy: -+ bx lr -+.global __a_barrier_oldkuser -+.hidden __a_barrier_oldkuser -+__a_barrier_oldkuser: -+ push {r0,r1,r2,r3,ip,lr} -+ mov r1,r0 -+ mov r2,sp -+ ldr ip,=0xffff0fc0 -+ mov lr,pc -+ mov pc,ip -+ pop {r0,r1,r2,r3,ip,lr} -+ bx lr -+.global __a_barrier_v6 -+.hidden __a_barrier_v6 -+__a_barrier_v6: -+ mcr p15,0,r0,c7,c10,5 -+ bx lr -+.global __a_barrier_v7 -+.hidden __a_barrier_v7 -+__a_barrier_v7: -+ .word 0xf57ff05b /* dmb ish */ -+ bx lr -+ -+.global __a_cas -+.hidden __a_cas -+.type __a_cas,%function -+__a_cas: -+ ldr ip,1f -+ ldr ip,[pc,ip] -+ add pc,pc,ip -+1: .word __a_cas_ptr-1b -+.global __a_cas_dummy -+.hidden __a_cas_dummy -+__a_cas_dummy: -+ mov r3,r0 -+ ldr r0,[r2] -+ subs r0,r3,r0 -+ streq r1,[r2] -+ bx lr -+.global __a_cas_v6 -+.hidden __a_cas_v6 -+__a_cas_v6: -+ mov r3,r0 -+ mcr p15,0,r0,c7,c10,5 -+1: .word 0xe1920f9f /* ldrex r0,[r2] */ -+ subs r0,r3,r0 -+ .word 0x01820f91 /* strexeq r0,r1,[r2] */ -+ teqeq r0,#1 -+ beq 1b -+ mcr p15,0,r0,c7,c10,5 -+ bx lr -+.global __a_cas_v7 -+.hidden __a_cas_v7 -+__a_cas_v7: -+ mov r3,r0 -+ .word 0xf57ff05b /* dmb ish */ -+1: .word 0xe1920f9f /* ldrex r0,[r2] */ -+ subs r0,r3,r0 -+ .word 0x01820f91 /* strexeq r0,r1,[r2] */ -+ teqeq r0,#1 -+ beq 1b -+ .word 0xf57ff05b /* dmb ish */ -+ bx lr -+ -+.global __aeabi_read_tp -+.type __aeabi_read_tp,%function -+__aeabi_read_tp: -+ -+.global __a_gettp -+.hidden __a_gettp -+.type __a_gettp,%function -+__a_gettp: -+ ldr r0,1f -+ ldr r0,[pc,r0] -+ add pc,pc,r0 -+1: .word __a_gettp_ptr-1b -+.global __a_gettp_dummy -+.hidden __a_gettp_dummy -+__a_gettp_dummy: -+ mrc p15,0,r0,c13,c0,3 -+ bx lr -+ -+.data -+.global __a_barrier_ptr -+.hidden __a_barrier_ptr -+__a_barrier_ptr: -+ .word 0 -+ -+.global __a_cas_ptr -+.hidden __a_cas_ptr -+__a_cas_ptr: -+ .word 0 -+ -+.global __a_gettp_ptr -+.hidden __a_gettp_ptr -+__a_gettp_ptr: -+ .word 0 ---- a/src/thread/arm/clone.s -+++ b/src/thread/arm/clone.s -@@ -1,3 +1,4 @@ -+.syntax unified - .text - .global __clone - .type __clone,%function -@@ -15,8 +16,6 @@ __clone: - tst r0,r0 - beq 1f - ldmfd sp!,{r4,r5,r6,r7} -- tst lr,#1 -- moveq pc,lr - bx lr - - 1: mov r0,r6 ---- a/src/thread/arm/syscall_cp.s -+++ b/src/thread/arm/syscall_cp.s -@@ -1,3 +1,4 @@ -+.syntax unified - .global __cp_begin - .hidden __cp_begin - .global __cp_end -@@ -22,8 +23,6 @@ __cp_begin: - svc 0 - __cp_end: - ldmfd sp!,{r4,r5,r6,r7,lr} -- tst lr,#1 -- moveq pc,lr - bx lr - __cp_cancel: - ldmfd sp!,{r4,r5,r6,r7,lr} ---- a/src/thread/microblaze/syscall_cp.s -+++ b/src/thread/microblaze/syscall_cp.s -@@ -11,7 +11,7 @@ - __syscall_cp_asm: - __cp_begin: - lwi r5, r5, 0 -- bnei r5, __cancel -+ bnei r5, __cp_cancel - addi r12, r6, 0 - add r5, r7, r0 - add r6, r8, r0 -@@ -23,3 +23,5 @@ __cp_begin: - __cp_end: - rtsd r15, 8 - nop -+__cp_cancel: -+ bri __cancel ---- a/src/thread/or1k/syscall_cp.s -+++ b/src/thread/or1k/syscall_cp.s -@@ -12,7 +12,7 @@ __syscall_cp_asm: - __cp_begin: - l.lwz r3, 0(r3) - l.sfeqi r3, 0 -- l.bnf __cancel -+ l.bnf __cp_cancel - l.ori r11, r4, 0 - l.ori r3, r5, 0 - l.ori r4, r6, 0 -@@ -24,3 +24,6 @@ __cp_begin: - __cp_end: - l.jr r9 - l.nop -+__cp_cancel: -+ l.j __cancel -+ l.nop ---- a/src/thread/powerpc/syscall_cp.s -+++ b/src/thread/powerpc/syscall_cp.s -@@ -38,7 +38,7 @@ __cp_begin: - cmpwi cr7, 0, 0 #compare r0 with 0, store result in cr7. - beq+ cr7, 1f #jump to label 1 if r0 was 0 - -- b __cancel #else call cancel -+ b __cp_cancel #else call cancel - 1: - #ok, the cancel flag was not set - # syscall: number goes to r0, the rest 3-8 -@@ -55,3 +55,5 @@ __cp_end: - #else negate result. - neg 3, 3 - blr -+__cp_cancel: -+ b __cancel ---- a/src/thread/pthread_cancel.c -+++ b/src/thread/pthread_cancel.c -@@ -1,12 +1,11 @@ -+#define _GNU_SOURCE - #include <string.h> - #include "pthread_impl.h" - #include "syscall.h" - #include "libc.h" - --#ifdef SHARED - __attribute__((__visibility__("hidden"))) --#endif --long __cancel(), __cp_cancel(), __syscall_cp_asm(), __syscall_cp_c(); -+long __cancel(), __syscall_cp_asm(), __syscall_cp_c(); - - long __cancel() - { -@@ -17,12 +16,6 @@ long __cancel() - return -ECANCELED; - } - --/* If __syscall_cp_asm has adjusted the stack pointer, it must provide a -- * definition of __cp_cancel to undo those adjustments and call __cancel. -- * Otherwise, __cancel provides a definition for __cp_cancel. */ -- --weak_alias(__cancel, __cp_cancel); -- - long __syscall_cp_asm(volatile void *, syscall_arg_t, - syscall_arg_t, syscall_arg_t, syscall_arg_t, - syscall_arg_t, syscall_arg_t, syscall_arg_t); -@@ -52,24 +45,22 @@ static void _sigaddset(sigset_t *set, in - set->__bits[s/8/sizeof *set->__bits] |= 1UL<<(s&8*sizeof *set->__bits-1); - } - --#ifdef SHARED - __attribute__((__visibility__("hidden"))) --#endif --extern const char __cp_begin[1], __cp_end[1]; -+extern const char __cp_begin[1], __cp_end[1], __cp_cancel[1]; - - static void cancel_handler(int sig, siginfo_t *si, void *ctx) - { - pthread_t self = __pthread_self(); - ucontext_t *uc = ctx; -- const char *ip = ((char **)&uc->uc_mcontext)[CANCEL_REG_IP]; -+ uintptr_t pc = uc->uc_mcontext.MC_PC; - - a_barrier(); - if (!self->cancel || self->canceldisable == PTHREAD_CANCEL_DISABLE) return; - - _sigaddset(&uc->uc_sigmask, SIGCANCEL); - -- if (self->cancelasync || ip >= __cp_begin && ip < __cp_end) { -- ((char **)&uc->uc_mcontext)[CANCEL_REG_IP] = (char *)__cp_cancel; -+ if (self->cancelasync || pc >= (uintptr_t)__cp_begin && pc < (uintptr_t)__cp_end) { -+ uc->uc_mcontext.MC_PC = (uintptr_t)__cp_cancel; - return; - } - ---- /dev/null -+++ b/src/thread/sh/__set_thread_area.c -@@ -0,0 +1,40 @@ -+#include "pthread_impl.h" -+#include "libc.h" -+#include <elf.h> -+ -+/* Also perform sh-specific init */ -+ -+#define CPU_HAS_LLSC 0x0040 -+#define CPU_HAS_CAS_L 0x0400 -+ -+__attribute__((__visibility__("hidden"))) -+extern const char __sh_cas_gusa[], __sh_cas_llsc[], __sh_cas_imask[], __sh_cas_cas_l[]; -+ -+__attribute__((__visibility__("hidden"))) -+const void *__sh_cas_ptr; -+ -+__attribute__((__visibility__("hidden"))) -+unsigned __sh_nommu; -+ -+int __set_thread_area(void *p) -+{ -+ size_t *aux; -+ __asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" ); -+#ifndef __SH4A__ -+ __sh_cas_ptr = __sh_cas_gusa; -+#if !defined(__SH3__) && !defined(__SH4__) -+ for (aux=libc.auxv; *aux; aux+=2) { -+ if (*aux != AT_PLATFORM) continue; -+ const char *s = (void *)aux[1]; -+ if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break; -+ __sh_cas_ptr = __sh_cas_imask; -+ __sh_nommu = 1; -+ } -+#endif -+ if (__hwcap & CPU_HAS_CAS_L) -+ __sh_cas_ptr = __sh_cas_cas_l; -+ else if (__hwcap & CPU_HAS_LLSC) -+ __sh_cas_ptr = __sh_cas_llsc; -+#endif -+ return 0; -+} ---- /dev/null -+++ b/src/thread/sh/atomics.s -@@ -0,0 +1,65 @@ -+/* Contract for all versions is same as cas.l r2,r3,@r0 -+ * pr and r1 are also clobbered (by jsr & r1 as temp). -+ * r0,r2,r4-r15 must be preserved. -+ * r3 contains result (==r2 iff cas succeeded). */ -+ -+ .align 2 -+.global __sh_cas_gusa -+.hidden __sh_cas_gusa -+__sh_cas_gusa: -+ mov.l r5,@-r15 -+ mov.l r4,@-r15 -+ mov r0,r4 -+ mova 1f,r0 -+ mov r15,r1 -+ mov #(0f-1f),r15 -+0: mov.l @r4,r5 -+ cmp/eq r5,r2 -+ bf 1f -+ mov.l r3,@r4 -+1: mov r1,r15 -+ mov r5,r3 -+ mov r4,r0 -+ mov.l @r15+,r4 -+ rts -+ mov.l @r15+,r5 -+ -+.global __sh_cas_llsc -+.hidden __sh_cas_llsc -+__sh_cas_llsc: -+ mov r0,r1 -+ synco -+0: movli.l @r1,r0 -+ cmp/eq r0,r2 -+ bf 1f -+ mov r3,r0 -+ movco.l r0,@r1 -+ bf 0b -+ mov r2,r0 -+1: synco -+ mov r0,r3 -+ rts -+ mov r1,r0 -+ -+.global __sh_cas_imask -+.hidden __sh_cas_imask -+__sh_cas_imask: -+ mov r0,r1 -+ stc sr,r0 -+ mov.l r0,@-r15 -+ or #0xf0,r0 -+ ldc r0,sr -+ mov.l @r1,r0 -+ cmp/eq r0,r2 -+ bf 1f -+ mov.l r3,@r1 -+1: ldc.l @r15+,sr -+ mov r0,r3 -+ rts -+ mov r1,r0 -+ -+.global __sh_cas_cas_l -+.hidden __sh_cas_cas_l -+__sh_cas_cas_l: -+ rts -+ .word 0x2323 /* cas.l r2,r3,@r0 */ ---- a/src/thread/sh/syscall_cp.s -+++ b/src/thread/sh/syscall_cp.s -@@ -14,17 +14,8 @@ __syscall_cp_asm: - __cp_begin: - mov.l @r4, r4 - tst r4, r4 -- bt 2f -- -- mov.l L1, r0 -- braf r0 -- nop --1: -- --.align 2 --L1: .long __cancel@PLT-(1b-.) -- --2: mov r5, r3 -+ bf __cp_cancel -+ mov r5, r3 - mov r6, r4 - mov r7, r5 - mov.l @r15, r6 -@@ -43,3 +34,12 @@ __cp_end: - - rts - nop -+ -+__cp_cancel: -+ mov.l 2f, r0 -+ braf r0 -+ nop -+1: -+ -+.align 2 -+2: .long __cancel@PCREL-(1b-.) ---- a/src/thread/x32/syscall_cp.s -+++ b/src/thread/x32/syscall_cp.s -@@ -14,7 +14,7 @@ __syscall_cp_internal: - __cp_begin: - mov (%rdi),%eax - test %eax,%eax -- jnz __cancel -+ jnz __cp_cancel - mov %rdi,%r11 - mov %rsi,%rax - mov %rdx,%rdi -@@ -27,3 +27,5 @@ __cp_begin: - syscall - __cp_end: - ret -+__cp_cancel: -+ jmp __cancel ---- a/src/thread/x86_64/syscall_cp.s -+++ b/src/thread/x86_64/syscall_cp.s -@@ -14,7 +14,7 @@ __syscall_cp_asm: - __cp_begin: - mov (%rdi),%eax - test %eax,%eax -- jnz __cancel -+ jnz __cp_cancel - mov %rdi,%r11 - mov %rsi,%rax - mov %rdx,%rdi -@@ -27,3 +27,5 @@ __cp_begin: - syscall - __cp_end: - ret -+__cp_cancel: -+ jmp __cancel |