--- a/.gitignore +++ b/.gitignore @@ -5,9 +5,6 @@ *.so.1 arch/*/bits/alltypes.h config.mak -include/bits -tools/musl-gcc -tools/musl-clang -tools/ld.musl-clang lib/musl-gcc.specs src/internal/version.h +/obj/ --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ # Do not make changes here. # +srcdir = . exec_prefix = /usr/local bindir = $(exec_prefix)/bin @@ -16,31 +17,38 @@ includedir = $(prefix)/include libdir = $(prefix)/lib syslibdir = /lib -SRCS = $(sort $(wildcard src/*/*.c arch/$(ARCH)/src/*.c)) -OBJS = $(SRCS:.c=.o) +BASE_SRCS = $(sort $(wildcard $(srcdir)/src/*/*.c $(srcdir)/arch/$(ARCH)/src/*.[csS])) +BASE_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(BASE_SRCS))) +ARCH_SRCS = $(wildcard $(srcdir)/src/*/$(ARCH)/*.[csS]) +ARCH_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(ARCH_SRCS))) +REPLACED_OBJS = $(sort $(subst /$(ARCH)/,/,$(ARCH_OBJS))) +OBJS = $(addprefix obj/, $(filter-out $(REPLACED_OBJS), $(sort $(BASE_OBJS) $(ARCH_OBJS)))) LOBJS = $(OBJS:.o=.lo) -GENH = include/bits/alltypes.h -GENH_INT = src/internal/version.h -IMPH = src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h +GENH = obj/include/bits/alltypes.h +GENH_INT = obj/src/internal/version.h +IMPH = $(addprefix $(srcdir)/, src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h) -LDFLAGS = +LDFLAGS = +LDFLAGS_AUTO = LIBCC = -lgcc CPPFLAGS = -CFLAGS = -Os -pipe +CFLAGS = +CFLAGS_AUTO = -Os -pipe CFLAGS_C99FSE = -std=c99 -ffreestanding -nostdinc CFLAGS_ALL = $(CFLAGS_C99FSE) -CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I./arch/$(ARCH) -I./src/internal -I./include -CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS) -CFLAGS_ALL_STATIC = $(CFLAGS_ALL) -CFLAGS_ALL_SHARED = $(CFLAGS_ALL) -fPIC -DSHARED +CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I$(srcdir)/arch/$(ARCH) -Iobj/src/internal -I$(srcdir)/src/internal -Iobj/include -I$(srcdir)/include +CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS_AUTO) $(CFLAGS) + +LDFLAGS_ALL = $(LDFLAGS_AUTO) $(LDFLAGS) AR = $(CROSS_COMPILE)ar RANLIB = $(CROSS_COMPILE)ranlib -INSTALL = ./tools/install.sh +INSTALL = $(srcdir)/tools/install.sh -ARCH_INCLUDES = $(wildcard arch/$(ARCH)/bits/*.h) -ALL_INCLUDES = $(sort $(wildcard include/*.h include/*/*.h) $(GENH) $(ARCH_INCLUDES:arch/$(ARCH)/%=include/%)) +ARCH_INCLUDES = $(wildcard $(srcdir)/arch/$(ARCH)/bits/*.h) +INCLUDES = $(wildcard $(srcdir)/include/*.h $(srcdir)/include/*/*.h) +ALL_INCLUDES = $(sort $(INCLUDES:$(srcdir)/%=%) $(GENH:obj/%=%) $(ARCH_INCLUDES:$(srcdir)/arch/$(ARCH)/%=include/%)) EMPTY_LIB_NAMES = m rt pthread crypt util xnet resolv dl EMPTY_LIBS = $(EMPTY_LIB_NAMES:%=lib/lib%.a) @@ -49,7 +57,7 @@ STATIC_LIBS = lib/libc.a SHARED_LIBS = lib/libc.so TOOL_LIBS = lib/musl-gcc.specs ALL_LIBS = $(CRT_LIBS) $(STATIC_LIBS) $(SHARED_LIBS) $(EMPTY_LIBS) $(TOOL_LIBS) -ALL_TOOLS = tools/musl-gcc +ALL_TOOLS = obj/musl-gcc WRAPCC_GCC = gcc WRAPCC_CLANG = clang @@ -58,95 +66,93 @@ LDSO_PATHNAME = $(syslibdir)/ld-musl-$(A -include config.mak +ifeq ($(ARCH),) +$(error Please set ARCH in config.mak before running make.) +endif + all: $(ALL_LIBS) $(ALL_TOOLS) +OBJ_DIRS = $(sort $(patsubst %/,%,$(dir $(ALL_LIBS) $(ALL_TOOLS) $(OBJS) $(GENH) $(GENH_INT))) $(addprefix obj/, crt crt/$(ARCH) include)) + +$(ALL_LIBS) $(ALL_TOOLS) $(CRT_LIBS:lib/%=obj/crt/%) $(OBJS) $(LOBJS) $(GENH) $(GENH_INT): | $(OBJ_DIRS) + +$(OBJ_DIRS): + mkdir -p $@ + install: install-libs install-headers install-tools clean: - rm -f crt/*.o - rm -f $(OBJS) - rm -f $(LOBJS) - rm -f $(ALL_LIBS) lib/*.[ao] lib/*.so - rm -f $(ALL_TOOLS) - rm -f $(GENH) $(GENH_INT) - rm -f include/bits + rm -rf obj lib distclean: clean rm -f config.mak -include/bits: - @test "$(ARCH)" || { echo "Please set ARCH in config.mak before running make." ; exit 1 ; } - ln -sf ../arch/$(ARCH)/bits $@ +obj/include/bits/alltypes.h: $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in $(srcdir)/tools/mkalltypes.sed + sed -f $(srcdir)/tools/mkalltypes.sed $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in > $@ -include/bits/alltypes.h.in: include/bits +obj/src/internal/version.h: $(wildcard $(srcdir)/VERSION $(srcdir)/.git) + printf '#define VERSION "%s"\n' "$$(cd $(srcdir); sh tools/version.sh)" > $@ -include/bits/alltypes.h: include/bits/alltypes.h.in include/alltypes.h.in tools/mkalltypes.sed - sed -f tools/mkalltypes.sed include/bits/alltypes.h.in include/alltypes.h.in > $@ +obj/src/internal/version.o obj/src/internal/version.lo: obj/src/internal/version.h -src/internal/version.h: $(wildcard VERSION .git) - printf '#define VERSION "%s"\n' "$$(sh tools/version.sh)" > $@ +obj/crt/rcrt1.o obj/src/ldso/dlstart.lo obj/src/ldso/dynlink.lo: $(srcdir)/src/internal/dynlink.h $(srcdir)/arch/$(ARCH)/reloc.h -src/internal/version.lo: src/internal/version.h +obj/crt/crt1.o obj/crt/scrt1.o obj/crt/rcrt1.o obj/src/ldso/dlstart.lo: $(srcdir)/arch/$(ARCH)/crt_arch.h -crt/rcrt1.o src/ldso/dlstart.lo src/ldso/dynlink.lo: src/internal/dynlink.h arch/$(ARCH)/reloc.h +obj/crt/rcrt1.o: $(srcdir)/src/ldso/dlstart.c -crt/crt1.o crt/Scrt1.o crt/rcrt1.o src/ldso/dlstart.lo: $(wildcard arch/$(ARCH)/crt_arch.h) +obj/crt/Scrt1.o obj/crt/rcrt1.o: CFLAGS_ALL += -fPIC -crt/rcrt1.o: src/ldso/dlstart.c +obj/crt/$(ARCH)/crti.o: $(srcdir)/crt/$(ARCH)/crti.s -crt/Scrt1.o crt/rcrt1.o: CFLAGS += -fPIC +obj/crt/$(ARCH)/crtn.o: $(srcdir)/crt/$(ARCH)/crtn.s -OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=src/%)) -$(OPTIMIZE_SRCS:%.c=%.o) $(OPTIMIZE_SRCS:%.c=%.lo): CFLAGS += -O3 +OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=$(srcdir)/src/%)) +$(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.o) $(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.lo): CFLAGS += -O3 MEMOPS_SRCS = src/string/memcpy.c src/string/memmove.c src/string/memcmp.c src/string/memset.c -$(MEMOPS_SRCS:%.c=%.o) $(MEMOPS_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_MEMOPS) +$(MEMOPS_SRCS:%.c=obj/%.o) $(MEMOPS_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_MEMOPS) NOSSP_SRCS = $(wildcard crt/*.c) \ src/env/__libc_start_main.c src/env/__init_tls.c \ src/thread/__set_thread_area.c src/env/__stack_chk_fail.c \ src/string/memset.c src/string/memcpy.c \ src/ldso/dlstart.c src/ldso/dynlink.c -$(NOSSP_SRCS:%.c=%.o) $(NOSSP_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_NOSSP) +$(NOSSP_SRCS:%.c=obj/%.o) $(NOSSP_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_NOSSP) + +$(CRT_LIBS:lib/%=obj/crt/%): CFLAGS_ALL += -DCRT -$(CRT_LIBS:lib/%=crt/%): CFLAGS += -DCRT +$(LOBJS): CFLAGS_ALL += -fPIC -DSHARED -# This incantation ensures that changes to any subarch asm files will -# force the corresponding object file to be rebuilt, even if the implicit -# rule below goes indirectly through a .sub file. -define mkasmdep -$(dir $(patsubst %/,%,$(dir $(1))))$(notdir $(1:.s=.o)): $(1) -endef -$(foreach s,$(wildcard src/*/$(ARCH)*/*.s),$(eval $(call mkasmdep,$(s)))) +CC_CMD = $(CC) $(CFLAGS_ALL) -c -o $@ $< # Choose invocation of assembler to be used -# $(1) is input file, $(2) is output file, $(3) is assembler flags ifeq ($(ADD_CFI),yes) - AS_CMD = LC_ALL=C awk -f tools/add-cfi.common.awk -f tools/add-cfi.$(ARCH).awk $< | $(CC) -x assembler -c -o $@ - + AS_CMD = LC_ALL=C awk -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk $< | $(CC) $(CFLAGS_ALL) -x assembler -c -o $@ - else - AS_CMD = $(CC) -c -o $@ $< + AS_CMD = $(CC_CMD) endif -%.o: $(ARCH)$(ASMSUBARCH)/%.sub - $(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $(dir $<)$(shell cat $<) +obj/%.o: $(srcdir)/%.s + $(AS_CMD) -%.o: $(ARCH)/%.s - $(AS_CMD) $(CFLAGS_ALL_STATIC) +obj/%.o: $(srcdir)/%.S + $(CC_CMD) -%.o: %.c $(GENH) $(IMPH) - $(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $< +obj/%.o: $(srcdir)/%.c $(GENH) $(IMPH) + $(CC_CMD) -%.lo: $(ARCH)$(ASMSUBARCH)/%.sub - $(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $(dir $<)$(shell cat $<) +obj/%.lo: $(srcdir)/%.s + $(AS_CMD) -%.lo: $(ARCH)/%.s - $(AS_CMD) $(CFLAGS_ALL_SHARED) +obj/%.lo: $(srcdir)/%.S + $(CC_CMD) -%.lo: %.c $(GENH) $(IMPH) - $(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $< +obj/%.lo: $(srcdir)/%.c $(GENH) $(IMPH) + $(CC_CMD) lib/libc.so: $(LOBJS) - $(CC) $(CFLAGS_ALL_SHARED) $(LDFLAGS) -nostdlib -shared \ + $(CC) $(CFLAGS_ALL) $(LDFLAGS_ALL) -nostdlib -shared \ -Wl,-e,_dlstart -Wl,-Bsymbolic-functions \ -o $@ $(LOBJS) $(LIBCC) @@ -159,21 +165,27 @@ $(EMPTY_LIBS): rm -f $@ $(AR) rc $@ -lib/%.o: crt/%.o +lib/%.o: obj/crt/%.o cp $< $@ -lib/musl-gcc.specs: tools/musl-gcc.specs.sh config.mak +lib/crti.o: obj/crt/$(ARCH)/crti.o + cp $< $@ + +lib/crtn.o: obj/crt/$(ARCH)/crtn.o + cp $< $@ + +lib/musl-gcc.specs: $(srcdir)/tools/musl-gcc.specs.sh config.mak sh $< "$(includedir)" "$(libdir)" "$(LDSO_PATHNAME)" > $@ -tools/musl-gcc: config.mak +obj/musl-gcc: config.mak printf '#!/bin/sh\nexec "$${REALGCC:-$(WRAPCC_GCC)}" "$$@" -specs "%s/musl-gcc.specs"\n' "$(libdir)" > $@ chmod +x $@ -tools/%-clang: tools/%-clang.in config.mak +obj/%-clang: $(srcdir)/tools/%-clang.in config.mak sed -e 's!@CC@!$(WRAPCC_CLANG)!g' -e 's!@PREFIX@!$(prefix)!g' -e 's!@INCDIR@!$(includedir)!g' -e 's!@LIBDIR@!$(libdir)!g' -e 's!@LDSO@!$(LDSO_PATHNAME)!g' $< > $@ chmod +x $@ -$(DESTDIR)$(bindir)/%: tools/% +$(DESTDIR)$(bindir)/%: obj/% $(INSTALL) -D $< $@ $(DESTDIR)$(libdir)/%.so: lib/%.so @@ -182,10 +194,13 @@ $(DESTDIR)$(libdir)/%.so: lib/%.so $(DESTDIR)$(libdir)/%: lib/% $(INSTALL) -D -m 644 $< $@ -$(DESTDIR)$(includedir)/bits/%: arch/$(ARCH)/bits/% +$(DESTDIR)$(includedir)/bits/%: $(srcdir)/arch/$(ARCH)/bits/% + $(INSTALL) -D -m 644 $< $@ + +$(DESTDIR)$(includedir)/bits/%: obj/include/bits/% $(INSTALL) -D -m 644 $< $@ -$(DESTDIR)$(includedir)/%: include/% +$(DESTDIR)$(includedir)/%: $(srcdir)/include/% $(INSTALL) -D -m 644 $< $@ $(DESTDIR)$(LDSO_PATHNAME): $(DESTDIR)$(libdir)/libc.so @@ -195,12 +210,12 @@ install-libs: $(ALL_LIBS:lib/%=$(DESTDIR install-headers: $(ALL_INCLUDES:include/%=$(DESTDIR)$(includedir)/%) -install-tools: $(ALL_TOOLS:tools/%=$(DESTDIR)$(bindir)/%) +install-tools: $(ALL_TOOLS:obj/%=$(DESTDIR)$(bindir)/%) musl-git-%.tar.gz: .git - git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@) + git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@) musl-%.tar.gz: .git - git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@) + git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@) .PHONY: all clean install install-libs install-headers install-tools --- a/arch/aarch64/atomic.h +++ /dev/null @@ -1,206 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include - -static inline int a_ctz_64(uint64_t x) -{ - __asm__( - " rbit %0, %1\n" - " clz %0, %0\n" - : "=r"(x) : "r"(x)); - return x; -} - -static inline int a_ctz_l(unsigned long x) -{ - return a_ctz_64(x); -} - -static inline void a_barrier() -{ - __asm__ __volatile__("dmb ish"); -} - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - void *old; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %0,%3\n" - " cmp %0,%1\n" - " b.ne 1f\n" - " stxr %w0,%2,%3\n" - " cbnz %w0,1b\n" - " mov %0,%1\n" - "1: dmb ish\n" - : "=&r"(old) - : "r"(t), "r"(s), "Q"(*(long*)p) - : "memory", "cc"); - return old; -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - int old; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %w0,%3\n" - " cmp %w0,%w1\n" - " b.ne 1f\n" - " stxr %w0,%w2,%3\n" - " cbnz %w0,1b\n" - " mov %w0,%w1\n" - "1: dmb ish\n" - : "=&r"(old) - : "r"(t), "r"(s), "Q"(*p) - : "memory", "cc"); - return old; -} - -static inline int a_swap(volatile int *x, int v) -{ - int old, tmp; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %w0,%3\n" - " stxr %w1,%w2,%3\n" - " cbnz %w1,1b\n" - " dmb ish\n" - : "=&r"(old), "=&r"(tmp) - : "r"(v), "Q"(*x) - : "memory", "cc" ); - return old; -} - -static inline int a_fetch_add(volatile int *x, int v) -{ - int old, tmp; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %w0,%3\n" - " add %w0,%w0,%w2\n" - " stxr %w1,%w0,%3\n" - " cbnz %w1,1b\n" - " dmb ish\n" - : "=&r"(old), "=&r"(tmp) - : "r"(v), "Q"(*x) - : "memory", "cc" ); - return old-v; -} - -static inline void a_inc(volatile int *x) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %w0,%2\n" - " add %w0,%w0,#1\n" - " stxr %w1,%w0,%2\n" - " cbnz %w1,1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "Q"(*x) - : "memory", "cc" ); -} - -static inline void a_dec(volatile int *x) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %w0,%2\n" - " sub %w0,%w0,#1\n" - " stxr %w1,%w0,%2\n" - " cbnz %w1,1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "Q"(*x) - : "memory", "cc" ); -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %0,%3\n" - " and %0,%0,%2\n" - " stxr %w1,%0,%3\n" - " cbnz %w1,1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "r"(v), "Q"(*p) - : "memory", "cc" ); -} - -static inline void a_and(volatile int *p, int v) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %w0,%3\n" - " and %w0,%w0,%w2\n" - " stxr %w1,%w0,%3\n" - " cbnz %w1,1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "r"(v), "Q"(*p) - : "memory", "cc" ); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %0,%3\n" - " orr %0,%0,%2\n" - " stxr %w1,%0,%3\n" - " cbnz %w1,1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "r"(v), "Q"(*p) - : "memory", "cc" ); -} - -static inline void a_or_l(volatile void *p, long v) -{ - return a_or_64(p, v); -} - -static inline void a_or(volatile int *p, int v) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldxr %w0,%3\n" - " orr %w0,%w0,%w2\n" - " stxr %w1,%w0,%3\n" - " cbnz %w1,1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "r"(v), "Q"(*p) - : "memory", "cc" ); -} - -static inline void a_store(volatile int *p, int x) -{ - __asm__ __volatile__( - " dmb ish\n" - " str %w1,%0\n" - " dmb ish\n" - : "=m"(*p) - : "r"(x) - : "memory", "cc" ); -} - -#define a_spin a_barrier - -static inline void a_crash() -{ - *(volatile char *)0=0; -} - - -#endif --- /dev/null +++ b/arch/aarch64/atomic_arch.h @@ -0,0 +1,53 @@ +#define a_ll a_ll +static inline int a_ll(volatile int *p) +{ + int v; + __asm__ __volatile__ ("ldxr %0, %1" : "=r"(v) : "Q"(*p)); + return v; +} + +#define a_sc a_sc +static inline int a_sc(volatile int *p, int v) +{ + int r; + __asm__ __volatile__ ("stxr %w0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory"); + return !r; +} + +#define a_barrier a_barrier +static inline void a_barrier() +{ + __asm__ __volatile__ ("dmb ish" : : : "memory"); +} + +#define a_pre_llsc a_barrier +#define a_post_llsc a_barrier + +#define a_cas_p a_cas_p +static inline void *a_cas_p(volatile void *p, void *t, void *s) +{ + void *old; + __asm__ __volatile__( + " dmb ish\n" + "1: ldxr %0,%3\n" + " cmp %0,%1\n" + " b.ne 1f\n" + " stxr %w0,%2,%3\n" + " cbnz %w0,1b\n" + " mov %0,%1\n" + "1: dmb ish\n" + : "=&r"(old) + : "r"(t), "r"(s), "Q"(*(void *volatile *)p) + : "memory", "cc"); + return old; +} + +#define a_ctz_64 a_ctz_64 +static inline int a_ctz_64(uint64_t x) +{ + __asm__( + " rbit %0, %1\n" + " clz %0, %0\n" + : "=r"(x) : "r"(x)); + return x; +} --- a/arch/aarch64/pthread_arch.h +++ b/arch/aarch64/pthread_arch.h @@ -8,4 +8,4 @@ static inline struct pthread *__pthread_ #define TLS_ABOVE_TP #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 16) -#define CANCEL_REG_IP 33 +#define MC_PC pc --- a/arch/arm/atomic.h +++ /dev/null @@ -1,261 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include - -static inline int a_ctz_l(unsigned long x) -{ - static const char debruijn32[32] = { - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 - }; - return debruijn32[(x&-x)*0x076be629 >> 27]; -} - -static inline int a_ctz_64(uint64_t x) -{ - uint32_t y = x; - if (!y) { - y = x>>32; - return 32 + a_ctz_l(y); - } - return a_ctz_l(y); -} - -#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 - -static inline void a_barrier() -{ - __asm__ __volatile__("dmb ish"); -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - int old; - __asm__ __volatile__( - " dmb ish\n" - "1: ldrex %0,%3\n" - " cmp %0,%1\n" - " bne 1f\n" - " strex %0,%2,%3\n" - " cmp %0, #0\n" - " bne 1b\n" - " mov %0, %1\n" - "1: dmb ish\n" - : "=&r"(old) - : "r"(t), "r"(s), "Q"(*p) - : "memory", "cc" ); - return old; -} - -static inline int a_swap(volatile int *x, int v) -{ - int old, tmp; - __asm__ __volatile__( - " dmb ish\n" - "1: ldrex %0,%3\n" - " strex %1,%2,%3\n" - " cmp %1, #0\n" - " bne 1b\n" - " dmb ish\n" - : "=&r"(old), "=&r"(tmp) - : "r"(v), "Q"(*x) - : "memory", "cc" ); - return old; -} - -static inline int a_fetch_add(volatile int *x, int v) -{ - int old, tmp; - __asm__ __volatile__( - " dmb ish\n" - "1: ldrex %0,%3\n" - " add %0,%0,%2\n" - " strex %1,%0,%3\n" - " cmp %1, #0\n" - " bne 1b\n" - " dmb ish\n" - : "=&r"(old), "=&r"(tmp) - : "r"(v), "Q"(*x) - : "memory", "cc" ); - return old-v; -} - -static inline void a_inc(volatile int *x) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldrex %0,%2\n" - " add %0,%0,#1\n" - " strex %1,%0,%2\n" - " cmp %1, #0\n" - " bne 1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "Q"(*x) - : "memory", "cc" ); -} - -static inline void a_dec(volatile int *x) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldrex %0,%2\n" - " sub %0,%0,#1\n" - " strex %1,%0,%2\n" - " cmp %1, #0\n" - " bne 1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "Q"(*x) - : "memory", "cc" ); -} - -static inline void a_and(volatile int *x, int v) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldrex %0,%3\n" - " and %0,%0,%2\n" - " strex %1,%0,%3\n" - " cmp %1, #0\n" - " bne 1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "r"(v), "Q"(*x) - : "memory", "cc" ); -} - -static inline void a_or(volatile int *x, int v) -{ - int tmp, tmp2; - __asm__ __volatile__( - " dmb ish\n" - "1: ldrex %0,%3\n" - " orr %0,%0,%2\n" - " strex %1,%0,%3\n" - " cmp %1, #0\n" - " bne 1b\n" - " dmb ish\n" - : "=&r"(tmp), "=&r"(tmp2) - : "r"(v), "Q"(*x) - : "memory", "cc" ); -} - -static inline void a_store(volatile int *p, int x) -{ - __asm__ __volatile__( - " dmb ish\n" - " str %1,%0\n" - " dmb ish\n" - : "=m"(*p) - : "r"(x) - : "memory", "cc" ); -} - -#else - -int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden"))); -#define __k_cas __a_cas - -static inline void a_barrier() -{ - __asm__ __volatile__("bl __a_barrier" - : : : "memory", "cc", "ip", "lr" ); -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - int old; - for (;;) { - if (!__k_cas(t, s, p)) - return t; - if ((old=*p) != t) - return old; - } -} - -static inline int a_swap(volatile int *x, int v) -{ - int old; - do old = *x; - while (__k_cas(old, v, x)); - return old; -} - -static inline int a_fetch_add(volatile int *x, int v) -{ - int old; - do old = *x; - while (__k_cas(old, old+v, x)); - return old; -} - -static inline void a_inc(volatile int *x) -{ - a_fetch_add(x, 1); -} - -static inline void a_dec(volatile int *x) -{ - a_fetch_add(x, -1); -} - -static inline void a_store(volatile int *p, int x) -{ - a_barrier(); - *p = x; - a_barrier(); -} - -static inline void a_and(volatile int *p, int v) -{ - int old; - do old = *p; - while (__k_cas(old, old&v, p)); -} - -static inline void a_or(volatile int *p, int v) -{ - int old; - do old = *p; - while (__k_cas(old, old|v, p)); -} - -#endif - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - return (void *)a_cas(p, (int)t, (int)s); -} - -#define a_spin a_barrier - -static inline void a_crash() -{ - *(volatile char *)0=0; -} - -static inline void a_or_l(volatile void *p, long v) -{ - a_or(p, v); -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_and((int *)p, u.r[0]); - a_and((int *)p+1, u.r[1]); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_or((int *)p, u.r[0]); - a_or((int *)p+1, u.r[1]); -} - -#endif --- /dev/null +++ b/arch/arm/atomic_arch.h @@ -0,0 +1,64 @@ +__attribute__((__visibility__("hidden"))) +extern const void *__arm_atomics[3]; /* gettp, cas, barrier */ + +#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \ + || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 + +#define a_ll a_ll +static inline int a_ll(volatile int *p) +{ + int v; + __asm__ __volatile__ ("ldrex %0, %1" : "=r"(v) : "Q"(*p)); + return v; +} + +#define a_sc a_sc +static inline int a_sc(volatile int *p, int v) +{ + int r; + __asm__ __volatile__ ("strex %0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory"); + return !r; +} + +#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 + +#define a_barrier a_barrier +static inline void a_barrier() +{ + __asm__ __volatile__ ("dmb ish" : : : "memory"); +} + +#endif + +#define a_pre_llsc a_barrier +#define a_post_llsc a_barrier + +#else + +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + for (;;) { + register int r0 __asm__("r0") = t; + register int r1 __asm__("r1") = s; + register volatile int *r2 __asm__("r2") = p; + int old; + __asm__ __volatile__ ( + "bl __a_cas" + : "+r"(r0) : "r"(r1), "r"(r2) + : "memory", "r3", "lr", "ip", "cc" ); + if (!r0) return t; + if ((old=*p)!=t) return old; + } +} + +#endif + +#ifndef a_barrier +#define a_barrier a_barrier +static inline void a_barrier() +{ + __asm__ __volatile__("bl __a_barrier" + : : : "memory", "cc", "ip", "lr" ); +} +#endif --- a/arch/arm/pthread_arch.h +++ b/arch/arm/pthread_arch.h @@ -27,4 +27,4 @@ static inline pthread_t __pthread_self() #define TLS_ABOVE_TP #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8) -#define CANCEL_REG_IP 18 +#define MC_PC arm_pc --- a/arch/arm/reloc.h +++ b/arch/arm/reloc.h @@ -6,10 +6,10 @@ #define ENDIAN_SUFFIX "" #endif -#if __SOFTFP__ -#define FP_SUFFIX "" -#else +#if __ARM_PCS_VFP #define FP_SUFFIX "hf" +#else +#define FP_SUFFIX "" #endif #define LDSO_ARCH "arm" ENDIAN_SUFFIX FP_SUFFIX @@ -28,10 +28,5 @@ #define REL_TPOFF R_ARM_TLS_TPOFF32 //#define REL_TLSDESC R_ARM_TLS_DESC -#ifdef __thumb__ #define CRTJMP(pc,sp) __asm__ __volatile__( \ "mov sp,%1 ; bx %0" : : "r"(pc), "r"(sp) : "memory" ) -#else -#define CRTJMP(pc,sp) __asm__ __volatile__( \ - "mov sp,%1 ; tst %0,#1 ; moveq pc,%0 ; bx %0" : : "r"(pc), "r"(sp) : "memory" ) -#endif --- a/arch/arm/src/__aeabi_atexit.c +++ /dev/null @@ -1,6 +0,0 @@ -int __cxa_atexit(void (*func)(void *), void *arg, void *dso); - -int __aeabi_atexit (void *obj, void (*func) (void *), void *d) -{ - return __cxa_atexit (func, obj, d); -} --- a/arch/arm/src/__aeabi_memclr.c +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include "libc.h" - -void __aeabi_memclr(void *dest, size_t n) -{ - memset(dest, 0, n); -} -weak_alias(__aeabi_memclr, __aeabi_memclr4); -weak_alias(__aeabi_memclr, __aeabi_memclr8); --- a/arch/arm/src/__aeabi_memcpy.c +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include "libc.h" - -void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n) -{ - memcpy(dest, src, n); -} -weak_alias(__aeabi_memcpy, __aeabi_memcpy4); -weak_alias(__aeabi_memcpy, __aeabi_memcpy8); --- a/arch/arm/src/__aeabi_memmove.c +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include "libc.h" - -void __aeabi_memmove(void *dest, const void *src, size_t n) -{ - memmove(dest, src, n); -} -weak_alias(__aeabi_memmove, __aeabi_memmove4); -weak_alias(__aeabi_memmove, __aeabi_memmove8); --- a/arch/arm/src/__aeabi_memset.c +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include "libc.h" - -void __aeabi_memset(void *dest, size_t n, int c) -{ - memset(dest, c, n); -} -weak_alias(__aeabi_memset, __aeabi_memset4); -weak_alias(__aeabi_memset, __aeabi_memset8); --- a/arch/arm/src/__set_thread_area.c +++ /dev/null @@ -1,49 +0,0 @@ -#include -#include -#include "pthread_impl.h" -#include "libc.h" - -#define HWCAP_TLS (1 << 15) - -extern const unsigned char __attribute__((__visibility__("hidden"))) - __a_barrier_dummy[], __a_barrier_oldkuser[], - __a_barrier_v6[], __a_barrier_v7[], - __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[], - __a_gettp_dummy[]; - -#define __a_barrier_kuser 0xffff0fa0 -#define __a_cas_kuser 0xffff0fc0 -#define __a_gettp_kuser 0xffff0fe0 - -extern uintptr_t __attribute__((__visibility__("hidden"))) - __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr; - -#define SET(op,ver) (__a_##op##_ptr = \ - (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy) - -int __set_thread_area(void *p) -{ -#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 - if (__hwcap & HWCAP_TLS) { - size_t *aux; - SET(cas, v7); - SET(barrier, v7); - for (aux=libc.auxv; *aux; aux+=2) { - if (*aux != AT_PLATFORM) continue; - const char *s = (void *)aux[1]; - if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break; - SET(cas, v6); - SET(barrier, v6); - break; - } - } else { - int ver = *(int *)0xffff0ffc; - SET(gettp, kuser); - SET(cas, kuser); - SET(barrier, kuser); - if (ver < 2) a_crash(); - if (ver < 3) SET(barrier, oldkuser); - } -#endif - return __syscall(0xf0005, p); -} --- a/arch/arm/src/arm/atomics.s +++ /dev/null @@ -1,116 +0,0 @@ -.text - -.global __a_barrier -.hidden __a_barrier -.type __a_barrier,%function -__a_barrier: - ldr ip,1f - ldr ip,[pc,ip] - add pc,pc,ip -1: .word __a_barrier_ptr-1b -.global __a_barrier_dummy -.hidden __a_barrier_dummy -__a_barrier_dummy: - tst lr,#1 - moveq pc,lr - bx lr -.global __a_barrier_oldkuser -.hidden __a_barrier_oldkuser -__a_barrier_oldkuser: - push {r0,r1,r2,r3,ip,lr} - mov r1,r0 - mov r2,sp - ldr ip,=0xffff0fc0 - mov lr,pc - mov pc,ip - pop {r0,r1,r2,r3,ip,lr} - tst lr,#1 - moveq pc,lr - bx lr -.global __a_barrier_v6 -.hidden __a_barrier_v6 -__a_barrier_v6: - mcr p15,0,r0,c7,c10,5 - bx lr -.global __a_barrier_v7 -.hidden __a_barrier_v7 -__a_barrier_v7: - .word 0xf57ff05b /* dmb ish */ - bx lr - -.global __a_cas -.hidden __a_cas -.type __a_cas,%function -__a_cas: - ldr ip,1f - ldr ip,[pc,ip] - add pc,pc,ip -1: .word __a_cas_ptr-1b -.global __a_cas_dummy -.hidden __a_cas_dummy -__a_cas_dummy: - mov r3,r0 - ldr r0,[r2] - subs r0,r3,r0 - streq r1,[r2] - tst lr,#1 - moveq pc,lr - bx lr -.global __a_cas_v6 -.hidden __a_cas_v6 -__a_cas_v6: - mov r3,r0 - mcr p15,0,r0,c7,c10,5 -1: .word 0xe1920f9f /* ldrex r0,[r2] */ - subs r0,r3,r0 - .word 0x01820f91 /* strexeq r0,r1,[r2] */ - teqeq r0,#1 - beq 1b - mcr p15,0,r0,c7,c10,5 - bx lr -.global __a_cas_v7 -.hidden __a_cas_v7 -__a_cas_v7: - mov r3,r0 - .word 0xf57ff05b /* dmb ish */ -1: .word 0xe1920f9f /* ldrex r0,[r2] */ - subs r0,r3,r0 - .word 0x01820f91 /* strexeq r0,r1,[r2] */ - teqeq r0,#1 - beq 1b - .word 0xf57ff05b /* dmb ish */ - bx lr - -.global __aeabi_read_tp -.type __aeabi_read_tp,%function -__aeabi_read_tp: - -.global __a_gettp -.hidden __a_gettp -.type __a_gettp,%function -__a_gettp: - ldr r0,1f - ldr r0,[pc,r0] - add pc,pc,r0 -1: .word __a_gettp_ptr-1b -.global __a_gettp_dummy -.hidden __a_gettp_dummy -__a_gettp_dummy: - mrc p15,0,r0,c13,c0,3 - bx lr - -.data -.global __a_barrier_ptr -.hidden __a_barrier_ptr -__a_barrier_ptr: - .word 0 - -.global __a_cas_ptr -.hidden __a_cas_ptr -__a_cas_ptr: - .word 0 - -.global __a_gettp_ptr -.hidden __a_gettp_ptr -__a_gettp_ptr: - .word 0 --- a/arch/arm/src/find_exidx.c +++ /dev/null @@ -1,42 +0,0 @@ -#define _GNU_SOURCE -#include -#include - -struct find_exidx_data { - uintptr_t pc, exidx_start; - int exidx_len; -}; - -static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr) -{ - struct find_exidx_data *data = ptr; - const ElfW(Phdr) *phdr = info->dlpi_phdr; - uintptr_t addr, exidx_start = 0; - int i, match = 0, exidx_len = 0; - - for (i = info->dlpi_phnum; i > 0; i--, phdr++) { - addr = info->dlpi_addr + phdr->p_vaddr; - switch (phdr->p_type) { - case PT_LOAD: - match |= data->pc >= addr && data->pc < addr + phdr->p_memsz; - break; - case PT_ARM_EXIDX: - exidx_start = addr; - exidx_len = phdr->p_memsz; - break; - } - } - data->exidx_start = exidx_start; - data->exidx_len = exidx_len; - return match; -} - -uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount) -{ - struct find_exidx_data data; - data.pc = pc; - if (dl_iterate_phdr(find_exidx, &data) <= 0) - return 0; - *pcount = data.exidx_len / 8; - return data.exidx_start; -} --- a/arch/i386/atomic.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include - -static inline int a_ctz_64(uint64_t x) -{ - int r; - __asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; addl $32,%0\n1:" - : "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) ); - return r; -} - -static inline int a_ctz_l(unsigned long x) -{ - long r; - __asm__( "bsf %1,%0" : "=r"(r) : "r"(x) ); - return r; -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - __asm__( "lock ; andl %1, (%0) ; lock ; andl %2, 4(%0)" - : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" ); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - __asm__( "lock ; orl %1, (%0) ; lock ; orl %2, 4(%0)" - : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" ); -} - -static inline void a_or_l(volatile void *p, long v) -{ - __asm__( "lock ; orl %1, %0" - : "=m"(*(long *)p) : "r"(v) : "memory" ); -} - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - __asm__( "lock ; cmpxchg %3, %1" - : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" ); - return t; -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - __asm__( "lock ; cmpxchg %3, %1" - : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); - return t; -} - -static inline void a_or(volatile int *p, int v) -{ - __asm__( "lock ; orl %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline void a_and(volatile int *p, int v) -{ - __asm__( "lock ; andl %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline int a_swap(volatile int *x, int v) -{ - __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); - return v; -} - -#define a_xchg a_swap - -static inline int a_fetch_add(volatile int *x, int v) -{ - __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); - return v; -} - -static inline void a_inc(volatile int *x) -{ - __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); -} - -static inline void a_dec(volatile int *x) -{ - __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); -} - -static inline void a_store(volatile int *p, int x) -{ - __asm__( "movl %1, %0 ; lock ; orl $0,(%%esp)" : "=m"(*p) : "r"(x) : "memory" ); -} - -static inline void a_spin() -{ - __asm__ __volatile__( "pause" : : : "memory" ); -} - -static inline void a_barrier() -{ - __asm__ __volatile__( "" : : : "memory" ); -} - -static inline void a_crash() -{ - __asm__ __volatile__( "hlt" : : : "memory" ); -} - - -#endif --- /dev/null +++ b/arch/i386/atomic_arch.h @@ -0,0 +1,109 @@ +#define a_ctz_64 a_ctz_64 +static inline int a_ctz_64(uint64_t x) +{ + int r; + __asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; addl $32,%0\n1:" + : "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) ); + return r; +} + +#define a_ctz_l a_ctz_l +static inline int a_ctz_l(unsigned long x) +{ + long r; + __asm__( "bsf %1,%0" : "=r"(r) : "r"(x) ); + return r; +} + +#define a_and_64 a_and_64 +static inline void a_and_64(volatile uint64_t *p, uint64_t v) +{ + __asm__( "lock ; andl %1, (%0) ; lock ; andl %2, 4(%0)" + : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" ); +} + +#define a_or_64 a_or_64 +static inline void a_or_64(volatile uint64_t *p, uint64_t v) +{ + __asm__( "lock ; orl %1, (%0) ; lock ; orl %2, 4(%0)" + : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" ); +} + +#define a_or_l a_or_l +static inline void a_or_l(volatile void *p, long v) +{ + __asm__( "lock ; orl %1, %0" + : "=m"(*(long *)p) : "r"(v) : "memory" ); +} + +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + __asm__( "lock ; cmpxchg %3, %1" + : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); + return t; +} + +#define a_or a_or +static inline void a_or(volatile int *p, int v) +{ + __asm__( "lock ; orl %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_and a_and +static inline void a_and(volatile int *p, int v) +{ + __asm__( "lock ; andl %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_swap a_swap +static inline int a_swap(volatile int *x, int v) +{ + __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); + return v; +} + +#define a_fetch_add a_fetch_add +static inline int a_fetch_add(volatile int *x, int v) +{ + __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); + return v; +} + +#define a_inc a_inc +static inline void a_inc(volatile int *x) +{ + __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); +} + +#define a_dec a_dec +static inline void a_dec(volatile int *x) +{ + __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); +} + +#define a_store a_store +static inline void a_store(volatile int *p, int x) +{ + __asm__( "movl %1, %0 ; lock ; orl $0,(%%esp)" : "=m"(*p) : "r"(x) : "memory" ); +} + +#define a_spin a_spin +static inline void a_spin() +{ + __asm__ __volatile__( "pause" : : : "memory" ); +} + +#define a_barrier a_barrier +static inline void a_barrier() +{ + __asm__ __volatile__( "" : : : "memory" ); +} + +#define a_crash a_crash +static inline void a_crash() +{ + __asm__ __volatile__( "hlt" : : : "memory" ); +} --- a/arch/i386/bits/alltypes.h.in +++ b/arch/i386/bits/alltypes.h.in @@ -26,10 +26,12 @@ TYPEDEF long double float_t; TYPEDEF long double double_t; #endif -#ifdef __cplusplus -TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t; -#else +#if !defined(__cplusplus) TYPEDEF struct { _Alignas(8) long long __ll; long double __ld; } max_align_t; +#elif defined(__GNUC__) +TYPEDEF struct { __attribute__((__aligned__(8))) long long __ll; long double __ld; } max_align_t; +#else +TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t; #endif TYPEDEF long time_t; --- a/arch/i386/pthread_arch.h +++ b/arch/i386/pthread_arch.h @@ -7,4 +7,4 @@ static inline struct pthread *__pthread_ #define TP_ADJ(p) (p) -#define CANCEL_REG_IP 14 +#define MC_PC gregs[REG_EIP] --- a/arch/microblaze/atomic.h +++ /dev/null @@ -1,143 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include - -static inline int a_ctz_l(unsigned long x) -{ - static const char debruijn32[32] = { - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 - }; - return debruijn32[(x&-x)*0x076be629 >> 27]; -} - -static inline int a_ctz_64(uint64_t x) -{ - uint32_t y = x; - if (!y) { - y = x>>32; - return 32 + a_ctz_l(y); - } - return a_ctz_l(y); -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - register int old, tmp; - __asm__ __volatile__ ( - " addi %0, r0, 0\n" - "1: lwx %0, %2, r0\n" - " rsubk %1, %0, %3\n" - " bnei %1, 1f\n" - " swx %4, %2, r0\n" - " addic %1, r0, 0\n" - " bnei %1, 1b\n" - "1: " - : "=&r"(old), "=&r"(tmp) - : "r"(p), "r"(t), "r"(s) - : "cc", "memory" ); - return old; -} - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - return (void *)a_cas(p, (int)t, (int)s); -} - -static inline int a_swap(volatile int *x, int v) -{ - register int old, tmp; - __asm__ __volatile__ ( - " addi %0, r0, 0\n" - "1: lwx %0, %2, r0\n" - " swx %3, %2, r0\n" - " addic %1, r0, 0\n" - " bnei %1, 1b\n" - "1: " - : "=&r"(old), "=&r"(tmp) - : "r"(x), "r"(v) - : "cc", "memory" ); - return old; -} - -static inline int a_fetch_add(volatile int *x, int v) -{ - register int new, tmp; - __asm__ __volatile__ ( - " addi %0, r0, 0\n" - "1: lwx %0, %2, r0\n" - " addk %0, %0, %3\n" - " swx %0, %2, r0\n" - " addic %1, r0, 0\n" - " bnei %1, 1b\n" - "1: " - : "=&r"(new), "=&r"(tmp) - : "r"(x), "r"(v) - : "cc", "memory" ); - return new-v; -} - -static inline void a_inc(volatile int *x) -{ - a_fetch_add(x, 1); -} - -static inline void a_dec(volatile int *x) -{ - a_fetch_add(x, -1); -} - -static inline void a_store(volatile int *p, int x) -{ - __asm__ __volatile__ ( - "swi %1, %0" - : "=m"(*p) : "r"(x) : "memory" ); -} - -#define a_spin a_barrier - -static inline void a_barrier() -{ - a_cas(&(int){0}, 0, 0); -} - -static inline void a_crash() -{ - *(volatile char *)0=0; -} - -static inline void a_and(volatile int *p, int v) -{ - int old; - do old = *p; - while (a_cas(p, old, old&v) != old); -} - -static inline void a_or(volatile int *p, int v) -{ - int old; - do old = *p; - while (a_cas(p, old, old|v) != old); -} - -static inline void a_or_l(volatile void *p, long v) -{ - a_or(p, v); -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_and((int *)p, u.r[0]); - a_and((int *)p+1, u.r[1]); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_or((int *)p, u.r[0]); - a_or((int *)p+1, u.r[1]); -} - -#endif --- /dev/null +++ b/arch/microblaze/atomic_arch.h @@ -0,0 +1,53 @@ +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + register int old, tmp; + __asm__ __volatile__ ( + " addi %0, r0, 0\n" + "1: lwx %0, %2, r0\n" + " rsubk %1, %0, %3\n" + " bnei %1, 1f\n" + " swx %4, %2, r0\n" + " addic %1, r0, 0\n" + " bnei %1, 1b\n" + "1: " + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(t), "r"(s) + : "cc", "memory" ); + return old; +} + +#define a_swap a_swap +static inline int a_swap(volatile int *x, int v) +{ + register int old, tmp; + __asm__ __volatile__ ( + " addi %0, r0, 0\n" + "1: lwx %0, %2, r0\n" + " swx %3, %2, r0\n" + " addic %1, r0, 0\n" + " bnei %1, 1b\n" + "1: " + : "=&r"(old), "=&r"(tmp) + : "r"(x), "r"(v) + : "cc", "memory" ); + return old; +} + +#define a_fetch_add a_fetch_add +static inline int a_fetch_add(volatile int *x, int v) +{ + register int new, tmp; + __asm__ __volatile__ ( + " addi %0, r0, 0\n" + "1: lwx %0, %2, r0\n" + " addk %0, %0, %3\n" + " swx %0, %2, r0\n" + " addic %1, r0, 0\n" + " bnei %1, 1b\n" + "1: " + : "=&r"(new), "=&r"(tmp) + : "r"(x), "r"(v) + : "cc", "memory" ); + return new-v; +} --- a/arch/microblaze/pthread_arch.h +++ b/arch/microblaze/pthread_arch.h @@ -7,4 +7,4 @@ static inline struct pthread *__pthread_ #define TP_ADJ(p) (p) -#define CANCEL_REG_IP 32 +#define MC_PC regs.pc --- a/arch/mips/atomic.h +++ /dev/null @@ -1,205 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include - -static inline int a_ctz_l(unsigned long x) -{ - static const char debruijn32[32] = { - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 - }; - return debruijn32[(x&-x)*0x076be629 >> 27]; -} - -static inline int a_ctz_64(uint64_t x) -{ - uint32_t y = x; - if (!y) { - y = x>>32; - return 32 + a_ctz_l(y); - } - return a_ctz_l(y); -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - int dummy; - __asm__ __volatile__( - ".set push\n" - ".set mips2\n" - ".set noreorder\n" - " sync\n" - "1: ll %0, %2\n" - " bne %0, %3, 1f\n" - " addu %1, %4, $0\n" - " sc %1, %2\n" - " beq %1, $0, 1b\n" - " nop\n" - " sync\n" - "1: \n" - ".set pop\n" - : "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" ); - return t; -} - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - return (void *)a_cas(p, (int)t, (int)s); -} - -static inline int a_swap(volatile int *x, int v) -{ - int old, dummy; - __asm__ __volatile__( - ".set push\n" - ".set mips2\n" - ".set noreorder\n" - " sync\n" - "1: ll %0, %2\n" - " addu %1, %3, $0\n" - " sc %1, %2\n" - " beq %1, $0, 1b\n" - " nop\n" - " sync\n" - ".set pop\n" - : "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" ); - return old; -} - -static inline int a_fetch_add(volatile int *x, int v) -{ - int old, dummy; - __asm__ __volatile__( - ".set push\n" - ".set mips2\n" - ".set noreorder\n" - " sync\n" - "1: ll %0, %2\n" - " addu %1, %0, %3\n" - " sc %1, %2\n" - " beq %1, $0, 1b\n" - " nop\n" - " sync\n" - ".set pop\n" - : "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" ); - return old; -} - -static inline void a_inc(volatile int *x) -{ - int dummy; - __asm__ __volatile__( - ".set push\n" - ".set mips2\n" - ".set noreorder\n" - " sync\n" - "1: ll %0, %1\n" - " addu %0, %0, 1\n" - " sc %0, %1\n" - " beq %0, $0, 1b\n" - " nop\n" - " sync\n" - ".set pop\n" - : "=&r"(dummy), "+m"(*x) : : "memory" ); -} - -static inline void a_dec(volatile int *x) -{ - int dummy; - __asm__ __volatile__( - ".set push\n" - ".set mips2\n" - ".set noreorder\n" - " sync\n" - "1: ll %0, %1\n" - " subu %0, %0, 1\n" - " sc %0, %1\n" - " beq %0, $0, 1b\n" - " nop\n" - " sync\n" - ".set pop\n" - : "=&r"(dummy), "+m"(*x) : : "memory" ); -} - -static inline void a_store(volatile int *p, int x) -{ - __asm__ __volatile__( - ".set push\n" - ".set mips2\n" - ".set noreorder\n" - " sync\n" - " sw %1, %0\n" - " sync\n" - ".set pop\n" - : "+m"(*p) : "r"(x) : "memory" ); -} - -#define a_spin a_barrier - -static inline void a_barrier() -{ - a_cas(&(int){0}, 0, 0); -} - -static inline void a_crash() -{ - *(volatile char *)0=0; -} - -static inline void a_and(volatile int *p, int v) -{ - int dummy; - __asm__ __volatile__( - ".set push\n" - ".set mips2\n" - ".set noreorder\n" - " sync\n" - "1: ll %0, %1\n" - " and %0, %0, %2\n" - " sc %0, %1\n" - " beq %0, $0, 1b\n" - " nop\n" - " sync\n" - ".set pop\n" - : "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" ); -} - -static inline void a_or(volatile int *p, int v) -{ - int dummy; - __asm__ __volatile__( - ".set push\n" - ".set mips2\n" - ".set noreorder\n" - " sync\n" - "1: ll %0, %1\n" - " or %0, %0, %2\n" - " sc %0, %1\n" - " beq %0, $0, 1b\n" - " nop\n" - " sync\n" - ".set pop\n" - : "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" ); -} - -static inline void a_or_l(volatile void *p, long v) -{ - a_or(p, v); -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_and((int *)p, u.r[0]); - a_and((int *)p+1, u.r[1]); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_or((int *)p, u.r[0]); - a_or((int *)p+1, u.r[1]); -} - -#endif --- /dev/null +++ b/arch/mips/atomic_arch.h @@ -0,0 +1,39 @@ +#define a_ll a_ll +static inline int a_ll(volatile int *p) +{ + int v; + __asm__ __volatile__ ( + ".set push ; .set mips2\n\t" + "ll %0, %1" + "\n\t.set pop" + : "=r"(v) : "m"(*p)); + return v; +} + +#define a_sc a_sc +static inline int a_sc(volatile int *p, int v) +{ + int r; + __asm__ __volatile__ ( + ".set push ; .set mips2\n\t" + "sc %0, %1" + "\n\t.set pop" + : "=r"(r), "=m"(*p) : "0"(v) : "memory"); + return r; +} + +#define a_barrier a_barrier +static inline void a_barrier() +{ + /* mips2 sync, but using too many directives causes + * gcc not to inline it, so encode with .long instead. */ + __asm__ __volatile__ (".long 0xf" : : : "memory"); +#if 0 + __asm__ __volatile__ ( + ".set push ; .set mips2 ; sync ; .set pop" + : : : "memory"); +#endif +} + +#define a_pre_llsc a_barrier +#define a_post_llsc a_barrier --- a/arch/mips/crt_arch.h +++ b/arch/mips/crt_arch.h @@ -4,13 +4,16 @@ __asm__( ".text \n" ".global _" START "\n" ".global " START "\n" +".global " START "_data\n" ".type _" START ", @function\n" ".type " START ", @function\n" +".type " START "_data, @function\n" "_" START ":\n" "" START ":\n" " bal 1f \n" " move $fp, $0 \n" -"2: .gpword 2b \n" +"" START "_data: \n" +" .gpword " START "_data \n" " .gpword " START "_c \n" ".weak _DYNAMIC \n" ".hidden _DYNAMIC \n" --- a/arch/mips/pthread_arch.h +++ b/arch/mips/pthread_arch.h @@ -16,4 +16,4 @@ static inline struct pthread *__pthread_ #define DTP_OFFSET 0x8000 -#define CANCEL_REG_IP (3-(union {int __i; char __b;}){1}.__b) +#define MC_PC pc --- a/arch/mips/syscall_arch.h +++ b/arch/mips/syscall_arch.h @@ -3,9 +3,7 @@ ((union { long long ll; long l[2]; }){ .ll = x }).l[1] #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x)) -#ifdef SHARED __attribute__((visibility("hidden"))) -#endif long (__syscall)(long, ...); #define SYSCALL_RLIM_INFINITY (-1UL/2) --- a/arch/or1k/atomic.h +++ /dev/null @@ -1,120 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include - -static inline int a_ctz_l(unsigned long x) -{ - static const char debruijn32[32] = { - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 - }; - return debruijn32[(x&-x)*0x076be629 >> 27]; -} - -static inline int a_ctz_64(uint64_t x) -{ - uint32_t y = x; - if (!y) { - y = x>>32; - return 32 + a_ctz_l(y); - } - return a_ctz_l(y); -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - __asm__("1: l.lwa %0, %1\n" - " l.sfeq %0, %2\n" - " l.bnf 1f\n" - " l.nop\n" - " l.swa %1, %3\n" - " l.bnf 1b\n" - " l.nop\n" - "1: \n" - : "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" ); - return t; -} - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - return (void *)a_cas(p, (int)t, (int)s); -} - -static inline int a_swap(volatile int *x, int v) -{ - int old; - do old = *x; - while (a_cas(x, old, v) != old); - return old; -} - -static inline int a_fetch_add(volatile int *x, int v) -{ - int old; - do old = *x; - while (a_cas(x, old, old+v) != old); - return old; -} - -static inline void a_inc(volatile int *x) -{ - a_fetch_add(x, 1); -} - -static inline void a_dec(volatile int *x) -{ - a_fetch_add(x, -1); -} - -static inline void a_store(volatile int *p, int x) -{ - a_swap(p, x); -} - -#define a_spin a_barrier - -static inline void a_barrier() -{ - a_cas(&(int){0}, 0, 0); -} - -static inline void a_crash() -{ - *(volatile char *)0=0; -} - -static inline void a_and(volatile int *p, int v) -{ - int old; - do old = *p; - while (a_cas(p, old, old&v) != old); -} - -static inline void a_or(volatile int *p, int v) -{ - int old; - do old = *p; - while (a_cas(p, old, old|v) != old); -} - -static inline void a_or_l(volatile void *p, long v) -{ - a_or(p, v); -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_and((int *)p, u.r[0]); - a_and((int *)p+1, u.r[1]); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_or((int *)p, u.r[0]); - a_or((int *)p+1, u.r[1]); -} - -#endif --- /dev/null +++ b/arch/or1k/atomic_arch.h @@ -0,0 +1,14 @@ +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + __asm__("1: l.lwa %0, %1\n" + " l.sfeq %0, %2\n" + " l.bnf 1f\n" + " l.nop\n" + " l.swa %1, %3\n" + " l.bnf 1b\n" + " l.nop\n" + "1: \n" + : "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" ); + return t; +} --- a/arch/or1k/pthread_arch.h +++ b/arch/or1k/pthread_arch.h @@ -14,5 +14,4 @@ static inline struct pthread *__pthread_ #define TLS_ABOVE_TP #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread)) -/* word-offset to 'pc' in mcontext_t */ -#define CANCEL_REG_IP 32 +#define MC_PC regs.pc --- a/arch/powerpc/atomic.h +++ /dev/null @@ -1,126 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include -#include - -static inline int a_ctz_l(unsigned long x) -{ - static const char debruijn32[32] = { - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 - }; - return debruijn32[(x&-x)*0x076be629 >> 27]; -} - -static inline int a_ctz_64(uint64_t x) -{ - uint32_t y = x; - if (!y) { - y = x>>32; - return 32 + a_ctz_l(y); - } - return a_ctz_l(y); -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - __asm__("\n" - " sync\n" - "1: lwarx %0, 0, %4\n" - " cmpw %0, %2\n" - " bne 1f\n" - " stwcx. %3, 0, %4\n" - " bne- 1b\n" - " isync\n" - "1: \n" - : "=&r"(t), "+m"(*p) : "r"(t), "r"(s), "r"(p) : "cc", "memory" ); - return t; -} - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - return (void *)a_cas(p, (int)t, (int)s); -} - -static inline int a_swap(volatile int *x, int v) -{ - int old; - do old = *x; - while (a_cas(x, old, v) != old); - return old; -} - -static inline int a_fetch_add(volatile int *x, int v) -{ - int old; - do old = *x; - while (a_cas(x, old, old+v) != old); - return old; -} - -static inline void a_inc(volatile int *x) -{ - a_fetch_add(x, 1); -} - -static inline void a_dec(volatile int *x) -{ - a_fetch_add(x, -1); -} - -static inline void a_store(volatile int *p, int x) -{ - __asm__ __volatile__ ("\n" - " sync\n" - " stw %1, %0\n" - " isync\n" - : "=m"(*p) : "r"(x) : "memory" ); -} - -#define a_spin a_barrier - -static inline void a_barrier() -{ - a_cas(&(int){0}, 0, 0); -} - -static inline void a_crash() -{ - *(volatile char *)0=0; -} - -static inline void a_and(volatile int *p, int v) -{ - int old; - do old = *p; - while (a_cas(p, old, old&v) != old); -} - -static inline void a_or(volatile int *p, int v) -{ - int old; - do old = *p; - while (a_cas(p, old, old|v) != old); -} - -static inline void a_or_l(volatile void *p, long v) -{ - a_or(p, v); -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_and((int *)p, u.r[0]); - a_and((int *)p+1, u.r[1]); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_or((int *)p, u.r[0]); - a_or((int *)p+1, u.r[1]); -} - -#endif --- /dev/null +++ b/arch/powerpc/atomic_arch.h @@ -0,0 +1,15 @@ +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + __asm__("\n" + " sync\n" + "1: lwarx %0, 0, %4\n" + " cmpw %0, %2\n" + " bne 1f\n" + " stwcx. %3, 0, %4\n" + " bne- 1b\n" + " isync\n" + "1: \n" + : "=&r"(t), "+m"(*p) : "r"(t), "r"(s), "r"(p) : "cc", "memory" ); + return t; +} --- a/arch/powerpc/pthread_arch.h +++ b/arch/powerpc/pthread_arch.h @@ -15,9 +15,8 @@ static inline struct pthread *__pthread_ #define DTP_OFFSET 0x8000 -// offset of the PC register in mcontext_t, divided by the system wordsize // the kernel calls the ip "nip", it's the first saved value after the 32 // GPRs. -#define CANCEL_REG_IP 32 +#define MC_PC gregs[32] #define CANARY canary_at_end --- a/arch/sh/atomic.h +++ /dev/null @@ -1,168 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include - -static inline int a_ctz_l(unsigned long x) -{ - static const char debruijn32[32] = { - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 - }; - return debruijn32[(x&-x)*0x076be629 >> 27]; -} - -static inline int a_ctz_64(uint64_t x) -{ - uint32_t y = x; - if (!y) { - y = x>>32; - return 32 + a_ctz_l(y); - } - return a_ctz_l(y); -} - -#define LLSC_CLOBBERS "r0", "t", "memory" -#define LLSC_START(mem) "synco\n" \ - "0: movli.l @" mem ", r0\n" -#define LLSC_END(mem) \ - "1: movco.l r0, @" mem "\n" \ - " bf 0b\n" \ - " synco\n" - -static inline int __sh_cas_llsc(volatile int *p, int t, int s) -{ - int old; - __asm__ __volatile__( - LLSC_START("%1") - " mov r0, %0\n" - " cmp/eq %0, %2\n" - " bf 1f\n" - " mov %3, r0\n" - LLSC_END("%1") - : "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS); - return old; -} - -static inline int __sh_swap_llsc(volatile int *x, int v) -{ - int old; - __asm__ __volatile__( - LLSC_START("%1") - " mov r0, %0\n" - " mov %2, r0\n" - LLSC_END("%1") - : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS); - return old; -} - -static inline int __sh_fetch_add_llsc(volatile int *x, int v) -{ - int old; - __asm__ __volatile__( - LLSC_START("%1") - " mov r0, %0\n" - " add %2, r0\n" - LLSC_END("%1") - : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS); - return old; -} - -static inline void __sh_store_llsc(volatile int *p, int x) -{ - __asm__ __volatile__( - " synco\n" - " mov.l %1, @%0\n" - " synco\n" - : : "r"(p), "r"(x) : "memory"); -} - -static inline void __sh_and_llsc(volatile int *x, int v) -{ - __asm__ __volatile__( - LLSC_START("%0") - " and %1, r0\n" - LLSC_END("%0") - : : "r"(x), "r"(v) : LLSC_CLOBBERS); -} - -static inline void __sh_or_llsc(volatile int *x, int v) -{ - __asm__ __volatile__( - LLSC_START("%0") - " or %1, r0\n" - LLSC_END("%0") - : : "r"(x), "r"(v) : LLSC_CLOBBERS); -} - -#ifdef __SH4A__ -#define a_cas(p,t,s) __sh_cas_llsc(p,t,s) -#define a_swap(x,v) __sh_swap_llsc(x,v) -#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v) -#define a_store(x,v) __sh_store_llsc(x, v) -#define a_and(x,v) __sh_and_llsc(x, v) -#define a_or(x,v) __sh_or_llsc(x, v) -#else - -int __sh_cas(volatile int *, int, int); -int __sh_swap(volatile int *, int); -int __sh_fetch_add(volatile int *, int); -void __sh_store(volatile int *, int); -void __sh_and(volatile int *, int); -void __sh_or(volatile int *, int); - -#define a_cas(p,t,s) __sh_cas(p,t,s) -#define a_swap(x,v) __sh_swap(x,v) -#define a_fetch_add(x,v) __sh_fetch_add(x, v) -#define a_store(x,v) __sh_store(x, v) -#define a_and(x,v) __sh_and(x, v) -#define a_or(x,v) __sh_or(x, v) -#endif - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - return (void *)a_cas(p, (int)t, (int)s); -} - -static inline void a_inc(volatile int *x) -{ - a_fetch_add(x, 1); -} - -static inline void a_dec(volatile int *x) -{ - a_fetch_add(x, -1); -} - -#define a_spin a_barrier - -static inline void a_barrier() -{ - a_cas(&(int){0}, 0, 0); -} - -static inline void a_crash() -{ - *(volatile char *)0=0; -} - -static inline void a_or_l(volatile void *p, long v) -{ - a_or(p, v); -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_and((int *)p, u.r[0]); - a_and((int *)p+1, u.r[1]); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - union { uint64_t v; uint32_t r[2]; } u = { v }; - a_or((int *)p, u.r[0]); - a_or((int *)p+1, u.r[1]); -} - -#endif --- /dev/null +++ b/arch/sh/atomic_arch.h @@ -0,0 +1,46 @@ +#if defined(__SH4A__) + +#define a_ll a_ll +static inline int a_ll(volatile int *p) +{ + int v; + __asm__ __volatile__ ("movli.l @%1, %0" : "=z"(v) : "r"(p), "m"(*p)); + return v; +} + +#define a_sc a_sc +static inline int a_sc(volatile int *p, int v) +{ + int r; + __asm__ __volatile__ ( + "movco.l %2, @%3 ; movt %0" + : "=r"(r), "=m"(*p) : "z"(v), "r"(p) : "memory", "cc"); + return r; +} + +#define a_barrier a_barrier +static inline void a_barrier() +{ + __asm__ __volatile__ ("synco" : : "memory"); +} + +#define a_pre_llsc a_barrier +#define a_post_llsc a_barrier + +#else + +#define a_cas a_cas +__attribute__((__visibility__("hidden"))) extern const void *__sh_cas_ptr; +static inline int a_cas(volatile int *p, int t, int s) +{ + register int r1 __asm__("r1"); + register int r2 __asm__("r2") = t; + register int r3 __asm__("r3") = s; + __asm__ __volatile__ ( + "jsr @%4 ; nop" + : "=r"(r1), "+r"(r3) : "z"(p), "r"(r2), "r"(__sh_cas_ptr) + : "memory", "pr", "cc"); + return r3; +} + +#endif --- a/arch/sh/crt_arch.h +++ b/arch/sh/crt_arch.h @@ -22,7 +22,8 @@ START ": \n" " mov.l 1f, r5 \n" " mov.l 1f+4, r6 \n" " add r0, r5 \n" -" bsr __fdpic_fixup \n" +" mov.l 4f, r1 \n" +"5: bsrf r1 \n" " add r0, r6 \n" " mov r0, r12 \n" #endif @@ -31,11 +32,16 @@ START ": \n" " mov.l r9, @-r15 \n" " mov.l r8, @-r15 \n" " mov #-16, r0 \n" -" bsr " START "_c \n" +" mov.l 2f, r1 \n" +"3: bsrf r1 \n" " and r0, r15 \n" ".align 2 \n" "1: .long __ROFIXUP_LIST__@PCREL \n" " .long __ROFIXUP_END__@PCREL + 4 \n" +"2: .long " START "_c@PCREL - (3b+4-.) \n" +#ifndef SHARED +"4: .long __fdpic_fixup@PCREL - (5b+4-.) \n" +#endif ); #ifndef SHARED @@ -53,13 +59,14 @@ START ": \n" " add r0, r5 \n" " mov r15, r4 \n" " mov #-16, r0 \n" -" and r0, r15 \n" -" bsr " START "_c \n" -" nop \n" +" mov.l 2f, r1 \n" +"3: bsrf r1 \n" +" and r0, r15 \n" ".align 2 \n" ".weak _DYNAMIC \n" ".hidden _DYNAMIC \n" "1: .long _DYNAMIC-. \n" +"2: .long " START "_c@PCREL - (3b+4-.) \n" ); #endif --- a/arch/sh/pthread_arch.h +++ b/arch/sh/pthread_arch.h @@ -8,4 +8,4 @@ static inline struct pthread *__pthread_ #define TLS_ABOVE_TP #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8) -#define CANCEL_REG_IP 17 +#define MC_PC sc_pc --- a/arch/sh/reloc.h +++ b/arch/sh/reloc.h @@ -32,6 +32,8 @@ #define REL_DTPOFF R_SH_TLS_DTPOFF32 #define REL_TPOFF R_SH_TLS_TPOFF32 +#define DL_NOMMU_SUPPORT 1 + #if __SH_FDPIC__ #define REL_FUNCDESC R_SH_FUNCDESC #define REL_FUNCDESC_VAL R_SH_FUNCDESC_VALUE --- a/arch/sh/src/__set_thread_area.c +++ /dev/null @@ -1,34 +0,0 @@ -#include "pthread_impl.h" -#include "libc.h" -#include "sh_atomic.h" -#include - -/* Also perform sh-specific init */ - -#define CPU_HAS_LLSC 0x0040 - -__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu; - -int __set_thread_area(void *p) -{ - size_t *aux; - __asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" ); -#ifndef __SH4A__ - if (__hwcap & CPU_HAS_LLSC) { - __sh_atomic_model = SH_A_LLSC; - return 0; - } -#if !defined(__SH3__) && !defined(__SH4__) - for (aux=libc.auxv; *aux; aux+=2) { - if (*aux != AT_PLATFORM) continue; - const char *s = (void *)aux[1]; - if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break; - __sh_atomic_model = SH_A_IMASK; - __sh_nommu = 1; - return 0; - } -#endif - /* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */ -#endif - return 0; -} --- a/arch/sh/src/atomic.c +++ /dev/null @@ -1,158 +0,0 @@ -#ifndef __SH4A__ - -#include "sh_atomic.h" -#include "atomic.h" -#include "libc.h" - -static inline unsigned mask() -{ - unsigned sr; - __asm__ __volatile__ ( "\n" - " stc sr,r0 \n" - " mov r0,%0 \n" - " or #0xf0,r0 \n" - " ldc r0,sr \n" - : "=&r"(sr) : : "memory", "r0" ); - return sr; -} - -static inline void unmask(unsigned sr) -{ - __asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" ); -} - -/* gusa is a hack in the kernel which lets you create a sequence of instructions - * which will be restarted if the process is preempted in the middle of the - * sequence. It will do for implementing atomics on non-smp systems. ABI is: - * r0 = address of first instruction after the atomic sequence - * r1 = original stack pointer - * r15 = -1 * length of atomic sequence in bytes - */ -#define GUSA_CLOBBERS "r0", "r1", "memory" -#define GUSA_START(mem,old,nop) \ - " .align 2\n" \ - " mova 1f, r0\n" \ - nop \ - " mov r15, r1\n" \ - " mov #(0f-1f), r15\n" \ - "0: mov.l @" mem ", " old "\n" -/* the target of mova must be 4 byte aligned, so we may need a nop */ -#define GUSA_START_ODD(mem,old) GUSA_START(mem,old,"") -#define GUSA_START_EVEN(mem,old) GUSA_START(mem,old,"\tnop\n") -#define GUSA_END(mem,new) \ - " mov.l " new ", @" mem "\n" \ - "1: mov r1, r15\n" - -int __sh_cas(volatile int *p, int t, int s) -{ - if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s); - - if (__sh_atomic_model == SH_A_IMASK) { - unsigned sr = mask(); - int old = *p; - if (old==t) *p = s; - unmask(sr); - return old; - } - - int old; - __asm__ __volatile__( - GUSA_START_EVEN("%1", "%0") - " cmp/eq %0, %2\n" - " bf 1f\n" - GUSA_END("%1", "%3") - : "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t"); - return old; -} - -int __sh_swap(volatile int *x, int v) -{ - if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v); - - if (__sh_atomic_model == SH_A_IMASK) { - unsigned sr = mask(); - int old = *x; - *x = v; - unmask(sr); - return old; - } - - int old; - __asm__ __volatile__( - GUSA_START_EVEN("%1", "%0") - GUSA_END("%1", "%2") - : "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS); - return old; -} - -int __sh_fetch_add(volatile int *x, int v) -{ - if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v); - - if (__sh_atomic_model == SH_A_IMASK) { - unsigned sr = mask(); - int old = *x; - *x = old + v; - unmask(sr); - return old; - } - - int old, dummy; - __asm__ __volatile__( - GUSA_START_EVEN("%2", "%0") - " mov %0, %1\n" - " add %3, %1\n" - GUSA_END("%2", "%1") - : "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS); - return old; -} - -void __sh_store(volatile int *p, int x) -{ - if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x); - __asm__ __volatile__( - " mov.l %1, @%0\n" - : : "r"(p), "r"(x) : "memory"); -} - -void __sh_and(volatile int *x, int v) -{ - if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v); - - if (__sh_atomic_model == SH_A_IMASK) { - unsigned sr = mask(); - int old = *x; - *x = old & v; - unmask(sr); - return; - } - - int dummy; - __asm__ __volatile__( - GUSA_START_ODD("%1", "%0") - " and %2, %0\n" - GUSA_END("%1", "%0") - : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS); -} - -void __sh_or(volatile int *x, int v) -{ - if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v); - - if (__sh_atomic_model == SH_A_IMASK) { - unsigned sr = mask(); - int old = *x; - *x = old | v; - unmask(sr); - return; - } - - int dummy; - __asm__ __volatile__( - GUSA_START_ODD("%1", "%0") - " or %2, %0\n" - GUSA_END("%1", "%0") - : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS); -} - -#endif --- a/arch/sh/src/sh_atomic.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _SH_ATOMIC_H -#define _SH_ATOMIC_H - -#define SH_A_GUSA 0 -#define SH_A_LLSC 1 -#define SH_A_CAS 2 -#if !defined(__SH3__) && !defined(__SH4__) -#define SH_A_IMASK 3 -#else -#define SH_A_IMASK -1LL /* unmatchable by unsigned int */ -#endif - -extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model; - -#endif --- a/arch/x32/atomic.h +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include - -static inline int a_ctz_64(uint64_t x) -{ - __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); - return x; -} - -static inline int a_ctz_l(unsigned long x) -{ - __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); - return x; -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - __asm__( "lock ; and %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - __asm__( "lock ; or %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline void a_or_l(volatile void *p, long v) -{ - __asm__( "lock ; or %1, %0" - : "=m"(*(long *)p) : "r"(v) : "memory" ); -} - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - __asm__( "lock ; cmpxchg %3, %1" - : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" ); - return t; -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - __asm__( "lock ; cmpxchg %3, %1" - : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); - return t; -} - -static inline void a_or(volatile int *p, int v) -{ - __asm__( "lock ; or %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline void a_and(volatile int *p, int v) -{ - __asm__( "lock ; and %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline int a_swap(volatile int *x, int v) -{ - __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); - return v; -} - -static inline int a_fetch_add(volatile int *x, int v) -{ - __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); - return v; -} - -static inline void a_inc(volatile int *x) -{ - __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); -} - -static inline void a_dec(volatile int *x) -{ - __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); -} - -static inline void a_store(volatile int *p, int x) -{ - __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" ); -} - -static inline void a_spin() -{ - __asm__ __volatile__( "pause" : : : "memory" ); -} - -static inline void a_barrier() -{ - __asm__ __volatile__( "" : : : "memory" ); -} - -static inline void a_crash() -{ - __asm__ __volatile__( "hlt" : : : "memory" ); -} - - -#endif --- /dev/null +++ b/arch/x32/atomic_arch.h @@ -0,0 +1,106 @@ +#define a_ctz_64 a_ctz_64 +static inline int a_ctz_64(uint64_t x) +{ + __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); + return x; +} + +#define a_ctz_l a_ctz_l +static inline int a_ctz_l(unsigned long x) +{ + __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); + return x; +} + +#define a_and_64 a_and_64 +static inline void a_and_64(volatile uint64_t *p, uint64_t v) +{ + __asm__( "lock ; and %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_or_64 a_or_64 +static inline void a_or_64(volatile uint64_t *p, uint64_t v) +{ + __asm__( "lock ; or %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_or_l a_or_l +static inline void a_or_l(volatile void *p, long v) +{ + __asm__( "lock ; or %1, %0" + : "=m"(*(long *)p) : "r"(v) : "memory" ); +} + +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + __asm__( "lock ; cmpxchg %3, %1" + : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); + return t; +} + +#define a_or a_or +static inline void a_or(volatile int *p, int v) +{ + __asm__( "lock ; or %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_and a_and +static inline void a_and(volatile int *p, int v) +{ + __asm__( "lock ; and %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_swap a_swap +static inline int a_swap(volatile int *x, int v) +{ + __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); + return v; +} + +#define a_fetch_add a_fetch_add +static inline int a_fetch_add(volatile int *x, int v) +{ + __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); + return v; +} + +#define a_inc a_inc +static inline void a_inc(volatile int *x) +{ + __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); +} + +#define a_dec a_dec +static inline void a_dec(volatile int *x) +{ + __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); +} + +#define a_store a_store +static inline void a_store(volatile int *p, int x) +{ + __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" ); +} + +#define a_spin a_spin +static inline void a_spin() +{ + __asm__ __volatile__( "pause" : : : "memory" ); +} + +#define a_barrier a_barrier +static inline void a_barrier() +{ + __asm__ __volatile__( "" : : : "memory" ); +} + +#define a_crash a_crash +static inline void a_crash() +{ + __asm__ __volatile__( "hlt" : : : "memory" ); +} --- a/arch/x32/pthread_arch.h +++ b/arch/x32/pthread_arch.h @@ -7,6 +7,6 @@ static inline struct pthread *__pthread_ #define TP_ADJ(p) (p) -#define CANCEL_REG_IP 32 +#define MC_PC gregs[REG_RIP] #define CANARY canary2 --- a/arch/x32/src/syscall_cp_fixup.c +++ b/arch/x32/src/syscall_cp_fixup.c @@ -1,8 +1,6 @@ #include -#ifdef SHARED __attribute__((__visibility__("hidden"))) -#endif long __syscall_cp_internal(volatile void*, long long, long long, long long, long long, long long, long long, long long); @@ -14,9 +12,7 @@ struct __timespec_kernel { long long tv_ ts->tv_nsec = __tsc(X)->tv_nsec; \ (X) = (unsigned long)ts; } } while(0) -#ifdef SHARED __attribute__((__visibility__("hidden"))) -#endif long __syscall_cp_asm (volatile void * foo, long long n, long long a1, long long a2, long long a3, long long a4, long long a5, long long a6) { --- a/arch/x86_64/atomic.h +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef _INTERNAL_ATOMIC_H -#define _INTERNAL_ATOMIC_H - -#include - -static inline int a_ctz_64(uint64_t x) -{ - __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); - return x; -} - -static inline int a_ctz_l(unsigned long x) -{ - __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); - return x; -} - -static inline void a_and_64(volatile uint64_t *p, uint64_t v) -{ - __asm__( "lock ; and %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline void a_or_64(volatile uint64_t *p, uint64_t v) -{ - __asm__( "lock ; or %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline void a_or_l(volatile void *p, long v) -{ - __asm__( "lock ; or %1, %0" - : "=m"(*(long *)p) : "r"(v) : "memory" ); -} - -static inline void *a_cas_p(volatile void *p, void *t, void *s) -{ - __asm__( "lock ; cmpxchg %3, %1" - : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" ); - return t; -} - -static inline int a_cas(volatile int *p, int t, int s) -{ - __asm__( "lock ; cmpxchg %3, %1" - : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); - return t; -} - -static inline void a_or(volatile int *p, int v) -{ - __asm__( "lock ; or %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline void a_and(volatile int *p, int v) -{ - __asm__( "lock ; and %1, %0" - : "=m"(*p) : "r"(v) : "memory" ); -} - -static inline int a_swap(volatile int *x, int v) -{ - __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); - return v; -} - -static inline int a_fetch_add(volatile int *x, int v) -{ - __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); - return v; -} - -static inline void a_inc(volatile int *x) -{ - __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); -} - -static inline void a_dec(volatile int *x) -{ - __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); -} - -static inline void a_store(volatile int *p, int x) -{ - __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" ); -} - -static inline void a_spin() -{ - __asm__ __volatile__( "pause" : : : "memory" ); -} - -static inline void a_barrier() -{ - __asm__ __volatile__( "" : : : "memory" ); -} - -static inline void a_crash() -{ - __asm__ __volatile__( "hlt" : : : "memory" ); -} - - -#endif --- /dev/null +++ b/arch/x86_64/atomic_arch.h @@ -0,0 +1,107 @@ +#define a_ctz_64 a_ctz_64 +static inline int a_ctz_64(uint64_t x) +{ + __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); + return x; +} + +#define a_and_64 a_and_64 +static inline void a_and_64(volatile uint64_t *p, uint64_t v) +{ + __asm__( "lock ; and %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_or_64 a_or_64 +static inline void a_or_64(volatile uint64_t *p, uint64_t v) +{ + __asm__( "lock ; or %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_or_l a_or_l +static inline void a_or_l(volatile void *p, long v) +{ + __asm__( "lock ; or %1, %0" + : "=m"(*(long *)p) : "r"(v) : "memory" ); +} + +#define a_cas_p a_cas_p +static inline void *a_cas_p(volatile void *p, void *t, void *s) +{ + __asm__( "lock ; cmpxchg %3, %1" + : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" ); + return t; +} + +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + __asm__( "lock ; cmpxchg %3, %1" + : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); + return t; +} + +#define a_or a_or +static inline void a_or(volatile int *p, int v) +{ + __asm__( "lock ; or %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_and a_and +static inline void a_and(volatile int *p, int v) +{ + __asm__( "lock ; and %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_swap a_swap +static inline int a_swap(volatile int *x, int v) +{ + __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); + return v; +} + +#define a_fetch_add a_fetch_add +static inline int a_fetch_add(volatile int *x, int v) +{ + __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" ); + return v; +} + +#define a_inc a_inc +static inline void a_inc(volatile int *x) +{ + __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" ); +} + +#define a_dec a_dec +static inline void a_dec(volatile int *x) +{ + __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" ); +} + +#define a_store a_store +static inline void a_store(volatile int *p, int x) +{ + __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" ); +} + +#define a_spin a_spin +static inline void a_spin() +{ + __asm__ __volatile__( "pause" : : : "memory" ); +} + +#define a_barrier a_barrier +static inline void a_barrier() +{ + __asm__ __volatile__( "" : : : "memory" ); +} + +#define a_crash a_crash +static inline void a_crash() +{ + __asm__ __volatile__( "hlt" : : : "memory" ); +} --- a/arch/x86_64/pthread_arch.h +++ b/arch/x86_64/pthread_arch.h @@ -7,4 +7,4 @@ static inline struct pthread *__pthread_ #define TP_ADJ(p) (p) -#define CANCEL_REG_IP 16 +#define MC_PC gregs[REG_RIP] --- a/configure +++ b/configure @@ -9,6 +9,9 @@ VAR=VALUE. See below for descriptions o Defaults for the options are specified in brackets. +Configuration: + --srcdir=DIR source directory [detected] + Installation directories: --prefix=PREFIX main installation prefix [/usr/local/musl] --exec-prefix=EPREFIX installation prefix for executable files [PREFIX] @@ -117,6 +120,7 @@ CFLAGS_TRY= LDFLAGS_AUTO= LDFLAGS_TRY= OPTIMIZE_GLOBS= +srcdir= prefix=/usr/local/musl exec_prefix='$(prefix)' bindir='$(exec_prefix)/bin' @@ -139,6 +143,7 @@ clang_wrapper=no for arg ; do case "$arg" in --help) usage ;; +--srcdir=*) srcdir=${arg#*=} ;; --prefix=*) prefix=${arg#*=} ;; --exec-prefix=*) exec_prefix=${arg#*=} ;; --bindir=*) bindir=${arg#*=} ;; @@ -179,11 +184,23 @@ LIBCC=*) LIBCC=${arg#*=} ;; esac done -for i in prefix exec_prefix bindir libdir includedir syslibdir ; do +for i in srcdir prefix exec_prefix bindir libdir includedir syslibdir ; do stripdir $i done # +# Get the source dir for out-of-tree builds +# +if test -z "$srcdir" ; then +srcdir="${0%/configure}" +stripdir srcdir +fi +abs_builddir="$(pwd)" || fail "$0: cannot determine working directory" +abs_srcdir="$(cd $srcdir && pwd)" || fail "$0: invalid source directory $srcdir" +test "$abs_srcdir" = "$abs_builddir" && srcdir=. +test "$srcdir" != "." -a -f Makefile -a ! -h Makefile && fail "$0: Makefile already exists in the working directory" + +# # Get a temp filename we can use # i=0 @@ -263,11 +280,11 @@ fi fi if test "$gcc_wrapper" = yes ; then -tools="$tools tools/musl-gcc" +tools="$tools obj/musl-gcc" tool_libs="$tool_libs lib/musl-gcc.specs" fi if test "$clang_wrapper" = yes ; then -tools="$tools tools/musl-clang tools/ld.musl-clang" +tools="$tools obj/musl-clang obj/ld.musl-clang" fi # @@ -321,7 +338,7 @@ __attribute__((__may_alias__)) #endif x; EOF -if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \ +if $CC $CFLAGS_C99FSE -I$srcdir/arch/$ARCH -I$srcdir/include $CPPFLAGS $CFLAGS \ -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then printf "no\n" else @@ -330,6 +347,13 @@ CFLAGS_C99FSE="$CFLAGS_C99FSE -D__may_al fi # +# The GNU toolchain defaults to assuming unmarked files need an +# executable stack, potentially exposing vulnerabilities in programs +# linked with such object files. Fix this. +# +tryflag CFLAGS_C99FSE -Wa,--noexecstack + +# # Check for options to disable stack protector, which needs to be # disabled for a few early-bootstrap translation units. If not found, # this is not an error; we assume the toolchain does not do ssp. @@ -430,11 +454,15 @@ tryflag CFLAGS_AUTO -fno-unwind-tables tryflag CFLAGS_AUTO -fno-asynchronous-unwind-tables # -# The GNU toolchain defaults to assuming unmarked files need an -# executable stack, potentially exposing vulnerabilities in programs -# linked with such object files. Fix this. +# Attempt to put each function and each data object in its own +# section. This both allows additional size optimizations at link +# time and works around a dangerous class of compiler/assembler bugs +# whereby relative address expressions are constant-folded by the +# assembler even when one or more of the symbols involved is +# replaceable. See gas pr 18561 and gcc pr 66609, 68178, etc. # -tryflag CFLAGS_AUTO -Wa,--noexecstack +tryflag CFLAGS_AUTO -ffunction-sections +tryflag CFLAGS_AUTO -fdata-sections # # On x86, make sure we don't have incompatible instruction set @@ -489,7 +517,7 @@ int foo(void) { } int bar(void) { fp = foo; return foo(); } EOF if $CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS \ - -DSHARED -fPIC -I./src/internal -include vis.h \ + -DSHARED -fPIC -I$srcdir/src/internal -include vis.h \ -nostdlib -shared -Wl,-Bsymbolic-functions \ -o /dev/null "$tmpc" >/dev/null 2>&1 ; then visibility=yes @@ -504,6 +532,16 @@ CFLAGS_AUTO="$CFLAGS_AUTO -include vis.h CFLAGS_AUTO="${CFLAGS_AUTO# }" fi +# Reduce space lost to padding for alignment purposes by sorting data +# objects according to their alignment reqirements. This approximates +# optimal packing. +tryldflag LDFLAGS_AUTO -Wl,--sort-section,alignment +tryldflag LDFLAGS_AUTO -Wl,--sort-common + +# When linking shared library, drop dummy weak definitions that were +# replaced by strong definitions from other translation units. +tryldflag LDFLAGS_AUTO -Wl,--gc-sections + # Some patched GCC builds have these defaults messed up... tryldflag LDFLAGS_AUTO -Wl,--hash-style=both @@ -513,6 +551,11 @@ tryldflag LDFLAGS_AUTO -Wl,--hash-style= # runtime library; implementation error is also a possibility. tryldflag LDFLAGS_AUTO -Wl,--no-undefined +# Avoid exporting symbols from compiler runtime libraries. They +# should be hidden anyway, but some toolchains including old gcc +# versions built without shared library support and pcc are broken. +tryldflag LDFLAGS_AUTO -Wl,--exclude-libs=ALL + test "$shared" = "no" || { # Disable dynamic linking if ld is broken and can't do -Bsymbolic-functions LDFLAGS_DUMMY= @@ -599,7 +642,7 @@ echo '#include ' > "$tmpc" echo '#if LDBL_MANT_DIG == 53' >> "$tmpc" echo 'typedef char ldcheck[9-(int)sizeof(long double)];' >> "$tmpc" echo '#endif' >> "$tmpc" -if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \ +if $CC $CFLAGS_C99FSE -I$srcdir/arch/$ARCH -I$srcdir/include $CPPFLAGS $CFLAGS \ -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then printf "yes\n" else @@ -622,6 +665,7 @@ cat << EOF ARCH = $ARCH SUBARCH = $SUBARCH ASMSUBARCH = $ASMSUBARCH +srcdir = $srcdir prefix = $prefix exec_prefix = $exec_prefix bindir = $bindir @@ -629,12 +673,14 @@ libdir = $libdir includedir = $includedir syslibdir = $syslibdir CC = $CC -CFLAGS = $CFLAGS_AUTO $CFLAGS +CFLAGS = $CFLAGS +CFLAGS_AUTO = $CFLAGS_AUTO CFLAGS_C99FSE = $CFLAGS_C99FSE CFLAGS_MEMOPS = $CFLAGS_MEMOPS CFLAGS_NOSSP = $CFLAGS_NOSSP CPPFLAGS = $CPPFLAGS -LDFLAGS = $LDFLAGS_AUTO $LDFLAGS +LDFLAGS = $LDFLAGS +LDFLAGS_AUTO = $LDFLAGS_AUTO CROSS_COMPILE = $CROSS_COMPILE LIBCC = $LIBCC OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS @@ -648,4 +694,6 @@ test "x$cc_family" = xgcc && echo 'WRAPC test "x$cc_family" = xclang && echo 'WRAPCC_CLANG = $(CC)' exec 1>&3 3>&- +test "$srcdir" = "." || ln -sf $srcdir/Makefile . + printf "done\n" --- a/crt/arm/crti.s +++ b/crt/arm/crti.s @@ -1,3 +1,5 @@ +.syntax unified + .section .init .global _init .type _init,%function --- a/crt/arm/crtn.s +++ b/crt/arm/crtn.s @@ -1,11 +1,9 @@ +.syntax unified + .section .init pop {r0,lr} - tst lr,#1 - moveq pc,lr bx lr .section .fini pop {r0,lr} - tst lr,#1 - moveq pc,lr bx lr --- a/include/complex.h +++ b/include/complex.h @@ -116,7 +116,7 @@ long double creall(long double complex); #if __STDC_VERSION__ >= 201112L #if defined(_Imaginary_I) -#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y))) +#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y)) #elif defined(__clang__) #define __CMPLX(x, y, t) (+(_Complex t){ (t)(x), (t)(y) }) #else --- a/include/netinet/tcp.h +++ b/include/netinet/tcp.h @@ -41,7 +41,20 @@ #define TCP_CLOSING 11 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) +#define TCPOPT_EOL 0 +#define TCPOPT_NOP 1 +#define TCPOPT_MAXSEG 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_SACK_PERMITTED 4 +#define TCPOPT_SACK 5 +#define TCPOPT_TIMESTAMP 8 +#define TCPOLEN_SACK_PERMITTED 2 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_MAXSEG 4 +#define TCPOLEN_TIMESTAMP 10 + #define SOL_TCP 6 + #include #include #include --- a/src/env/__init_tls.c +++ b/src/env/__init_tls.c @@ -8,9 +8,6 @@ #include "atomic.h" #include "syscall.h" -#ifndef SHARED -static -#endif int __init_tp(void *p) { pthread_t td = p; @@ -24,8 +21,6 @@ int __init_tp(void *p) return 0; } -#ifndef SHARED - static struct builtin_tls { char c; struct pthread pt; @@ -33,33 +28,40 @@ static struct builtin_tls { } builtin_tls[1]; #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt) -struct tls_image { - void *image; - size_t len, size, align; -} __static_tls; - -#define T __static_tls +static struct tls_module main_tls; void *__copy_tls(unsigned char *mem) { pthread_t td; - if (!T.image) return mem; - void **dtv = (void *)mem; - dtv[0] = (void *)1; + struct tls_module *p; + size_t i; + void **dtv; + #ifdef TLS_ABOVE_TP - mem += sizeof(void *) * 2; - mem += -((uintptr_t)mem + sizeof(struct pthread)) & (T.align-1); + dtv = (void **)(mem + libc.tls_size) - (libc.tls_cnt + 1); + + mem += -((uintptr_t)mem + sizeof(struct pthread)) & (libc.tls_align-1); td = (pthread_t)mem; mem += sizeof(struct pthread); + + for (i=1, p=libc.tls_head; p; i++, p=p->next) { + dtv[i] = mem + p->offset; + memcpy(dtv[i], p->image, p->len); + } #else + dtv = (void **)mem; + mem += libc.tls_size - sizeof(struct pthread); - mem -= (uintptr_t)mem & (T.align-1); + mem -= (uintptr_t)mem & (libc.tls_align-1); td = (pthread_t)mem; - mem -= T.size; + + for (i=1, p=libc.tls_head; p; i++, p=p->next) { + dtv[i] = mem - p->offset; + memcpy(dtv[i], p->image, p->len); + } #endif + dtv[0] = (void *)libc.tls_cnt; td->dtv = td->dtv_copy = dtv; - dtv[1] = mem; - memcpy(mem, T.image, T.len); return td; } @@ -69,7 +71,7 @@ typedef Elf32_Phdr Phdr; typedef Elf64_Phdr Phdr; #endif -void __init_tls(size_t *aux) +static void static_init_tls(size_t *aux) { unsigned char *p; size_t n; @@ -86,16 +88,24 @@ void __init_tls(size_t *aux) } if (tls_phdr) { - T.image = (void *)(base + tls_phdr->p_vaddr); - T.len = tls_phdr->p_filesz; - T.size = tls_phdr->p_memsz; - T.align = tls_phdr->p_align; + main_tls.image = (void *)(base + tls_phdr->p_vaddr); + main_tls.len = tls_phdr->p_filesz; + main_tls.size = tls_phdr->p_memsz; + main_tls.align = tls_phdr->p_align; + libc.tls_cnt = 1; + libc.tls_head = &main_tls; } - T.size += (-T.size - (uintptr_t)T.image) & (T.align-1); - if (T.align < MIN_TLS_ALIGN) T.align = MIN_TLS_ALIGN; + main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image) + & (main_tls.align-1); + if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN; +#ifndef TLS_ABOVE_TP + main_tls.offset = main_tls.size; +#endif - libc.tls_size = 2*sizeof(void *)+T.size+T.align+sizeof(struct pthread) + libc.tls_align = main_tls.align; + libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread) + + main_tls.size + main_tls.align + MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN; if (libc.tls_size > sizeof builtin_tls) { @@ -117,6 +127,5 @@ void __init_tls(size_t *aux) if (__init_tp(__copy_tls(mem)) < 0) a_crash(); } -#else -void __init_tls(size_t *auxv) { } -#endif + +weak_alias(static_init_tls, __init_tls); --- a/src/env/__libc_start_main.c +++ b/src/env/__libc_start_main.c @@ -8,21 +8,17 @@ void __init_tls(size_t *); -#ifndef SHARED -static void dummy() {} +static void dummy(void) {} weak_alias(dummy, _init); -extern void (*const __init_array_start)() __attribute__((weak)); -extern void (*const __init_array_end)() __attribute__((weak)); -#endif + +__attribute__((__weak__, __visibility__("hidden"))) +extern void (*const __init_array_start)(void), (*const __init_array_end)(void); static void dummy1(void *p) {} weak_alias(dummy1, __init_ssp); #define AUX_CNT 38 -#ifndef SHARED -static -#endif void __init_libc(char **envp, char *pn) { size_t i, *auxv, aux[AUX_CNT] = { 0 }; @@ -57,20 +53,22 @@ void __init_libc(char **envp, char *pn) libc.secure = 1; } -int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv) +static void libc_start_init(void) { - char **envp = argv+argc+1; - -#ifndef SHARED - __init_libc(envp, argv[0]); _init(); uintptr_t a = (uintptr_t)&__init_array_start; for (; a<(uintptr_t)&__init_array_end; a+=sizeof(void(*)())) (*(void (**)())a)(); -#else - void __libc_start_init(void); +} + +weak_alias(libc_start_init, __libc_start_init); + +int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv) +{ + char **envp = argv+argc+1; + + __init_libc(envp, argv[0]); __libc_start_init(); -#endif /* Pass control to the application */ exit(main(argc, argv, envp)); --- a/src/env/__reset_tls.c +++ b/src/env/__reset_tls.c @@ -1,21 +1,16 @@ -#ifndef SHARED - #include #include "pthread_impl.h" - -extern struct tls_image { - void *image; - size_t len, size, align; -} __static_tls; - -#define T __static_tls +#include "libc.h" void __reset_tls() { - if (!T.size) return; pthread_t self = __pthread_self(); - memcpy(self->dtv[1], T.image, T.len); - memset((char *)self->dtv[1]+T.len, 0, T.size-T.len); + struct tls_module *p; + size_t i, n = (size_t)self->dtv[0]; + if (n) for (p=libc.tls_head, i=1; i<=n; i++, p=p->next) { + if (!self->dtv[i]) continue; + memcpy(self->dtv[i], p->image, p->len); + memset((char *)self->dtv[i]+p->len, 0, + p->size - p->len); + } } - -#endif --- a/src/env/__stack_chk_fail.c +++ b/src/env/__stack_chk_fail.c @@ -17,16 +17,7 @@ void __stack_chk_fail(void) a_crash(); } -#ifdef SHARED - __attribute__((__visibility__("hidden"))) -void __stack_chk_fail_local(void) -{ - a_crash(); -} - -#else +void __stack_chk_fail_local(void); weak_alias(__stack_chk_fail, __stack_chk_fail_local); - -#endif --- /dev/null +++ b/src/exit/arm/__aeabi_atexit.c @@ -0,0 +1,6 @@ +int __cxa_atexit(void (*func)(void *), void *arg, void *dso); + +int __aeabi_atexit (void *obj, void (*func) (void *), void *d) +{ + return __cxa_atexit (func, obj, d); +} --- a/src/exit/exit.c +++ b/src/exit/exit.c @@ -10,25 +10,25 @@ static void dummy() * as a consequence of linking either __toread.c or __towrite.c. */ weak_alias(dummy, __funcs_on_exit); weak_alias(dummy, __stdio_exit); - -#ifndef SHARED weak_alias(dummy, _fini); -extern void (*const __fini_array_start)() __attribute__((weak)); -extern void (*const __fini_array_end)() __attribute__((weak)); -#endif -_Noreturn void exit(int code) -{ - __funcs_on_exit(); +__attribute__((__weak__, __visibility__("hidden"))) +extern void (*const __fini_array_start)(void), (*const __fini_array_end)(void); -#ifndef SHARED +static void libc_exit_fini(void) +{ uintptr_t a = (uintptr_t)&__fini_array_end; for (; a>(uintptr_t)&__fini_array_start; a-=sizeof(void(*)())) (*(void (**)())(a-sizeof(void(*)())))(); _fini(); -#endif +} - __stdio_exit(); +weak_alias(libc_exit_fini, __libc_exit_fini); +_Noreturn void exit(int code) +{ + __funcs_on_exit(); + __libc_exit_fini(); + __stdio_exit(); _Exit(code); } --- /dev/null +++ b/src/fenv/arm/fenv-hf.S @@ -0,0 +1,69 @@ +#if __ARM_PCS_VFP + +.syntax unified +.fpu vfp + +.global fegetround +.type fegetround,%function +fegetround: + fmrx r0, fpscr + and r0, r0, #0xc00000 + bx lr + +.global __fesetround +.type __fesetround,%function +__fesetround: + fmrx r3, fpscr + bic r3, r3, #0xc00000 + orr r3, r3, r0 + fmxr fpscr, r3 + mov r0, #0 + bx lr + +.global fetestexcept +.type fetestexcept,%function +fetestexcept: + and r0, r0, #0x1f + fmrx r3, fpscr + and r0, r0, r3 + bx lr + +.global feclearexcept +.type feclearexcept,%function +feclearexcept: + and r0, r0, #0x1f + fmrx r3, fpscr + bic r3, r3, r0 + fmxr fpscr, r3 + mov r0, #0 + bx lr + +.global feraiseexcept +.type feraiseexcept,%function +feraiseexcept: + and r0, r0, #0x1f + fmrx r3, fpscr + orr r3, r3, r0 + fmxr fpscr, r3 + mov r0, #0 + bx lr + +.global fegetenv +.type fegetenv,%function +fegetenv: + fmrx r3, fpscr + str r3, [r0] + mov r0, #0 + bx lr + +.global fesetenv +.type fesetenv,%function +fesetenv: + cmn r0, #1 + moveq r3, #0 + ldrne r3, [r0] + fmxr fpscr, r3 + mov r0, #0 + bx lr + +#endif --- /dev/null +++ b/src/fenv/arm/fenv.c @@ -0,0 +1,3 @@ +#if !__ARM_PCS_VFP +#include "../fenv.c" +#endif --- a/src/fenv/armebhf/fenv.sub +++ /dev/null @@ -1 +0,0 @@ -../armhf/fenv.s --- a/src/fenv/armhf/fenv.s +++ /dev/null @@ -1,64 +0,0 @@ -.fpu vfp - -.global fegetround -.type fegetround,%function -fegetround: - mrc p10, 7, r0, cr1, cr0, 0 - and r0, r0, #0xc00000 - bx lr - -.global __fesetround -.type __fesetround,%function -__fesetround: - mrc p10, 7, r3, cr1, cr0, 0 - bic r3, r3, #0xc00000 - orr r3, r3, r0 - mcr p10, 7, r3, cr1, cr0, 0 - mov r0, #0 - bx lr - -.global fetestexcept -.type fetestexcept,%function -fetestexcept: - and r0, r0, #0x1f - mrc p10, 7, r3, cr1, cr0, 0 - and r0, r0, r3 - bx lr - -.global feclearexcept -.type feclearexcept,%function -feclearexcept: - and r0, r0, #0x1f - mrc p10, 7, r3, cr1, cr0, 0 - bic r3, r3, r0 - mcr p10, 7, r3, cr1, cr0, 0 - mov r0, #0 - bx lr - -.global feraiseexcept -.type feraiseexcept,%function -feraiseexcept: - and r0, r0, #0x1f - mrc p10, 7, r3, cr1, cr0, 0 - orr r3, r3, r0 - mcr p10, 7, r3, cr1, cr0, 0 - mov r0, #0 - bx lr - -.global fegetenv -.type fegetenv,%function -fegetenv: - mrc p10, 7, r3, cr1, cr0, 0 - str r3, [r0] - mov r0, #0 - bx lr - -.global fesetenv -.type fesetenv,%function -fesetenv: - cmn r0, #1 - moveq r3, #0 - ldrne r3, [r0] - mcr p10, 7, r3, cr1, cr0, 0 - mov r0, #0 - bx lr --- a/src/fenv/armhf/fenv.sub +++ /dev/null @@ -1 +0,0 @@ -fenv.s --- a/src/fenv/mips-sf/fenv.sub +++ /dev/null @@ -1 +0,0 @@ -../fenv.c --- /dev/null +++ b/src/fenv/mips/fenv-sf.c @@ -0,0 +1,3 @@ +#ifdef __mips_soft_float +#include "../fenv.c" +#endif --- /dev/null +++ b/src/fenv/mips/fenv.S @@ -0,0 +1,71 @@ +#ifndef __mips_soft_float + +.set noreorder + +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + and $4, $4, 0x7c + cfc1 $5, $31 + or $5, $5, $4 + xor $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + and $4, $4, 0x7c + cfc1 $5, $31 + or $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + and $4, $4, 0x7c + cfc1 $2, $31 + jr $ra + and $2, $2, $4 + +.global fegetround +.type fegetround,@function +fegetround: + cfc1 $2, $31 + jr $ra + andi $2, $2, 3 + +.global __fesetround +.type __fesetround,@function +__fesetround: + cfc1 $5, $31 + li $6, -4 + and $5, $5, $6 + or $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global fegetenv +.type fegetenv,@function +fegetenv: + cfc1 $5, $31 + sw $5, 0($4) + jr $ra + li $2, 0 + +.global fesetenv +.type fesetenv,@function +fesetenv: + addiu $5, $4, 1 + beq $5, $0, 1f + nop + lw $5, 0($4) +1: ctc1 $5, $31 + jr $ra + li $2, 0 + +#endif --- a/src/fenv/mips/fenv.s +++ /dev/null @@ -1,67 +0,0 @@ -.set noreorder - -.global feclearexcept -.type feclearexcept,@function -feclearexcept: - and $4, $4, 0x7c - cfc1 $5, $31 - or $5, $5, $4 - xor $5, $5, $4 - ctc1 $5, $31 - jr $ra - li $2, 0 - -.global feraiseexcept -.type feraiseexcept,@function -feraiseexcept: - and $4, $4, 0x7c - cfc1 $5, $31 - or $5, $5, $4 - ctc1 $5, $31 - jr $ra - li $2, 0 - -.global fetestexcept -.type fetestexcept,@function -fetestexcept: - and $4, $4, 0x7c - cfc1 $2, $31 - jr $ra - and $2, $2, $4 - -.global fegetround -.type fegetround,@function -fegetround: - cfc1 $2, $31 - jr $ra - andi $2, $2, 3 - -.global __fesetround -.type __fesetround,@function -__fesetround: - cfc1 $5, $31 - li $6, -4 - and $5, $5, $6 - or $5, $5, $4 - ctc1 $5, $31 - jr $ra - li $2, 0 - -.global fegetenv -.type fegetenv,@function -fegetenv: - cfc1 $5, $31 - sw $5, 0($4) - jr $ra - li $2, 0 - -.global fesetenv -.type fesetenv,@function -fesetenv: - addiu $5, $4, 1 - beq $5, $0, 1f - nop - lw $5, 0($4) -1: ctc1 $5, $31 - jr $ra - li $2, 0 --- a/src/fenv/mipsel-sf/fenv.sub +++ /dev/null @@ -1 +0,0 @@ -../fenv.c --- a/src/fenv/sh-nofpu/fenv.sub +++ /dev/null @@ -1 +0,0 @@ -../fenv.c --- /dev/null +++ b/src/fenv/sh/fenv-nofpu.c @@ -0,0 +1,3 @@ +#if !__SH_FPU_ANY__ && !__SH4__ +#include "../fenv.c" +#endif --- /dev/null +++ b/src/fenv/sh/fenv.S @@ -0,0 +1,78 @@ +#if __SH_FPU_ANY__ || __SH4__ + +.global fegetround +.type fegetround, @function +fegetround: + sts fpscr, r0 + rts + and #3, r0 + +.global __fesetround +.type __fesetround, @function +__fesetround: + sts fpscr, r0 + or r4, r0 + lds r0, fpscr + rts + mov #0, r0 + +.global fetestexcept +.type fetestexcept, @function +fetestexcept: + sts fpscr, r0 + and r4, r0 + rts + and #0x7c, r0 + +.global feclearexcept +.type feclearexcept, @function +feclearexcept: + mov r4, r0 + and #0x7c, r0 + not r0, r4 + sts fpscr, r0 + and r4, r0 + lds r0, fpscr + rts + mov #0, r0 + +.global feraiseexcept +.type feraiseexcept, @function +feraiseexcept: + mov r4, r0 + and #0x7c, r0 + sts fpscr, r4 + or r4, r0 + lds r0, fpscr + rts + mov #0, r0 + +.global fegetenv +.type fegetenv, @function +fegetenv: + sts fpscr, r0 + mov.l r0, @r4 + rts + mov #0, r0 + +.global fesetenv +.type fesetenv, @function +fesetenv: + mov r4, r0 + cmp/eq #-1, r0 + bf 1f + + ! the default environment is complicated by the fact that we need to + ! preserve the current precision bit, which we do not know a priori + sts fpscr, r0 + mov #8, r1 + swap.w r1, r1 + bra 2f + and r1, r0 + +1: mov.l @r4, r0 ! non-default environment +2: lds r0, fpscr + rts + mov #0, r0 + +#endif --- a/src/fenv/sh/fenv.s +++ /dev/null @@ -1,74 +0,0 @@ -.global fegetround -.type fegetround, @function -fegetround: - sts fpscr, r0 - rts - and #3, r0 - -.global __fesetround -.type __fesetround, @function -__fesetround: - sts fpscr, r0 - or r4, r0 - lds r0, fpscr - rts - mov #0, r0 - -.global fetestexcept -.type fetestexcept, @function -fetestexcept: - sts fpscr, r0 - and r4, r0 - rts - and #0x7c, r0 - -.global feclearexcept -.type feclearexcept, @function -feclearexcept: - mov r4, r0 - and #0x7c, r0 - not r0, r4 - sts fpscr, r0 - and r4, r0 - lds r0, fpscr - rts - mov #0, r0 - -.global feraiseexcept -.type feraiseexcept, @function -feraiseexcept: - mov r4, r0 - and #0x7c, r0 - sts fpscr, r4 - or r4, r0 - lds r0, fpscr - rts - mov #0, r0 - -.global fegetenv -.type fegetenv, @function -fegetenv: - sts fpscr, r0 - mov.l r0, @r4 - rts - mov #0, r0 - -.global fesetenv -.type fesetenv, @function -fesetenv: - mov r4, r0 - cmp/eq #-1, r0 - bf 1f - - ! the default environment is complicated by the fact that we need to - ! preserve the current precision bit, which we do not know a priori - sts fpscr, r0 - mov #8, r1 - swap.w r1, r1 - bra 2f - and r1, r0 - -1: mov.l @r4, r0 ! non-default environment -2: lds r0, fpscr - rts - mov #0, r0 --- a/src/fenv/sheb-nofpu/fenv.sub +++ /dev/null @@ -1 +0,0 @@ -../fenv.c --- a/src/internal/arm/syscall.s +++ b/src/internal/arm/syscall.s @@ -1,3 +1,4 @@ +.syntax unified .global __syscall .hidden __syscall .type __syscall,%function @@ -11,6 +12,4 @@ __syscall: ldmfd ip,{r3,r4,r5,r6} svc 0 ldmfd sp!,{r4,r5,r6,r7} - tst lr,#1 - moveq pc,lr bx lr --- /dev/null +++ b/src/internal/atomic.h @@ -0,0 +1,275 @@ +#ifndef _ATOMIC_H +#define _ATOMIC_H + +#include + +#include "atomic_arch.h" + +#ifdef a_ll + +#ifndef a_pre_llsc +#define a_pre_llsc() +#endif + +#ifndef a_post_llsc +#define a_post_llsc() +#endif + +#ifndef a_cas +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (old==t && !a_sc(p, s)); + a_post_llsc(); + return old; +} +#endif + +#ifndef a_swap +#define a_swap a_swap +static inline int a_swap(volatile int *p, int v) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (!a_sc(p, v)); + a_post_llsc(); + return old; +} +#endif + +#ifndef a_fetch_add +#define a_fetch_add a_fetch_add +static inline int a_fetch_add(volatile int *p, int v) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (!a_sc(p, (unsigned)old + v)); + a_post_llsc(); + return old; +} +#endif + +#ifndef a_fetch_and +#define a_fetch_and a_fetch_and +static inline int a_fetch_and(volatile int *p, int v) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (!a_sc(p, old & v)); + a_post_llsc(); + return old; +} +#endif + +#ifndef a_fetch_or +#define a_fetch_or a_fetch_or +static inline int a_fetch_or(volatile int *p, int v) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (!a_sc(p, old | v)); + a_post_llsc(); + return old; +} +#endif + +#endif + +#ifndef a_cas +#error missing definition of a_cas +#endif + +#ifndef a_swap +#define a_swap a_swap +static inline int a_swap(volatile int *p, int v) +{ + int old; + do old = *p; + while (a_cas(p, old, v) != old); + return old; +} +#endif + +#ifndef a_fetch_add +#define a_fetch_add a_fetch_add +static inline int a_fetch_add(volatile int *p, int v) +{ + int old; + do old = *p; + while (a_cas(p, old, (unsigned)old+v) != old); + return old; +} +#endif + +#ifndef a_fetch_and +#define a_fetch_and a_fetch_and +static inline int a_fetch_and(volatile int *p, int v) +{ + int old; + do old = *p; + while (a_cas(p, old, old&v) != old); + return old; +} +#endif +#ifndef a_fetch_or +#define a_fetch_or a_fetch_or +static inline int a_fetch_or(volatile int *p, int v) +{ + int old; + do old = *p; + while (a_cas(p, old, old|v) != old); + return old; +} +#endif + +#ifndef a_and +#define a_and a_and +static inline void a_and(volatile int *p, int v) +{ + a_fetch_and(p, v); +} +#endif + +#ifndef a_or +#define a_or a_or +static inline void a_or(volatile int *p, int v) +{ + a_fetch_or(p, v); +} +#endif + +#ifndef a_inc +#define a_inc a_inc +static inline void a_inc(volatile int *p) +{ + a_fetch_add(p, 1); +} +#endif + +#ifndef a_dec +#define a_dec a_dec +static inline void a_dec(volatile int *p) +{ + a_fetch_add(p, -1); +} +#endif + +#ifndef a_store +#define a_store a_store +static inline void a_store(volatile int *p, int v) +{ +#ifdef a_barrier + a_barrier(); + *p = v; + a_barrier(); +#else + a_swap(p, v); +#endif +} +#endif + +#ifndef a_barrier +#define a_barrier a_barrier +static void a_barrier() +{ + volatile int tmp = 0; + a_cas(&tmp, 0, 0); +} +#endif + +#ifndef a_spin +#define a_spin a_barrier +#endif + +#ifndef a_and_64 +#define a_and_64 a_and_64 +static inline void a_and_64(volatile uint64_t *p, uint64_t v) +{ + union { uint64_t v; uint32_t r[2]; } u = { v }; + if (u.r[0]+1) a_and((int *)p, u.r[0]); + if (u.r[1]+1) a_and((int *)p+1, u.r[1]); +} +#endif + +#ifndef a_or_64 +#define a_or_64 a_or_64 +static inline void a_or_64(volatile uint64_t *p, uint64_t v) +{ + union { uint64_t v; uint32_t r[2]; } u = { v }; + if (u.r[0]) a_or((int *)p, u.r[0]); + if (u.r[1]) a_or((int *)p+1, u.r[1]); +} +#endif + +#ifndef a_cas_p +#define a_cas_p a_cas_p +static inline void *a_cas_p(volatile void *p, void *t, void *s) +{ + return (void *)a_cas((volatile int *)p, (int)t, (int)s); +} +#endif + +#ifndef a_or_l +#define a_or_l a_or_l +static inline void a_or_l(volatile void *p, long v) +{ + if (sizeof(long) == sizeof(int)) a_or(p, v); + else a_or_64(p, v); +} +#endif + +#ifndef a_crash +#define a_crash a_crash +static inline void a_crash() +{ + *(volatile char *)0=0; +} +#endif + +#ifndef a_ctz_64 +#define a_ctz_64 a_ctz_64 +static inline int a_ctz_64(uint64_t x) +{ + static const char debruijn64[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12 + }; + static const char debruijn32[32] = { + 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, + 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 + }; + if (sizeof(long) < 8) { + uint32_t y = x; + if (!y) { + y = x>>32; + return 32 + debruijn32[(y&-y)*0x076be629 >> 27]; + } + return debruijn32[(y&-y)*0x076be629 >> 27]; + } + return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58]; +} +#endif + +#ifndef a_ctz_l +#define a_ctz_l a_ctz_l +static inline int a_ctz_l(unsigned long x) +{ + static const char debruijn32[32] = { + 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, + 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 + }; + if (sizeof(long) == 8) return a_ctz_64(x); + return debruijn32[(x&-x)*0x076be629 >> 27]; +} +#endif + +#endif --- a/src/internal/dynlink.h +++ b/src/internal/dynlink.h @@ -64,6 +64,10 @@ struct fdpic_dummy_loadmap { #define DL_FDPIC 0 #endif +#ifndef DL_NOMMU_SUPPORT +#define DL_NOMMU_SUPPORT 0 +#endif + #if !DL_FDPIC #define IS_RELATIVE(x,s) ( \ (R_TYPE(x) == REL_RELATIVE) || \ --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -11,13 +11,20 @@ struct __locale_struct { const struct __locale_map *volatile cat[6]; }; +struct tls_module { + struct tls_module *next; + void *image; + size_t len, size, align, offset; +}; + struct __libc { int can_do_threads; int threaded; int secure; volatile int threads_minus_1; size_t *auxv; - size_t tls_size; + struct tls_module *tls_head; + size_t tls_size, tls_align, tls_cnt; size_t page_size; struct __locale_struct global_locale; }; --- a/src/internal/syscall.h +++ b/src/internal/syscall.h @@ -17,9 +17,7 @@ typedef long syscall_arg_t; #endif -#ifdef SHARED __attribute__((visibility("hidden"))) -#endif long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...), __syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t); --- a/src/internal/version.c +++ b/src/internal/version.c @@ -1,12 +1,9 @@ -#ifdef SHARED - #include "version.h" static const char version[] = VERSION; +__attribute__((__visibility__("hidden"))) const char *__libc_get_version() { return version; } - -#endif --- a/src/internal/vis.h +++ b/src/internal/vis.h @@ -4,10 +4,9 @@ * override default visibilities to reduce the size and performance costs * of position-independent code. */ -#ifndef CRT -#ifdef SHARED +#if !defined(CRT) && !defined(__ASSEMBLER__) -/* For shared libc.so, all symbols should be protected, but some toolchains +/* Conceptually, all symbols should be protected, but some toolchains * fail to support copy relocations for protected data, so exclude all * exported data symbols. */ @@ -25,16 +24,4 @@ extern char *optarg, **environ, **__envi #pragma GCC visibility push(protected) -#elif defined(__PIC__) - -/* If building static libc.a as position-independent code, try to make - * everything hidden except possibly-undefined weak references. */ - -__attribute__((__visibility__("default"))) -extern void (*const __init_array_start)(), (*const __init_array_end)(), - (*const __fini_array_start)(), (*const __fini_array_end)(); - -#pragma GCC visibility push(hidden) - -#endif #endif --- a/src/ldso/arm/dlsym.s +++ b/src/ldso/arm/dlsym.s @@ -1,3 +1,4 @@ +.syntax unified .text .global dlsym .hidden __dlsym --- /dev/null +++ b/src/ldso/arm/find_exidx.c @@ -0,0 +1,42 @@ +#define _GNU_SOURCE +#include +#include + +struct find_exidx_data { + uintptr_t pc, exidx_start; + int exidx_len; +}; + +static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr) +{ + struct find_exidx_data *data = ptr; + const ElfW(Phdr) *phdr = info->dlpi_phdr; + uintptr_t addr, exidx_start = 0; + int i, match = 0, exidx_len = 0; + + for (i = info->dlpi_phnum; i > 0; i--, phdr++) { + addr = info->dlpi_addr + phdr->p_vaddr; + switch (phdr->p_type) { + case PT_LOAD: + match |= data->pc >= addr && data->pc < addr + phdr->p_memsz; + break; + case PT_ARM_EXIDX: + exidx_start = addr; + exidx_len = phdr->p_memsz; + break; + } + } + data->exidx_start = exidx_start; + data->exidx_len = exidx_len; + return match; +} + +uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount) +{ + struct find_exidx_data data; + data.pc = pc; + if (dl_iterate_phdr(find_exidx, &data) <= 0) + return 0; + *pcount = data.exidx_len / 8; + return data.exidx_start; +} --- a/src/ldso/dynlink.c +++ b/src/ldso/dynlink.c @@ -70,8 +70,8 @@ struct dso { char kernel_mapped; struct dso **deps, *needed_by; char *rpath_orig, *rpath; - void *tls_image; - size_t tls_len, tls_size, tls_align, tls_id, tls_offset; + struct tls_module tls; + size_t tls_id; size_t relro_start, relro_end; void **new_dtv; unsigned char *new_tls; @@ -99,7 +99,9 @@ struct symdef { int __init_tp(void *); void __init_libc(char **, char *); +void *__copy_tls(unsigned char *); +__attribute__((__visibility__("hidden"))) const char *__libc_get_version(void); static struct builtin_tls { @@ -123,6 +125,7 @@ static int noload; static jmp_buf *rtld_fail; static pthread_rwlock_t lock; static struct debug debug; +static struct tls_module *tls_tail; static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN; static size_t static_tls_cnt; static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE }; @@ -131,6 +134,15 @@ static struct fdpic_dummy_loadmap app_du struct debug *_dl_debug_addr = &debug; +__attribute__((__visibility__("hidden"))) +void (*const __init_array_start)(void)=0, (*const __fini_array_start)(void)=0; + +__attribute__((__visibility__("hidden"))) +extern void (*const __init_array_end)(void), (*const __fini_array_end)(void); + +weak_alias(__init_array_start, __init_array_end); +weak_alias(__fini_array_start, __fini_array_end); + static int dl_strcmp(const char *l, const char *r) { for (; *l==*r && *l; l++, r++); @@ -397,14 +409,14 @@ static void do_relocs(struct dso *dso, s break; #ifdef TLS_ABOVE_TP case REL_TPOFF: - *reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend; + *reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend; break; #else case REL_TPOFF: - *reloc_addr = tls_val - def.dso->tls_offset + addend; + *reloc_addr = tls_val - def.dso->tls.offset + addend; break; case REL_TPOFF_NEG: - *reloc_addr = def.dso->tls_offset - tls_val + addend; + *reloc_addr = def.dso->tls.offset - tls_val + addend; break; #endif case REL_TLSDESC: @@ -426,10 +438,10 @@ static void do_relocs(struct dso *dso, s } else { reloc_addr[0] = (size_t)__tlsdesc_static; #ifdef TLS_ABOVE_TP - reloc_addr[1] = tls_val + def.dso->tls_offset + reloc_addr[1] = tls_val + def.dso->tls.offset + TPOFF_K + addend; #else - reloc_addr[1] = tls_val - def.dso->tls_offset + reloc_addr[1] = tls_val - def.dso->tls.offset + addend; #endif } @@ -482,8 +494,14 @@ static void reclaim_gaps(struct dso *dso static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off) { - char *q = mmap(p, n, prot, flags, fd, off); - if (q != MAP_FAILED || errno != EINVAL) return q; + static int no_map_fixed; + char *q; + if (!no_map_fixed) { + q = mmap(p, n, prot, flags|MAP_FIXED, fd, off); + if (!DL_NOMMU_SUPPORT || q != MAP_FAILED || errno != EINVAL) + return q; + no_map_fixed = 1; + } /* Fallbacks for MAP_FIXED failure on NOMMU kernels. */ if (flags & MAP_ANONYMOUS) { memset(p, 0, n); @@ -561,9 +579,9 @@ static void *map_library(int fd, struct dyn = ph->p_vaddr; } else if (ph->p_type == PT_TLS) { tls_image = ph->p_vaddr; - dso->tls_align = ph->p_align; - dso->tls_len = ph->p_filesz; - dso->tls_size = ph->p_memsz; + dso->tls.align = ph->p_align; + dso->tls.len = ph->p_filesz; + dso->tls.size = ph->p_memsz; } else if (ph->p_type == PT_GNU_RELRO) { dso->relro_start = ph->p_vaddr & -PAGE_SIZE; dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE; @@ -593,7 +611,7 @@ static void *map_library(int fd, struct ((ph->p_flags&PF_W) ? PROT_WRITE: 0) | ((ph->p_flags&PF_X) ? PROT_EXEC : 0)); map = mmap(0, ph->p_memsz + (ph->p_vaddr & PAGE_SIZE-1), - prot, (prot&PROT_WRITE) ? MAP_PRIVATE : MAP_SHARED, + prot, MAP_PRIVATE, fd, ph->p_offset & -PAGE_SIZE); if (map == MAP_FAILED) { unmap_library(dso); @@ -604,6 +622,19 @@ static void *map_library(int fd, struct dso->loadmap->segs[i].p_vaddr = ph->p_vaddr; dso->loadmap->segs[i].p_memsz = ph->p_memsz; i++; + if (prot & PROT_WRITE) { + size_t brk = (ph->p_vaddr & PAGE_SIZE-1) + + ph->p_filesz; + size_t pgbrk = brk + PAGE_SIZE-1 & -PAGE_SIZE; + size_t pgend = brk + ph->p_memsz - ph->p_filesz + + PAGE_SIZE-1 & -PAGE_SIZE; + if (pgend > pgbrk && mmap_fixed(map+pgbrk, + pgend-pgbrk, prot, + MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, + -1, off_start) == MAP_FAILED) + goto error; + memset(map + brk, 0, pgbrk-brk); + } } map = (void *)dso->loadmap->segs[0].addr; map_len = 0; @@ -618,7 +649,11 @@ static void *map_library(int fd, struct * the length of the file. This is okay because we will not * use the invalid part; we just need to reserve the right * amount of virtual address space to map over later. */ - map = mmap((void *)addr_min, map_len, prot, MAP_PRIVATE, fd, off_start); + map = DL_NOMMU_SUPPORT + ? mmap((void *)addr_min, map_len, PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) + : mmap((void *)addr_min, map_len, prot, + MAP_PRIVATE, fd, off_start); if (map==MAP_FAILED) goto error; dso->map = map; dso->map_len = map_len; @@ -643,7 +678,8 @@ static void *map_library(int fd, struct dso->phentsize = eh->e_phentsize; } /* Reuse the existing mapping for the lowest-address LOAD */ - if ((ph->p_vaddr & -PAGE_SIZE) == addr_min) continue; + if ((ph->p_vaddr & -PAGE_SIZE) == addr_min && !DL_NOMMU_SUPPORT) + continue; this_min = ph->p_vaddr & -PAGE_SIZE; this_max = ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE; off_start = ph->p_offset & -PAGE_SIZE; @@ -670,7 +706,7 @@ static void *map_library(int fd, struct done_mapping: dso->base = base; dso->dynv = laddr(dso, dyn); - if (dso->tls_size) dso->tls_image = laddr(dso, tls_image); + if (dso->tls.size) dso->tls.image = laddr(dso, tls_image); if (!runtime) reclaim_gaps(dso); free(allocated_buf); return map; @@ -987,8 +1023,8 @@ static struct dso *load_library(const ch * extended DTV capable of storing an additional slot for * the newly-loaded DSO. */ alloc_size = sizeof *p + strlen(pathname) + 1; - if (runtime && temp_dso.tls_image) { - size_t per_th = temp_dso.tls_size + temp_dso.tls_align + if (runtime && temp_dso.tls.image) { + size_t per_th = temp_dso.tls.size + temp_dso.tls.align + sizeof(void *) * (tls_cnt+3); n_th = libc.threads_minus_1 + 1; if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX; @@ -1009,22 +1045,25 @@ static struct dso *load_library(const ch strcpy(p->name, pathname); /* Add a shortname only if name arg was not an explicit pathname. */ if (pathname != name) p->shortname = strrchr(p->name, '/')+1; - if (p->tls_image) { + if (p->tls.image) { p->tls_id = ++tls_cnt; - tls_align = MAXP2(tls_align, p->tls_align); + tls_align = MAXP2(tls_align, p->tls.align); #ifdef TLS_ABOVE_TP - p->tls_offset = tls_offset + ( (tls_align-1) & - -(tls_offset + (uintptr_t)p->tls_image) ); - tls_offset += p->tls_size; + p->tls.offset = tls_offset + ( (tls_align-1) & + -(tls_offset + (uintptr_t)p->tls.image) ); + tls_offset += p->tls.size; #else - tls_offset += p->tls_size + p->tls_align - 1; - tls_offset -= (tls_offset + (uintptr_t)p->tls_image) - & (p->tls_align-1); - p->tls_offset = tls_offset; + tls_offset += p->tls.size + p->tls.align - 1; + tls_offset -= (tls_offset + (uintptr_t)p->tls.image) + & (p->tls.align-1); + p->tls.offset = tls_offset; #endif p->new_dtv = (void *)(-sizeof(size_t) & (uintptr_t)(p->name+strlen(p->name)+sizeof(size_t))); p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1)); + if (tls_tail) tls_tail->next = &p->tls; + else libc.tls_head = &p->tls; + tls_tail = &p->tls; } tail->next = p; @@ -1151,7 +1190,7 @@ static void kernel_mapped_dso(struct dso p->kernel_mapped = 1; } -static void do_fini() +void __libc_exit_fini() { struct dso *p; size_t dyn[DYN_CNT]; @@ -1214,53 +1253,8 @@ static void dl_debug_state(void) weak_alias(dl_debug_state, _dl_debug_state); -void __reset_tls() +void __init_tls(size_t *auxv) { - pthread_t self = __pthread_self(); - struct dso *p; - for (p=head; p; p=p->next) { - if (!p->tls_id || !self->dtv[p->tls_id]) continue; - memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len); - memset((char *)self->dtv[p->tls_id]+p->tls_len, 0, - p->tls_size - p->tls_len); - if (p->tls_id == (size_t)self->dtv[0]) break; - } -} - -void *__copy_tls(unsigned char *mem) -{ - pthread_t td; - struct dso *p; - void **dtv; - -#ifdef TLS_ABOVE_TP - dtv = (void **)(mem + libc.tls_size) - (tls_cnt + 1); - - mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1); - td = (pthread_t)mem; - mem += sizeof(struct pthread); - - for (p=head; p; p=p->next) { - if (!p->tls_id) continue; - dtv[p->tls_id] = mem + p->tls_offset; - memcpy(dtv[p->tls_id], p->tls_image, p->tls_len); - } -#else - dtv = (void **)mem; - - mem += libc.tls_size - sizeof(struct pthread); - mem -= (uintptr_t)mem & (tls_align-1); - td = (pthread_t)mem; - - for (p=head; p; p=p->next) { - if (!p->tls_id) continue; - dtv[p->tls_id] = mem - p->tls_offset; - memcpy(dtv[p->tls_id], p->tls_image, p->tls_len); - } -#endif - dtv[0] = (void *)tls_cnt; - td->dtv = td->dtv_copy = dtv; - return td; } __attribute__((__visibility__("hidden"))) @@ -1286,7 +1280,7 @@ void *__tls_get_new(size_t *v) /* Get new DTV space from new DSO if needed */ if (v[0] > (size_t)self->dtv[0]) { void **newdtv = p->new_dtv + - (v[0]+1)*sizeof(void *)*a_fetch_add(&p->new_dtv_idx,1); + (v[0]+1)*a_fetch_add(&p->new_dtv_idx,1); memcpy(newdtv, self->dtv, ((size_t)self->dtv[0]+1) * sizeof(void *)); newdtv[0] = (void *)v[0]; @@ -1297,12 +1291,12 @@ void *__tls_get_new(size_t *v) unsigned char *mem; for (p=head; ; p=p->next) { if (!p->tls_id || self->dtv[p->tls_id]) continue; - mem = p->new_tls + (p->tls_size + p->tls_align) + mem = p->new_tls + (p->tls.size + p->tls.align) * a_fetch_add(&p->new_tls_idx,1); - mem += ((uintptr_t)p->tls_image - (uintptr_t)mem) - & (p->tls_align-1); + mem += ((uintptr_t)p->tls.image - (uintptr_t)mem) + & (p->tls.align-1); self->dtv[p->tls_id] = mem; - memcpy(mem, p->tls_image, p->tls_len); + memcpy(mem, p->tls.image, p->tls.len); if (p->tls_id == v[0]) break; } __restore_sigs(&set); @@ -1311,6 +1305,8 @@ void *__tls_get_new(size_t *v) static void update_tls_size() { + libc.tls_cnt = tls_cnt; + libc.tls_align = tls_align; libc.tls_size = ALIGN( (1+tls_cnt) * sizeof(void *) + tls_offset + @@ -1421,6 +1417,7 @@ _Noreturn void __dls3(size_t *sp) * use during dynamic linking. If possible it will also serve as the * thread pointer at runtime. */ libc.tls_size = sizeof builtin_tls; + libc.tls_align = tls_align; if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) { a_crash(); } @@ -1448,13 +1445,13 @@ _Noreturn void __dls3(size_t *sp) interp_off = (size_t)phdr->p_vaddr; else if (phdr->p_type == PT_TLS) { tls_image = phdr->p_vaddr; - app.tls_len = phdr->p_filesz; - app.tls_size = phdr->p_memsz; - app.tls_align = phdr->p_align; + app.tls.len = phdr->p_filesz; + app.tls.size = phdr->p_memsz; + app.tls.align = phdr->p_align; } } if (DL_FDPIC) app.loadmap = app_loadmap; - if (app.tls_size) app.tls_image = laddr(&app, tls_image); + if (app.tls.size) app.tls.image = laddr(&app, tls_image); if (interp_off) ldso.name = laddr(&app, interp_off); if ((aux[0] & (1UL<tls_id; - info.dlpi_tls_data = current->tls_image; + info.dlpi_tls_data = current->tls.image; ret = (callback)(&info, sizeof (info), data); --- a/src/locale/langinfo.c +++ b/src/locale/langinfo.c @@ -37,23 +37,23 @@ char *__nl_langinfo_l(nl_item item, loca switch (cat) { case LC_NUMERIC: - if (idx > 1) return NULL; + if (idx > 1) return ""; str = c_numeric; break; case LC_TIME: - if (idx > 0x31) return NULL; + if (idx > 0x31) return ""; str = c_time; break; case LC_MONETARY: - if (idx > 0) return NULL; + if (idx > 0) return ""; str = ""; break; case LC_MESSAGES: - if (idx > 3) return NULL; + if (idx > 3) return ""; str = c_messages; break; default: - return NULL; + return ""; } for (; idx; idx--, str++) for (; *str; str++); --- a/src/malloc/lite_malloc.c +++ b/src/malloc/lite_malloc.c @@ -8,7 +8,7 @@ void *__expand_heap(size_t *); -void *__simple_malloc(size_t n) +static void *__simple_malloc(size_t n) { static char *cur, *end; static volatile int lock[2]; --- a/src/math/__rem_pio2.c +++ b/src/math/__rem_pio2.c @@ -118,7 +118,7 @@ int __rem_pio2(double x, double *y) if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ medium: /* rint(x/(pi/2)), Assume round-to-nearest. */ - fn = x*invpio2 + toint - toint; + fn = (double_t)x*invpio2 + toint - toint; n = (int32_t)fn; r = x - fn*pio2_1; w = fn*pio2_1t; /* 1st round, good to 85 bits */ --- a/src/math/__rem_pio2f.c +++ b/src/math/__rem_pio2f.c @@ -51,7 +51,7 @@ int __rem_pio2f(float x, double *y) /* 25+53 bit pi is good enough for medium size */ if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - fn = x*invpio2 + toint - toint; + fn = (double_t)x*invpio2 + toint - toint; n = (int32_t)fn; *y = x - fn*pio2_1 - fn*pio2_1t; return n; --- /dev/null +++ b/src/math/arm/fabs.c @@ -0,0 +1,15 @@ +#include + +#if __ARM_PCS_VFP + +double fabs(double x) +{ + __asm__ ("vabs.f64 %P0, %P1" : "=w"(x) : "w"(x)); + return x; +} + +#else + +#include "../fabs.c" + +#endif --- /dev/null +++ b/src/math/arm/fabsf.c @@ -0,0 +1,15 @@ +#include + +#if __ARM_PCS_VFP + +float fabsf(float x) +{ + __asm__ ("vabs.f32 %0, %1" : "=t"(x) : "t"(x)); + return x; +} + +#else + +#include "../fabsf.c" + +#endif --- /dev/null +++ b/src/math/arm/sqrt.c @@ -0,0 +1,15 @@ +#include + +#if __VFP_FP__ && !__SOFTFP__ + +double sqrt(double x) +{ + __asm__ ("vsqrt.f64 %P0, %P1" : "=w"(x) : "w"(x)); + return x; +} + +#else + +#include "../sqrt.c" + +#endif --- /dev/null +++ b/src/math/arm/sqrtf.c @@ -0,0 +1,15 @@ +#include + +#if __VFP_FP__ && !__SOFTFP__ + +float sqrtf(float x) +{ + __asm__ ("vsqrt.f32 %0, %1" : "=t"(x) : "t"(x)); + return x; +} + +#else + +#include "../sqrtf.c" + +#endif --- a/src/math/armebhf/fabs.sub +++ /dev/null @@ -1 +0,0 @@ -../armhf/fabs.s --- a/src/math/armebhf/fabsf.sub +++ /dev/null @@ -1 +0,0 @@ -../armhf/fabsf.s --- a/src/math/armebhf/sqrt.sub +++ /dev/null @@ -1 +0,0 @@ -../armhf/sqrt.s --- a/src/math/armebhf/sqrtf.sub +++ /dev/null @@ -1 +0,0 @@ -../armhf/sqrtf.s --- a/src/math/armhf/fabs.s +++ /dev/null @@ -1,7 +0,0 @@ -.fpu vfp -.text -.global fabs -.type fabs,%function -fabs: - vabs.f64 d0, d0 - bx lr --- a/src/math/armhf/fabs.sub +++ /dev/null @@ -1 +0,0 @@ -fabs.s --- a/src/math/armhf/fabsf.s +++ /dev/null @@ -1,7 +0,0 @@ -.fpu vfp -.text -.global fabsf -.type fabsf,%function -fabsf: - vabs.f32 s0, s0 - bx lr --- a/src/math/armhf/fabsf.sub +++ /dev/null @@ -1 +0,0 @@ -fabsf.s --- a/src/math/armhf/sqrt.s +++ /dev/null @@ -1,7 +0,0 @@ -.fpu vfp -.text -.global sqrt -.type sqrt,%function -sqrt: - vsqrt.f64 d0, d0 - bx lr --- a/src/math/armhf/sqrt.sub +++ /dev/null @@ -1 +0,0 @@ -sqrt.s --- a/src/math/armhf/sqrtf.s +++ /dev/null @@ -1,7 +0,0 @@ -.fpu vfp -.text -.global sqrtf -.type sqrtf,%function -sqrtf: - vsqrt.f32 s0, s0 - bx lr --- a/src/math/armhf/sqrtf.sub +++ /dev/null @@ -1 +0,0 @@ -sqrtf.s --- a/src/math/hypot.c +++ b/src/math/hypot.c @@ -12,10 +12,10 @@ static void sq(double_t *hi, double_t *l { double_t xh, xl, xc; - xc = x*SPLIT; + xc = (double_t)x*SPLIT; xh = x - xc + xc; xl = x - xh; - *hi = x*x; + *hi = (double_t)x*x; *lo = xh*xh - *hi + 2*xh*xl + xl*xl; } --- a/src/mman/mremap.c +++ b/src/mman/mremap.c @@ -1,17 +1,31 @@ +#define _GNU_SOURCE #include #include +#include +#include #include #include "syscall.h" #include "libc.h" +static void dummy(void) { } +weak_alias(dummy, __vm_wait); + void *__mremap(void *old_addr, size_t old_len, size_t new_len, int flags, ...) { va_list ap; - void *new_addr; - - va_start(ap, flags); - new_addr = va_arg(ap, void *); - va_end(ap); + void *new_addr = 0; + + if (new_len >= PTRDIFF_MAX) { + errno = ENOMEM; + return MAP_FAILED; + } + + if (flags & MREMAP_FIXED) { + __vm_wait(); + va_start(ap, flags); + new_addr = va_arg(ap, void *); + va_end(ap); + } return (void *)syscall(SYS_mremap, old_addr, old_len, new_len, flags, new_addr); } --- a/src/network/getifaddrs.c +++ b/src/network/getifaddrs.c @@ -162,13 +162,26 @@ static int netlink_msg_to_ifaddr(void *p for (rta = NLMSG_RTA(h, sizeof(*ifa)); NLMSG_RTAOK(rta, h); rta = RTA_NEXT(rta)) { switch (rta->rta_type) { case IFA_ADDRESS: - copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); + /* If ifa_addr is already set we, received an IFA_LOCAL before + * so treat this as destination address */ + if (ifs->ifa.ifa_addr) + copy_addr(&ifs->ifa.ifa_dstaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); + else + copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); break; case IFA_BROADCAST: - /* For point-to-point links this is peer, but ifa_broadaddr - * and ifa_dstaddr are union, so this works for both. */ copy_addr(&ifs->ifa.ifa_broadaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); break; + case IFA_LOCAL: + /* If ifa_addr is set and we get IFA_LOCAL, assume we have + * a point-to-point network. Move address to correct field. */ + if (ifs->ifa.ifa_addr) { + ifs->ifu = ifs->addr; + ifs->ifa.ifa_dstaddr = &ifs->ifu.sa; + memset(&ifs->addr, 0, sizeof(ifs->addr)); + } + copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index); + break; case IFA_LABEL: if (RTA_DATALEN(rta) < sizeof(ifs->name)) { memcpy(ifs->name, RTA_DATA(rta), RTA_DATALEN(rta)); --- a/src/network/getnameinfo.c +++ b/src/network/getnameinfo.c @@ -135,13 +135,13 @@ int getnameinfo(const struct sockaddr *r switch (af) { case AF_INET: a = (void *)&((struct sockaddr_in *)sa)->sin_addr; - if (sl != sizeof(struct sockaddr_in)) return EAI_FAMILY; + if (sl < sizeof(struct sockaddr_in)) return EAI_FAMILY; mkptr4(ptr, a); scopeid = 0; break; case AF_INET6: a = (void *)&((struct sockaddr_in6 *)sa)->sin6_addr; - if (sl != sizeof(struct sockaddr_in6)) return EAI_FAMILY; + if (sl < sizeof(struct sockaddr_in6)) return EAI_FAMILY; if (memcmp(a, "\0\0\0\0\0\0\0\0\0\0\xff\xff", 12)) mkptr6(ptr, a); else --- a/src/network/if_nametoindex.c +++ b/src/network/if_nametoindex.c @@ -10,7 +10,7 @@ unsigned if_nametoindex(const char *name struct ifreq ifr; int fd, r; - if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return -1; + if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return 0; strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); r = ioctl(fd, SIOCGIFINDEX, &ifr); __syscall(SYS_close, fd); --- a/src/network/lookup_name.c +++ b/src/network/lookup_name.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "lookup.h" #include "stdio_impl.h" #include "syscall.h" @@ -51,7 +52,14 @@ static int name_from_hosts(struct addres int cnt = 0; unsigned char _buf[1032]; FILE _f, *f = __fopen_rb_ca("/etc/hosts", &_f, _buf, sizeof _buf); - if (!f) return 0; + if (!f) switch (errno) { + case ENOENT: + case ENOTDIR: + case EACCES: + return 0; + default: + return EAI_SYSTEM; + } while (fgets(line, sizeof line, f) && cnt < MAXADDRS) { char *p, *z; --- a/src/network/lookup_serv.c +++ b/src/network/lookup_serv.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "lookup.h" #include "stdio_impl.h" @@ -69,7 +70,14 @@ int __lookup_serv(struct service buf[sta unsigned char _buf[1032]; FILE _f, *f = __fopen_rb_ca("/etc/services", &_f, _buf, sizeof _buf); - if (!f) return EAI_SERVICE; + if (!f) switch (errno) { + case ENOENT: + case ENOTDIR: + case EACCES: + return EAI_SERVICE; + default: + return EAI_SYSTEM; + } while (fgets(line, sizeof line, f) && cnt < MAXSERVS) { if ((p=strchr(line, '#'))) *p++='\n', *p=0; --- a/src/network/proto.c +++ b/src/network/proto.c @@ -9,21 +9,36 @@ static const unsigned char protos[] = { "\001icmp\0" "\002igmp\0" "\003ggp\0" + "\004ipencap\0" + "\005st\0" "\006tcp\0" + "\008egp\0" "\014pup\0" "\021udp\0" - "\026idp\0" + "\024hmp\0" + "\026xns-idp\0" + "\033rdp\0" + "\035iso-tp4\0" + "\044xtp\0" + "\045ddp\0" + "\046idpr-cmtp\0" "\051ipv6\0" "\053ipv6-route\0" "\054ipv6-frag\0" + "\055idrp\0" + "\056rsvp\0" "\057gre\0" "\062esp\0" "\063ah\0" + "\071skip\0" "\072ipv6-icmp\0" "\073ipv6-nonxt\0" "\074ipv6-opts\0" + "\111rspf\0" + "\121vmtp\0" "\131ospf\0" "\136ipip\0" + "\142encap\0" "\147pim\0" "\377raw" }; --- a/src/network/res_msend.c +++ b/src/network/res_msend.c @@ -54,7 +54,15 @@ int __res_msend(int nqueries, const unsi /* Get nameservers from resolv.conf, fallback to localhost */ f = __fopen_rb_ca("/etc/resolv.conf", &_f, _buf, sizeof _buf); - if (f) for (nns=0; nns<3 && fgets(line, sizeof line, f); ) { + if (!f) switch (errno) { + case ENOENT: + case ENOTDIR: + case EACCES: + goto no_resolv_conf; + default: + return -1; + } + for (nns=0; nns<3 && fgets(line, sizeof line, f); ) { if (!strncmp(line, "options", 7) && isspace(line[7])) { unsigned long x; char *p, *z; @@ -92,7 +100,8 @@ int __res_msend(int nqueries, const unsi } } } - if (f) __fclose_ca(f); + __fclose_ca(f); +no_resolv_conf: if (!nns) { ns[0].sin.sin_family = AF_INET; ns[0].sin.sin_port = htons(53); --- a/src/search/tsearch_avl.c +++ b/src/search/tsearch_avl.c @@ -77,38 +77,45 @@ static struct node *find(struct node *n, return find(n->right, k, cmp); } -static struct node *insert(struct node **n, const void *k, - int (*cmp)(const void *, const void *), int *new) +static struct node *insert(struct node *n, const void *k, + int (*cmp)(const void *, const void *), struct node **found) { - struct node *r = *n; + struct node *r; int c; - if (!r) { - *n = r = malloc(sizeof **n); - if (r) { - r->key = k; - r->left = r->right = 0; - r->height = 1; + if (!n) { + n = malloc(sizeof *n); + if (n) { + n->key = k; + n->left = n->right = 0; + n->height = 1; } - *new = 1; - return r; + *found = n; + return n; + } + c = cmp(k, n->key); + if (c == 0) { + *found = n; + return 0; + } + r = insert(c < 0 ? n->left : n->right, k, cmp, found); + if (r) { + if (c < 0) + n->left = r; + else + n->right = r; + r = balance(n); } - c = cmp(k, r->key); - if (c == 0) - return r; - if (c < 0) - r = insert(&r->left, k, cmp, new); - else - r = insert(&r->right, k, cmp, new); - if (*new) - *n = balance(*n); return r; } -static struct node *movr(struct node *n, struct node *r) { - if (!n) - return r; - n->right = movr(n->right, r); +static struct node *remove_rightmost(struct node *n, struct node **rightmost) +{ + if (!n->right) { + *rightmost = n; + return n->left; + } + n->right = remove_rightmost(n->right, rightmost); return balance(n); } @@ -122,7 +129,13 @@ static struct node *remove(struct node * c = cmp(k, (*n)->key); if (c == 0) { struct node *r = *n; - *n = movr(r->left, r->right); + if (r->left) { + r->left = remove_rightmost(r->left, n); + (*n)->left = r->left; + (*n)->right = r->right; + *n = balance(*n); + } else + *n = r->right; free(r); return parent; } @@ -138,6 +151,8 @@ static struct node *remove(struct node * void *tdelete(const void *restrict key, void **restrict rootp, int(*compar)(const void *, const void *)) { + if (!rootp) + return 0; struct node *n = *rootp; struct node *ret; /* last argument is arbitrary non-null pointer @@ -150,17 +165,21 @@ void *tdelete(const void *restrict key, void *tfind(const void *key, void *const *rootp, int(*compar)(const void *, const void *)) { + if (!rootp) + return 0; return find(*rootp, key, compar); } void *tsearch(const void *key, void **rootp, int (*compar)(const void *, const void *)) { - int new = 0; - struct node *n = *rootp; + struct node *update; struct node *ret; - ret = insert(&n, key, compar, &new); - *rootp = n; + if (!rootp) + return 0; + update = insert(*rootp, key, compar, &ret); + if (update) + *rootp = update; return ret; } --- a/src/setjmp/arm/longjmp.s +++ b/src/setjmp/arm/longjmp.s @@ -1,3 +1,4 @@ +.syntax unified .global _longjmp .global longjmp .type _longjmp,%function @@ -20,7 +21,11 @@ longjmp: ldc p2, cr4, [ip], #48 2: tst r1,#0x40 beq 2f - .word 0xecbc8b10 /* vldmia ip!, {d8-d15} */ + .fpu vfp + vldmia ip!, {d8-d15} + .fpu softvfp + .eabi_attribute 10, 0 + .eabi_attribute 27, 0 2: tst r1,#0x200 beq 3f ldcl p1, cr10, [ip], #8 @@ -29,9 +34,7 @@ longjmp: ldcl p1, cr13, [ip], #8 ldcl p1, cr14, [ip], #8 ldcl p1, cr15, [ip], #8 -3: tst lr,#1 - moveq pc,lr - bx lr +3: bx lr .hidden __hwcap 1: .word __hwcap-1b --- a/src/setjmp/arm/setjmp.s +++ b/src/setjmp/arm/setjmp.s @@ -1,3 +1,4 @@ +.syntax unified .global __setjmp .global _setjmp .global setjmp @@ -22,7 +23,11 @@ setjmp: stc p2, cr4, [ip], #48 2: tst r1,#0x40 beq 2f - .word 0xecac8b10 /* vstmia ip!, {d8-d15} */ + .fpu vfp + vstmia ip!, {d8-d15} + .fpu softvfp + .eabi_attribute 10, 0 + .eabi_attribute 27, 0 2: tst r1,#0x200 beq 3f stcl p1, cr10, [ip], #8 @@ -31,9 +36,7 @@ setjmp: stcl p1, cr13, [ip], #8 stcl p1, cr14, [ip], #8 stcl p1, cr15, [ip], #8 -3: tst lr,#1 - moveq pc,lr - bx lr +3: bx lr .hidden __hwcap 1: .word __hwcap-1b --- a/src/setjmp/mips-sf/longjmp.s +++ /dev/null @@ -1,25 +0,0 @@ -.set noreorder - -.global _longjmp -.global longjmp -.type _longjmp,@function -.type longjmp,@function -_longjmp: -longjmp: - move $2, $5 - bne $2, $0, 1f - nop - addu $2, $2, 1 -1: lw $ra, 0($4) - lw $sp, 4($4) - lw $16, 8($4) - lw $17, 12($4) - lw $18, 16($4) - lw $19, 20($4) - lw $20, 24($4) - lw $21, 28($4) - lw $22, 32($4) - lw $23, 36($4) - lw $30, 40($4) - jr $ra - lw $28, 44($4) --- a/src/setjmp/mips-sf/longjmp.sub +++ /dev/null @@ -1 +0,0 @@ -longjmp.s --- a/src/setjmp/mips-sf/setjmp.s +++ /dev/null @@ -1,25 +0,0 @@ -.set noreorder - -.global __setjmp -.global _setjmp -.global setjmp -.type __setjmp,@function -.type _setjmp,@function -.type setjmp,@function -__setjmp: -_setjmp: -setjmp: - sw $ra, 0($4) - sw $sp, 4($4) - sw $16, 8($4) - sw $17, 12($4) - sw $18, 16($4) - sw $19, 20($4) - sw $20, 24($4) - sw $21, 28($4) - sw $22, 32($4) - sw $23, 36($4) - sw $30, 40($4) - sw $28, 44($4) - jr $ra - li $2, 0 --- a/src/setjmp/mips-sf/setjmp.sub +++ /dev/null @@ -1 +0,0 @@ -setjmp.s --- /dev/null +++ b/src/setjmp/mips/longjmp.S @@ -0,0 +1,40 @@ +.set noreorder + +.global _longjmp +.global longjmp +.type _longjmp,@function +.type longjmp,@function +_longjmp: +longjmp: + move $2, $5 + bne $2, $0, 1f + nop + addu $2, $2, 1 +1: +#ifndef __mips_soft_float + lwc1 $20, 56($4) + lwc1 $21, 60($4) + lwc1 $22, 64($4) + lwc1 $23, 68($4) + lwc1 $24, 72($4) + lwc1 $25, 76($4) + lwc1 $26, 80($4) + lwc1 $27, 84($4) + lwc1 $28, 88($4) + lwc1 $29, 92($4) + lwc1 $30, 96($4) + lwc1 $31, 100($4) +#endif + lw $ra, 0($4) + lw $sp, 4($4) + lw $16, 8($4) + lw $17, 12($4) + lw $18, 16($4) + lw $19, 20($4) + lw $20, 24($4) + lw $21, 28($4) + lw $22, 32($4) + lw $23, 36($4) + lw $30, 40($4) + jr $ra + lw $28, 44($4) --- a/src/setjmp/mips/longjmp.s +++ /dev/null @@ -1,37 +0,0 @@ -.set noreorder - -.global _longjmp -.global longjmp -.type _longjmp,@function -.type longjmp,@function -_longjmp: -longjmp: - move $2, $5 - bne $2, $0, 1f - nop - addu $2, $2, 1 -1: lwc1 $20, 56($4) - lwc1 $21, 60($4) - lwc1 $22, 64($4) - lwc1 $23, 68($4) - lwc1 $24, 72($4) - lwc1 $25, 76($4) - lwc1 $26, 80($4) - lwc1 $27, 84($4) - lwc1 $28, 88($4) - lwc1 $29, 92($4) - lwc1 $30, 96($4) - lwc1 $31, 100($4) - lw $ra, 0($4) - lw $sp, 4($4) - lw $16, 8($4) - lw $17, 12($4) - lw $18, 16($4) - lw $19, 20($4) - lw $20, 24($4) - lw $21, 28($4) - lw $22, 32($4) - lw $23, 36($4) - lw $30, 40($4) - jr $ra - lw $28, 44($4) --- /dev/null +++ b/src/setjmp/mips/setjmp.S @@ -0,0 +1,39 @@ +.set noreorder + +.global __setjmp +.global _setjmp +.global setjmp +.type __setjmp,@function +.type _setjmp,@function +.type setjmp,@function +__setjmp: +_setjmp: +setjmp: + sw $ra, 0($4) + sw $sp, 4($4) + sw $16, 8($4) + sw $17, 12($4) + sw $18, 16($4) + sw $19, 20($4) + sw $20, 24($4) + sw $21, 28($4) + sw $22, 32($4) + sw $23, 36($4) + sw $30, 40($4) + sw $28, 44($4) +#ifndef __mips_soft_float + swc1 $20, 56($4) + swc1 $21, 60($4) + swc1 $22, 64($4) + swc1 $23, 68($4) + swc1 $24, 72($4) + swc1 $25, 76($4) + swc1 $26, 80($4) + swc1 $27, 84($4) + swc1 $28, 88($4) + swc1 $29, 92($4) + swc1 $30, 96($4) + swc1 $31, 100($4) +#endif + jr $ra + li $2, 0 --- a/src/setjmp/mips/setjmp.s +++ /dev/null @@ -1,37 +0,0 @@ -.set noreorder - -.global __setjmp -.global _setjmp -.global setjmp -.type __setjmp,@function -.type _setjmp,@function -.type setjmp,@function -__setjmp: -_setjmp: -setjmp: - sw $ra, 0($4) - sw $sp, 4($4) - sw $16, 8($4) - sw $17, 12($4) - sw $18, 16($4) - sw $19, 20($4) - sw $20, 24($4) - sw $21, 28($4) - sw $22, 32($4) - sw $23, 36($4) - sw $30, 40($4) - sw $28, 44($4) - swc1 $20, 56($4) - swc1 $21, 60($4) - swc1 $22, 64($4) - swc1 $23, 68($4) - swc1 $24, 72($4) - swc1 $25, 76($4) - swc1 $26, 80($4) - swc1 $27, 84($4) - swc1 $28, 88($4) - swc1 $29, 92($4) - swc1 $30, 96($4) - swc1 $31, 100($4) - jr $ra - li $2, 0 --- a/src/setjmp/mipsel-sf/longjmp.sub +++ /dev/null @@ -1 +0,0 @@ -../mips-sf/longjmp.s --- a/src/setjmp/mipsel-sf/setjmp.sub +++ /dev/null @@ -1 +0,0 @@ -../mips-sf/setjmp.s --- a/src/setjmp/sh-nofpu/longjmp.s +++ /dev/null @@ -1,22 +0,0 @@ -.global _longjmp -.global longjmp -.type _longjmp, @function -.type longjmp, @function -_longjmp: -longjmp: - mov.l @r4+, r8 - mov.l @r4+, r9 - mov.l @r4+, r10 - mov.l @r4+, r11 - mov.l @r4+, r12 - mov.l @r4+, r13 - mov.l @r4+, r14 - mov.l @r4+, r15 - lds.l @r4+, pr - - tst r5, r5 - movt r0 - add r5, r0 - - rts - nop --- a/src/setjmp/sh-nofpu/longjmp.sub +++ /dev/null @@ -1 +0,0 @@ -longjmp.s --- a/src/setjmp/sh-nofpu/setjmp.s +++ /dev/null @@ -1,24 +0,0 @@ -.global ___setjmp -.hidden ___setjmp -.global __setjmp -.global _setjmp -.global setjmp -.type __setjmp, @function -.type _setjmp, @function -.type setjmp, @function -___setjmp: -__setjmp: -_setjmp: -setjmp: - add #36, r4 - sts.l pr, @-r4 - mov.l r15 @-r4 - mov.l r14, @-r4 - mov.l r13, @-r4 - mov.l r12, @-r4 - mov.l r11, @-r4 - mov.l r10, @-r4 - mov.l r9, @-r4 - mov.l r8, @-r4 - rts - mov #0, r0 --- a/src/setjmp/sh-nofpu/setjmp.sub +++ /dev/null @@ -1 +0,0 @@ -setjmp.s --- /dev/null +++ b/src/setjmp/sh/longjmp.S @@ -0,0 +1,28 @@ +.global _longjmp +.global longjmp +.type _longjmp, @function +.type longjmp, @function +_longjmp: +longjmp: + mov.l @r4+, r8 + mov.l @r4+, r9 + mov.l @r4+, r10 + mov.l @r4+, r11 + mov.l @r4+, r12 + mov.l @r4+, r13 + mov.l @r4+, r14 + mov.l @r4+, r15 + lds.l @r4+, pr +#if __SH_FPU_ANY__ || __SH4__ + fmov.s @r4+, fr12 + fmov.s @r4+, fr13 + fmov.s @r4+, fr14 + fmov.s @r4+, fr15 +#endif + + tst r5, r5 + movt r0 + add r5, r0 + + rts + nop --- a/src/setjmp/sh/longjmp.s +++ /dev/null @@ -1,26 +0,0 @@ -.global _longjmp -.global longjmp -.type _longjmp, @function -.type longjmp, @function -_longjmp: -longjmp: - mov.l @r4+, r8 - mov.l @r4+, r9 - mov.l @r4+, r10 - mov.l @r4+, r11 - mov.l @r4+, r12 - mov.l @r4+, r13 - mov.l @r4+, r14 - mov.l @r4+, r15 - lds.l @r4+, pr - fmov.s @r4+, fr12 - fmov.s @r4+, fr13 - fmov.s @r4+, fr14 - fmov.s @r4+, fr15 - - tst r5, r5 - movt r0 - add r5, r0 - - rts - nop --- /dev/null +++ b/src/setjmp/sh/setjmp.S @@ -0,0 +1,32 @@ +.global ___setjmp +.hidden ___setjmp +.global __setjmp +.global _setjmp +.global setjmp +.type __setjmp, @function +.type _setjmp, @function +.type setjmp, @function +___setjmp: +__setjmp: +_setjmp: +setjmp: +#if __SH_FPU_ANY__ || __SH4__ + add #52, r4 + fmov.s fr15, @-r4 + fmov.s fr14, @-r4 + fmov.s fr13, @-r4 + fmov.s fr12, @-r4 +#else + add #36, r4 +#endif + sts.l pr, @-r4 + mov.l r15, @-r4 + mov.l r14, @-r4 + mov.l r13, @-r4 + mov.l r12, @-r4 + mov.l r11, @-r4 + mov.l r10, @-r4 + mov.l r9, @-r4 + mov.l r8, @-r4 + rts + mov #0, r0 --- a/src/setjmp/sh/setjmp.s +++ /dev/null @@ -1,28 +0,0 @@ -.global ___setjmp -.hidden ___setjmp -.global __setjmp -.global _setjmp -.global setjmp -.type __setjmp, @function -.type _setjmp, @function -.type setjmp, @function -___setjmp: -__setjmp: -_setjmp: -setjmp: - add #52, r4 - fmov.s fr15, @-r4 - fmov.s fr14, @-r4 - fmov.s fr13, @-r4 - fmov.s fr12, @-r4 - sts.l pr, @-r4 - mov.l r15, @-r4 - mov.l r14, @-r4 - mov.l r13, @-r4 - mov.l r12, @-r4 - mov.l r11, @-r4 - mov.l r10, @-r4 - mov.l r9, @-r4 - mov.l r8, @-r4 - rts - mov #0, r0 --- a/src/setjmp/sheb-nofpu/longjmp.sub +++ /dev/null @@ -1 +0,0 @@ -../sh-nofpu/longjmp.s --- a/src/setjmp/sheb-nofpu/setjmp.sub +++ /dev/null @@ -1 +0,0 @@ -../sh-nofpu/setjmp.s --- a/src/signal/arm/restore.s +++ b/src/signal/arm/restore.s @@ -1,3 +1,5 @@ +.syntax unified + .global __restore .type __restore,%function __restore: --- a/src/signal/arm/sigsetjmp.s +++ b/src/signal/arm/sigsetjmp.s @@ -1,3 +1,4 @@ +.syntax unified .global sigsetjmp .global __sigsetjmp .type sigsetjmp,%function --- a/src/signal/sigaction.c +++ b/src/signal/sigaction.c @@ -17,10 +17,6 @@ void __get_handler_set(sigset_t *set) int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old) { struct k_sigaction ksa, ksa_old; - if (sig >= (unsigned)_NSIG) { - errno = EINVAL; - return -1; - } if (sa) { if ((uintptr_t)sa->sa_handler > 1UL) { a_or_l(handler_set+(sig-1)/(8*sizeof(long)), @@ -57,7 +53,7 @@ int __libc_sigaction(int sig, const stru int __sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old) { - if (sig-32U < 3) { + if (sig-32U < 3 || sig-1U >= _NSIG-1) { errno = EINVAL; return -1; } --- a/src/signal/sigsetjmp_tail.c +++ b/src/signal/sigsetjmp_tail.c @@ -2,9 +2,7 @@ #include #include "syscall.h" -#ifdef SHARED __attribute__((__visibility__("hidden"))) -#endif int __sigsetjmp_tail(sigjmp_buf jb, int ret) { void *p = jb->__ss; --- a/src/stdio/getdelim.c +++ b/src/stdio/getdelim.c @@ -27,17 +27,18 @@ ssize_t getdelim(char **restrict s, size for (;;) { z = memchr(f->rpos, delim, f->rend - f->rpos); k = z ? z - f->rpos + 1 : f->rend - f->rpos; - if (i+k >= *n) { + if (i+k+1 >= *n) { if (k >= SIZE_MAX/2-i) goto oom; - *n = i+k+2; - if (*n < SIZE_MAX/4) *n *= 2; - tmp = realloc(*s, *n); + size_t m = i+k+2; + if (!z && m < SIZE_MAX/4) m += m/2; + tmp = realloc(*s, m); if (!tmp) { - *n = i+k+2; - tmp = realloc(*s, *n); + m = i+k+2; + tmp = realloc(*s, m); if (!tmp) goto oom; } *s = tmp; + *n = m; } memcpy(*s+i, f->rpos, k); f->rpos += k; --- /dev/null +++ b/src/string/arm/__aeabi_memclr.c @@ -0,0 +1,9 @@ +#include +#include "libc.h" + +void __aeabi_memclr(void *dest, size_t n) +{ + memset(dest, 0, n); +} +weak_alias(__aeabi_memclr, __aeabi_memclr4); +weak_alias(__aeabi_memclr, __aeabi_memclr8); --- /dev/null +++ b/src/string/arm/__aeabi_memcpy.c @@ -0,0 +1,9 @@ +#include +#include "libc.h" + +void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n) +{ + memcpy(dest, src, n); +} +weak_alias(__aeabi_memcpy, __aeabi_memcpy4); +weak_alias(__aeabi_memcpy, __aeabi_memcpy8); --- /dev/null +++ b/src/string/arm/__aeabi_memmove.c @@ -0,0 +1,9 @@ +#include +#include "libc.h" + +void __aeabi_memmove(void *dest, const void *src, size_t n) +{ + memmove(dest, src, n); +} +weak_alias(__aeabi_memmove, __aeabi_memmove4); +weak_alias(__aeabi_memmove, __aeabi_memmove8); --- /dev/null +++ b/src/string/arm/__aeabi_memset.c @@ -0,0 +1,9 @@ +#include +#include "libc.h" + +void __aeabi_memset(void *dest, size_t n, int c) +{ + memset(dest, c, n); +} +weak_alias(__aeabi_memset, __aeabi_memset4); +weak_alias(__aeabi_memset, __aeabi_memset8); --- /dev/null +++ b/src/string/arm/memcpy.c @@ -0,0 +1,3 @@ +#if __ARMEB__ +#include "../memcpy.c" +#endif --- /dev/null +++ b/src/string/arm/memcpy_le.S @@ -0,0 +1,383 @@ +#ifndef __ARMEB__ + +/* + * Copyright (C) 2008 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +/* + * Optimized memcpy() for ARM. + * + * note that memcpy() always returns the destination pointer, + * so we have to preserve R0. + */ + +/* + * This file has been modified from the original for use in musl libc. + * The main changes are: addition of .type memcpy,%function to make the + * code safely callable from thumb mode, adjusting the return + * instructions to be compatible with pre-thumb ARM cpus, and removal + * of prefetch code that is not compatible with older cpus. + */ + +.syntax unified + +.global memcpy +.type memcpy,%function +memcpy: + /* The stack must always be 64-bits aligned to be compliant with the + * ARM ABI. Since we have to save R0, we might as well save R4 + * which we can use for better pipelining of the reads below + */ + .fnstart + .save {r0, r4, lr} + stmfd sp!, {r0, r4, lr} + /* Making room for r5-r11 which will be spilled later */ + .pad #28 + sub sp, sp, #28 + + /* it simplifies things to take care of len<4 early */ + cmp r2, #4 + blo copy_last_3_and_return + + /* compute the offset to align the source + * offset = (4-(src&3))&3 = -src & 3 + */ + rsb r3, r1, #0 + ands r3, r3, #3 + beq src_aligned + + /* align source to 32 bits. We need to insert 2 instructions between + * a ldr[b|h] and str[b|h] because byte and half-word instructions + * stall 2 cycles. + */ + movs r12, r3, lsl #31 + sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ + ldrbmi r3, [r1], #1 + ldrbcs r4, [r1], #1 + ldrbcs r12,[r1], #1 + strbmi r3, [r0], #1 + strbcs r4, [r0], #1 + strbcs r12,[r0], #1 + +src_aligned: + + /* see if src and dst are aligned together (congruent) */ + eor r12, r0, r1 + tst r12, #3 + bne non_congruent + + /* Use post-incriment mode for stm to spill r5-r11 to reserved stack + * frame. Don't update sp. + */ + stmea sp, {r5-r11} + + /* align the destination to a cache-line */ + rsb r3, r0, #0 + ands r3, r3, #0x1C + beq congruent_aligned32 + cmp r3, r2 + andhi r3, r2, #0x1C + + /* conditionnaly copies 0 to 7 words (length in r3) */ + movs r12, r3, lsl #28 + ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ + ldmmi r1!, {r8, r9} /* 8 bytes */ + stmcs r0!, {r4, r5, r6, r7} + stmmi r0!, {r8, r9} + tst r3, #0x4 + ldrne r10,[r1], #4 /* 4 bytes */ + strne r10,[r0], #4 + sub r2, r2, r3 + +congruent_aligned32: + /* + * here source is aligned to 32 bytes. + */ + +cached_aligned32: + subs r2, r2, #32 + blo less_than_32_left + + /* + * We preload a cache-line up to 64 bytes ahead. On the 926, this will + * stall only until the requested world is fetched, but the linefill + * continues in the the background. + * While the linefill is going, we write our previous cache-line + * into the write-buffer (which should have some free space). + * When the linefill is done, the writebuffer will + * start dumping its content into memory + * + * While all this is going, we then load a full cache line into + * 8 registers, this cache line should be in the cache by now + * (or partly in the cache). + * + * This code should work well regardless of the source/dest alignment. + * + */ + + /* Align the preload register to a cache-line because the cpu does + * "critical word first" (the first word requested is loaded first). + */ + @ bic r12, r1, #0x1F + @ add r12, r12, #64 + +1: ldmia r1!, { r4-r11 } + subs r2, r2, #32 + + /* + * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi + * for ARM9 preload will not be safely guarded by the preceding subs. + * When it is safely guarded the only possibility to have SIGSEGV here + * is because the caller overstates the length. + */ + @ ldrhi r3, [r12], #32 /* cheap ARM9 preload */ + stmia r0!, { r4-r11 } + bhs 1b + + add r2, r2, #32 + +less_than_32_left: + /* + * less than 32 bytes left at this point (length in r2) + */ + + /* skip all this if there is nothing to do, which should + * be a common case (if not executed the code below takes + * about 16 cycles) + */ + tst r2, #0x1F + beq 1f + + /* conditionnaly copies 0 to 31 bytes */ + movs r12, r2, lsl #28 + ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ + ldmmi r1!, {r8, r9} /* 8 bytes */ + stmcs r0!, {r4, r5, r6, r7} + stmmi r0!, {r8, r9} + movs r12, r2, lsl #30 + ldrcs r3, [r1], #4 /* 4 bytes */ + ldrhmi r4, [r1], #2 /* 2 bytes */ + strcs r3, [r0], #4 + strhmi r4, [r0], #2 + tst r2, #0x1 + ldrbne r3, [r1] /* last byte */ + strbne r3, [r0] + + /* we're done! restore everything and return */ +1: ldmfd sp!, {r5-r11} + ldmfd sp!, {r0, r4, lr} + bx lr + + /********************************************************************/ + +non_congruent: + /* + * here source is aligned to 4 bytes + * but destination is not. + * + * in the code below r2 is the number of bytes read + * (the number of bytes written is always smaller, because we have + * partial words in the shift queue) + */ + cmp r2, #4 + blo copy_last_3_and_return + + /* Use post-incriment mode for stm to spill r5-r11 to reserved stack + * frame. Don't update sp. + */ + stmea sp, {r5-r11} + + /* compute shifts needed to align src to dest */ + rsb r5, r0, #0 + and r5, r5, #3 /* r5 = # bytes in partial words */ + mov r12, r5, lsl #3 /* r12 = right */ + rsb lr, r12, #32 /* lr = left */ + + /* read the first word */ + ldr r3, [r1], #4 + sub r2, r2, #4 + + /* write a partial word (0 to 3 bytes), such that destination + * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) + */ + movs r5, r5, lsl #31 + strbmi r3, [r0], #1 + movmi r3, r3, lsr #8 + strbcs r3, [r0], #1 + movcs r3, r3, lsr #8 + strbcs r3, [r0], #1 + movcs r3, r3, lsr #8 + + cmp r2, #4 + blo partial_word_tail + + /* Align destination to 32 bytes (cache line boundary) */ +1: tst r0, #0x1c + beq 2f + ldr r5, [r1], #4 + sub r2, r2, #4 + orr r4, r3, r5, lsl lr + mov r3, r5, lsr r12 + str r4, [r0], #4 + cmp r2, #4 + bhs 1b + blo partial_word_tail + + /* copy 32 bytes at a time */ +2: subs r2, r2, #32 + blo less_than_thirtytwo + + /* Use immediate mode for the shifts, because there is an extra cycle + * for register shifts, which could account for up to 50% of + * performance hit. + */ + + cmp r12, #24 + beq loop24 + cmp r12, #8 + beq loop8 + +loop16: + ldr r12, [r1], #4 +1: mov r4, r12 + ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} + subs r2, r2, #32 + ldrhs r12, [r1], #4 + orr r3, r3, r4, lsl #16 + mov r4, r4, lsr #16 + orr r4, r4, r5, lsl #16 + mov r5, r5, lsr #16 + orr r5, r5, r6, lsl #16 + mov r6, r6, lsr #16 + orr r6, r6, r7, lsl #16 + mov r7, r7, lsr #16 + orr r7, r7, r8, lsl #16 + mov r8, r8, lsr #16 + orr r8, r8, r9, lsl #16 + mov r9, r9, lsr #16 + orr r9, r9, r10, lsl #16 + mov r10, r10, lsr #16 + orr r10, r10, r11, lsl #16 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsr #16 + bhs 1b + b less_than_thirtytwo + +loop8: + ldr r12, [r1], #4 +1: mov r4, r12 + ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} + subs r2, r2, #32 + ldrhs r12, [r1], #4 + orr r3, r3, r4, lsl #24 + mov r4, r4, lsr #8 + orr r4, r4, r5, lsl #24 + mov r5, r5, lsr #8 + orr r5, r5, r6, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r7, lsl #24 + mov r7, r7, lsr #8 + orr r7, r7, r8, lsl #24 + mov r8, r8, lsr #8 + orr r8, r8, r9, lsl #24 + mov r9, r9, lsr #8 + orr r9, r9, r10, lsl #24 + mov r10, r10, lsr #8 + orr r10, r10, r11, lsl #24 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsr #8 + bhs 1b + b less_than_thirtytwo + +loop24: + ldr r12, [r1], #4 +1: mov r4, r12 + ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} + subs r2, r2, #32 + ldrhs r12, [r1], #4 + orr r3, r3, r4, lsl #8 + mov r4, r4, lsr #24 + orr r4, r4, r5, lsl #8 + mov r5, r5, lsr #24 + orr r5, r5, r6, lsl #8 + mov r6, r6, lsr #24 + orr r6, r6, r7, lsl #8 + mov r7, r7, lsr #24 + orr r7, r7, r8, lsl #8 + mov r8, r8, lsr #24 + orr r8, r8, r9, lsl #8 + mov r9, r9, lsr #24 + orr r9, r9, r10, lsl #8 + mov r10, r10, lsr #24 + orr r10, r10, r11, lsl #8 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsr #24 + bhs 1b + +less_than_thirtytwo: + /* copy the last 0 to 31 bytes of the source */ + rsb r12, lr, #32 /* we corrupted r12, recompute it */ + add r2, r2, #32 + cmp r2, #4 + blo partial_word_tail + +1: ldr r5, [r1], #4 + sub r2, r2, #4 + orr r4, r3, r5, lsl lr + mov r3, r5, lsr r12 + str r4, [r0], #4 + cmp r2, #4 + bhs 1b + +partial_word_tail: + /* we have a partial word in the input buffer */ + movs r5, lr, lsl #(31-3) + strbmi r3, [r0], #1 + movmi r3, r3, lsr #8 + strbcs r3, [r0], #1 + movcs r3, r3, lsr #8 + strbcs r3, [r0], #1 + + /* Refill spilled registers from the stack. Don't update sp. */ + ldmfd sp, {r5-r11} + +copy_last_3_and_return: + movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ + ldrbmi r2, [r1], #1 + ldrbcs r3, [r1], #1 + ldrbcs r12,[r1] + strbmi r2, [r0], #1 + strbcs r3, [r0], #1 + strbcs r12,[r0] + + /* we're done! restore sp and spilled registers and return */ + add sp, sp, #28 + ldmfd sp!, {r0, r4, lr} + bx lr + +#endif --- a/src/string/armel/memcpy.s +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -/* - * Optimized memcpy() for ARM. - * - * note that memcpy() always returns the destination pointer, - * so we have to preserve R0. - */ - -/* - * This file has been modified from the original for use in musl libc. - * The main changes are: addition of .type memcpy,%function to make the - * code safely callable from thumb mode, adjusting the return - * instructions to be compatible with pre-thumb ARM cpus, and removal - * of prefetch code that is not compatible with older cpus. - */ - -.global memcpy -.type memcpy,%function -memcpy: - /* The stack must always be 64-bits aligned to be compliant with the - * ARM ABI. Since we have to save R0, we might as well save R4 - * which we can use for better pipelining of the reads below - */ - .fnstart - .save {r0, r4, lr} - stmfd sp!, {r0, r4, lr} - /* Making room for r5-r11 which will be spilled later */ - .pad #28 - sub sp, sp, #28 - - /* it simplifies things to take care of len<4 early */ - cmp r2, #4 - blo copy_last_3_and_return - - /* compute the offset to align the source - * offset = (4-(src&3))&3 = -src & 3 - */ - rsb r3, r1, #0 - ands r3, r3, #3 - beq src_aligned - - /* align source to 32 bits. We need to insert 2 instructions between - * a ldr[b|h] and str[b|h] because byte and half-word instructions - * stall 2 cycles. - */ - movs r12, r3, lsl #31 - sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ - .word 0x44d13001 /* ldrbmi r3, [r1], #1 */ - .word 0x24d14001 /* ldrbcs r4, [r1], #1 */ - .word 0x24d1c001 /* ldrbcs r12,[r1], #1 */ - .word 0x44c03001 /* strbmi r3, [r0], #1 */ - .word 0x24c04001 /* strbcs r4, [r0], #1 */ - .word 0x24c0c001 /* strbcs r12,[r0], #1 */ - -src_aligned: - - /* see if src and dst are aligned together (congruent) */ - eor r12, r0, r1 - tst r12, #3 - bne non_congruent - - /* Use post-incriment mode for stm to spill r5-r11 to reserved stack - * frame. Don't update sp. - */ - stmea sp, {r5-r11} - - /* align the destination to a cache-line */ - rsb r3, r0, #0 - ands r3, r3, #0x1C - beq congruent_aligned32 - cmp r3, r2 - andhi r3, r2, #0x1C - - /* conditionnaly copies 0 to 7 words (length in r3) */ - movs r12, r3, lsl #28 - ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ - ldmmi r1!, {r8, r9} /* 8 bytes */ - stmcs r0!, {r4, r5, r6, r7} - stmmi r0!, {r8, r9} - tst r3, #0x4 - ldrne r10,[r1], #4 /* 4 bytes */ - strne r10,[r0], #4 - sub r2, r2, r3 - -congruent_aligned32: - /* - * here source is aligned to 32 bytes. - */ - -cached_aligned32: - subs r2, r2, #32 - blo less_than_32_left - - /* - * We preload a cache-line up to 64 bytes ahead. On the 926, this will - * stall only until the requested world is fetched, but the linefill - * continues in the the background. - * While the linefill is going, we write our previous cache-line - * into the write-buffer (which should have some free space). - * When the linefill is done, the writebuffer will - * start dumping its content into memory - * - * While all this is going, we then load a full cache line into - * 8 registers, this cache line should be in the cache by now - * (or partly in the cache). - * - * This code should work well regardless of the source/dest alignment. - * - */ - - /* Align the preload register to a cache-line because the cpu does - * "critical word first" (the first word requested is loaded first). - */ - @ bic r12, r1, #0x1F - @ add r12, r12, #64 - -1: ldmia r1!, { r4-r11 } - subs r2, r2, #32 - - /* - * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi - * for ARM9 preload will not be safely guarded by the preceding subs. - * When it is safely guarded the only possibility to have SIGSEGV here - * is because the caller overstates the length. - */ - @ ldrhi r3, [r12], #32 /* cheap ARM9 preload */ - stmia r0!, { r4-r11 } - bhs 1b - - add r2, r2, #32 - -less_than_32_left: - /* - * less than 32 bytes left at this point (length in r2) - */ - - /* skip all this if there is nothing to do, which should - * be a common case (if not executed the code below takes - * about 16 cycles) - */ - tst r2, #0x1F - beq 1f - - /* conditionnaly copies 0 to 31 bytes */ - movs r12, r2, lsl #28 - ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ - ldmmi r1!, {r8, r9} /* 8 bytes */ - stmcs r0!, {r4, r5, r6, r7} - stmmi r0!, {r8, r9} - movs r12, r2, lsl #30 - ldrcs r3, [r1], #4 /* 4 bytes */ - .word 0x40d140b2 /* ldrhmi r4, [r1], #2 */ /* 2 bytes */ - strcs r3, [r0], #4 - .word 0x40c040b2 /* strhmi r4, [r0], #2 */ - tst r2, #0x1 - .word 0x15d13000 /* ldrbne r3, [r1] */ /* last byte */ - .word 0x15c03000 /* strbne r3, [r0] */ - - /* we're done! restore everything and return */ -1: ldmfd sp!, {r5-r11} - ldmfd sp!, {r0, r4, lr} - tst lr, #1 - moveq pc, lr - bx lr - - /********************************************************************/ - -non_congruent: - /* - * here source is aligned to 4 bytes - * but destination is not. - * - * in the code below r2 is the number of bytes read - * (the number of bytes written is always smaller, because we have - * partial words in the shift queue) - */ - cmp r2, #4 - blo copy_last_3_and_return - - /* Use post-incriment mode for stm to spill r5-r11 to reserved stack - * frame. Don't update sp. - */ - stmea sp, {r5-r11} - - /* compute shifts needed to align src to dest */ - rsb r5, r0, #0 - and r5, r5, #3 /* r5 = # bytes in partial words */ - mov r12, r5, lsl #3 /* r12 = right */ - rsb lr, r12, #32 /* lr = left */ - - /* read the first word */ - ldr r3, [r1], #4 - sub r2, r2, #4 - - /* write a partial word (0 to 3 bytes), such that destination - * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) - */ - movs r5, r5, lsl #31 - .word 0x44c03001 /* strbmi r3, [r0], #1 */ - movmi r3, r3, lsr #8 - .word 0x24c03001 /* strbcs r3, [r0], #1 */ - movcs r3, r3, lsr #8 - .word 0x24c03001 /* strbcs r3, [r0], #1 */ - movcs r3, r3, lsr #8 - - cmp r2, #4 - blo partial_word_tail - - /* Align destination to 32 bytes (cache line boundary) */ -1: tst r0, #0x1c - beq 2f - ldr r5, [r1], #4 - sub r2, r2, #4 - orr r4, r3, r5, lsl lr - mov r3, r5, lsr r12 - str r4, [r0], #4 - cmp r2, #4 - bhs 1b - blo partial_word_tail - - /* copy 32 bytes at a time */ -2: subs r2, r2, #32 - blo less_than_thirtytwo - - /* Use immediate mode for the shifts, because there is an extra cycle - * for register shifts, which could account for up to 50% of - * performance hit. - */ - - cmp r12, #24 - beq loop24 - cmp r12, #8 - beq loop8 - -loop16: - ldr r12, [r1], #4 -1: mov r4, r12 - ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} - subs r2, r2, #32 - ldrhs r12, [r1], #4 - orr r3, r3, r4, lsl #16 - mov r4, r4, lsr #16 - orr r4, r4, r5, lsl #16 - mov r5, r5, lsr #16 - orr r5, r5, r6, lsl #16 - mov r6, r6, lsr #16 - orr r6, r6, r7, lsl #16 - mov r7, r7, lsr #16 - orr r7, r7, r8, lsl #16 - mov r8, r8, lsr #16 - orr r8, r8, r9, lsl #16 - mov r9, r9, lsr #16 - orr r9, r9, r10, lsl #16 - mov r10, r10, lsr #16 - orr r10, r10, r11, lsl #16 - stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} - mov r3, r11, lsr #16 - bhs 1b - b less_than_thirtytwo - -loop8: - ldr r12, [r1], #4 -1: mov r4, r12 - ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} - subs r2, r2, #32 - ldrhs r12, [r1], #4 - orr r3, r3, r4, lsl #24 - mov r4, r4, lsr #8 - orr r4, r4, r5, lsl #24 - mov r5, r5, lsr #8 - orr r5, r5, r6, lsl #24 - mov r6, r6, lsr #8 - orr r6, r6, r7, lsl #24 - mov r7, r7, lsr #8 - orr r7, r7, r8, lsl #24 - mov r8, r8, lsr #8 - orr r8, r8, r9, lsl #24 - mov r9, r9, lsr #8 - orr r9, r9, r10, lsl #24 - mov r10, r10, lsr #8 - orr r10, r10, r11, lsl #24 - stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} - mov r3, r11, lsr #8 - bhs 1b - b less_than_thirtytwo - -loop24: - ldr r12, [r1], #4 -1: mov r4, r12 - ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} - subs r2, r2, #32 - ldrhs r12, [r1], #4 - orr r3, r3, r4, lsl #8 - mov r4, r4, lsr #24 - orr r4, r4, r5, lsl #8 - mov r5, r5, lsr #24 - orr r5, r5, r6, lsl #8 - mov r6, r6, lsr #24 - orr r6, r6, r7, lsl #8 - mov r7, r7, lsr #24 - orr r7, r7, r8, lsl #8 - mov r8, r8, lsr #24 - orr r8, r8, r9, lsl #8 - mov r9, r9, lsr #24 - orr r9, r9, r10, lsl #8 - mov r10, r10, lsr #24 - orr r10, r10, r11, lsl #8 - stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} - mov r3, r11, lsr #24 - bhs 1b - -less_than_thirtytwo: - /* copy the last 0 to 31 bytes of the source */ - rsb r12, lr, #32 /* we corrupted r12, recompute it */ - add r2, r2, #32 - cmp r2, #4 - blo partial_word_tail - -1: ldr r5, [r1], #4 - sub r2, r2, #4 - orr r4, r3, r5, lsl lr - mov r3, r5, lsr r12 - str r4, [r0], #4 - cmp r2, #4 - bhs 1b - -partial_word_tail: - /* we have a partial word in the input buffer */ - movs r5, lr, lsl #(31-3) - .word 0x44c03001 /* strbmi r3, [r0], #1 */ - movmi r3, r3, lsr #8 - .word 0x24c03001 /* strbcs r3, [r0], #1 */ - movcs r3, r3, lsr #8 - .word 0x24c03001 /* strbcs r3, [r0], #1 */ - - /* Refill spilled registers from the stack. Don't update sp. */ - ldmfd sp, {r5-r11} - -copy_last_3_and_return: - movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ - .word 0x44d12001 /* ldrbmi r2, [r1], #1 */ - .word 0x24d13001 /* ldrbcs r3, [r1], #1 */ - .word 0x25d1c000 /* ldrbcs r12,[r1] */ - .word 0x44c02001 /* strbmi r2, [r0], #1 */ - .word 0x24c03001 /* strbcs r3, [r0], #1 */ - .word 0x25c0c000 /* strbcs r12,[r0] */ - - /* we're done! restore sp and spilled registers and return */ - add sp, sp, #28 - ldmfd sp!, {r0, r4, lr} - tst lr, #1 - moveq pc, lr - bx lr --- a/src/string/armel/memcpy.sub +++ /dev/null @@ -1 +0,0 @@ -memcpy.s --- a/src/string/armhf/memcpy.sub +++ /dev/null @@ -1 +0,0 @@ -../armel/memcpy.s --- a/src/thread/__syscall_cp.c +++ b/src/thread/__syscall_cp.c @@ -1,9 +1,7 @@ #include "pthread_impl.h" #include "syscall.h" -#ifdef SHARED __attribute__((__visibility__("hidden"))) -#endif long __syscall_cp_c(); static long sccp(syscall_arg_t nr, --- a/src/thread/__tls_get_addr.c +++ b/src/thread/__tls_get_addr.c @@ -1,16 +1,16 @@ #include #include "pthread_impl.h" +#include "libc.h" + +__attribute__((__visibility__("hidden"))) +void *__tls_get_new(size_t *); void *__tls_get_addr(size_t *v) { pthread_t self = __pthread_self(); -#ifdef SHARED - __attribute__((__visibility__("hidden"))) - void *__tls_get_new(size_t *); if (v[0]<=(size_t)self->dtv[0]) return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET; return __tls_get_new(v); -#else - return (char *)self->dtv[1]+v[1]+DTP_OFFSET; -#endif } + +weak_alias(__tls_get_addr, __tls_get_new); --- a/src/thread/aarch64/syscall_cp.s +++ b/src/thread/aarch64/syscall_cp.s @@ -17,7 +17,7 @@ __syscall_cp_asm: __cp_begin: ldr w0,[x0] - cbnz w0,1f + cbnz w0,__cp_cancel mov x8,x1 mov x0,x2 mov x1,x3 @@ -28,6 +28,5 @@ __cp_begin: svc 0 __cp_end: ret - - // cbnz might not be able to jump far enough -1: b __cancel +__cp_cancel: + b __cancel --- /dev/null +++ b/src/thread/arm/__set_thread_area.c @@ -0,0 +1,49 @@ +#include +#include +#include "pthread_impl.h" +#include "libc.h" + +#define HWCAP_TLS (1 << 15) + +extern const unsigned char __attribute__((__visibility__("hidden"))) + __a_barrier_dummy[], __a_barrier_oldkuser[], + __a_barrier_v6[], __a_barrier_v7[], + __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[], + __a_gettp_dummy[]; + +#define __a_barrier_kuser 0xffff0fa0 +#define __a_cas_kuser 0xffff0fc0 +#define __a_gettp_kuser 0xffff0fe0 + +extern uintptr_t __attribute__((__visibility__("hidden"))) + __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr; + +#define SET(op,ver) (__a_##op##_ptr = \ + (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy) + +int __set_thread_area(void *p) +{ +#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 + if (__hwcap & HWCAP_TLS) { + size_t *aux; + SET(cas, v7); + SET(barrier, v7); + for (aux=libc.auxv; *aux; aux+=2) { + if (*aux != AT_PLATFORM) continue; + const char *s = (void *)aux[1]; + if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break; + SET(cas, v6); + SET(barrier, v6); + break; + } + } else { + int ver = *(int *)0xffff0ffc; + SET(gettp, kuser); + SET(cas, kuser); + SET(barrier, kuser); + if (ver < 2) a_crash(); + if (ver < 3) SET(barrier, oldkuser); + } +#endif + return __syscall(0xf0005, p); +} --- a/src/thread/arm/__set_thread_area.s +++ /dev/null @@ -1 +0,0 @@ -/* Replaced by C code in arch/arm/src */ --- a/src/thread/arm/__unmapself.s +++ b/src/thread/arm/__unmapself.s @@ -1,3 +1,4 @@ +.syntax unified .text .global __unmapself .type __unmapself,%function --- /dev/null +++ b/src/thread/arm/atomics.s @@ -0,0 +1,111 @@ +.syntax unified +.text + +.global __a_barrier +.hidden __a_barrier +.type __a_barrier,%function +__a_barrier: + ldr ip,1f + ldr ip,[pc,ip] + add pc,pc,ip +1: .word __a_barrier_ptr-1b +.global __a_barrier_dummy +.hidden __a_barrier_dummy +__a_barrier_dummy: + bx lr +.global __a_barrier_oldkuser +.hidden __a_barrier_oldkuser +__a_barrier_oldkuser: + push {r0,r1,r2,r3,ip,lr} + mov r1,r0 + mov r2,sp + ldr ip,=0xffff0fc0 + mov lr,pc + mov pc,ip + pop {r0,r1,r2,r3,ip,lr} + bx lr +.global __a_barrier_v6 +.hidden __a_barrier_v6 +__a_barrier_v6: + mcr p15,0,r0,c7,c10,5 + bx lr +.global __a_barrier_v7 +.hidden __a_barrier_v7 +__a_barrier_v7: + .word 0xf57ff05b /* dmb ish */ + bx lr + +.global __a_cas +.hidden __a_cas +.type __a_cas,%function +__a_cas: + ldr ip,1f + ldr ip,[pc,ip] + add pc,pc,ip +1: .word __a_cas_ptr-1b +.global __a_cas_dummy +.hidden __a_cas_dummy +__a_cas_dummy: + mov r3,r0 + ldr r0,[r2] + subs r0,r3,r0 + streq r1,[r2] + bx lr +.global __a_cas_v6 +.hidden __a_cas_v6 +__a_cas_v6: + mov r3,r0 + mcr p15,0,r0,c7,c10,5 +1: .word 0xe1920f9f /* ldrex r0,[r2] */ + subs r0,r3,r0 + .word 0x01820f91 /* strexeq r0,r1,[r2] */ + teqeq r0,#1 + beq 1b + mcr p15,0,r0,c7,c10,5 + bx lr +.global __a_cas_v7 +.hidden __a_cas_v7 +__a_cas_v7: + mov r3,r0 + .word 0xf57ff05b /* dmb ish */ +1: .word 0xe1920f9f /* ldrex r0,[r2] */ + subs r0,r3,r0 + .word 0x01820f91 /* strexeq r0,r1,[r2] */ + teqeq r0,#1 + beq 1b + .word 0xf57ff05b /* dmb ish */ + bx lr + +.global __aeabi_read_tp +.type __aeabi_read_tp,%function +__aeabi_read_tp: + +.global __a_gettp +.hidden __a_gettp +.type __a_gettp,%function +__a_gettp: + ldr r0,1f + ldr r0,[pc,r0] + add pc,pc,r0 +1: .word __a_gettp_ptr-1b +.global __a_gettp_dummy +.hidden __a_gettp_dummy +__a_gettp_dummy: + mrc p15,0,r0,c13,c0,3 + bx lr + +.data +.global __a_barrier_ptr +.hidden __a_barrier_ptr +__a_barrier_ptr: + .word 0 + +.global __a_cas_ptr +.hidden __a_cas_ptr +__a_cas_ptr: + .word 0 + +.global __a_gettp_ptr +.hidden __a_gettp_ptr +__a_gettp_ptr: + .word 0 --- a/src/thread/arm/clone.s +++ b/src/thread/arm/clone.s @@ -1,3 +1,4 @@ +.syntax unified .text .global __clone .type __clone,%function @@ -15,8 +16,6 @@ __clone: tst r0,r0 beq 1f ldmfd sp!,{r4,r5,r6,r7} - tst lr,#1 - moveq pc,lr bx lr 1: mov r0,r6 --- a/src/thread/arm/syscall_cp.s +++ b/src/thread/arm/syscall_cp.s @@ -1,3 +1,4 @@ +.syntax unified .global __cp_begin .hidden __cp_begin .global __cp_end @@ -22,8 +23,6 @@ __cp_begin: svc 0 __cp_end: ldmfd sp!,{r4,r5,r6,r7,lr} - tst lr,#1 - moveq pc,lr bx lr __cp_cancel: ldmfd sp!,{r4,r5,r6,r7,lr} --- a/src/thread/microblaze/syscall_cp.s +++ b/src/thread/microblaze/syscall_cp.s @@ -11,7 +11,7 @@ __syscall_cp_asm: __cp_begin: lwi r5, r5, 0 - bnei r5, __cancel + bnei r5, __cp_cancel addi r12, r6, 0 add r5, r7, r0 add r6, r8, r0 @@ -23,3 +23,5 @@ __cp_begin: __cp_end: rtsd r15, 8 nop +__cp_cancel: + bri __cancel --- a/src/thread/or1k/syscall_cp.s +++ b/src/thread/or1k/syscall_cp.s @@ -12,7 +12,7 @@ __syscall_cp_asm: __cp_begin: l.lwz r3, 0(r3) l.sfeqi r3, 0 - l.bnf __cancel + l.bnf __cp_cancel l.ori r11, r4, 0 l.ori r3, r5, 0 l.ori r4, r6, 0 @@ -24,3 +24,6 @@ __cp_begin: __cp_end: l.jr r9 l.nop +__cp_cancel: + l.j __cancel + l.nop --- a/src/thread/powerpc/syscall_cp.s +++ b/src/thread/powerpc/syscall_cp.s @@ -38,7 +38,7 @@ __cp_begin: cmpwi cr7, 0, 0 #compare r0 with 0, store result in cr7. beq+ cr7, 1f #jump to label 1 if r0 was 0 - b __cancel #else call cancel + b __cp_cancel #else call cancel 1: #ok, the cancel flag was not set # syscall: number goes to r0, the rest 3-8 @@ -55,3 +55,5 @@ __cp_end: #else negate result. neg 3, 3 blr +__cp_cancel: + b __cancel --- a/src/thread/pthread_cancel.c +++ b/src/thread/pthread_cancel.c @@ -1,12 +1,11 @@ +#define _GNU_SOURCE #include #include "pthread_impl.h" #include "syscall.h" #include "libc.h" -#ifdef SHARED __attribute__((__visibility__("hidden"))) -#endif -long __cancel(), __cp_cancel(), __syscall_cp_asm(), __syscall_cp_c(); +long __cancel(), __syscall_cp_asm(), __syscall_cp_c(); long __cancel() { @@ -17,12 +16,6 @@ long __cancel() return -ECANCELED; } -/* If __syscall_cp_asm has adjusted the stack pointer, it must provide a - * definition of __cp_cancel to undo those adjustments and call __cancel. - * Otherwise, __cancel provides a definition for __cp_cancel. */ - -weak_alias(__cancel, __cp_cancel); - long __syscall_cp_asm(volatile void *, syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t); @@ -52,24 +45,22 @@ static void _sigaddset(sigset_t *set, in set->__bits[s/8/sizeof *set->__bits] |= 1UL<<(s&8*sizeof *set->__bits-1); } -#ifdef SHARED __attribute__((__visibility__("hidden"))) -#endif -extern const char __cp_begin[1], __cp_end[1]; +extern const char __cp_begin[1], __cp_end[1], __cp_cancel[1]; static void cancel_handler(int sig, siginfo_t *si, void *ctx) { pthread_t self = __pthread_self(); ucontext_t *uc = ctx; - const char *ip = ((char **)&uc->uc_mcontext)[CANCEL_REG_IP]; + uintptr_t pc = uc->uc_mcontext.MC_PC; a_barrier(); if (!self->cancel || self->canceldisable == PTHREAD_CANCEL_DISABLE) return; _sigaddset(&uc->uc_sigmask, SIGCANCEL); - if (self->cancelasync || ip >= __cp_begin && ip < __cp_end) { - ((char **)&uc->uc_mcontext)[CANCEL_REG_IP] = (char *)__cp_cancel; + if (self->cancelasync || pc >= (uintptr_t)__cp_begin && pc < (uintptr_t)__cp_end) { + uc->uc_mcontext.MC_PC = (uintptr_t)__cp_cancel; return; } --- /dev/null +++ b/src/thread/sh/__set_thread_area.c @@ -0,0 +1,40 @@ +#include "pthread_impl.h" +#include "libc.h" +#include + +/* Also perform sh-specific init */ + +#define CPU_HAS_LLSC 0x0040 +#define CPU_HAS_CAS_L 0x0400 + +__attribute__((__visibility__("hidden"))) +extern const char __sh_cas_gusa[], __sh_cas_llsc[], __sh_cas_imask[], __sh_cas_cas_l[]; + +__attribute__((__visibility__("hidden"))) +const void *__sh_cas_ptr; + +__attribute__((__visibility__("hidden"))) +unsigned __sh_nommu; + +int __set_thread_area(void *p) +{ + size_t *aux; + __asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" ); +#ifndef __SH4A__ + __sh_cas_ptr = __sh_cas_gusa; +#if !defined(__SH3__) && !defined(__SH4__) + for (aux=libc.auxv; *aux; aux+=2) { + if (*aux != AT_PLATFORM) continue; + const char *s = (void *)aux[1]; + if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break; + __sh_cas_ptr = __sh_cas_imask; + __sh_nommu = 1; + } +#endif + if (__hwcap & CPU_HAS_CAS_L) + __sh_cas_ptr = __sh_cas_cas_l; + else if (__hwcap & CPU_HAS_LLSC) + __sh_cas_ptr = __sh_cas_llsc; +#endif + return 0; +} --- /dev/null +++ b/src/thread/sh/atomics.s @@ -0,0 +1,65 @@ +/* Contract for all versions is same as cas.l r2,r3,@r0 + * pr and r1 are also clobbered (by jsr & r1 as temp). + * r0,r2,r4-r15 must be preserved. + * r3 contains result (==r2 iff cas succeeded). */ + + .align 2 +.global __sh_cas_gusa +.hidden __sh_cas_gusa +__sh_cas_gusa: + mov.l r5,@-r15 + mov.l r4,@-r15 + mov r0,r4 + mova 1f,r0 + mov r15,r1 + mov #(0f-1f),r15 +0: mov.l @r4,r5 + cmp/eq r5,r2 + bf 1f + mov.l r3,@r4 +1: mov r1,r15 + mov r5,r3 + mov r4,r0 + mov.l @r15+,r4 + rts + mov.l @r15+,r5 + +.global __sh_cas_llsc +.hidden __sh_cas_llsc +__sh_cas_llsc: + mov r0,r1 + synco +0: movli.l @r1,r0 + cmp/eq r0,r2 + bf 1f + mov r3,r0 + movco.l r0,@r1 + bf 0b + mov r2,r0 +1: synco + mov r0,r3 + rts + mov r1,r0 + +.global __sh_cas_imask +.hidden __sh_cas_imask +__sh_cas_imask: + mov r0,r1 + stc sr,r0 + mov.l r0,@-r15 + or #0xf0,r0 + ldc r0,sr + mov.l @r1,r0 + cmp/eq r0,r2 + bf 1f + mov.l r3,@r1 +1: ldc.l @r15+,sr + mov r0,r3 + rts + mov r1,r0 + +.global __sh_cas_cas_l +.hidden __sh_cas_cas_l +__sh_cas_cas_l: + rts + .word 0x2323 /* cas.l r2,r3,@r0 */ --- a/src/thread/sh/syscall_cp.s +++ b/src/thread/sh/syscall_cp.s @@ -14,17 +14,8 @@ __syscall_cp_asm: __cp_begin: mov.l @r4, r4 tst r4, r4 - bt 2f - - mov.l L1, r0 - braf r0 - nop -1: - -.align 2 -L1: .long __cancel@PLT-(1b-.) - -2: mov r5, r3 + bf __cp_cancel + mov r5, r3 mov r6, r4 mov r7, r5 mov.l @r15, r6 @@ -43,3 +34,12 @@ __cp_end: rts nop + +__cp_cancel: + mov.l 2f, r0 + braf r0 + nop +1: + +.align 2 +2: .long __cancel@PCREL-(1b-.) --- a/src/thread/x32/syscall_cp.s +++ b/src/thread/x32/syscall_cp.s @@ -14,7 +14,7 @@ __syscall_cp_internal: __cp_begin: mov (%rdi),%eax test %eax,%eax - jnz __cancel + jnz __cp_cancel mov %rdi,%r11 mov %rsi,%rax mov %rdx,%rdi @@ -27,3 +27,5 @@ __cp_begin: syscall __cp_end: ret +__cp_cancel: + jmp __cancel --- a/src/thread/x86_64/syscall_cp.s +++ b/src/thread/x86_64/syscall_cp.s @@ -14,7 +14,7 @@ __syscall_cp_asm: __cp_begin: mov (%rdi),%eax test %eax,%eax - jnz __cancel + jnz __cp_cancel mov %rdi,%r11 mov %rsi,%rax mov %rdx,%rdi @@ -27,3 +27,5 @@ __cp_begin: syscall __cp_end: ret +__cp_cancel: + jmp __cancel