diff options
author | root <root@artemis.panaceas.org> | 2015-12-25 04:40:36 +0000 |
---|---|---|
committer | root <root@artemis.panaceas.org> | 2015-12-25 04:40:36 +0000 |
commit | 849369d6c66d3054688672f97d31fceb8e8230fb (patch) | |
tree | 6135abc790ca67dedbe07c39806591e70eda81ce /arch/um/os-Linux | |
download | linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2 linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip |
initial_commit
Diffstat (limited to 'arch/um/os-Linux')
41 files changed, 6871 insertions, 0 deletions
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile new file mode 100644 index 00000000..d66f0388 --- /dev/null +++ b/arch/um/os-Linux/Makefile @@ -0,0 +1,20 @@ +# +# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +obj-y = aio.o elf_aux.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ + registers.o sigio.o signal.o start_up.o time.o tty.o uaccess.o \ + umid.o tls.o user_syms.o util.o drivers/ sys-$(SUBARCH)/ skas/ + +USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \ + main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ + tty.o tls.o uaccess.o umid.o util.o + +CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH) + +HAVE_AIO_ABI := $(shell [ -r /usr/include/linux/aio_abi.h ] && \ + echo -DHAVE_AIO_ABI ) +CFLAGS_aio.o += $(HAVE_AIO_ABI) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c new file mode 100644 index 00000000..57e3d46c --- /dev/null +++ b/arch/um/os-Linux/aio.c @@ -0,0 +1,393 @@ +/* + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <unistd.h> +#include <sched.h> +#include <signal.h> +#include <errno.h> +#include <sys/time.h> +#include <asm/unistd.h> +#include "aio.h" +#include "init.h" +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "user.h" + +struct aio_thread_req { + enum aio_type type; + int io_fd; + unsigned long long offset; + char *buf; + int len; + struct aio_context *aio; +}; + +#if defined(HAVE_AIO_ABI) +#include <linux/aio_abi.h> + +/* + * If we have the headers, we are going to build with AIO enabled. + * If we don't have aio in libc, we define the necessary stubs here. + */ + +#if !defined(HAVE_AIO_LIBC) + +static long io_setup(int n, aio_context_t *ctxp) +{ + return syscall(__NR_io_setup, n, ctxp); +} + +static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) +{ + return syscall(__NR_io_submit, ctx, nr, iocbpp); +} + +static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, + struct io_event *events, struct timespec *timeout) +{ + return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); +} + +#endif + +/* + * The AIO_MMAP cases force the mmapped page into memory here + * rather than in whatever place first touches the data. I used + * to do this by touching the page, but that's delicate because + * gcc is prone to optimizing that away. So, what's done here + * is we read from the descriptor from which the page was + * mapped. The caller is required to pass an offset which is + * inside the page that was mapped. Thus, when the read + * returns, we know that the page is in the page cache, and + * that it now backs the mmapped area. + */ + +static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, + int len, unsigned long long offset, struct aio_context *aio) +{ + struct iocb *iocbp = & ((struct iocb) { + .aio_data = (unsigned long) aio, + .aio_fildes = fd, + .aio_buf = (unsigned long) buf, + .aio_nbytes = len, + .aio_offset = offset + }); + char c; + + switch (type) { + case AIO_READ: + iocbp->aio_lio_opcode = IOCB_CMD_PREAD; + break; + case AIO_WRITE: + iocbp->aio_lio_opcode = IOCB_CMD_PWRITE; + break; + case AIO_MMAP: + iocbp->aio_lio_opcode = IOCB_CMD_PREAD; + iocbp->aio_buf = (unsigned long) &c; + iocbp->aio_nbytes = sizeof(c); + break; + default: + printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type); + return -EINVAL; + } + + return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno; +} + +/* Initialized in an initcall and unchanged thereafter */ +static aio_context_t ctx = 0; + +static int aio_thread(void *arg) +{ + struct aio_thread_reply reply; + struct io_event event; + int err, n, reply_fd; + + signal(SIGWINCH, SIG_IGN); + + while (1) { + n = io_getevents(ctx, 1, 1, &event, NULL); + if (n < 0) { + if (errno == EINTR) + continue; + printk(UM_KERN_ERR "aio_thread - io_getevents failed, " + "errno = %d\n", errno); + } + else { + reply = ((struct aio_thread_reply) + { .data = (void *) (long) event.data, + .err = event.res }); + reply_fd = ((struct aio_context *) reply.data)->reply_fd; + err = write(reply_fd, &reply, sizeof(reply)); + if (err != sizeof(reply)) + printk(UM_KERN_ERR "aio_thread - write failed, " + "fd = %d, err = %d\n", reply_fd, errno); + } + } + return 0; +} + +#endif + +static int do_not_aio(struct aio_thread_req *req) +{ + char c; + unsigned long long actual; + int n; + + actual = lseek64(req->io_fd, req->offset, SEEK_SET); + if (actual != req->offset) + return -errno; + + switch (req->type) { + case AIO_READ: + n = read(req->io_fd, req->buf, req->len); + break; + case AIO_WRITE: + n = write(req->io_fd, req->buf, req->len); + break; + case AIO_MMAP: + n = read(req->io_fd, &c, sizeof(c)); + break; + default: + printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n", + req->type); + return -EINVAL; + } + + if (n < 0) + return -errno; + return 0; +} + +/* These are initialized in initcalls and not changed */ +static int aio_req_fd_r = -1; +static int aio_req_fd_w = -1; +static int aio_pid = -1; +static unsigned long aio_stack; + +static int not_aio_thread(void *arg) +{ + struct aio_thread_req req; + struct aio_thread_reply reply; + int err; + + signal(SIGWINCH, SIG_IGN); + while (1) { + err = read(aio_req_fd_r, &req, sizeof(req)); + if (err != sizeof(req)) { + if (err < 0) + printk(UM_KERN_ERR "not_aio_thread - " + "read failed, fd = %d, err = %d\n", + aio_req_fd_r, + errno); + else { + printk(UM_KERN_ERR "not_aio_thread - short " + "read, fd = %d, length = %d\n", + aio_req_fd_r, err); + } + continue; + } + err = do_not_aio(&req); + reply = ((struct aio_thread_reply) { .data = req.aio, + .err = err }); + err = write(req.aio->reply_fd, &reply, sizeof(reply)); + if (err != sizeof(reply)) + printk(UM_KERN_ERR "not_aio_thread - write failed, " + "fd = %d, err = %d\n", req.aio->reply_fd, errno); + } + + return 0; +} + +static int init_aio_24(void) +{ + int fds[2], err; + + err = os_pipe(fds, 1, 1); + if (err) + goto out; + + aio_req_fd_w = fds[0]; + aio_req_fd_r = fds[1]; + + err = os_set_fd_block(aio_req_fd_w, 0); + if (err) + goto out_close_pipe; + + err = run_helper_thread(not_aio_thread, NULL, + CLONE_FILES | CLONE_VM, &aio_stack); + if (err < 0) + goto out_close_pipe; + + aio_pid = err; + goto out; + +out_close_pipe: + close(fds[0]); + close(fds[1]); + aio_req_fd_w = -1; + aio_req_fd_r = -1; +out: +#ifndef HAVE_AIO_ABI + printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during " + "build\n"); +#endif + printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to " + "I/O thread\n"); + return 0; +} + +#ifdef HAVE_AIO_ABI +#define DEFAULT_24_AIO 0 +static int init_aio_26(void) +{ + int err; + + if (io_setup(256, &ctx)) { + err = -errno; + printk(UM_KERN_ERR "aio_thread failed to initialize context, " + "err = %d\n", errno); + return err; + } + + err = run_helper_thread(aio_thread, NULL, + CLONE_FILES | CLONE_VM, &aio_stack); + if (err < 0) + return err; + + aio_pid = err; + + printk(UM_KERN_INFO "Using 2.6 host AIO\n"); + return 0; +} + +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_reply reply; + int err; + + err = do_aio(ctx, type, io_fd, buf, len, offset, aio); + if (err) { + reply = ((struct aio_thread_reply) { .data = aio, + .err = err }); + err = write(aio->reply_fd, &reply, sizeof(reply)); + if (err != sizeof(reply)) { + err = -errno; + printk(UM_KERN_ERR "submit_aio_26 - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, -err); + } + else err = 0; + } + + return err; +} + +#else +#define DEFAULT_24_AIO 1 +static int init_aio_26(void) +{ + return -ENOSYS; +} + +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + return -ENOSYS; +} +#endif + +/* Initialized in an initcall and unchanged thereafter */ +static int aio_24 = DEFAULT_24_AIO; + +static int __init set_aio_24(char *name, int *add) +{ + aio_24 = 1; + return 0; +} + +__uml_setup("aio=2.4", set_aio_24, +"aio=2.4\n" +" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n" +" available. 2.4 AIO is a single thread that handles one request at a\n" +" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n" +" interface to handle an arbitrary number of pending requests. 2.6 AIO \n" +" is not available in tt mode, on 2.4 hosts, or when UML is built with\n" +" /usr/include/linux/aio_abi.h not available. Many distributions don't\n" +" include aio_abi.h, so you will need to copy it from a kernel tree to\n" +" your /usr/include/linux in order to build an AIO-capable UML\n\n" +); + +static int init_aio(void) +{ + int err; + + if (!aio_24) { + err = init_aio_26(); + if (err && (errno == ENOSYS)) { + printk(UM_KERN_INFO "2.6 AIO not supported on the " + "host - reverting to 2.4 AIO\n"); + aio_24 = 1; + } + else return err; + } + + if (aio_24) + return init_aio_24(); + + return 0; +} + +/* + * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio + * needs to be called when the kernel is running because it calls run_helper, + * which needs get_free_page. exit_aio is a __uml_exitcall because the generic + * kernel does not run __exitcalls on shutdown, and can't because many of them + * break when called outside of module unloading. + */ +__initcall(init_aio); + +static void exit_aio(void) +{ + if (aio_pid != -1) { + os_kill_process(aio_pid, 1); + free_stack(aio_stack, 0); + } +} + +__uml_exitcall(exit_aio); + +static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_req req = { .type = type, + .io_fd = io_fd, + .offset = offset, + .buf = buf, + .len = len, + .aio = aio, + }; + int err; + + err = write(aio_req_fd_w, &req, sizeof(req)); + if (err == sizeof(req)) + err = 0; + else err = -errno; + + return err; +} + +int submit_aio(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio) +{ + aio->reply_fd = reply_fd; + if (aio_24) + return submit_aio_24(type, io_fd, buf, len, offset, aio); + else + return submit_aio_26(type, io_fd, buf, len, offset, aio); +} diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile new file mode 100644 index 00000000..6c546dc9 --- /dev/null +++ b/arch/um/os-Linux/drivers/Makefile @@ -0,0 +1,13 @@ +# +# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +ethertap-objs := ethertap_kern.o ethertap_user.o +tuntap-objs := tuntap_kern.o tuntap_user.o + +obj-y = +obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o +obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h new file mode 100644 index 00000000..ddffd41c --- /dev/null +++ b/arch/um/os-Linux/drivers/etap.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __DRIVERS_ETAP_H +#define __DRIVERS_ETAP_H + +#include "net_user.h" + +struct ethertap_data { + char *dev_name; + char *gate_addr; + int data_fd; + int control_fd; + void *dev; +}; + +extern const struct net_user_info ethertap_user_info; + +#endif diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c new file mode 100644 index 00000000..7f6f9a71 --- /dev/null +++ b/arch/um/os-Linux/drivers/ethertap_kern.c @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include <linux/init.h> +#include <linux/netdevice.h> +#include "etap.h" +#include "net_kern.h" + +struct ethertap_init { + char *dev_name; + char *gate_addr; +}; + +static void etap_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct ethertap_data *epri; + struct ethertap_init *init = data; + + pri = netdev_priv(dev); + epri = (struct ethertap_data *) pri->user; + epri->dev_name = init->dev_name; + epri->gate_addr = init->gate_addr; + epri->data_fd = -1; + epri->control_fd = -1; + epri->dev = dev; + + printk(KERN_INFO "ethertap backend - %s", epri->dev_name); + if (epri->gate_addr != NULL) + printk(KERN_CONT ", IP = %s", epri->gate_addr); + printk(KERN_CONT "\n"); +} + +static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + int len; + + len = net_recvfrom(fd, skb_mac_header(skb), + skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP); + if (len <= 0) + return(len); + + skb_pull(skb, 2); + len -= 2; + return len; +} + +static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + skb_push(skb, 2); + return net_send(fd, skb->data, skb->len); +} + +const struct net_kern_info ethertap_kern_info = { + .init = etap_init, + .protocol = eth_protocol, + .read = etap_read, + .write = etap_write, +}; + +int ethertap_setup(char *str, char **mac_out, void *data) +{ + struct ethertap_init *init = data; + + *init = ((struct ethertap_init) + { .dev_name = NULL, + .gate_addr = NULL }); + if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out, + &init->gate_addr)) + return 0; + if (init->dev_name == NULL) { + printk(KERN_ERR "ethertap_setup : Missing tap device name\n"); + return 0; + } + + return 1; +} + +static struct transport ethertap_transport = { + .list = LIST_HEAD_INIT(ethertap_transport.list), + .name = "ethertap", + .setup = ethertap_setup, + .user = ðertap_user_info, + .kern = ðertap_kern_info, + .private_size = sizeof(struct ethertap_data), + .setup_size = sizeof(struct ethertap_init), +}; + +static int register_ethertap(void) +{ + register_transport(ðertap_transport); + return 0; +} + +late_initcall(register_ethertap); diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c new file mode 100644 index 00000000..cc72cb2c --- /dev/null +++ b/arch/um/os-Linux/drivers/ethertap_user.c @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include "etap.h" +#include "kern_constants.h" +#include "os.h" +#include "net_user.h" +#include "um_malloc.h" +#include "user.h" + +#define MAX_PACKET ETH_MAX_PACKET + +static int etap_user_init(void *data, void *dev) +{ + struct ethertap_data *pri = data; + + pri->dev = dev; + return 0; +} + +struct addr_change { + enum { ADD_ADDR, DEL_ADDR } what; + unsigned char addr[4]; + unsigned char netmask[4]; +}; + +static void etap_change(int op, unsigned char *addr, unsigned char *netmask, + int fd) +{ + struct addr_change change; + char *output; + int n; + + change.what = op; + memcpy(change.addr, addr, sizeof(change.addr)); + memcpy(change.netmask, netmask, sizeof(change.netmask)); + CATCH_EINTR(n = write(fd, &change, sizeof(change))); + if (n != sizeof(change)) { + printk(UM_KERN_ERR "etap_change - request failed, err = %d\n", + errno); + return; + } + + output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL); + if (output == NULL) + printk(UM_KERN_ERR "etap_change : Failed to allocate output " + "buffer\n"); + read_output(fd, output, UM_KERN_PAGE_SIZE); + if (output != NULL) { + printk("%s", output); + kfree(output); + } +} + +static void etap_open_addr(unsigned char *addr, unsigned char *netmask, + void *arg) +{ + etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); +} + +static void etap_close_addr(unsigned char *addr, unsigned char *netmask, + void *arg) +{ + etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); +} + +struct etap_pre_exec_data { + int control_remote; + int control_me; + int data_me; +}; + +static void etap_pre_exec(void *arg) +{ + struct etap_pre_exec_data *data = arg; + + dup2(data->control_remote, 1); + close(data->data_me); + close(data->control_me); +} + +static int etap_tramp(char *dev, char *gate, int control_me, + int control_remote, int data_me, int data_remote) +{ + struct etap_pre_exec_data pe_data; + int pid, err, n; + char version_buf[sizeof("nnnnn\0")]; + char data_fd_buf[sizeof("nnnnnn\0")]; + char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; + char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, + data_fd_buf, gate_buf, NULL }; + char *nosetup_args[] = { "uml_net", version_buf, "ethertap", + dev, data_fd_buf, NULL }; + char **args, c; + + sprintf(data_fd_buf, "%d", data_remote); + sprintf(version_buf, "%d", UML_NET_VERSION); + if (gate != NULL) { + strcpy(gate_buf, gate); + args = setup_args; + } + else args = nosetup_args; + + err = 0; + pe_data.control_remote = control_remote; + pe_data.control_me = control_me; + pe_data.data_me = data_me; + pid = run_helper(etap_pre_exec, &pe_data, args); + + if (pid < 0) + err = pid; + close(data_remote); + close(control_remote); + CATCH_EINTR(n = read(control_me, &c, sizeof(c))); + if (n != sizeof(c)) { + err = -errno; + printk(UM_KERN_ERR "etap_tramp : read of status failed, " + "err = %d\n", -err); + return err; + } + if (c != 1) { + printk(UM_KERN_ERR "etap_tramp : uml_net failed\n"); + err = helper_wait(pid); + } + return err; +} + +static int etap_open(void *data) +{ + struct ethertap_data *pri = data; + char *output; + int data_fds[2], control_fds[2], err, output_len; + + err = tap_open_common(pri->dev, pri->gate_addr); + if (err) + return err; + + err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "etap_open - data socketpair failed - " + "err = %d\n", errno); + return err; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "etap_open - control socketpair failed - " + "err = %d\n", errno); + goto out_close_data; + } + + err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], + control_fds[1], data_fds[0], data_fds[1]); + output_len = UM_KERN_PAGE_SIZE; + output = uml_kmalloc(output_len, UM_GFP_KERNEL); + read_output(control_fds[0], output, output_len); + + if (output == NULL) + printk(UM_KERN_ERR "etap_open : failed to allocate output " + "buffer\n"); + else { + printk("%s", output); + kfree(output); + } + + if (err < 0) { + printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err); + goto out_close_control; + } + + pri->data_fd = data_fds[0]; + pri->control_fd = control_fds[0]; + iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); + return data_fds[0]; + +out_close_control: + close(control_fds[0]); + close(control_fds[1]); +out_close_data: + close(data_fds[0]); + close(data_fds[1]); + return err; +} + +static void etap_close(int fd, void *data) +{ + struct ethertap_data *pri = data; + + iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); + close(fd); + + if (shutdown(pri->data_fd, SHUT_RDWR) < 0) + printk(UM_KERN_ERR "etap_close - shutdown data socket failed, " + "errno = %d\n", errno); + + if (shutdown(pri->control_fd, SHUT_RDWR) < 0) + printk(UM_KERN_ERR "etap_close - shutdown control socket " + "failed, errno = %d\n", errno); + + close(pri->data_fd); + pri->data_fd = -1; + close(pri->control_fd); + pri->control_fd = -1; +} + +static void etap_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct ethertap_data *pri = data; + + tap_check_ips(pri->gate_addr, addr); + if (pri->control_fd == -1) + return; + etap_open_addr(addr, netmask, &pri->control_fd); +} + +static void etap_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct ethertap_data *pri = data; + + if (pri->control_fd == -1) + return; + + etap_close_addr(addr, netmask, &pri->control_fd); +} + +const struct net_user_info ethertap_user_info = { + .init = etap_user_init, + .open = etap_open, + .close = etap_close, + .remove = NULL, + .add_address = etap_add_addr, + .delete_address = etap_del_addr, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP, +}; diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h new file mode 100644 index 00000000..f17c3158 --- /dev/null +++ b/arch/um/os-Linux/drivers/tuntap.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_TUNTAP_H +#define __UM_TUNTAP_H + +#include "net_user.h" + +struct tuntap_data { + char *dev_name; + int fixed_config; + char *gate_addr; + int fd; + void *dev; +}; + +extern const struct net_user_info tuntap_user_info; + +#endif diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c new file mode 100644 index 00000000..4048800e --- /dev/null +++ b/arch/um/os-Linux/drivers/tuntap_kern.c @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/netdevice.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <asm/errno.h> +#include "net_kern.h" +#include "tuntap.h" + +struct tuntap_init { + char *dev_name; + char *gate_addr; +}; + +static void tuntap_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct tuntap_data *tpri; + struct tuntap_init *init = data; + + pri = netdev_priv(dev); + tpri = (struct tuntap_data *) pri->user; + tpri->dev_name = init->dev_name; + tpri->fixed_config = (init->dev_name != NULL); + tpri->gate_addr = init->gate_addr; + tpri->fd = -1; + tpri->dev = dev; + + printk(KERN_INFO "TUN/TAP backend - "); + if (tpri->gate_addr != NULL) + printk(KERN_CONT "IP = %s", tpri->gate_addr); + printk(KERN_CONT "\n"); +} + +static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_read(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); +} + +static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_write(fd, skb->data, skb->len); +} + +const struct net_kern_info tuntap_kern_info = { + .init = tuntap_init, + .protocol = eth_protocol, + .read = tuntap_read, + .write = tuntap_write, +}; + +int tuntap_setup(char *str, char **mac_out, void *data) +{ + struct tuntap_init *init = data; + + *init = ((struct tuntap_init) + { .dev_name = NULL, + .gate_addr = NULL }); + if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out, + &init->gate_addr)) + return 0; + + return 1; +} + +static struct transport tuntap_transport = { + .list = LIST_HEAD_INIT(tuntap_transport.list), + .name = "tuntap", + .setup = tuntap_setup, + .user = &tuntap_user_info, + .kern = &tuntap_kern_info, + .private_size = sizeof(struct tuntap_data), + .setup_size = sizeof(struct tuntap_init), +}; + +static int register_tuntap(void) +{ + register_transport(&tuntap_transport); + return 0; +} + +late_initcall(register_tuntap); diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c new file mode 100644 index 00000000..2448be03 --- /dev/null +++ b/arch/um/os-Linux/drivers/tuntap_user.c @@ -0,0 +1,217 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/uio.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "tuntap.h" +#include "user.h" + +static int tuntap_user_init(void *data, void *dev) +{ + struct tuntap_data *pri = data; + + pri->dev = dev; + return 0; +} + +static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct tuntap_data *pri = data; + + tap_check_ips(pri->gate_addr, addr); + if ((pri->fd == -1) || pri->fixed_config) + return; + open_addr(addr, netmask, pri->dev_name); +} + +static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct tuntap_data *pri = data; + + if ((pri->fd == -1) || pri->fixed_config) + return; + close_addr(addr, netmask, pri->dev_name); +} + +struct tuntap_pre_exec_data { + int stdout; + int close_me; +}; + +static void tuntap_pre_exec(void *arg) +{ + struct tuntap_pre_exec_data *data = arg; + + dup2(data->stdout, 1); + close(data->close_me); +} + +static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, + char *buffer, int buffer_len, int *used_out) +{ + struct tuntap_pre_exec_data data; + char version_buf[sizeof("nnnnn\0")]; + char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate, + NULL }; + char buf[CMSG_SPACE(sizeof(*fd_out))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + int pid, n, err; + + sprintf(version_buf, "%d", UML_NET_VERSION); + + data.stdout = remote; + data.close_me = me; + + pid = run_helper(tuntap_pre_exec, &data, argv); + + if (pid < 0) + return -pid; + + close(remote); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + if (buffer != NULL) { + iov = ((struct iovec) { buffer, buffer_len }); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + } + else { + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + } + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + msg.msg_flags = 0; + n = recvmsg(me, &msg, 0); + *used_out = n; + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - " + "errno = %d\n", errno); + return err; + } + helper_wait(pid); + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL) { + printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " + "message\n"); + return -EINVAL; + } + if ((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)) { + printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " + "descriptor\n"); + return -EINVAL; + } + *fd_out = ((int *) CMSG_DATA(cmsg))[0]; + os_set_exec_close(*fd_out); + return 0; +} + +static int tuntap_open(void *data) +{ + struct ifreq ifr; + struct tuntap_data *pri = data; + char *output, *buffer; + int err, fds[2], len, used; + + err = tap_open_common(pri->dev, pri->gate_addr); + if (err < 0) + return err; + + if (pri->fixed_config) { + pri->fd = os_open_file("/dev/net/tun", + of_cloexec(of_rdwr(OPENFLAGS())), 0); + if (pri->fd < 0) { + printk(UM_KERN_ERR "Failed to open /dev/net/tun, " + "err = %d\n", -pri->fd); + return pri->fd; + } + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); + if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) { + err = -errno; + printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n", + errno); + close(pri->fd); + return err; + } + } + else { + err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "tuntap_open : socketpair failed - " + "errno = %d\n", errno); + return err; + } + + buffer = get_output_buffer(&len); + if (buffer != NULL) + len--; + used = 0; + + err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], + fds[1], buffer, len, &used); + + output = buffer; + if (err < 0) { + printk("%s", output); + free_output_buffer(buffer); + printk(UM_KERN_ERR "tuntap_open_tramp failed - " + "err = %d\n", -err); + return err; + } + + pri->dev_name = uml_strdup(buffer); + output += IFNAMSIZ; + printk("%s", output); + free_output_buffer(buffer); + + close(fds[0]); + iter_addresses(pri->dev, open_addr, pri->dev_name); + } + + return pri->fd; +} + +static void tuntap_close(int fd, void *data) +{ + struct tuntap_data *pri = data; + + if (!pri->fixed_config) + iter_addresses(pri->dev, close_addr, pri->dev_name); + close(fd); + pri->fd = -1; +} + +const struct net_user_info tuntap_user_info = { + .init = tuntap_user_init, + .open = tuntap_open, + .close = tuntap_close, + .remove = NULL, + .add_address = tuntap_add_addr, + .delete_address = tuntap_del_addr, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c new file mode 100644 index 00000000..608784d4 --- /dev/null +++ b/arch/um/os-Linux/elf_aux.c @@ -0,0 +1,79 @@ +/* + * arch/um/kernel/elf_aux.c + * + * Scan the Elf auxiliary vector provided by the host to extract + * information about vsyscall-page, etc. + * + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser (bodo.stroesser@fujitsu-siemens.com) + */ +#include <elf.h> +#include <stddef.h> +#include "init.h" +#include "elf_user.h" +#include "mem_user.h" +#include <kern_constants.h> + +/* Use the one from the kernel - the host may miss it, if having old headers. */ +#if UM_ELF_CLASS == UM_ELFCLASS32 +typedef Elf32_auxv_t elf_auxv_t; +#else +typedef Elf64_auxv_t elf_auxv_t; +#endif + +/* These are initialized very early in boot and never changed */ +char * elf_aux_platform; +long elf_aux_hwcap; +unsigned long vsyscall_ehdr; +unsigned long vsyscall_end; +unsigned long __kernel_vsyscall; + +__init void scan_elf_aux( char **envp) +{ + long page_size = 0; + elf_auxv_t * auxv; + + while ( *envp++ != NULL) ; + + for ( auxv = (elf_auxv_t *)envp; auxv->a_type != AT_NULL; auxv++) { + switch ( auxv->a_type ) { + case AT_SYSINFO: + __kernel_vsyscall = auxv->a_un.a_val; + /* See if the page is under TASK_SIZE */ + if (__kernel_vsyscall < (unsigned long) envp) + __kernel_vsyscall = 0; + break; + case AT_SYSINFO_EHDR: + vsyscall_ehdr = auxv->a_un.a_val; + /* See if the page is under TASK_SIZE */ + if (vsyscall_ehdr < (unsigned long) envp) + vsyscall_ehdr = 0; + break; + case AT_HWCAP: + elf_aux_hwcap = auxv->a_un.a_val; + break; + case AT_PLATFORM: + /* elf.h removed the pointer elements from + * a_un, so we have to use a_val, which is + * all that's left. + */ + elf_aux_platform = + (char *) (long) auxv->a_un.a_val; + break; + case AT_PAGESZ: + page_size = auxv->a_un.a_val; + break; + } + } + if ( ! __kernel_vsyscall || ! vsyscall_ehdr || + ! elf_aux_hwcap || ! elf_aux_platform || + ! page_size || (vsyscall_ehdr % page_size) ) { + __kernel_vsyscall = 0; + vsyscall_ehdr = 0; + elf_aux_hwcap = 0; + elf_aux_platform = "i586"; + } + else { + vsyscall_end = vsyscall_ehdr + page_size; + } +} diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c new file mode 100644 index 00000000..66e583a4 --- /dev/null +++ b/arch/um/os-Linux/execvp.c @@ -0,0 +1,149 @@ +/* Copyright (C) 2006 by Paolo Giarrusso - modified from glibc' execvp.c. + Original copyright notice follows: + + Copyright (C) 1991,92,1995-99,2002,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ +#include <unistd.h> + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <limits.h> + +#ifndef TEST +#include "um_malloc.h" +#else +#include <stdio.h> +#define um_kmalloc malloc +#endif +#include "os.h" + +/* Execute FILE, searching in the `PATH' environment variable if it contains + no slashes, with arguments ARGV and environment from `environ'. */ +int execvp_noalloc(char *buf, const char *file, char *const argv[]) +{ + if (*file == '\0') { + return -ENOENT; + } + + if (strchr (file, '/') != NULL) { + /* Don't search when it contains a slash. */ + execv(file, argv); + } else { + int got_eacces; + size_t len, pathlen; + char *name, *p; + char *path = getenv("PATH"); + if (path == NULL) + path = ":/bin:/usr/bin"; + + len = strlen(file) + 1; + pathlen = strlen(path); + /* Copy the file name at the top. */ + name = memcpy(buf + pathlen + 1, file, len); + /* And add the slash. */ + *--name = '/'; + + got_eacces = 0; + p = path; + do { + char *startp; + + path = p; + //Let's avoid this GNU extension. + //p = strchrnul (path, ':'); + p = strchr(path, ':'); + if (!p) + p = strchr(path, '\0'); + + if (p == path) + /* Two adjacent colons, or a colon at the beginning or the end + of `PATH' means to search the current directory. */ + startp = name + 1; + else + startp = memcpy(name - (p - path), path, p - path); + + /* Try to execute this name. If it works, execv will not return. */ + execv(startp, argv); + + /* + if (errno == ENOEXEC) { + } + */ + + switch (errno) { + case EACCES: + /* Record the we got a `Permission denied' error. If we end + up finding no executable we can use, we want to diagnose + that we did find one but were denied access. */ + got_eacces = 1; + case ENOENT: + case ESTALE: + case ENOTDIR: + /* Those errors indicate the file is missing or not executable + by us, in which case we want to just try the next path + directory. */ + case ENODEV: + case ETIMEDOUT: + /* Some strange filesystems like AFS return even + stranger error numbers. They cannot reasonably mean + anything else so ignore those, too. */ + case ENOEXEC: + /* We won't go searching for the shell + * if it is not executable - the Linux + * kernel already handles this enough, + * for us. */ + break; + + default: + /* Some other error means we found an executable file, but + something went wrong executing it; return the error to our + caller. */ + return -errno; + } + } while (*p++ != '\0'); + + /* We tried every element and none of them worked. */ + if (got_eacces) + /* At least one failure was due to permissions, so report that + error. */ + return -EACCES; + } + + /* Return the error from the last attempt (probably ENOENT). */ + return -errno; +} +#ifdef TEST +int main(int argc, char**argv) +{ + char buf[PATH_MAX]; + int ret; + argc--; + if (!argc) { + fprintf(stderr, "Not enough arguments\n"); + return 1; + } + argv++; + if (ret = execvp_noalloc(buf, argv[0], argv)) { + errno = -ret; + perror("execvp_noalloc"); + } + return 0; +} +#endif diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c new file mode 100644 index 00000000..140e587b --- /dev/null +++ b/arch/um/os-Linux/file.c @@ -0,0 +1,578 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/un.h> +#include "kern_constants.h" +#include "os.h" +#include "user.h" + +static void copy_stat(struct uml_stat *dst, const struct stat64 *src) +{ + *dst = ((struct uml_stat) { + .ust_dev = src->st_dev, /* device */ + .ust_ino = src->st_ino, /* inode */ + .ust_mode = src->st_mode, /* protection */ + .ust_nlink = src->st_nlink, /* number of hard links */ + .ust_uid = src->st_uid, /* user ID of owner */ + .ust_gid = src->st_gid, /* group ID of owner */ + .ust_size = src->st_size, /* total size, in bytes */ + .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */ + .ust_blocks = src->st_blocks, /* number of blocks allocated */ + .ust_atime = src->st_atime, /* time of last access */ + .ust_mtime = src->st_mtime, /* time of last modification */ + .ust_ctime = src->st_ctime, /* time of last change */ + }); +} + +int os_stat_fd(const int fd, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + CATCH_EINTR(err = fstat64(fd, &sbuf)); + if (err < 0) + return -errno; + + if (ubuf != NULL) + copy_stat(ubuf, &sbuf); + return err; +} + +int os_stat_file(const char *file_name, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + CATCH_EINTR(err = stat64(file_name, &sbuf)); + if (err < 0) + return -errno; + + if (ubuf != NULL) + copy_stat(ubuf, &sbuf); + return err; +} + +int os_access(const char *file, int mode) +{ + int amode, err; + + amode = (mode & OS_ACC_R_OK ? R_OK : 0) | + (mode & OS_ACC_W_OK ? W_OK : 0) | + (mode & OS_ACC_X_OK ? X_OK : 0) | + (mode & OS_ACC_F_OK ? F_OK : 0); + + err = access(file, amode); + if (err < 0) + return -errno; + + return 0; +} + +/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ +int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) +{ + int err; + + err = ioctl(fd, cmd, arg); + if (err < 0) + return -errno; + + return err; +} + +/* FIXME: ensure namebuf in os_get_if_name is big enough */ +int os_get_ifname(int fd, char* namebuf) +{ + if (ioctl(fd, SIOCGIFNAME, namebuf) < 0) + return -errno; + + return 0; +} + +int os_set_slip(int fd) +{ + int disc, sencap; + + disc = N_SLIP; + if (ioctl(fd, TIOCSETD, &disc) < 0) + return -errno; + + sencap = 0; + if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0) + return -errno; + + return 0; +} + +int os_mode_fd(int fd, int mode) +{ + int err; + + CATCH_EINTR(err = fchmod(fd, mode)); + if (err < 0) + return -errno; + + return 0; +} + +int os_file_type(char *file) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if (err < 0) + return err; + + if (S_ISDIR(buf.ust_mode)) + return OS_TYPE_DIR; + else if (S_ISLNK(buf.ust_mode)) + return OS_TYPE_SYMLINK; + else if (S_ISCHR(buf.ust_mode)) + return OS_TYPE_CHARDEV; + else if (S_ISBLK(buf.ust_mode)) + return OS_TYPE_BLOCKDEV; + else if (S_ISFIFO(buf.ust_mode)) + return OS_TYPE_FIFO; + else if (S_ISSOCK(buf.ust_mode)) + return OS_TYPE_SOCK; + else return OS_TYPE_FILE; +} + +int os_file_mode(const char *file, struct openflags *mode_out) +{ + int err; + + *mode_out = OPENFLAGS(); + + err = access(file, W_OK); + if (err && (errno != EACCES)) + return -errno; + else if (!err) + *mode_out = of_write(*mode_out); + + err = access(file, R_OK); + if (err && (errno != EACCES)) + return -errno; + else if (!err) + *mode_out = of_read(*mode_out); + + return err; +} + +int os_open_file(const char *file, struct openflags flags, int mode) +{ + int fd, err, f = 0; + + if (flags.r && flags.w) + f = O_RDWR; + else if (flags.r) + f = O_RDONLY; + else if (flags.w) + f = O_WRONLY; + else f = 0; + + if (flags.s) + f |= O_SYNC; + if (flags.c) + f |= O_CREAT; + if (flags.t) + f |= O_TRUNC; + if (flags.e) + f |= O_EXCL; + if (flags.a) + f |= O_APPEND; + + fd = open64(file, f, mode); + if (fd < 0) + return -errno; + + if (flags.cl && fcntl(fd, F_SETFD, 1)) { + err = -errno; + close(fd); + return err; + } + + return fd; +} + +int os_connect_socket(const char *name) +{ + struct sockaddr_un sock; + int fd, err; + + sock.sun_family = AF_UNIX; + snprintf(sock.sun_path, sizeof(sock.sun_path), "%s", name); + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + err = -errno; + goto out; + } + + err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); + if (err) { + err = -errno; + goto out_close; + } + + return fd; + +out_close: + close(fd); +out: + return err; +} + +void os_close_file(int fd) +{ + close(fd); +} + +int os_seek_file(int fd, unsigned long long offset) +{ + unsigned long long actual; + + actual = lseek64(fd, offset, SEEK_SET); + if (actual != offset) + return -errno; + return 0; +} + +int os_read_file(int fd, void *buf, int len) +{ + int n = read(fd, buf, len); + + if (n < 0) + return -errno; + return n; +} + +int os_write_file(int fd, const void *buf, int len) +{ + int n = write(fd, (void *) buf, len); + + if (n < 0) + return -errno; + return n; +} + +int os_file_size(const char *file, unsigned long long *size_out) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); + return err; + } + + if (S_ISBLK(buf.ust_mode)) { + int fd; + long blocks; + + fd = open(file, O_RDONLY, 0); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "Couldn't open \"%s\", " + "errno = %d\n", file, errno); + return err; + } + if (ioctl(fd, BLKGETSIZE, &blocks) < 0) { + err = -errno; + printk(UM_KERN_ERR "Couldn't get the block size of " + "\"%s\", errno = %d\n", file, errno); + close(fd); + return err; + } + *size_out = ((long long) blocks) * 512; + close(fd); + } + else *size_out = buf.ust_size; + + return 0; +} + +int os_file_modtime(const char *file, unsigned long *modtime) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); + return err; + } + + *modtime = buf.ust_mtime; + return 0; +} + +int os_set_exec_close(int fd) +{ + int err; + + CATCH_EINTR(err = fcntl(fd, F_SETFD, FD_CLOEXEC)); + + if (err < 0) + return -errno; + return err; +} + +int os_pipe(int *fds, int stream, int close_on_exec) +{ + int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; + + err = socketpair(AF_UNIX, type, 0, fds); + if (err < 0) + return -errno; + + if (!close_on_exec) + return 0; + + err = os_set_exec_close(fds[0]); + if (err < 0) + goto error; + + err = os_set_exec_close(fds[1]); + if (err < 0) + goto error; + + return 0; + + error: + printk(UM_KERN_ERR "os_pipe : Setting FD_CLOEXEC failed, err = %d\n", + -err); + close(fds[1]); + close(fds[0]); + return err; +} + +int os_set_fd_async(int fd) +{ + int err, flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + flags |= O_ASYNC | O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) < 0) { + err = -errno; + printk(UM_KERN_ERR "os_set_fd_async : failed to set O_ASYNC " + "and O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); + return err; + } + + if ((fcntl(fd, F_SETSIG, SIGIO) < 0) || + (fcntl(fd, F_SETOWN, os_getpid()) < 0)) { + err = -errno; + printk(UM_KERN_ERR "os_set_fd_async : Failed to fcntl F_SETOWN " + "(or F_SETSIG) fd %d, errno = %d\n", fd, errno); + return err; + } + + return 0; +} + +int os_clear_fd_async(int fd) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + flags &= ~(O_ASYNC | O_NONBLOCK); + if (fcntl(fd, F_SETFL, flags) < 0) + return -errno; + return 0; +} + +int os_set_fd_block(int fd, int blocking) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + if (blocking) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; + + if (fcntl(fd, F_SETFL, flags) < 0) + return -errno; + + return 0; +} + +int os_accept_connection(int fd) +{ + int new; + + new = accept(fd, NULL, 0); + if (new < 0) + return -errno; + return new; +} + +#ifndef SHUT_RD +#define SHUT_RD 0 +#endif + +#ifndef SHUT_WR +#define SHUT_WR 1 +#endif + +#ifndef SHUT_RDWR +#define SHUT_RDWR 2 +#endif + +int os_shutdown_socket(int fd, int r, int w) +{ + int what, err; + + if (r && w) + what = SHUT_RDWR; + else if (r) + what = SHUT_RD; + else if (w) + what = SHUT_WR; + else + return -EINVAL; + + err = shutdown(fd, what); + if (err < 0) + return -errno; + return 0; +} + +int os_rcv_fd(int fd, int *helper_pid_out) +{ + int new, n; + char buf[CMSG_SPACE(sizeof(new))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + iov = ((struct iovec) { .iov_base = helper_pid_out, + .iov_len = sizeof(*helper_pid_out) }); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + msg.msg_flags = 0; + + n = recvmsg(fd, &msg, 0); + if (n < 0) + return -errno; + else if (n != iov.iov_len) + *helper_pid_out = -1; + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL) { + printk(UM_KERN_ERR "rcv_fd didn't receive anything, " + "error = %d\n", errno); + return -1; + } + if ((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)) { + printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n"); + return -1; + } + + new = ((int *) CMSG_DATA(cmsg))[0]; + return new; +} + +int os_create_unix_socket(const char *file, int len, int close_on_exec) +{ + struct sockaddr_un addr; + int sock, err; + + sock = socket(PF_UNIX, SOCK_DGRAM, 0); + if (sock < 0) + return -errno; + + if (close_on_exec) { + err = os_set_exec_close(sock); + if (err < 0) + printk(UM_KERN_ERR "create_unix_socket : " + "close_on_exec failed, err = %d", -err); + } + + addr.sun_family = AF_UNIX; + + snprintf(addr.sun_path, len, "%s", file); + + err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); + if (err < 0) + return -errno; + + return sock; +} + +void os_flush_stdout(void) +{ + fflush(stdout); +} + +int os_lock_file(int fd, int excl) +{ + int type = excl ? F_WRLCK : F_RDLCK; + struct flock lock = ((struct flock) { .l_type = type, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 } ); + int err, save; + + err = fcntl(fd, F_SETLK, &lock); + if (!err) + goto out; + + save = -errno; + err = fcntl(fd, F_GETLK, &lock); + if (err) { + err = -errno; + goto out; + } + + printk(UM_KERN_ERR "F_SETLK failed, file already locked by pid %d\n", + lock.l_pid); + err = save; + out: + return err; +} + +unsigned os_major(unsigned long long dev) +{ + return major(dev); +} + +unsigned os_minor(unsigned long long dev) +{ + return minor(dev); +} + +unsigned long long os_makedev(unsigned major, unsigned minor) +{ + return makedev(major, minor); +} diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c new file mode 100644 index 00000000..b6b10961 --- /dev/null +++ b/arch/um/os-Linux/helper.c @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> +#include <linux/limits.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "um_malloc.h" +#include "user.h" + +struct helper_data { + void (*pre_exec)(void*); + void *pre_data; + char **argv; + int fd; + char *buf; +}; + +static int helper_child(void *arg) +{ + struct helper_data *data = arg; + char **argv = data->argv; + int err; + + if (data->pre_exec != NULL) + (*data->pre_exec)(data->pre_data); + err = execvp_noalloc(data->buf, argv[0], argv); + + /* If the exec succeeds, we don't get here */ + write(data->fd, &err, sizeof(err)); + + return 0; +} + +/* Returns either the pid of the child process we run or -E* on failure. */ +int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) +{ + struct helper_data data; + unsigned long stack, sp; + int pid, fds[2], ret, n; + + stack = alloc_stack(0, __cant_sleep()); + if (stack == 0) + return -ENOMEM; + + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds); + if (ret < 0) { + ret = -errno; + printk(UM_KERN_ERR "run_helper : pipe failed, errno = %d\n", + errno); + goto out_free; + } + + ret = os_set_exec_close(fds[1]); + if (ret < 0) { + printk(UM_KERN_ERR "run_helper : setting FD_CLOEXEC failed, " + "ret = %d\n", -ret); + goto out_close; + } + + sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); + data.pre_exec = pre_exec; + data.pre_data = pre_data; + data.argv = argv; + data.fd = fds[1]; + data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) : + uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); + pid = clone(helper_child, (void *) sp, CLONE_VM, &data); + if (pid < 0) { + ret = -errno; + printk(UM_KERN_ERR "run_helper : clone failed, errno = %d\n", + errno); + goto out_free2; + } + + close(fds[1]); + fds[1] = -1; + + /* + * Read the errno value from the child, if the exec failed, or get 0 if + * the exec succeeded because the pipe fd was set as close-on-exec. + */ + n = read(fds[0], &ret, sizeof(ret)); + if (n == 0) { + ret = pid; + } else { + if (n < 0) { + n = -errno; + printk(UM_KERN_ERR "run_helper : read on pipe failed, " + "ret = %d\n", -n); + ret = n; + } + CATCH_EINTR(waitpid(pid, NULL, __WCLONE)); + } + +out_free2: + kfree(data.buf); +out_close: + if (fds[1] != -1) + close(fds[1]); + close(fds[0]); +out_free: + free_stack(stack, 0); + return ret; +} + +int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, + unsigned long *stack_out) +{ + unsigned long stack, sp; + int pid, status, err; + + stack = alloc_stack(0, __cant_sleep()); + if (stack == 0) + return -ENOMEM; + + sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); + pid = clone(proc, (void *) sp, flags, arg); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "run_helper_thread : clone failed, " + "errno = %d\n", errno); + return err; + } + if (stack_out == NULL) { + CATCH_EINTR(pid = waitpid(pid, &status, __WCLONE)); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "run_helper_thread - wait failed, " + "errno = %d\n", errno); + pid = err; + } + if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) + printk(UM_KERN_ERR "run_helper_thread - thread " + "returned status 0x%x\n", status); + free_stack(stack, 0); + } else + *stack_out = stack; + return pid; +} + +int helper_wait(int pid) +{ + int ret, status; + int wflags = __WCLONE; + + CATCH_EINTR(ret = waitpid(pid, &status, wflags)); + if (ret < 0) { + printk(UM_KERN_ERR "helper_wait : waitpid process %d failed, " + "errno = %d\n", pid, errno); + return -errno; + } else if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + printk(UM_KERN_ERR "helper_wait : process %d exited with " + "status 0x%x\n", pid, status); + return -ECHILD; + } else + return 0; +} diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c new file mode 100644 index 00000000..0348b975 --- /dev/null +++ b/arch/um/os-Linux/irq.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <errno.h> +#include <poll.h> +#include <signal.h> +#include <string.h> +#include "irq_user.h" +#include "kern_constants.h" +#include "os.h" +#include "process.h" +#include "um_malloc.h" +#include "user.h" + +/* + * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd + * and os_free_irq_by_cb, which are called under irq_lock. + */ +static struct pollfd *pollfds = NULL; +static int pollfds_num = 0; +static int pollfds_size = 0; + +int os_waiting_for_events(struct irq_fd *active_fds) +{ + struct irq_fd *irq_fd; + int i, n, err; + + n = poll(pollfds, pollfds_num, 0); + if (n < 0) { + err = -errno; + if (errno != EINTR) + printk(UM_KERN_ERR "os_waiting_for_events:" + " poll returned %d, errno = %d\n", n, errno); + return err; + } + + if (n == 0) + return 0; + + irq_fd = active_fds; + + for (i = 0; i < pollfds_num; i++) { + if (pollfds[i].revents != 0) { + irq_fd->current_events = pollfds[i].revents; + pollfds[i].fd = -1; + } + irq_fd = irq_fd->next; + } + return n; +} + +int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds) +{ + if (pollfds_num == pollfds_size) { + if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) { + /* return min size needed for new pollfds area */ + return (pollfds_size + 1) * sizeof(pollfds[0]); + } + + if (pollfds != NULL) { + memcpy(tmp_pfd, pollfds, + sizeof(pollfds[0]) * pollfds_size); + /* remove old pollfds */ + kfree(pollfds); + } + pollfds = tmp_pfd; + pollfds_size++; + } else + kfree(tmp_pfd); /* remove not used tmp_pfd */ + + pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, + .events = events, + .revents = 0 }); + pollfds_num++; + + return 0; +} + +void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, + struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) +{ + struct irq_fd **prev; + int i = 0; + + prev = &active_fds; + while (*prev != NULL) { + if ((*test)(*prev, arg)) { + struct irq_fd *old_fd = *prev; + if ((pollfds[i].fd != -1) && + (pollfds[i].fd != (*prev)->fd)) { + printk(UM_KERN_ERR "os_free_irq_by_cb - " + "mismatch between active_fds and " + "pollfds, fd %d vs %d\n", + (*prev)->fd, pollfds[i].fd); + goto out; + } + + pollfds_num--; + + /* + * This moves the *whole* array after pollfds[i] + * (though it doesn't spot as such)! + */ + memmove(&pollfds[i], &pollfds[i + 1], + (pollfds_num - i) * sizeof(pollfds[0])); + if (*last_irq_ptr2 == &old_fd->next) + *last_irq_ptr2 = prev; + + *prev = (*prev)->next; + if (old_fd->type == IRQ_WRITE) + ignore_sigio_fd(old_fd->fd); + kfree(old_fd); + continue; + } + prev = &(*prev)->next; + i++; + } + out: + return; +} + +int os_get_pollfd(int i) +{ + return pollfds[i].fd; +} + +void os_set_pollfd(int i, int fd) +{ + pollfds[i].fd = fd; +} + +void os_set_ioignore(void) +{ + signal(SIGIO, SIG_IGN); +} diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c new file mode 100644 index 00000000..fb2a97a7 --- /dev/null +++ b/arch/um/os-Linux/main.c @@ -0,0 +1,260 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <sys/resource.h> +#include "as-layout.h" +#include "init.h" +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "um_malloc.h" + +#define PGD_BOUND (4 * 1024 * 1024) +#define STACKSIZE (8 * 1024 * 1024) +#define THREAD_NAME_LEN (256) + +static void set_stklim(void) +{ + struct rlimit lim; + + if (getrlimit(RLIMIT_STACK, &lim) < 0) { + perror("getrlimit"); + exit(1); + } + if ((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)) { + lim.rlim_cur = STACKSIZE; + if (setrlimit(RLIMIT_STACK, &lim) < 0) { + perror("setrlimit"); + exit(1); + } + } +} + +static __init void do_uml_initcalls(void) +{ + initcall_t *call; + + call = &__uml_initcall_start; + while (call < &__uml_initcall_end) { + (*call)(); + call++; + } +} + +static void last_ditch_exit(int sig) +{ + uml_cleanup(); + exit(1); +} + +static void install_fatal_handler(int sig) +{ + struct sigaction action; + + /* All signals are enabled in this handler ... */ + sigemptyset(&action.sa_mask); + + /* + * ... including the signal being handled, plus we want the + * handler reset to the default behavior, so that if an exit + * handler is hanging for some reason, the UML will just die + * after this signal is sent a second time. + */ + action.sa_flags = SA_RESETHAND | SA_NODEFER; + action.sa_restorer = NULL; + action.sa_handler = last_ditch_exit; + if (sigaction(sig, &action, NULL) < 0) { + printf("failed to install handler for signal %d - errno = %d\n", + sig, errno); + exit(1); + } +} + +#define UML_LIB_PATH ":" OS_LIB_PATH "/uml" + +static void setup_env_path(void) +{ + char *new_path = NULL; + char *old_path = NULL; + int path_len = 0; + + old_path = getenv("PATH"); + /* + * if no PATH variable is set or it has an empty value + * just use the default + /usr/lib/uml + */ + if (!old_path || (path_len = strlen(old_path)) == 0) { + if (putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH)) + perror("couldn't putenv"); + return; + } + + /* append /usr/lib/uml to the existing path */ + path_len += strlen("PATH=" UML_LIB_PATH) + 1; + new_path = malloc(path_len); + if (!new_path) { + perror("couldn't malloc to set a new PATH"); + return; + } + snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path); + if (putenv(new_path)) { + perror("couldn't putenv to set a new PATH"); + free(new_path); + } +} + +extern void scan_elf_aux( char **envp); + +int __init main(int argc, char **argv, char **envp) +{ + char **new_argv; + int ret, i, err; + + set_stklim(); + + setup_env_path(); + + new_argv = malloc((argc + 1) * sizeof(char *)); + if (new_argv == NULL) { + perror("Mallocing argv"); + exit(1); + } + for (i = 0; i < argc; i++) { + new_argv[i] = strdup(argv[i]); + if (new_argv[i] == NULL) { + perror("Mallocing an arg"); + exit(1); + } + } + new_argv[argc] = NULL; + + /* + * Allow these signals to bring down a UML if all other + * methods of control fail. + */ + install_fatal_handler(SIGINT); + install_fatal_handler(SIGTERM); + + scan_elf_aux(envp); + + do_uml_initcalls(); + ret = linux_main(argc, argv); + + /* + * Disable SIGPROF - I have no idea why libc doesn't do this or turn + * off the profiling time, but UML dies with a SIGPROF just before + * exiting when profiling is active. + */ + change_sig(SIGPROF, 0); + + /* + * This signal stuff used to be in the reboot case. However, + * sometimes a SIGVTALRM can come in when we're halting (reproducably + * when writing out gcov information, presumably because that takes + * some time) and cause a segfault. + */ + + /* stop timers and set SIGVTALRM to be ignored */ + disable_timer(); + + /* disable SIGIO for the fds and set SIGIO to be ignored */ + err = deactivate_all_fds(); + if (err) + printf("deactivate_all_fds failed, errno = %d\n", -err); + + /* + * Let any pending signals fire now. This ensures + * that they won't be delivered after the exec, when + * they are definitely not expected. + */ + unblock_signals(); + + /* Reboot */ + if (ret) { + printf("\n"); + execvp(new_argv[0], new_argv); + perror("Failed to exec kernel"); + ret = 1; + } + printf("\n"); + return uml_exitcode; +} + +extern void *__real_malloc(int); + +void *__wrap_malloc(int size) +{ + void *ret; + + if (!kmalloc_ok) + return __real_malloc(size); + else if (size <= UM_KERN_PAGE_SIZE) + /* finding contiguous pages can be hard*/ + ret = uml_kmalloc(size, UM_GFP_KERNEL); + else ret = vmalloc(size); + + /* + * glibc people insist that if malloc fails, errno should be + * set by malloc as well. So we do. + */ + if (ret == NULL) + errno = ENOMEM; + + return ret; +} + +void *__wrap_calloc(int n, int size) +{ + void *ptr = __wrap_malloc(n * size); + + if (ptr == NULL) + return NULL; + memset(ptr, 0, n * size); + return ptr; +} + +extern void __real_free(void *); + +extern unsigned long high_physmem; + +void __wrap_free(void *ptr) +{ + unsigned long addr = (unsigned long) ptr; + + /* + * We need to know how the allocation happened, so it can be correctly + * freed. This is done by seeing what region of memory the pointer is + * in - + * physical memory - kmalloc/kfree + * kernel virtual memory - vmalloc/vfree + * anywhere else - malloc/free + * If kmalloc is not yet possible, then either high_physmem and/or + * end_vm are still 0 (as at startup), in which case we call free, or + * we have set them, but anyway addr has not been allocated from those + * areas. So, in both cases __real_free is called. + * + * CAN_KMALLOC is checked because it would be bad to free a buffer + * with kmalloc/vmalloc after they have been turned off during + * shutdown. + * XXX: However, we sometimes shutdown CAN_KMALLOC temporarily, so + * there is a possibility for memory leaks. + */ + + if ((addr >= uml_physmem) && (addr < high_physmem)) { + if (kmalloc_ok) + kfree(ptr); + } + else if ((addr >= start_vm) && (addr < end_vm)) { + if (kmalloc_ok) + vfree(ptr); + } + else __real_free(ptr); +} diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c new file mode 100644 index 00000000..e696144d --- /dev/null +++ b/arch/um/os-Linux/mem.c @@ -0,0 +1,279 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/param.h> +#include "init.h" +#include "kern_constants.h" +#include "os.h" +#include "user.h" + +/* Modified by which_tmpdir, which is called during early boot */ +static char *default_tmpdir = "/tmp"; + +/* + * Modified when creating the physical memory file and when checking + * the tmp filesystem for usability, both happening during early boot. + */ +static char *tempdir = NULL; + +static void __init find_tempdir(void) +{ + const char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL }; + int i; + char *dir = NULL; + + if (tempdir != NULL) + /* We've already been called */ + return; + for (i = 0; dirs[i]; i++) { + dir = getenv(dirs[i]); + if ((dir != NULL) && (*dir != '\0')) + break; + } + if ((dir == NULL) || (*dir == '\0')) + dir = default_tmpdir; + + tempdir = malloc(strlen(dir) + 2); + if (tempdir == NULL) { + fprintf(stderr, "Failed to malloc tempdir, " + "errno = %d\n", errno); + return; + } + strcpy(tempdir, dir); + strcat(tempdir, "/"); +} + +/* + * This will return 1, with the first character in buf being the + * character following the next instance of c in the file. This will + * read the file as needed. If there's an error, -errno is returned; + * if the end of the file is reached, 0 is returned. + */ +static int next(int fd, char *buf, size_t size, char c) +{ + ssize_t n; + size_t len; + char *ptr; + + while ((ptr = strchr(buf, c)) == NULL) { + n = read(fd, buf, size - 1); + if (n == 0) + return 0; + else if (n < 0) + return -errno; + + buf[n] = '\0'; + } + + ptr++; + len = strlen(ptr); + memmove(buf, ptr, len + 1); + + /* + * Refill the buffer so that if there's a partial string that we care + * about, it will be completed, and we can recognize it. + */ + n = read(fd, &buf[len], size - len - 1); + if (n < 0) + return -errno; + + buf[len + n] = '\0'; + return 1; +} + +/* which_tmpdir is called only during early boot */ +static int checked_tmpdir = 0; + +/* + * Look for a tmpfs mounted at /dev/shm. I couldn't find a cleaner + * way to do this than to parse /proc/mounts. statfs will return the + * same filesystem magic number and fs id for both /dev and /dev/shm + * when they are both tmpfs, so you can't tell if they are different + * filesystems. Also, there seems to be no other way of finding the + * mount point of a filesystem from within it. + * + * If a /dev/shm tmpfs entry is found, then we switch to using it. + * Otherwise, we stay with the default /tmp. + */ +static void which_tmpdir(void) +{ + int fd, found; + char buf[128] = { '\0' }; + + if (checked_tmpdir) + return; + + checked_tmpdir = 1; + + printf("Checking for tmpfs mount on /dev/shm..."); + + fd = open("/proc/mounts", O_RDONLY); + if (fd < 0) { + printf("failed to open /proc/mounts, errno = %d\n", errno); + return; + } + + while (1) { + found = next(fd, buf, ARRAY_SIZE(buf), ' '); + if (found != 1) + break; + + if (!strncmp(buf, "/dev/shm", strlen("/dev/shm"))) + goto found; + + found = next(fd, buf, ARRAY_SIZE(buf), '\n'); + if (found != 1) + break; + } + +err: + if (found == 0) + printf("nothing mounted on /dev/shm\n"); + else if (found < 0) + printf("read returned errno %d\n", -found); + +out: + close(fd); + + return; + +found: + found = next(fd, buf, ARRAY_SIZE(buf), ' '); + if (found != 1) + goto err; + + if (strncmp(buf, "tmpfs", strlen("tmpfs"))) { + printf("not tmpfs\n"); + goto out; + } + + printf("OK\n"); + default_tmpdir = "/dev/shm"; + goto out; +} + +static int __init make_tempfile(const char *template, char **out_tempname, + int do_unlink) +{ + char *tempname; + int fd; + + which_tmpdir(); + tempname = malloc(MAXPATHLEN); + if (tempname == NULL) + return -1; + + find_tempdir(); + if ((tempdir == NULL) || (strlen(tempdir) >= MAXPATHLEN)) + return -1; + + if (template[0] != '/') + strcpy(tempname, tempdir); + else + tempname[0] = '\0'; + strncat(tempname, template, MAXPATHLEN-1-strlen(tempname)); + fd = mkstemp(tempname); + if (fd < 0) { + fprintf(stderr, "open - cannot create %s: %s\n", tempname, + strerror(errno)); + goto out; + } + if (do_unlink && (unlink(tempname) < 0)) { + perror("unlink"); + goto out; + } + if (out_tempname) { + *out_tempname = tempname; + } else + free(tempname); + return fd; +out: + free(tempname); + return -1; +} + +#define TEMPNAME_TEMPLATE "vm_file-XXXXXX" + +static int __init create_tmp_file(unsigned long long len) +{ + int fd, err; + char zero; + + fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); + if (fd < 0) + exit(1); + + err = fchmod(fd, 0777); + if (err < 0) { + perror("fchmod"); + exit(1); + } + + /* + * Seek to len - 1 because writing a character there will + * increase the file size by one byte, to the desired length. + */ + if (lseek64(fd, len - 1, SEEK_SET) < 0) { + perror("lseek64"); + exit(1); + } + + zero = 0; + + err = write(fd, &zero, 1); + if (err != 1) { + perror("write"); + exit(1); + } + + return fd; +} + +int __init create_mem_file(unsigned long long len) +{ + int err, fd; + + fd = create_tmp_file(len); + + err = os_set_exec_close(fd); + if (err < 0) { + errno = -err; + perror("exec_close"); + } + return fd; +} + + +void __init check_tmpexec(void) +{ + void *addr; + int err, fd = create_tmp_file(UM_KERN_PAGE_SIZE); + + addr = mmap(NULL, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0); + printf("Checking PROT_EXEC mmap in %s...",tempdir); + fflush(stdout); + if (addr == MAP_FAILED) { + err = errno; + perror("failed"); + close(fd); + if (err == EPERM) + printf("%s must be not mounted noexec\n",tempdir); + exit(1); + } + printf("OK\n"); + munmap(addr, UM_KERN_PAGE_SIZE); + + close(fd); +} diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c new file mode 100644 index 00000000..0c45dc8e --- /dev/null +++ b/arch/um/os-Linux/process.c @@ -0,0 +1,270 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include "init.h" +#include "kern_constants.h" +#include "longjmp.h" +#include "os.h" +#include "process.h" +#include "skas_ptrace.h" +#include "user.h" + +#define ARBITRARY_ADDR -1 +#define FAILURE_PID -1 + +#define STAT_PATH_LEN sizeof("/proc/#######/stat\0") +#define COMM_SCANF "%*[^)])" + +unsigned long os_process_pc(int pid) +{ + char proc_stat[STAT_PATH_LEN], buf[256]; + unsigned long pc = ARBITRARY_ADDR; + int fd, err; + + sprintf(proc_stat, "/proc/%d/stat", pid); + fd = open(proc_stat, O_RDONLY, 0); + if (fd < 0) { + printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', " + "errno = %d\n", proc_stat, errno); + goto out; + } + CATCH_EINTR(err = read(fd, buf, sizeof(buf))); + if (err < 0) { + printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', " + "err = %d\n", proc_stat, errno); + goto out_close; + } + os_close_file(fd); + pc = ARBITRARY_ADDR; + if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %lu", &pc) != 1) + printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n", + buf); + out_close: + close(fd); + out: + return pc; +} + +int os_process_parent(int pid) +{ + char stat[STAT_PATH_LEN]; + char data[256]; + int parent = FAILURE_PID, n, fd; + + if (pid == -1) + return parent; + + snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); + fd = open(stat, O_RDONLY, 0); + if (fd < 0) { + printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat, + errno); + return parent; + } + + CATCH_EINTR(n = read(fd, data, sizeof(data))); + close(fd); + + if (n < 0) { + printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat, + errno); + return parent; + } + + parent = FAILURE_PID; + n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent); + if (n != 1) + printk(UM_KERN_ERR "Failed to scan '%s'\n", data); + + return parent; +} + +void os_stop_process(int pid) +{ + kill(pid, SIGSTOP); +} + +void os_kill_process(int pid, int reap_child) +{ + kill(pid, SIGKILL); + if (reap_child) + CATCH_EINTR(waitpid(pid, NULL, __WALL)); +} + +/* This is here uniquely to have access to the userspace errno, i.e. the one + * used by ptrace in case of error. + */ + +long os_ptrace_ldt(long pid, long addr, long data) +{ + int ret; + + ret = ptrace(PTRACE_LDT, pid, addr, data); + + if (ret < 0) + return -errno; + return ret; +} + +/* Kill off a ptraced child by all means available. kill it normally first, + * then PTRACE_KILL it, then PTRACE_CONT it in case it's in a run state from + * which it can't exit directly. + */ + +void os_kill_ptraced_process(int pid, int reap_child) +{ + kill(pid, SIGKILL); + ptrace(PTRACE_KILL, pid); + ptrace(PTRACE_CONT, pid); + if (reap_child) + CATCH_EINTR(waitpid(pid, NULL, __WALL)); +} + +/* Don't use the glibc version, which caches the result in TLS. It misses some + * syscalls, and also breaks with clone(), which does not unshare the TLS. + */ + +int os_getpid(void) +{ + return syscall(__NR_getpid); +} + +int os_getpgrp(void) +{ + return getpgrp(); +} + +int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, + int r, int w, int x) +{ + void *loc; + int prot; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, + fd, off); + if (loc == MAP_FAILED) + return -errno; + return 0; +} + +int os_protect_memory(void *addr, unsigned long len, int r, int w, int x) +{ + int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0)); + + if (mprotect(addr, len, prot) < 0) + return -errno; + + return 0; +} + +int os_unmap_memory(void *addr, int len) +{ + int err; + + err = munmap(addr, len); + if (err < 0) + return -errno; + return 0; +} + +#ifndef MADV_REMOVE +#define MADV_REMOVE KERNEL_MADV_REMOVE +#endif + +int os_drop_memory(void *addr, int length) +{ + int err; + + err = madvise(addr, length, MADV_REMOVE); + if (err < 0) + err = -errno; + return err; +} + +int __init can_drop_memory(void) +{ + void *addr; + int fd, ok = 0; + + printk(UM_KERN_INFO "Checking host MADV_REMOVE support..."); + fd = create_mem_file(UM_KERN_PAGE_SIZE); + if (fd < 0) { + printk(UM_KERN_ERR "Creating test memory file failed, " + "err = %d\n", -fd); + goto out; + } + + addr = mmap64(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "Mapping test memory file failed, " + "err = %d\n", -errno); + goto out_close; + } + + if (madvise(addr, UM_KERN_PAGE_SIZE, MADV_REMOVE) != 0) { + printk(UM_KERN_ERR "MADV_REMOVE failed, err = %d\n", -errno); + goto out_unmap; + } + + printk(UM_KERN_CONT "OK\n"); + ok = 1; + +out_unmap: + munmap(addr, UM_KERN_PAGE_SIZE); +out_close: + close(fd); +out: + return ok; +} + +void init_new_thread_signals(void) +{ + set_handler(SIGSEGV, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + set_handler(SIGTRAP, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + set_handler(SIGFPE, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + set_handler(SIGILL, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + set_handler(SIGBUS, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + signal(SIGHUP, SIG_IGN); + + set_handler(SIGIO, (__sighandler_t) sig_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, + SIGVTALRM, -1); + signal(SIGWINCH, SIG_IGN); + signal(SIGTERM, SIG_DFL); +} + +int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr) +{ + jmp_buf buf; + int n; + + *jmp_ptr = &buf; + n = UML_SETJMP(&buf); + if (n != 0) + return n; + (*fn)(arg); + return 0; +} diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c new file mode 100644 index 00000000..830fe6a1 --- /dev/null +++ b/arch/um/os-Linux/registers.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <string.h> +#include <sys/ptrace.h> +#include "sysdep/ptrace.h" + +int save_registers(int pid, struct uml_pt_regs *regs) +{ + int err; + + err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp); + if (err < 0) + return -errno; + return 0; +} + +int restore_registers(int pid, struct uml_pt_regs *regs) +{ + int err; + + err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp); + if (err < 0) + return -errno; + return 0; +} + +/* This is set once at boot time and not changed thereafter */ + +static unsigned long exec_regs[MAX_REG_NR]; + +int init_registers(int pid) +{ + int err; + + err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs); + if (err < 0) + return -errno; + + arch_init_registers(pid); + return 0; +} + +void get_safe_registers(unsigned long *regs) +{ + memcpy(regs, exec_regs, sizeof(exec_regs)); +} diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c new file mode 100644 index 00000000..63d299df --- /dev/null +++ b/arch/um/os-Linux/sigio.c @@ -0,0 +1,549 @@ +/* + * Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <pty.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "init.h" +#include "os.h" +#include "process.h" +#include "sigio.h" +#include "um_malloc.h" +#include "user.h" + +/* + * Protected by sigio_lock(), also used by sigio_cleanup, which is an + * exitcall. + */ +static int write_sigio_pid = -1; +static unsigned long write_sigio_stack; + +/* + * These arrays are initialized before the sigio thread is started, and + * the descriptors closed after it is killed. So, it can't see them change. + * On the UML side, they are changed under the sigio_lock. + */ +#define SIGIO_FDS_INIT {-1, -1} + +static int write_sigio_fds[2] = SIGIO_FDS_INIT; +static int sigio_private[2] = SIGIO_FDS_INIT; + +struct pollfds { + struct pollfd *poll; + int size; + int used; +}; + +/* + * Protected by sigio_lock(). Used by the sigio thread, but the UML thread + * synchronizes with it. + */ +static struct pollfds current_poll; +static struct pollfds next_poll; +static struct pollfds all_sigio_fds; + +static int write_sigio_thread(void *unused) +{ + struct pollfds *fds, tmp; + struct pollfd *p; + int i, n, respond_fd; + char c; + + signal(SIGWINCH, SIG_IGN); + fds = ¤t_poll; + while (1) { + n = poll(fds->poll, fds->used, -1); + if (n < 0) { + if (errno == EINTR) + continue; + printk(UM_KERN_ERR "write_sigio_thread : poll returned " + "%d, errno = %d\n", n, errno); + } + for (i = 0; i < fds->used; i++) { + p = &fds->poll[i]; + if (p->revents == 0) + continue; + if (p->fd == sigio_private[1]) { + CATCH_EINTR(n = read(sigio_private[1], &c, + sizeof(c))); + if (n != sizeof(c)) + printk(UM_KERN_ERR + "write_sigio_thread : " + "read on socket failed, " + "err = %d\n", errno); + tmp = current_poll; + current_poll = next_poll; + next_poll = tmp; + respond_fd = sigio_private[1]; + } + else { + respond_fd = write_sigio_fds[1]; + fds->used--; + memmove(&fds->poll[i], &fds->poll[i + 1], + (fds->used - i) * sizeof(*fds->poll)); + } + + CATCH_EINTR(n = write(respond_fd, &c, sizeof(c))); + if (n != sizeof(c)) + printk(UM_KERN_ERR "write_sigio_thread : " + "write on socket failed, err = %d\n", + errno); + } + } + + return 0; +} + +static int need_poll(struct pollfds *polls, int n) +{ + struct pollfd *new; + + if (n <= polls->size) + return 0; + + new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC); + if (new == NULL) { + printk(UM_KERN_ERR "need_poll : failed to allocate new " + "pollfds\n"); + return -ENOMEM; + } + + memcpy(new, polls->poll, polls->used * sizeof(struct pollfd)); + kfree(polls->poll); + + polls->poll = new; + polls->size = n; + return 0; +} + +/* + * Must be called with sigio_lock held, because it's needed by the marked + * critical section. + */ +static void update_thread(void) +{ + unsigned long flags; + int n; + char c; + + flags = set_signals(0); + CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", + errno); + goto fail; + } + + CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c))); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : read failed, err = %d\n", + errno); + goto fail; + } + + set_signals(flags); + return; + fail: + /* Critical section start */ + if (write_sigio_pid != -1) { + os_kill_process(write_sigio_pid, 1); + free_stack(write_sigio_stack, 0); + } + write_sigio_pid = -1; + close(sigio_private[0]); + close(sigio_private[1]); + close(write_sigio_fds[0]); + close(write_sigio_fds[1]); + /* Critical section end */ + set_signals(flags); +} + +int add_sigio_fd(int fd) +{ + struct pollfd *p; + int err = 0, i, n; + + sigio_lock(); + for (i = 0; i < all_sigio_fds.used; i++) { + if (all_sigio_fds.poll[i].fd == fd) + break; + } + if (i == all_sigio_fds.used) + goto out; + + p = &all_sigio_fds.poll[i]; + + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) + goto out; + } + + n = current_poll.used; + err = need_poll(&next_poll, n + 1); + if (err) + goto out; + + memcpy(next_poll.poll, current_poll.poll, + current_poll.used * sizeof(struct pollfd)); + next_poll.poll[n] = *p; + next_poll.used = n + 1; + update_thread(); + out: + sigio_unlock(); + return err; +} + +int ignore_sigio_fd(int fd) +{ + struct pollfd *p; + int err = 0, i, n = 0; + + /* + * This is called from exitcalls elsewhere in UML - if + * sigio_cleanup has already run, then update_thread will hang + * or fail because the thread is no longer running. + */ + if (write_sigio_pid == -1) + return -EIO; + + sigio_lock(); + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) + break; + } + if (i == current_poll.used) + goto out; + + err = need_poll(&next_poll, current_poll.used - 1); + if (err) + goto out; + + for (i = 0; i < current_poll.used; i++) { + p = ¤t_poll.poll[i]; + if (p->fd != fd) + next_poll.poll[n++] = *p; + } + next_poll.used = current_poll.used - 1; + + update_thread(); + out: + sigio_unlock(); + return err; +} + +static struct pollfd *setup_initial_poll(int fd) +{ + struct pollfd *p; + + p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL); + if (p == NULL) { + printk(UM_KERN_ERR "setup_initial_poll : failed to allocate " + "poll\n"); + return NULL; + } + *p = ((struct pollfd) { .fd = fd, + .events = POLLIN, + .revents = 0 }); + return p; +} + +static void write_sigio_workaround(void) +{ + struct pollfd *p; + int err; + int l_write_sigio_fds[2]; + int l_sigio_private[2]; + int l_write_sigio_pid; + + /* We call this *tons* of times - and most ones we must just fail. */ + sigio_lock(); + l_write_sigio_pid = write_sigio_pid; + sigio_unlock(); + + if (l_write_sigio_pid != -1) + return; + + err = os_pipe(l_write_sigio_fds, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, " + "err = %d\n", -err); + return; + } + err = os_pipe(l_sigio_private, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, " + "err = %d\n", -err); + goto out_close1; + } + + p = setup_initial_poll(l_sigio_private[1]); + if (!p) + goto out_close2; + + sigio_lock(); + + /* + * Did we race? Don't try to optimize this, please, it's not so likely + * to happen, and no more than once at the boot. + */ + if (write_sigio_pid != -1) + goto out_free; + + current_poll = ((struct pollfds) { .poll = p, + .used = 1, + .size = 1 }); + + if (write_sigio_irq(l_write_sigio_fds[0])) + goto out_clear_poll; + + memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); + memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); + + write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, + CLONE_FILES | CLONE_VM, + &write_sigio_stack); + + if (write_sigio_pid < 0) + goto out_clear; + + sigio_unlock(); + return; + +out_clear: + write_sigio_pid = -1; + write_sigio_fds[0] = -1; + write_sigio_fds[1] = -1; + sigio_private[0] = -1; + sigio_private[1] = -1; +out_clear_poll: + current_poll = ((struct pollfds) { .poll = NULL, + .size = 0, + .used = 0 }); +out_free: + sigio_unlock(); + kfree(p); +out_close2: + close(l_sigio_private[0]); + close(l_sigio_private[1]); +out_close1: + close(l_write_sigio_fds[0]); + close(l_write_sigio_fds[1]); +} + +void sigio_broken(int fd, int read) +{ + int err; + + write_sigio_workaround(); + + sigio_lock(); + err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); + if (err) { + printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd " + "for descriptor %d\n", fd); + goto out; + } + + all_sigio_fds.poll[all_sigio_fds.used++] = + ((struct pollfd) { .fd = fd, + .events = read ? POLLIN : POLLOUT, + .revents = 0 }); +out: + sigio_unlock(); +} + +/* Changed during early boot */ +static int pty_output_sigio; +static int pty_close_sigio; + +void maybe_sigio_broken(int fd, int read) +{ + if (!isatty(fd)) + return; + + if ((read || pty_output_sigio) && (!read || pty_close_sigio)) + return; + + sigio_broken(fd, read); +} + +static void sigio_cleanup(void) +{ + if (write_sigio_pid == -1) + return; + + os_kill_process(write_sigio_pid, 1); + free_stack(write_sigio_stack, 0); + write_sigio_pid = -1; +} + +__uml_exitcall(sigio_cleanup); + +/* Used as a flag during SIGIO testing early in boot */ +static int got_sigio; + +static void __init handler(int sig) +{ + got_sigio = 1; +} + +struct openpty_arg { + int master; + int slave; + int err; +}; + +static void openpty_cb(void *arg) +{ + struct openpty_arg *info = arg; + + info->err = 0; + if (openpty(&info->master, &info->slave, NULL, NULL, NULL)) + info->err = -errno; +} + +static int async_pty(int master, int slave) +{ + int flags; + + flags = fcntl(master, F_GETFL); + if (flags < 0) + return -errno; + + if ((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || + (fcntl(master, F_SETOWN, os_getpid()) < 0)) + return -errno; + + if ((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) + return -errno; + + return 0; +} + +static void __init check_one_sigio(void (*proc)(int, int)) +{ + struct sigaction old, new; + struct openpty_arg pty = { .master = -1, .slave = -1 }; + int master, slave, err; + + initial_thread_cb(openpty_cb, &pty); + if (pty.err) { + printk(UM_KERN_ERR "check_one_sigio failed, errno = %d\n", + -pty.err); + return; + } + + master = pty.master; + slave = pty.slave; + + if ((master == -1) || (slave == -1)) { + printk(UM_KERN_ERR "check_one_sigio failed to allocate a " + "pty\n"); + return; + } + + /* Not now, but complain so we now where we failed. */ + err = raw(master); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : raw failed, errno = %d\n", + -err); + return; + } + + err = async_pty(master, slave); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigio_async failed, " + "err = %d\n", -err); + return; + } + + if (sigaction(SIGIO, NULL, &old) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 1 failed, " + "errno = %d\n", errno); + return; + } + + new = old; + new.sa_handler = handler; + if (sigaction(SIGIO, &new, NULL) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 2 failed, " + "errno = %d\n", errno); + return; + } + + got_sigio = 0; + (*proc)(master, slave); + + close(master); + close(slave); + + if (sigaction(SIGIO, &old, NULL) < 0) + printk(UM_KERN_ERR "check_one_sigio : sigaction 3 failed, " + "errno = %d\n", errno); +} + +static void tty_output(int master, int slave) +{ + int n; + char buf[512]; + + printk(UM_KERN_INFO "Checking that host ptys support output SIGIO..."); + + memset(buf, 0, sizeof(buf)); + + while (write(master, buf, sizeof(buf)) > 0) ; + if (errno != EAGAIN) + printk(UM_KERN_ERR "tty_output : write failed, errno = %d\n", + errno); + while (((n = read(slave, buf, sizeof(buf))) > 0) && + !({ barrier(); got_sigio; })) + ; + + if (got_sigio) { + printk(UM_KERN_CONT "Yes\n"); + pty_output_sigio = 1; + } else if (n == -EAGAIN) + printk(UM_KERN_CONT "No, enabling workaround\n"); + else + printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n); +} + +static void tty_close(int master, int slave) +{ + printk(UM_KERN_INFO "Checking that host ptys support SIGIO on " + "close..."); + + close(slave); + if (got_sigio) { + printk(UM_KERN_CONT "Yes\n"); + pty_close_sigio = 1; + } else + printk(UM_KERN_CONT "No, enabling workaround\n"); +} + +static void __init check_sigio(void) +{ + if ((access("/dev/ptmx", R_OK) < 0) && + (access("/dev/ptyp0", R_OK) < 0)) { + printk(UM_KERN_WARNING "No pseudo-terminals available - " + "skipping pty SIGIO check\n"); + return; + } + check_one_sigio(tty_output); + check_one_sigio(tty_close); +} + +/* Here because it only does the SIGIO testing for now */ +void __init os_check_bugs(void) +{ + check_sigio(); +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c new file mode 100644 index 00000000..6ae18070 --- /dev/null +++ b/arch/um/os-Linux/signal.c @@ -0,0 +1,298 @@ +/* + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <stdarg.h> +#include <errno.h> +#include <signal.h> +#include <strings.h> +#include "as-layout.h" +#include "kern_util.h" +#include "os.h" +#include "process.h" +#include "sysdep/barrier.h" +#include "sysdep/sigcontext.h" +#include "user.h" + +/* Copied from linux/compiler-gcc.h since we can't include it directly */ +#define barrier() __asm__ __volatile__("": : :"memory") + +void (*sig_info[NSIG])(int, struct uml_pt_regs *) = { + [SIGTRAP] = relay_signal, + [SIGFPE] = relay_signal, + [SIGILL] = relay_signal, + [SIGWINCH] = winch, + [SIGBUS] = bus_handler, + [SIGSEGV] = segv_handler, + [SIGIO] = sigio_handler, + [SIGVTALRM] = timer_handler }; + +static void sig_handler_common(int sig, struct sigcontext *sc) +{ + struct uml_pt_regs r; + int save_errno = errno; + + r.is_user = 0; + if (sig == SIGSEGV) { + /* For segfaults, we want the data from the sigcontext. */ + copy_sc(&r, sc); + GET_FAULTINFO_FROM_SC(r.faultinfo, sc); + } + + /* enable signals if sig isn't IRQ signal */ + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) + unblock_signals(); + + (*sig_info[sig])(sig, &r); + + errno = save_errno; +} + +/* + * These are the asynchronous signals. SIGPROF is excluded because we want to + * be able to profile all of UML, not just the non-critical sections. If + * profiling is not thread-safe, then that is not my problem. We can disable + * profiling when SMP is enabled in that case. + */ +#define SIGIO_BIT 0 +#define SIGIO_MASK (1 << SIGIO_BIT) + +#define SIGVTALRM_BIT 1 +#define SIGVTALRM_MASK (1 << SIGVTALRM_BIT) + +static int signals_enabled; +static unsigned int signals_pending; + +void sig_handler(int sig, struct sigcontext *sc) +{ + int enabled; + + enabled = signals_enabled; + if (!enabled && (sig == SIGIO)) { + signals_pending |= SIGIO_MASK; + return; + } + + block_signals(); + + sig_handler_common(sig, sc); + + set_signals(enabled); +} + +static void real_alarm_handler(struct sigcontext *sc) +{ + struct uml_pt_regs regs; + + if (sc != NULL) + copy_sc(®s, sc); + regs.is_user = 0; + unblock_signals(); + timer_handler(SIGVTALRM, ®s); +} + +void alarm_handler(int sig, struct sigcontext *sc) +{ + int enabled; + + enabled = signals_enabled; + if (!signals_enabled) { + signals_pending |= SIGVTALRM_MASK; + return; + } + + block_signals(); + + real_alarm_handler(sc); + set_signals(enabled); +} + +void timer_init(void) +{ + set_handler(SIGVTALRM, (__sighandler_t) alarm_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, -1); +} + +void set_sigstack(void *sig_stack, int size) +{ + stack_t stack = ((stack_t) { .ss_flags = 0, + .ss_sp = (__ptr_t) sig_stack, + .ss_size = size - sizeof(void *) }); + + if (sigaltstack(&stack, NULL) != 0) + panic("enabling signal stack failed, errno = %d\n", errno); +} + +static void (*handlers[_NSIG])(int sig, struct sigcontext *sc); + +void handle_signal(int sig, struct sigcontext *sc) +{ + unsigned long pending = 1UL << sig; + + do { + int nested, bail; + + /* + * pending comes back with one bit set for each + * interrupt that arrived while setting up the stack, + * plus a bit for this interrupt, plus the zero bit is + * set if this is a nested interrupt. + * If bail is true, then we interrupted another + * handler setting up the stack. In this case, we + * have to return, and the upper handler will deal + * with this interrupt. + */ + bail = to_irq_stack(&pending); + if (bail) + return; + + nested = pending & 1; + pending &= ~1; + + while ((sig = ffs(pending)) != 0){ + sig--; + pending &= ~(1 << sig); + (*handlers[sig])(sig, sc); + } + + /* + * Again, pending comes back with a mask of signals + * that arrived while tearing down the stack. If this + * is non-zero, we just go back, set up the stack + * again, and handle the new interrupts. + */ + if (!nested) + pending = from_irq_stack(nested); + } while (pending); +} + +extern void hard_handler(int sig); + +void set_handler(int sig, void (*handler)(int), int flags, ...) +{ + struct sigaction action; + va_list ap; + sigset_t sig_mask; + int mask; + + handlers[sig] = (void (*)(int, struct sigcontext *)) handler; + action.sa_handler = hard_handler; + + sigemptyset(&action.sa_mask); + + va_start(ap, flags); + while ((mask = va_arg(ap, int)) != -1) + sigaddset(&action.sa_mask, mask); + va_end(ap); + + if (sig == SIGSEGV) + flags |= SA_NODEFER; + + action.sa_flags = flags; + action.sa_restorer = NULL; + if (sigaction(sig, &action, NULL) < 0) + panic("sigaction failed - errno = %d\n", errno); + + sigemptyset(&sig_mask); + sigaddset(&sig_mask, sig); + if (sigprocmask(SIG_UNBLOCK, &sig_mask, NULL) < 0) + panic("sigprocmask failed - errno = %d\n", errno); +} + +int change_sig(int signal, int on) +{ + sigset_t sigset; + + sigemptyset(&sigset); + sigaddset(&sigset, signal); + if (sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) + return -errno; + + return 0; +} + +void block_signals(void) +{ + signals_enabled = 0; + /* + * This must return with signals disabled, so this barrier + * ensures that writes are flushed out before the return. + * This might matter if gcc figures out how to inline this and + * decides to shuffle this code into the caller. + */ + barrier(); +} + +void unblock_signals(void) +{ + int save_pending; + + if (signals_enabled == 1) + return; + + /* + * We loop because the IRQ handler returns with interrupts off. So, + * interrupts may have arrived and we need to re-enable them and + * recheck signals_pending. + */ + while (1) { + /* + * Save and reset save_pending after enabling signals. This + * way, signals_pending won't be changed while we're reading it. + */ + signals_enabled = 1; + + /* + * Setting signals_enabled and reading signals_pending must + * happen in this order. + */ + barrier(); + + save_pending = signals_pending; + if (save_pending == 0) + return; + + signals_pending = 0; + + /* + * We have pending interrupts, so disable signals, as the + * handlers expect them off when they are called. They will + * be enabled again above. + */ + + signals_enabled = 0; + + /* + * Deal with SIGIO first because the alarm handler might + * schedule, leaving the pending SIGIO stranded until we come + * back here. + */ + if (save_pending & SIGIO_MASK) + sig_handler_common(SIGIO, NULL); + + if (save_pending & SIGVTALRM_MASK) + real_alarm_handler(NULL); + } +} + +int get_signals(void) +{ + return signals_enabled; +} + +int set_signals(int enable) +{ + int ret; + if (signals_enabled == enable) + return enable; + + ret = signals_enabled; + if (enable) + unblock_signals(); + else block_signals(); + + return ret; +} diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile new file mode 100644 index 00000000..d2ea3409 --- /dev/null +++ b/arch/um/os-Linux/skas/Makefile @@ -0,0 +1,10 @@ +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) +# Licensed under the GPL +# + +obj-y := mem.o process.o + +USER_OBJS := $(obj-y) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c new file mode 100644 index 00000000..d261f170 --- /dev/null +++ b/arch/um/os-Linux/skas/mem.c @@ -0,0 +1,295 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include "init.h" +#include "kern_constants.h" +#include "as-layout.h" +#include "mm_id.h" +#include "os.h" +#include "proc_mm.h" +#include "ptrace_user.h" +#include "registers.h" +#include "skas.h" +#include "user.h" +#include "sysdep/ptrace.h" +#include "sysdep/stub.h" + +extern unsigned long batch_syscall_stub, __syscall_stub_start; + +extern void wait_stub_done(int pid); + +static inline unsigned long *check_init_stack(struct mm_id * mm_idp, + unsigned long *stack) +{ + if (stack == NULL) { + stack = (unsigned long *) mm_idp->stack + 2; + *stack = 0; + } + return stack; +} + +static unsigned long syscall_regs[MAX_REG_NR]; + +static int __init init_syscall_regs(void) +{ + get_safe_registers(syscall_regs); + syscall_regs[REGS_IP_INDEX] = STUB_CODE + + ((unsigned long) &batch_syscall_stub - + (unsigned long) &__syscall_stub_start); + return 0; +} + +__initcall(init_syscall_regs); + +extern int proc_mm; + +int single_count = 0; +int multi_count = 0; +int multi_op_count = 0; + +static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) +{ + int n, i; + long ret, offset; + unsigned long * data; + unsigned long * syscall; + int err, pid = mm_idp->u.pid; + + if (proc_mm) + /* FIXME: Need to look up userspace_pid by cpu */ + pid = userspace_pid[0]; + + multi_count++; + + n = ptrace_setregs(pid, syscall_regs); + if (n < 0) { + printk(UM_KERN_ERR "Registers - \n"); + for (i = 0; i < MAX_REG_NR; i++) + printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); + panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n", + -n); + } + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) + panic("Failed to continue stub, pid = %d, errno = %d\n", pid, + errno); + + wait_stub_done(pid); + + /* + * When the stub stops, we find the following values on the + * beginning of the stack: + * (long )return_value + * (long )offset to failed sycall-data (0, if no error) + */ + ret = *((unsigned long *) mm_idp->stack); + offset = *((unsigned long *) mm_idp->stack + 1); + if (offset) { + data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA); + printk(UM_KERN_ERR "do_syscall_stub : ret = %ld, offset = %ld, " + "data = %p\n", ret, offset, data); + syscall = (unsigned long *)((unsigned long)data + data[0]); + printk(UM_KERN_ERR "do_syscall_stub: syscall %ld failed, " + "return value = 0x%lx, expected return value = 0x%lx\n", + syscall[0], ret, syscall[7]); + printk(UM_KERN_ERR " syscall parameters: " + "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + syscall[1], syscall[2], syscall[3], + syscall[4], syscall[5], syscall[6]); + for (n = 1; n < data[0]/sizeof(long); n++) { + if (n == 1) + printk(UM_KERN_ERR " additional syscall " + "data:"); + if (n % 4 == 1) + printk("\n" UM_KERN_ERR " "); + printk(" 0x%lx", data[n]); + } + if (n > 1) + printk("\n"); + } + else ret = 0; + + *addr = check_init_stack(mm_idp, NULL); + + return ret; +} + +long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, long expected, void **addr, + int done) +{ + unsigned long *stack = check_init_stack(mm_idp, *addr); + + if (done && *addr == NULL) + single_count++; + + *stack += sizeof(long); + stack += *stack / sizeof(long); + + *stack++ = syscall; + *stack++ = args[0]; + *stack++ = args[1]; + *stack++ = args[2]; + *stack++ = args[3]; + *stack++ = args[4]; + *stack++ = args[5]; + *stack++ = expected; + *stack = 0; + multi_op_count++; + + if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) < + UM_KERN_PAGE_SIZE - 10 * sizeof(long))) { + *addr = stack; + return 0; + } + + return do_syscall_stub(mm_idp, addr); +} + +long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr) +{ + unsigned long *stack; + int ret = 0; + + /* + * If *addr still is uninitialized, it *must* contain NULL. + * Thus in this case do_syscall_stub correctly won't be called. + */ + if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >= + UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) { + ret = do_syscall_stub(mm_idp, addr); + /* in case of error, don't overwrite data on stack */ + if (ret) + return ret; + } + + stack = check_init_stack(mm_idp, *addr); + *addr = stack; + + *stack = data_count * sizeof(long); + + memcpy(stack + 1, data, data_count * sizeof(long)); + + *stub_addr = (void *)(((unsigned long)(stack + 1) & + ~UM_KERN_PAGE_MASK) + STUB_DATA); + + return 0; +} + +int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot, + int phys_fd, unsigned long long offset, int done, void **data) +{ + int ret; + + if (proc_mm) { + struct proc_mm_op map; + int fd = mm_idp->u.mm_fd; + + map = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = virt, + .len = len, + .prot = prot, + .flags = MAP_SHARED | + MAP_FIXED, + .fd = phys_fd, + .offset= offset + } } } ); + CATCH_EINTR(ret = write(fd, &map, sizeof(map))); + if (ret != sizeof(map)) { + ret = -errno; + printk(UM_KERN_ERR "map : /proc/mm map failed, " + "err = %d\n", -ret); + } + else ret = 0; + } + else { + unsigned long args[] = { virt, len, prot, + MAP_SHARED | MAP_FIXED, phys_fd, + MMAP_OFFSET(offset) }; + + ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt, + data, done); + } + + return ret; +} + +int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int done, void **data) +{ + int ret; + + if (proc_mm) { + struct proc_mm_op unmap; + int fd = mm_idp->u.mm_fd; + + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, + .u = + { .munmap = + { .addr = + (unsigned long) addr, + .len = len } } } ); + CATCH_EINTR(ret = write(fd, &unmap, sizeof(unmap))); + if (ret != sizeof(unmap)) { + ret = -errno; + printk(UM_KERN_ERR "unmap - proc_mm write returned " + "%d\n", ret); + } + else ret = 0; + } + else { + unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, + 0 }; + + ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0, + data, done); + } + + return ret; +} + +int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + unsigned int prot, int done, void **data) +{ + struct proc_mm_op protect; + int ret; + + if (proc_mm) { + int fd = mm_idp->u.mm_fd; + + protect = ((struct proc_mm_op) { .op = MM_MPROTECT, + .u = + { .mprotect = + { .addr = + (unsigned long) addr, + .len = len, + .prot = prot } } } ); + + CATCH_EINTR(ret = write(fd, &protect, sizeof(protect))); + if (ret != sizeof(protect)) { + ret = -errno; + printk(UM_KERN_ERR "protect failed, err = %d", -ret); + } + else ret = 0; + } + else { + unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + + ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0, + data, done); + } + + return ret; +} diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c new file mode 100644 index 00000000..d6e0a223 --- /dev/null +++ b/arch/um/os-Linux/skas/process.c @@ -0,0 +1,728 @@ +/* + * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <unistd.h> +#include <sched.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include "as-layout.h" +#include "chan_user.h" +#include "kern_constants.h" +#include "kern_util.h" +#include "mem.h" +#include "os.h" +#include "process.h" +#include "proc_mm.h" +#include "ptrace_user.h" +#include "registers.h" +#include "skas.h" +#include "skas_ptrace.h" +#include "user.h" +#include "sysdep/stub.h" + +int is_skas_winch(int pid, int fd, void *data) +{ + if (pid != getpgrp()) + return 0; + + register_winch_irq(-1, fd, -1, data, 0); + return 1; +} + +static int ptrace_dump_regs(int pid) +{ + unsigned long regs[MAX_REG_NR]; + int i; + + if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) + return -errno; + + printk(UM_KERN_ERR "Stub registers -\n"); + for (i = 0; i < ARRAY_SIZE(regs); i++) + printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]); + + return 0; +} + +/* + * Signals that are OK to receive in the stub - we'll just continue it. + * SIGWINCH will happen when UML is inside a detached screen. + */ +#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) + +/* Signals that the stub will finish with - anything else is an error */ +#define STUB_DONE_MASK (1 << SIGTRAP) + +void wait_stub_done(int pid) +{ + int n, status, err; + + while (1) { + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); + if ((n < 0) || !WIFSTOPPED(status)) + goto bad_wait; + + if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0) + break; + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) { + printk(UM_KERN_ERR "wait_stub_done : continue failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + } + + if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) + return; + +bad_wait: + err = ptrace_dump_regs(pid); + if (err) + printk(UM_KERN_ERR "Failed to get registers from stub, " + "errno = %d\n", -err); + printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, " + "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno, + status); + fatal_sigsegv(); +} + +extern unsigned long current_stub_stack(void); + +static void get_skas_faultinfo(int pid, struct faultinfo *fi) +{ + int err; + + if (ptrace_faultinfo) { + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); + if (err) { + printk(UM_KERN_ERR "get_skas_faultinfo - " + "PTRACE_FAULTINFO failed, errno = %d\n", errno); + fatal_sigsegv(); + } + + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); + } + else { + unsigned long fpregs[FP_SIZE]; + + err = get_fp_registers(pid, fpregs); + if (err < 0) { + printk(UM_KERN_ERR "save_fp_registers returned %d\n", + err); + fatal_sigsegv(); + } + err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); + if (err) { + printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " + "errno = %d\n", pid, errno); + fatal_sigsegv(); + } + wait_stub_done(pid); + + /* + * faultinfo is prepared by the stub-segv-handler at start of + * the stub stack page. We just have to copy it. + */ + memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); + + err = put_fp_registers(pid, fpregs); + if (err < 0) { + printk(UM_KERN_ERR "put_fp_registers returned %d\n", + err); + fatal_sigsegv(); + } + } +} + +static void handle_segv(int pid, struct uml_pt_regs * regs) +{ + get_skas_faultinfo(pid, ®s->faultinfo); + segv(regs->faultinfo, 0, 1, NULL); +} + +/* + * To use the same value of using_sysemu as the caller, ask it that value + * (in local_using_sysemu + */ +static void handle_trap(int pid, struct uml_pt_regs *regs, + int local_using_sysemu) +{ + int err, status; + + if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) + fatal_sigsegv(); + + /* Mark this as a syscall */ + UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); + + if (!local_using_sysemu) + { + err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, + __NR_getpid); + if (err < 0) { + printk(UM_KERN_ERR "handle_trap - nullifying syscall " + "failed, errno = %d\n", errno); + fatal_sigsegv(); + } + + err = ptrace(PTRACE_SYSCALL, pid, 0, 0); + if (err < 0) { + printk(UM_KERN_ERR "handle_trap - continuing to end of " + "syscall failed, errno = %d\n", errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if ((err < 0) || !WIFSTOPPED(status) || + (WSTOPSIG(status) != SIGTRAP + 0x80)) { + err = ptrace_dump_regs(pid); + if (err) + printk(UM_KERN_ERR "Failed to get registers " + "from process, errno = %d\n", -err); + printk(UM_KERN_ERR "handle_trap - failed to wait at " + "end of syscall, errno = %d, status = %d\n", + errno, status); + fatal_sigsegv(); + } + } + + handle_syscall(regs); +} + +extern int __syscall_stub_start; + +static int userspace_tramp(void *stack) +{ + void *addr; + int err; + + ptrace(PTRACE_TRACEME, 0, 0, 0); + + signal(SIGTERM, SIG_DFL); + signal(SIGWINCH, SIG_IGN); + err = set_interval(); + if (err) { + printk(UM_KERN_ERR "userspace_tramp - setting timer failed, " + "errno = %d\n", err); + exit(1); + } + + if (!proc_mm) { + /* + * This has a pte, but it can't be mapped in with the usual + * tlb_flush mechanism because this is part of that mechanism + */ + int fd; + unsigned long long offset; + fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); + addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, " + "errno = %d\n", STUB_CODE, errno); + exit(1); + } + + if (stack != NULL) { + fd = phys_mapping(to_phys(stack), &offset); + addr = mmap((void *) STUB_DATA, + UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, offset); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "mapping segfault stack " + "at 0x%lx failed, errno = %d\n", + STUB_DATA, errno); + exit(1); + } + } + } + if (!ptrace_faultinfo && (stack != NULL)) { + struct sigaction sa; + + unsigned long v = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) &__syscall_stub_start; + + set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE); + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK | SA_NODEFER; + sa.sa_handler = (void *) v; + sa.sa_restorer = NULL; + if (sigaction(SIGSEGV, &sa, NULL) < 0) { + printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV " + "handler failed - errno = %d\n", errno); + exit(1); + } + } + + kill(os_getpid(), SIGSTOP); + return 0; +} + +/* Each element set once, and only accessed by a single processor anyway */ +#undef NR_CPUS +#define NR_CPUS 1 +int userspace_pid[NR_CPUS]; + +int start_userspace(unsigned long stub_stack) +{ + void *stack; + unsigned long sp; + int pid, status, n, flags, err; + + stack = mmap(NULL, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (stack == MAP_FAILED) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : mmap failed, " + "errno = %d\n", errno); + return err; + } + + sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *); + + flags = CLONE_FILES; + if (proc_mm) + flags |= CLONE_VM; + else + flags |= SIGCHLD; + + pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : clone failed, " + "errno = %d\n", errno); + return err; + } + + do { + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : wait failed, " + "errno = %d\n", errno); + goto out_kill; + } + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); + + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { + err = -EINVAL; + printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got " + "status = %d\n", status); + goto out_kill; + } + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *) PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS " + "failed, errno = %d\n", errno); + goto out_kill; + } + + if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : munmap failed, " + "errno = %d\n", errno); + goto out_kill; + } + + return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; +} + +void userspace(struct uml_pt_regs *regs) +{ + struct itimerval timer; + unsigned long long nsecs, now; + int err, status, op, pid = userspace_pid[0]; + /* To prevent races if using_sysemu changes under us.*/ + int local_using_sysemu; + + if (getitimer(ITIMER_VIRTUAL, &timer)) + printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno); + nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + + timer.it_value.tv_usec * UM_NSEC_PER_USEC; + nsecs += os_nsecs(); + + while (1) { + /* + * This can legitimately fail if the process loads a + * bogus value into a segment register. It will + * segfault and PTRACE_GETREGS will read that value + * out of the process. However, PTRACE_SETREGS will + * fail. In this case, there is nothing to do but + * just kill the process. + */ + if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) + fatal_sigsegv(); + + /* Now we set local_using_sysemu to be used for one loop */ + local_using_sysemu = get_using_sysemu(); + + op = SELECT_PTRACE_OPERATION(local_using_sysemu, + singlestepping(NULL)); + + if (ptrace(op, pid, 0, 0)) { + printk(UM_KERN_ERR "userspace - ptrace continue " + "failed, op = %d, errno = %d\n", op, errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if (err < 0) { + printk(UM_KERN_ERR "userspace - wait failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + + regs->is_user = 1; + if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + + if (WIFSTOPPED(status)) { + int sig = WSTOPSIG(status); + switch (sig) { + case SIGSEGV: + if (PTRACE_FULL_FAULTINFO || + !ptrace_faultinfo) { + get_skas_faultinfo(pid, + ®s->faultinfo); + (*sig_info[SIGSEGV])(SIGSEGV, regs); + } + else handle_segv(pid, regs); + break; + case SIGTRAP + 0x80: + handle_trap(pid, regs, local_using_sysemu); + break; + case SIGTRAP: + relay_signal(SIGTRAP, regs); + break; + case SIGVTALRM: + now = os_nsecs(); + if (now < nsecs) + break; + block_signals(); + (*sig_info[sig])(sig, regs); + unblock_signals(); + nsecs = timer.it_value.tv_sec * + UM_NSEC_PER_SEC + + timer.it_value.tv_usec * + UM_NSEC_PER_USEC; + nsecs += os_nsecs(); + break; + case SIGIO: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + block_signals(); + (*sig_info[sig])(sig, regs); + unblock_signals(); + break; + default: + printk(UM_KERN_ERR "userspace - child stopped " + "with signal %d\n", sig); + fatal_sigsegv(); + } + pid = userspace_pid[0]; + interrupt_end(); + + /* Avoid -ERESTARTSYS handling in host */ + if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) + PT_SYSCALL_NR(regs->gp) = -1; + } + } +} + +static unsigned long thread_regs[MAX_REG_NR]; + +static int __init init_thread_regs(void) +{ + get_safe_registers(thread_regs); + /* Set parent's instruction pointer to start of clone-stub */ + thread_regs[REGS_IP_INDEX] = STUB_CODE + + (unsigned long) stub_clone_handler - + (unsigned long) &__syscall_stub_start; + thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE - + sizeof(void *); +#ifdef __SIGNAL_FRAMESIZE + thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE; +#endif + return 0; +} + +__initcall(init_thread_regs); + +int copy_context_skas0(unsigned long new_stack, int pid) +{ + struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ }; + int err; + unsigned long current_stack = current_stub_stack(); + struct stub_data *data = (struct stub_data *) current_stack; + struct stub_data *child_data = (struct stub_data *) new_stack; + unsigned long long new_offset; + int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + + /* + * prepare offset and fd of child's stack as argument for parent's + * and child's mmap2 calls + */ + *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), + .fd = new_fd, + .timer = ((struct itimerval) + { .it_value = tv, + .it_interval = tv }) }); + + err = ptrace_setregs(pid, thread_regs); + if (err < 0) { + err = -errno; + printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS " + "failed, pid = %d, errno = %d\n", pid, -err); + return err; + } + + /* set a well known return code for detection of child write failure */ + child_data->err = 12345678; + + /* + * Wait, until parent has finished its work: read child's pid from + * parent's stack, and check, if bad result. + */ + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) { + err = -errno; + printk(UM_KERN_ERR "Failed to continue new process, pid = %d, " + "errno = %d\n", pid, errno); + return err; + } + + wait_stub_done(pid); + + pid = data->err; + if (pid < 0) { + printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports " + "error %d\n", -pid); + return pid; + } + + /* + * Wait, until child has finished too: read child's result from + * child's stack and check it. + */ + wait_stub_done(pid); + if (child_data->err != STUB_DATA) { + printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports " + "error %ld\n", child_data->err); + err = child_data->err; + goto out_kill; + } + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *)PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS " + "failed, errno = %d\n", errno); + goto out_kill; + } + + return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; +} + +/* + * This is used only, if stub pages are needed, while proc_mm is + * available. Opening /proc/mm creates a new mm_context, which lacks + * the stub-pages. Thus, we map them using /proc/mm-fd + */ +int map_stub_pages(int fd, unsigned long code, unsigned long data, + unsigned long stack) +{ + struct proc_mm_op mmop; + int n; + unsigned long long code_offset; + int code_fd = phys_mapping(to_phys((void *) &__syscall_stub_start), + &code_offset); + + mmop = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = code, + .len = UM_KERN_PAGE_SIZE, + .prot = PROT_EXEC, + .flags = MAP_FIXED | MAP_PRIVATE, + .fd = code_fd, + .offset = code_offset + } } }); + CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); + if (n != sizeof(mmop)) { + n = errno; + printk(UM_KERN_ERR "mmap args - addr = 0x%lx, fd = %d, " + "offset = %llx\n", code, code_fd, + (unsigned long long) code_offset); + printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for code " + "failed, err = %d\n", n); + return -n; + } + + if (stack) { + unsigned long long map_offset; + int map_fd = phys_mapping(to_phys((void *)stack), &map_offset); + mmop = ((struct proc_mm_op) + { .op = MM_MMAP, + .u = + { .mmap = + { .addr = data, + .len = UM_KERN_PAGE_SIZE, + .prot = PROT_READ | PROT_WRITE, + .flags = MAP_FIXED | MAP_SHARED, + .fd = map_fd, + .offset = map_offset + } } }); + CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); + if (n != sizeof(mmop)) { + n = errno; + printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for " + "data failed, err = %d\n", n); + return -n; + } + } + + return 0; +} + +void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) +{ + (*buf)[0].JB_IP = (unsigned long) handler; + (*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE - + sizeof(void *); +} + +#define INIT_JMP_NEW_THREAD 0 +#define INIT_JMP_CALLBACK 1 +#define INIT_JMP_HALT 2 +#define INIT_JMP_REBOOT 3 + +void switch_threads(jmp_buf *me, jmp_buf *you) +{ + if (UML_SETJMP(me) == 0) + UML_LONGJMP(you, 1); +} + +static jmp_buf initial_jmpbuf; + +/* XXX Make these percpu */ +static void (*cb_proc)(void *arg); +static void *cb_arg; +static jmp_buf *cb_back; + +int start_idle_thread(void *stack, jmp_buf *switch_buf) +{ + int n; + + set_handler(SIGWINCH, (__sighandler_t) sig_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGVTALRM, -1); + + /* + * Can't use UML_SETJMP or UML_LONGJMP here because they save + * and restore signals, with the possible side-effect of + * trying to handle any signals which came when they were + * blocked, which can't be done on this stack. + * Signals must be blocked when jumping back here and restored + * after returning to the jumper. + */ + n = setjmp(initial_jmpbuf); + switch (n) { + case INIT_JMP_NEW_THREAD: + (*switch_buf)[0].JB_IP = (unsigned long) new_thread_handler; + (*switch_buf)[0].JB_SP = (unsigned long) stack + + UM_THREAD_SIZE - sizeof(void *); + break; + case INIT_JMP_CALLBACK: + (*cb_proc)(cb_arg); + longjmp(*cb_back, 1); + break; + case INIT_JMP_HALT: + kmalloc_ok = 0; + return 0; + case INIT_JMP_REBOOT: + kmalloc_ok = 0; + return 1; + default: + printk(UM_KERN_ERR "Bad sigsetjmp return in " + "start_idle_thread - %d\n", n); + fatal_sigsegv(); + } + longjmp(*switch_buf, 1); +} + +void initial_thread_cb_skas(void (*proc)(void *), void *arg) +{ + jmp_buf here; + + cb_proc = proc; + cb_arg = arg; + cb_back = &here; + + block_signals(); + if (UML_SETJMP(&here) == 0) + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); + unblock_signals(); + + cb_proc = NULL; + cb_arg = NULL; + cb_back = NULL; +} + +void halt_skas(void) +{ + block_signals(); + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); +} + +void reboot_skas(void) +{ + block_signals(); + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT); +} + +void __switch_mm(struct mm_id *mm_idp) +{ + int err; + + /* FIXME: need cpu pid in __switch_mm */ + if (proc_mm) { + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + mm_idp->u.mm_fd); + if (err) { + printk(UM_KERN_ERR "__switch_mm - PTRACE_SWITCH_MM " + "failed, errno = %d\n", errno); + fatal_sigsegv(); + } + } + else userspace_pid[0] = mm_idp->u.pid; +} diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c new file mode 100644 index 00000000..02ee9adf --- /dev/null +++ b/arch/um/os-Linux/start_up.c @@ -0,0 +1,542 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include "init.h" +#include "kern_constants.h" +#include "os.h" +#include "mem_user.h" +#include "ptrace_user.h" +#include "registers.h" +#include "skas.h" +#include "skas_ptrace.h" + +static void ptrace_child(void) +{ + int ret; + /* Calling os_getpid because some libcs cached getpid incorrectly */ + int pid = os_getpid(), ppid = getppid(); + int sc_result; + + if (change_sig(SIGWINCH, 0) < 0 || + ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) { + perror("ptrace"); + kill(pid, SIGKILL); + } + kill(pid, SIGSTOP); + + /* + * This syscall will be intercepted by the parent. Don't call more than + * once, please. + */ + sc_result = os_getpid(); + + if (sc_result == pid) + /* Nothing modified by the parent, we are running normally. */ + ret = 1; + else if (sc_result == ppid) + /* + * Expected in check_ptrace and check_sysemu when they succeed + * in modifying the stack frame + */ + ret = 0; + else + /* Serious trouble! This could be caused by a bug in host 2.6 + * SKAS3/2.6 patch before release -V6, together with a bug in + * the UML code itself. + */ + ret = 2; + + exit(ret); +} + +static void fatal_perror(const char *str) +{ + perror(str); + exit(1); +} + +static void fatal(char *fmt, ...) +{ + va_list list; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); + + exit(1); +} + +static void non_fatal(char *fmt, ...) +{ + va_list list; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); +} + +static int start_ptraced_child(void) +{ + int pid, n, status; + + pid = fork(); + if (pid == 0) + ptrace_child(); + else if (pid < 0) + fatal_perror("start_ptraced_child : fork failed"); + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_ptrace : waitpid failed"); + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) + fatal("check_ptrace : expected SIGSTOP, got status = %d", + status); + + return pid; +} + +/* When testing for SYSEMU support, if it is one of the broken versions, we + * must just avoid using sysemu, not panic, but only if SYSEMU features are + * broken. + * So only for SYSEMU features we test mustpanic, while normal host features + * must work anyway! + */ +static int stop_ptraced_child(int pid, int exitcode, int mustexit) +{ + int status, n, ret = 0; + + if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) { + perror("stop_ptraced_child : ptrace failed"); + return -1; + } + CATCH_EINTR(n = waitpid(pid, &status, 0)); + if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { + int exit_with = WEXITSTATUS(status); + if (exit_with == 2) + non_fatal("check_ptrace : child exited with status 2. " + "\nDisabling SYSEMU support.\n"); + non_fatal("check_ptrace : child exited with exitcode %d, while " + "expecting %d; status 0x%x\n", exit_with, + exitcode, status); + if (mustexit) + exit(1); + ret = -1; + } + + return ret; +} + +/* Changed only during early boot */ +int ptrace_faultinfo; +static int disable_ptrace_faultinfo; + +int ptrace_ldt; +static int disable_ptrace_ldt; + +int proc_mm; +static int disable_proc_mm; + +int have_switch_mm; +static int disable_switch_mm; + +int skas_needs_stub; + +static int __init skas0_cmd_param(char *str, int* add) +{ + disable_ptrace_faultinfo = 1; + disable_ptrace_ldt = 1; + disable_proc_mm = 1; + disable_switch_mm = 1; + + return 0; +} + +/* The two __uml_setup would conflict, without this stupid alias. */ + +static int __init mode_skas0_cmd_param(char *str, int* add) + __attribute__((alias("skas0_cmd_param"))); + +__uml_setup("skas0", skas0_cmd_param, +"skas0\n" +" Disables SKAS3 and SKAS4 usage, so that SKAS0 is used\n\n"); + +__uml_setup("mode=skas0", mode_skas0_cmd_param, +"mode=skas0\n" +" Disables SKAS3 and SKAS4 usage, so that SKAS0 is used.\n\n"); + +/* Changed only during early boot */ +static int force_sysemu_disabled = 0; + +static int __init nosysemu_cmd_param(char *str, int* add) +{ + force_sysemu_disabled = 1; + return 0; +} + +__uml_setup("nosysemu", nosysemu_cmd_param, +"nosysemu\n" +" Turns off syscall emulation patch for ptrace (SYSEMU) on.\n" +" SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n" +" behaviour of ptrace() and helps reducing host context switch rate.\n" +" To make it working, you need a kernel patch for your host, too.\n" +" See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n" +" information.\n\n"); + +static void __init check_sysemu(void) +{ + unsigned long regs[MAX_REG_NR]; + int pid, n, status, count=0; + + non_fatal("Checking syscall emulation patch for ptrace..."); + sysemu_supported = 0; + pid = start_ptraced_child(); + + if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) + goto fail; + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_sysemu : wait failed"); + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) + fatal("check_sysemu : expected SIGTRAP, got status = %d\n", + status); + + if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) + fatal_perror("check_sysemu : PTRACE_GETREGS failed"); + if (PT_SYSCALL_NR(regs) != __NR_getpid) { + non_fatal("check_sysemu got system call number %d, " + "expected %d...", PT_SYSCALL_NR(regs), __NR_getpid); + goto fail; + } + + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, os_getpid()); + if (n < 0) { + non_fatal("check_sysemu : failed to modify system call " + "return"); + goto fail; + } + + if (stop_ptraced_child(pid, 0, 0) < 0) + goto fail_stopped; + + sysemu_supported = 1; + non_fatal("OK\n"); + set_using_sysemu(!force_sysemu_disabled); + + non_fatal("Checking advanced syscall emulation patch for ptrace..."); + pid = start_ptraced_child(); + + if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0, + (void *) PTRACE_O_TRACESYSGOOD) < 0)) + fatal_perror("check_sysemu: PTRACE_OLDSETOPTIONS failed"); + + while (1) { + count++; + if (ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) + goto fail; + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_sysemu: wait failed"); + + if (WIFSTOPPED(status) && + (WSTOPSIG(status) == (SIGTRAP|0x80))) { + if (!count) { + non_fatal("check_sysemu: SYSEMU_SINGLESTEP " + "doesn't singlestep"); + goto fail; + } + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, + os_getpid()); + if (n < 0) + fatal_perror("check_sysemu : failed to modify " + "system call return"); + break; + } + else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) + count++; + else { + non_fatal("check_sysemu: expected SIGTRAP or " + "(SIGTRAP | 0x80), got status = %d\n", + status); + goto fail; + } + } + if (stop_ptraced_child(pid, 0, 0) < 0) + goto fail_stopped; + + sysemu_supported = 2; + non_fatal("OK\n"); + + if (!force_sysemu_disabled) + set_using_sysemu(sysemu_supported); + return; + +fail: + stop_ptraced_child(pid, 1, 0); +fail_stopped: + non_fatal("missing\n"); +} + +static void __init check_ptrace(void) +{ + int pid, syscall, n, status; + + non_fatal("Checking that ptrace can change system call numbers..."); + pid = start_ptraced_child(); + + if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0, + (void *) PTRACE_O_TRACESYSGOOD) < 0)) + fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed"); + + while (1) { + if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + fatal_perror("check_ptrace : ptrace failed"); + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_ptrace : wait failed"); + + if (!WIFSTOPPED(status) || + (WSTOPSIG(status) != (SIGTRAP | 0x80))) + fatal("check_ptrace : expected (SIGTRAP|0x80), " + "got status = %d", status); + + syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, + 0); + if (syscall == __NR_getpid) { + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, + __NR_getppid); + if (n < 0) + fatal_perror("check_ptrace : failed to modify " + "system call"); + break; + } + } + stop_ptraced_child(pid, 0, 1); + non_fatal("OK\n"); + check_sysemu(); +} + +extern void check_tmpexec(void); + +static void __init check_coredump_limit(void) +{ + struct rlimit lim; + int err = getrlimit(RLIMIT_CORE, &lim); + + if (err) { + perror("Getting core dump limit"); + return; + } + + printf("Core dump limits :\n\tsoft - "); + if (lim.rlim_cur == RLIM_INFINITY) + printf("NONE\n"); + else printf("%lu\n", lim.rlim_cur); + + printf("\thard - "); + if (lim.rlim_max == RLIM_INFINITY) + printf("NONE\n"); + else printf("%lu\n", lim.rlim_max); +} + +void __init os_early_checks(void) +{ + int pid; + + /* Print out the core dump limits early */ + check_coredump_limit(); + + check_ptrace(); + + /* Need to check this early because mmapping happens before the + * kernel is running. + */ + check_tmpexec(); + + pid = start_ptraced_child(); + if (init_registers(pid)) + fatal("Failed to initialize default registers"); + stop_ptraced_child(pid, 1, 1); +} + +static int __init noprocmm_cmd_param(char *str, int* add) +{ + disable_proc_mm = 1; + return 0; +} + +__uml_setup("noprocmm", noprocmm_cmd_param, +"noprocmm\n" +" Turns off usage of /proc/mm, even if host supports it.\n" +" To support /proc/mm, the host needs to be patched using\n" +" the current skas3 patch.\n\n"); + +static int __init noptracefaultinfo_cmd_param(char *str, int* add) +{ + disable_ptrace_faultinfo = 1; + return 0; +} + +__uml_setup("noptracefaultinfo", noptracefaultinfo_cmd_param, +"noptracefaultinfo\n" +" Turns off usage of PTRACE_FAULTINFO, even if host supports\n" +" it. To support PTRACE_FAULTINFO, the host needs to be patched\n" +" using the current skas3 patch.\n\n"); + +static int __init noptraceldt_cmd_param(char *str, int* add) +{ + disable_ptrace_ldt = 1; + return 0; +} + +__uml_setup("noptraceldt", noptraceldt_cmd_param, +"noptraceldt\n" +" Turns off usage of PTRACE_LDT, even if host supports it.\n" +" To support PTRACE_LDT, the host needs to be patched using\n" +" the current skas3 patch.\n\n"); + +static inline void check_skas3_ptrace_faultinfo(void) +{ + struct ptrace_faultinfo fi; + int pid, n; + + non_fatal(" - PTRACE_FAULTINFO..."); + pid = start_ptraced_child(); + + n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); + if (n < 0) { + if (errno == EIO) + non_fatal("not found\n"); + else + perror("not found"); + } else if (disable_ptrace_faultinfo) + non_fatal("found but disabled on command line\n"); + else { + ptrace_faultinfo = 1; + non_fatal("found\n"); + } + + stop_ptraced_child(pid, 1, 1); +} + +static inline void check_skas3_ptrace_ldt(void) +{ +#ifdef PTRACE_LDT + int pid, n; + unsigned char ldtbuf[40]; + struct ptrace_ldt ldt_op = (struct ptrace_ldt) { + .func = 2, /* read default ldt */ + .ptr = ldtbuf, + .bytecount = sizeof(ldtbuf)}; + + non_fatal(" - PTRACE_LDT..."); + pid = start_ptraced_child(); + + n = ptrace(PTRACE_LDT, pid, 0, (unsigned long) &ldt_op); + if (n < 0) { + if (errno == EIO) + non_fatal("not found\n"); + else + perror("not found"); + } else if (disable_ptrace_ldt) + non_fatal("found, but use is disabled\n"); + else { + ptrace_ldt = 1; + non_fatal("found\n"); + } + + stop_ptraced_child(pid, 1, 1); +#endif +} + +static inline void check_skas3_proc_mm(void) +{ + non_fatal(" - /proc/mm..."); + if (access("/proc/mm", W_OK) < 0) + perror("not found"); + else if (disable_proc_mm) + non_fatal("found but disabled on command line\n"); + else { + proc_mm = 1; + non_fatal("found\n"); + } +} + +void can_do_skas(void) +{ + non_fatal("Checking for the skas3 patch in the host:\n"); + + check_skas3_proc_mm(); + check_skas3_ptrace_faultinfo(); + check_skas3_ptrace_ldt(); + + if (!proc_mm || !ptrace_faultinfo || !ptrace_ldt) + skas_needs_stub = 1; +} + +int __init parse_iomem(char *str, int *add) +{ + struct iomem_region *new; + struct stat64 buf; + char *file, *driver; + int fd, size; + + driver = str; + file = strchr(str,','); + if (file == NULL) { + fprintf(stderr, "parse_iomem : failed to parse iomem\n"); + goto out; + } + *file = '\0'; + file++; + fd = open(file, O_RDWR, 0); + if (fd < 0) { + perror("parse_iomem - Couldn't open io file"); + goto out; + } + + if (fstat64(fd, &buf) < 0) { + perror("parse_iomem - cannot stat_fd file"); + goto out_close; + } + + new = malloc(sizeof(*new)); + if (new == NULL) { + perror("Couldn't allocate iomem_region struct"); + goto out_close; + } + + size = (buf.st_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1); + + *new = ((struct iomem_region) { .next = iomem_regions, + .driver = driver, + .fd = fd, + .size = size, + .phys = 0, + .virt = 0 }); + iomem_regions = new; + iomem_size += new->size + UM_KERN_PAGE_SIZE; + + return 0; + out_close: + close(fd); + out: + return 1; +} diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile new file mode 100644 index 00000000..b4bc6ac4 --- /dev/null +++ b/arch/um/os-Linux/sys-i386/Makefile @@ -0,0 +1,10 @@ +# +# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +obj-y = registers.o signal.o task_size.o tls.o + +USER_OBJS := $(obj-y) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c new file mode 100644 index 00000000..229f7a53 --- /dev/null +++ b/arch/um/os-Linux/sys-i386/registers.c @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <sys/ptrace.h> +#include <sys/user.h> +#include "kern_constants.h" +#include "longjmp.h" +#include "user.h" +#include "sysdep/ptrace_user.h" + +int save_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int restore_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int save_fpx_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int restore_fpx_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +unsigned long get_thread_reg(int reg, jmp_buf *buf) +{ + switch (reg) { + case EIP: + return buf[0]->__eip; + case UESP: + return buf[0]->__esp; + case EBP: + return buf[0]->__ebp; + default: + printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n", + reg); + return 0; + } +} + +int have_fpx_regs = 1; + +int get_fp_registers(int pid, unsigned long *regs) +{ + if (have_fpx_regs) + return save_fpx_registers(pid, regs); + else + return save_fp_registers(pid, regs); +} + +int put_fp_registers(int pid, unsigned long *regs) +{ + if (have_fpx_regs) + return restore_fpx_registers(pid, regs); + else + return restore_fp_registers(pid, regs); +} + +void arch_init_registers(int pid) +{ + struct user_fpxregs_struct fpx_regs; + int err; + + err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs); + if (!err) + return; + + if (errno != EIO) + panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", + errno); + + have_fpx_regs = 0; +} diff --git a/arch/um/os-Linux/sys-i386/signal.c b/arch/um/os-Linux/sys-i386/signal.c new file mode 100644 index 00000000..f311609f --- /dev/null +++ b/arch/um/os-Linux/sys-i386/signal.c @@ -0,0 +1,13 @@ +/* + * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <signal.h> + +extern void handle_signal(int sig, struct sigcontext *sc); + +void hard_handler(int sig) +{ + handle_signal(sig, (struct sigcontext *) (&sig + 1)); +} diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c new file mode 100644 index 00000000..be04c1e1 --- /dev/null +++ b/arch/um/os-Linux/sys-i386/task_size.c @@ -0,0 +1,139 @@ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/mman.h> +#include "longjmp.h" +#include "kern_constants.h" + +static jmp_buf buf; + +static void segfault(int sig) +{ + longjmp(buf, 1); +} + +static int page_ok(unsigned long page) +{ + unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT); + unsigned long n = ~0UL; + void *mapped = NULL; + int ok = 0; + + /* + * First see if the page is readable. If it is, it may still + * be a VDSO, so we go on to see if it's writable. If not + * then try mapping memory there. If that fails, then we're + * still in the kernel area. As a sanity check, we'll fail if + * the mmap succeeds, but gives us an address different from + * what we wanted. + */ + if (setjmp(buf) == 0) + n = *address; + else { + mapped = mmap(address, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mapped == MAP_FAILED) + return 0; + if (mapped != address) + goto out; + } + + /* + * Now, is it writeable? If so, then we're in user address + * space. If not, then try mprotecting it and try the write + * again. + */ + if (setjmp(buf) == 0) { + *address = n; + ok = 1; + goto out; + } else if (mprotect(address, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE) != 0) + goto out; + + if (setjmp(buf) == 0) { + *address = n; + ok = 1; + } + + out: + if (mapped != NULL) + munmap(mapped, UM_KERN_PAGE_SIZE); + return ok; +} + +unsigned long os_get_top_address(void) +{ + struct sigaction sa, old; + unsigned long bottom = 0; + /* + * A 32-bit UML on a 64-bit host gets confused about the VDSO at + * 0xffffe000. It is mapped, is readable, can be reprotected writeable + * and written. However, exec discovers later that it can't be + * unmapped. So, just set the highest address to be checked to just + * below it. This might waste some address space on 4G/4G 32-bit + * hosts, but shouldn't hurt otherwise. + */ + unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT; + unsigned long test, original; + + printf("Locating the bottom of the address space ... "); + fflush(stdout); + + /* + * We're going to be longjmping out of the signal handler, so + * SA_DEFER needs to be set. + */ + sa.sa_handler = segfault; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_NODEFER; + if (sigaction(SIGSEGV, &sa, &old)) { + perror("os_get_top_address"); + exit(1); + } + + /* Manually scan the address space, bottom-up, until we find + * the first valid page (or run out of them). + */ + for (bottom = 0; bottom < top; bottom++) { + if (page_ok(bottom)) + break; + } + + /* If we've got this far, we ran out of pages. */ + if (bottom == top) { + fprintf(stderr, "Unable to determine bottom of address " + "space.\n"); + exit(1); + } + + printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT); + printf("Locating the top of the address space ... "); + fflush(stdout); + + original = bottom; + + /* This could happen with a 4G/4G split */ + if (page_ok(top)) + goto out; + + do { + test = bottom + (top - bottom) / 2; + if (page_ok(test)) + bottom = test; + else + top = test; + } while (top - bottom > 1); + +out: + /* Restore the old SIGSEGV handling */ + if (sigaction(SIGSEGV, &old, NULL)) { + perror("os_get_top_address"); + exit(1); + } + top <<= UM_KERN_PAGE_SHIFT; + printf("0x%x\n", top); + + return top; +} diff --git a/arch/um/os-Linux/sys-i386/tls.c b/arch/um/os-Linux/sys-i386/tls.c new file mode 100644 index 00000000..32ed41ec --- /dev/null +++ b/arch/um/os-Linux/sys-i386/tls.c @@ -0,0 +1,36 @@ +#include <errno.h> +#include <linux/unistd.h> + +#include <sys/syscall.h> +#include <unistd.h> + +#include "sysdep/tls.h" +#include "user.h" + +/* Checks whether host supports TLS, and sets *tls_min according to the value + * valid on the host. + * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */ +void check_host_supports_tls(int *supports_tls, int *tls_min) { + /* Values for x86 and x86_64.*/ + int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64}; + int i; + + for (i = 0; i < ARRAY_SIZE(val); i++) { + user_desc_t info; + info.entry_number = val[i]; + + if (syscall(__NR_get_thread_area, &info) == 0) { + *tls_min = val[i]; + *supports_tls = 1; + return; + } else { + if (errno == EINVAL) + continue; + else if (errno == ENOSYS) + *supports_tls = 0; + return; + } + } + + *supports_tls = 0; +} diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile new file mode 100644 index 00000000..a44a47f8 --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/Makefile @@ -0,0 +1,10 @@ +# +# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +obj-y = registers.o prctl.o signal.o task_size.o + +USER_OBJS := $(obj-y) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/sys-x86_64/prctl.c b/arch/um/os-Linux/sys-x86_64/prctl.c new file mode 100644 index 00000000..9d34eddb --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/prctl.c @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit.com,linux.intel.com}) + * Licensed under the GPL + */ + +#include <sys/ptrace.h> +#include <linux/ptrace.h> + +int os_arch_prctl(int pid, int code, unsigned long *addr) +{ + return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code); +} diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c new file mode 100644 index 00000000..594d97ad --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/registers.c @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2006 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <sys/ptrace.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> +#include "kern_constants.h" +#include "longjmp.h" +#include "user.h" + +int save_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int restore_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +unsigned long get_thread_reg(int reg, jmp_buf *buf) +{ + switch (reg) { + case RIP: + return buf[0]->__rip; + case RSP: + return buf[0]->__rsp; + case RBP: + return buf[0]->__rbp; + default: + printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n", + reg); + return 0; + } +} + +int get_fp_registers(int pid, unsigned long *regs) +{ + return save_fp_registers(pid, regs); +} + +int put_fp_registers(int pid, unsigned long *regs) +{ + return restore_fp_registers(pid, regs); +} diff --git a/arch/um/os-Linux/sys-x86_64/signal.c b/arch/um/os-Linux/sys-x86_64/signal.c new file mode 100644 index 00000000..82a38882 --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/signal.c @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <signal.h> + +extern void handle_signal(int sig, struct sigcontext *sc); + +void hard_handler(int sig) +{ + struct ucontext *uc; + asm("movq %%rdx, %0" : "=r" (uc)); + + handle_signal(sig, (struct sigcontext *) &uc->uc_mcontext); +} diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c new file mode 100644 index 00000000..26a0dd1f --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/task_size.c @@ -0,0 +1,5 @@ +unsigned long os_get_top_address(unsigned long shift) +{ + /* The old value of CONFIG_TOP_ADDR */ + return 0x7fc0000000; +} diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c new file mode 100644 index 00000000..6e3359d6 --- /dev/null +++ b/arch/um/os-Linux/time.c @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <errno.h> +#include <signal.h> +#include <time.h> +#include <sys/time.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "process.h" +#include "user.h" + +int set_interval(void) +{ + int usec = UM_USEC_PER_SEC / UM_HZ; + struct itimerval interval = ((struct itimerval) { { 0, usec }, + { 0, usec } }); + + if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) + return -errno; + + return 0; +} + +int timer_one_shot(int ticks) +{ + unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ; + unsigned long sec = usec / UM_USEC_PER_SEC; + struct itimerval interval; + + usec %= UM_USEC_PER_SEC; + interval = ((struct itimerval) { { 0, 0 }, { sec, usec } }); + + if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) + return -errno; + + return 0; +} + +/** + * timeval_to_ns - Convert timeval to nanoseconds + * @ts: pointer to the timeval variable to be converted + * + * Returns the scalar nanosecond representation of the timeval + * parameter. + * + * Ripped from linux/time.h because it's a kernel header, and thus + * unusable from here. + */ +static inline long long timeval_to_ns(const struct timeval *tv) +{ + return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + + tv->tv_usec * UM_NSEC_PER_USEC; +} + +long long disable_timer(void) +{ + struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } }); + long long remain, max = UM_NSEC_PER_SEC / UM_HZ; + + if (setitimer(ITIMER_VIRTUAL, &time, &time) < 0) + printk(UM_KERN_ERR "disable_timer - setitimer failed, " + "errno = %d\n", errno); + + remain = timeval_to_ns(&time.it_value); + if (remain > max) + remain = max; + + return remain; +} + +long long os_nsecs(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return timeval_to_ns(&tv); +} + +#ifdef UML_CONFIG_NO_HZ +static int after_sleep_interval(struct timespec *ts) +{ + return 0; +} + +static void deliver_alarm(void) +{ + alarm_handler(SIGVTALRM, NULL); +} + +static unsigned long long sleep_time(unsigned long long nsecs) +{ + return nsecs; +} + +#else +unsigned long long last_tick; +unsigned long long skew; + +static void deliver_alarm(void) +{ + unsigned long long this_tick = os_nsecs(); + int one_tick = UM_NSEC_PER_SEC / UM_HZ; + + /* Protection against the host's time going backwards */ + if ((last_tick != 0) && (this_tick < last_tick)) + this_tick = last_tick; + + if (last_tick == 0) + last_tick = this_tick - one_tick; + + skew += this_tick - last_tick; + + while (skew >= one_tick) { + alarm_handler(SIGVTALRM, NULL); + skew -= one_tick; + } + + last_tick = this_tick; +} + +static unsigned long long sleep_time(unsigned long long nsecs) +{ + return nsecs > skew ? nsecs - skew : 0; +} + +static inline long long timespec_to_us(const struct timespec *ts) +{ + return ((long long) ts->tv_sec * UM_USEC_PER_SEC) + + ts->tv_nsec / UM_NSEC_PER_USEC; +} + +static int after_sleep_interval(struct timespec *ts) +{ + int usec = UM_USEC_PER_SEC / UM_HZ; + long long start_usecs = timespec_to_us(ts); + struct timeval tv; + struct itimerval interval; + + /* + * It seems that rounding can increase the value returned from + * setitimer to larger than the one passed in. Over time, + * this will cause the remaining time to be greater than the + * tick interval. If this happens, then just reduce the first + * tick to the interval value. + */ + if (start_usecs > usec) + start_usecs = usec; + + start_usecs -= skew / UM_NSEC_PER_USEC; + if (start_usecs < 0) + start_usecs = 0; + + tv = ((struct timeval) { .tv_sec = start_usecs / UM_USEC_PER_SEC, + .tv_usec = start_usecs % UM_USEC_PER_SEC }); + interval = ((struct itimerval) { { 0, usec }, tv }); + + if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) + return -errno; + + return 0; +} +#endif + +void idle_sleep(unsigned long long nsecs) +{ + struct timespec ts; + + /* + * nsecs can come in as zero, in which case, this starts a + * busy loop. To prevent this, reset nsecs to the tick + * interval if it is zero. + */ + if (nsecs == 0) + nsecs = UM_NSEC_PER_SEC / UM_HZ; + + nsecs = sleep_time(nsecs); + ts = ((struct timespec) { .tv_sec = nsecs / UM_NSEC_PER_SEC, + .tv_nsec = nsecs % UM_NSEC_PER_SEC }); + + if (nanosleep(&ts, &ts) == 0) + deliver_alarm(); + after_sleep_interval(&ts); +} diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c new file mode 100644 index 00000000..73277801 --- /dev/null +++ b/arch/um/os-Linux/tls.c @@ -0,0 +1,35 @@ +#include <errno.h> +#include <sys/ptrace.h> +#include "sysdep/tls.h" + +/* TLS support - we basically rely on the host's one.*/ + +#ifndef PTRACE_GET_THREAD_AREA +#define PTRACE_GET_THREAD_AREA 25 +#endif + +#ifndef PTRACE_SET_THREAD_AREA +#define PTRACE_SET_THREAD_AREA 26 +#endif + +int os_set_thread_area(user_desc_t *info, int pid) +{ + int ret; + + ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number, + (unsigned long) info); + if (ret < 0) + ret = -errno; + return ret; +} + +int os_get_thread_area(user_desc_t *info, int pid) +{ + int ret; + + ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number, + (unsigned long) info); + if (ret < 0) + ret = -errno; + return ret; +} diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c new file mode 100644 index 00000000..b09ff66a --- /dev/null +++ b/arch/um/os-Linux/tty.c @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "user.h" + +struct grantpt_info { + int fd; + int res; + int err; +}; + +static void grantpt_cb(void *arg) +{ + struct grantpt_info *info = arg; + + info->res = grantpt(info->fd); + info->err = errno; +} + +int get_pty(void) +{ + struct grantpt_info info; + int fd, err; + + fd = open("/dev/ptmx", O_RDWR); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't open /dev/ptmx - " + "err = %d\n", errno); + return err; + } + + info.fd = fd; + initial_thread_cb(grantpt_cb, &info); + + if (info.res < 0) { + err = -info.err; + printk(UM_KERN_ERR "get_pty : Couldn't grant pty - " + "errno = %d\n", -info.err); + goto out; + } + + if (unlockpt(fd) < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't unlock pty - " + "errno = %d\n", errno); + goto out; + } + return fd; +out: + close(fd); + return err; +} diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c new file mode 100644 index 00000000..087ed74f --- /dev/null +++ b/arch/um/os-Linux/uaccess.c @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include "longjmp.h" + +unsigned long __do_user_copy(void *to, const void *from, int n, + void **fault_addr, jmp_buf **fault_catcher, + void (*op)(void *to, const void *from, + int n), int *faulted_out) +{ + unsigned long *faddrp = (unsigned long *) fault_addr, ret; + + jmp_buf jbuf; + *fault_catcher = &jbuf; + if (UML_SETJMP(&jbuf) == 0) { + (*op)(to, from, n); + ret = 0; + *faulted_out = 0; + } + else { + ret = *faddrp; + *faulted_out = 1; + } + *fault_addr = NULL; + *fault_catcher = NULL; + return ret; +} + diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c new file mode 100644 index 00000000..a27defb8 --- /dev/null +++ b/arch/um/os-Linux/umid.c @@ -0,0 +1,396 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> +#include <sys/stat.h> +#include "init.h" +#include "kern_constants.h" +#include "os.h" +#include "user.h" + +#define UML_DIR "~/.uml/" + +#define UMID_LEN 64 + +/* Changed by set_umid, which is run early in boot */ +static char umid[UMID_LEN] = { 0 }; + +/* Changed by set_uml_dir and make_uml_dir, which are run early in boot */ +static char *uml_dir = UML_DIR; + +static int __init make_uml_dir(void) +{ + char dir[512] = { '\0' }; + int len, err; + + if (*uml_dir == '~') { + char *home = getenv("HOME"); + + err = -ENOENT; + if (home == NULL) { + printk(UM_KERN_ERR "make_uml_dir : no value in " + "environment for $HOME\n"); + goto err; + } + strlcpy(dir, home, sizeof(dir)); + uml_dir++; + } + strlcat(dir, uml_dir, sizeof(dir)); + len = strlen(dir); + if (len > 0 && dir[len - 1] != '/') + strlcat(dir, "/", sizeof(dir)); + + err = -ENOMEM; + uml_dir = malloc(strlen(dir) + 1); + if (uml_dir == NULL) { + printf("make_uml_dir : malloc failed, errno = %d\n", errno); + goto err; + } + strcpy(uml_dir, dir); + + if ((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)) { + printf("Failed to mkdir '%s': %s\n", uml_dir, strerror(errno)); + err = -errno; + goto err_free; + } + return 0; + +err_free: + free(uml_dir); +err: + uml_dir = NULL; + return err; +} + +/* + * Unlinks the files contained in @dir and then removes @dir. + * Doesn't handle directory trees, so it's not like rm -rf, but almost such. We + * ignore ENOENT errors for anything (they happen, strangely enough - possibly + * due to races between multiple dying UML threads). + */ +static int remove_files_and_dir(char *dir) +{ + DIR *directory; + struct dirent *ent; + int len; + char file[256]; + int ret; + + directory = opendir(dir); + if (directory == NULL) { + if (errno != ENOENT) + return -errno; + else + return 0; + } + + while ((ent = readdir(directory)) != NULL) { + if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + continue; + len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1; + if (len > sizeof(file)) { + ret = -E2BIG; + goto out; + } + + sprintf(file, "%s/%s", dir, ent->d_name); + if (unlink(file) < 0 && errno != ENOENT) { + ret = -errno; + goto out; + } + } + + if (rmdir(dir) < 0 && errno != ENOENT) { + ret = -errno; + goto out; + } + + ret = 0; +out: + closedir(directory); + return ret; +} + +/* + * This says that there isn't already a user of the specified directory even if + * there are errors during the checking. This is because if these errors + * happen, the directory is unusable by the pre-existing UML, so we might as + * well take it over. This could happen either by + * the existing UML somehow corrupting its umid directory + * something other than UML sticking stuff in the directory + * this boot racing with a shutdown of the other UML + * In any of these cases, the directory isn't useful for anything else. + * + * Boolean return: 1 if in use, 0 otherwise. + */ +static inline int is_umdir_used(char *dir) +{ + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")], *end; + int dead, fd, p, n, err; + + n = snprintf(file, sizeof(file), "%s/pid", dir); + if (n >= sizeof(file)) { + printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n"); + err = -E2BIG; + goto out; + } + + dead = 0; + fd = open(file, O_RDONLY); + if (fd < 0) { + fd = -errno; + if (fd != -ENOENT) { + printk(UM_KERN_ERR "is_umdir_used : couldn't open pid " + "file '%s', err = %d\n", file, -fd); + } + goto out; + } + + err = 0; + n = read(fd, pid, sizeof(pid)); + if (n < 0) { + printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file " + "'%s', err = %d\n", file, errno); + goto out_close; + } else if (n == 0) { + printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file " + "'%s', 0-byte read\n", file); + goto out_close; + } + + p = strtoul(pid, &end, 0); + if (end == pid) { + printk(UM_KERN_ERR "is_umdir_used : couldn't parse pid file " + "'%s', errno = %d\n", file, errno); + goto out_close; + } + + if ((kill(p, 0) == 0) || (errno != ESRCH)) { + printk(UM_KERN_ERR "umid \"%s\" is already in use by pid %d\n", + umid, p); + return 1; + } + +out_close: + close(fd); +out: + return 0; +} + +/* + * Try to remove the directory @dir unless it's in use. + * Precondition: @dir exists. + * Returns 0 for success, < 0 for failure in removal or if the directory is in + * use. + */ +static int umdir_take_if_dead(char *dir) +{ + int ret; + if (is_umdir_used(dir)) + return -EEXIST; + + ret = remove_files_and_dir(dir); + if (ret) { + printk(UM_KERN_ERR "is_umdir_used - remove_files_and_dir " + "failed with err = %d\n", ret); + } + return ret; +} + +static void __init create_pid_file(void) +{ + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")]; + int fd, n; + + if (umid_file_name("pid", file, sizeof(file))) + return; + + fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd < 0) { + printk(UM_KERN_ERR "Open of machine pid file \"%s\" failed: " + "%s\n", file, strerror(errno)); + return; + } + + snprintf(pid, sizeof(pid), "%d\n", getpid()); + n = write(fd, pid, strlen(pid)); + if (n != strlen(pid)) + printk(UM_KERN_ERR "Write of pid file failed - err = %d\n", + errno); + + close(fd); +} + +int __init set_umid(char *name) +{ + if (strlen(name) > UMID_LEN - 1) + return -E2BIG; + + strlcpy(umid, name, sizeof(umid)); + + return 0; +} + +/* Changed in make_umid, which is called during early boot */ +static int umid_setup = 0; + +static int __init make_umid(void) +{ + int fd, err; + char tmp[256]; + + if (umid_setup) + return 0; + + make_uml_dir(); + + if (*umid == '\0') { + strlcpy(tmp, uml_dir, sizeof(tmp)); + strlcat(tmp, "XXXXXX", sizeof(tmp)); + fd = mkstemp(tmp); + if (fd < 0) { + printk(UM_KERN_ERR "make_umid - mkstemp(%s) failed: " + "%s\n", tmp, strerror(errno)); + err = -errno; + goto err; + } + + close(fd); + + set_umid(&tmp[strlen(uml_dir)]); + + /* + * There's a nice tiny little race between this unlink and + * the mkdir below. It'd be nice if there were a mkstemp + * for directories. + */ + if (unlink(tmp)) { + err = -errno; + goto err; + } + } + + snprintf(tmp, sizeof(tmp), "%s%s", uml_dir, umid); + err = mkdir(tmp, 0777); + if (err < 0) { + err = -errno; + if (err != -EEXIST) + goto err; + + if (umdir_take_if_dead(tmp) < 0) + goto err; + + err = mkdir(tmp, 0777); + } + if (err) { + err = -errno; + printk(UM_KERN_ERR "Failed to create '%s' - err = %d\n", umid, + errno); + goto err; + } + + umid_setup = 1; + + create_pid_file(); + + err = 0; + err: + return err; +} + +static int __init make_umid_init(void) +{ + if (!make_umid()) + return 0; + + /* + * If initializing with the given umid failed, then try again with + * a random one. + */ + printk(UM_KERN_ERR "Failed to initialize umid \"%s\", trying with a " + "random umid\n", umid); + *umid = '\0'; + make_umid(); + + return 0; +} + +__initcall(make_umid_init); + +int __init umid_file_name(char *name, char *buf, int len) +{ + int n, err; + + err = make_umid(); + if (err) + return err; + + n = snprintf(buf, len, "%s%s/%s", uml_dir, umid, name); + if (n >= len) { + printk(UM_KERN_ERR "umid_file_name : buffer too short\n"); + return -E2BIG; + } + + return 0; +} + +char *get_umid(void) +{ + return umid; +} + +static int __init set_uml_dir(char *name, int *add) +{ + if (*name == '\0') { + printf("uml_dir can't be an empty string\n"); + return 0; + } + + if (name[strlen(name) - 1] == '/') { + uml_dir = name; + return 0; + } + + uml_dir = malloc(strlen(name) + 2); + if (uml_dir == NULL) { + printf("Failed to malloc uml_dir - error = %d\n", errno); + + /* + * Return 0 here because do_initcalls doesn't look at + * the return value. + */ + return 0; + } + sprintf(uml_dir, "%s/", name); + + return 0; +} + +__uml_setup("uml_dir=", set_uml_dir, +"uml_dir=<directory>\n" +" The location to place the pid and umid files.\n\n" +); + +static void remove_umid_dir(void) +{ + char dir[strlen(uml_dir) + UMID_LEN + 1], err; + + sprintf(dir, "%s%s", uml_dir, umid); + err = remove_files_and_dir(dir); + if (err) + printf("remove_umid_dir - remove_files_and_dir failed with " + "err = %d\n", err); +} + +__uml_exitcall(remove_umid_dir); diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c new file mode 100644 index 00000000..05f5ea8e --- /dev/null +++ b/arch/um/os-Linux/user_syms.c @@ -0,0 +1,115 @@ +#include "linux/types.h" +#include "linux/module.h" + +/* Some of this are builtin function (some are not but could in the future), + * so I *must* declare good prototypes for them and then EXPORT them. + * The kernel code uses the macro defined by include/linux/string.h, + * so I undef macros; the userspace code does not include that and I + * add an EXPORT for the glibc one. + */ + +#undef strlen +#undef strstr +#undef memcpy +#undef memset + +extern size_t strlen(const char *); +extern void *memmove(void *, const void *, size_t); +extern void *memset(void *, int, size_t); +extern int printf(const char *, ...); + +/* If it's not defined, the export is included in lib/string.c.*/ +#ifdef __HAVE_ARCH_STRSTR +EXPORT_SYMBOL(strstr); +#endif + +#ifndef __x86_64__ +extern void *memcpy(void *, const void *, size_t); +EXPORT_SYMBOL(memcpy); +#endif + +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(printf); + +/* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms. + * However, the modules will use the CRC defined *here*, no matter if it is + * good; so the versions of these symbols will always match + */ +#define EXPORT_SYMBOL_PROTO(sym) \ + int sym(void); \ + EXPORT_SYMBOL(sym); + +extern void readdir64(void) __attribute__((weak)); +EXPORT_SYMBOL(readdir64); +extern void truncate64(void) __attribute__((weak)); +EXPORT_SYMBOL(truncate64); + +#ifdef SUBARCH_i386 +EXPORT_SYMBOL(vsyscall_ehdr); +EXPORT_SYMBOL(vsyscall_end); +#endif + +EXPORT_SYMBOL_PROTO(__errno_location); + +EXPORT_SYMBOL_PROTO(access); +EXPORT_SYMBOL_PROTO(open); +EXPORT_SYMBOL_PROTO(open64); +EXPORT_SYMBOL_PROTO(close); +EXPORT_SYMBOL_PROTO(read); +EXPORT_SYMBOL_PROTO(write); +EXPORT_SYMBOL_PROTO(dup2); +EXPORT_SYMBOL_PROTO(__xstat); +EXPORT_SYMBOL_PROTO(__lxstat); +EXPORT_SYMBOL_PROTO(__lxstat64); +EXPORT_SYMBOL_PROTO(__fxstat64); +EXPORT_SYMBOL_PROTO(lseek); +EXPORT_SYMBOL_PROTO(lseek64); +EXPORT_SYMBOL_PROTO(chown); +EXPORT_SYMBOL_PROTO(fchown); +EXPORT_SYMBOL_PROTO(truncate); +EXPORT_SYMBOL_PROTO(ftruncate64); +EXPORT_SYMBOL_PROTO(utime); +EXPORT_SYMBOL_PROTO(utimes); +EXPORT_SYMBOL_PROTO(futimes); +EXPORT_SYMBOL_PROTO(chmod); +EXPORT_SYMBOL_PROTO(fchmod); +EXPORT_SYMBOL_PROTO(rename); +EXPORT_SYMBOL_PROTO(__xmknod); + +EXPORT_SYMBOL_PROTO(symlink); +EXPORT_SYMBOL_PROTO(link); +EXPORT_SYMBOL_PROTO(unlink); +EXPORT_SYMBOL_PROTO(readlink); + +EXPORT_SYMBOL_PROTO(mkdir); +EXPORT_SYMBOL_PROTO(rmdir); +EXPORT_SYMBOL_PROTO(opendir); +EXPORT_SYMBOL_PROTO(readdir); +EXPORT_SYMBOL_PROTO(closedir); +EXPORT_SYMBOL_PROTO(seekdir); +EXPORT_SYMBOL_PROTO(telldir); + +EXPORT_SYMBOL_PROTO(ioctl); + +EXPORT_SYMBOL_PROTO(pread64); +EXPORT_SYMBOL_PROTO(pwrite64); + +EXPORT_SYMBOL_PROTO(statfs); +EXPORT_SYMBOL_PROTO(statfs64); + +EXPORT_SYMBOL_PROTO(getuid); + +EXPORT_SYMBOL_PROTO(fsync); +EXPORT_SYMBOL_PROTO(fdatasync); + +EXPORT_SYMBOL_PROTO(lstat64); +EXPORT_SYMBOL_PROTO(fstat64); +EXPORT_SYMBOL_PROTO(mknod); + +/* Export symbols used by GCC for the stack protector. */ +extern void __stack_smash_handler(void *) __attribute__((weak)); +EXPORT_SYMBOL(__stack_smash_handler); + +extern long __guard __attribute__((weak)); +EXPORT_SYMBOL(__guard); diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c new file mode 100644 index 00000000..5803b188 --- /dev/null +++ b/arch/um/os-Linux/util.c @@ -0,0 +1,146 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <termios.h> +#include <wait.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include "kern_constants.h" +#include "os.h" +#include "user.h" + +void stack_protections(unsigned long address) +{ + if (mprotect((void *) address, UM_THREAD_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC) < 0) + panic("protecting stack failed, errno = %d", errno); +} + +int raw(int fd) +{ + struct termios tt; + int err; + + CATCH_EINTR(err = tcgetattr(fd, &tt)); + if (err < 0) + return -errno; + + cfmakeraw(&tt); + + CATCH_EINTR(err = tcsetattr(fd, TCSADRAIN, &tt)); + if (err < 0) + return -errno; + + /* + * XXX tcsetattr could have applied only some changes + * (and cfmakeraw() is a set of changes) + */ + return 0; +} + +void setup_machinename(char *machine_out) +{ + struct utsname host; + + uname(&host); +#ifdef UML_CONFIG_UML_X86 +# ifndef UML_CONFIG_64BIT + if (!strcmp(host.machine, "x86_64")) { + strcpy(machine_out, "i686"); + return; + } +# else + if (!strcmp(host.machine, "i686")) { + strcpy(machine_out, "x86_64"); + return; + } +# endif +#endif + strcpy(machine_out, host.machine); +} + +void setup_hostinfo(char *buf, int len) +{ + struct utsname host; + + uname(&host); + snprintf(buf, len, "%s %s %s %s %s", host.sysname, host.nodename, + host.release, host.version, host.machine); +} + +/* + * We cannot use glibc's abort(). It makes use of tgkill() which + * has no effect within UML's kernel threads. + * After that glibc would execute an invalid instruction to kill + * the calling process and UML crashes with SIGSEGV. + */ +static inline void __attribute__ ((noreturn)) uml_abort(void) +{ + sigset_t sig; + + fflush(NULL); + + if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT)) + sigprocmask(SIG_UNBLOCK, &sig, 0); + + for (;;) + if (kill(getpid(), SIGABRT) < 0) + exit(127); +} + +void os_dump_core(void) +{ + int pid; + + signal(SIGSEGV, SIG_DFL); + + /* + * We are about to SIGTERM this entire process group to ensure that + * nothing is around to run after the kernel exits. The + * kernel wants to abort, not die through SIGTERM, so we + * ignore it here. + */ + + signal(SIGTERM, SIG_IGN); + kill(0, SIGTERM); + /* + * Most of the other processes associated with this UML are + * likely sTopped, so give them a SIGCONT so they see the + * SIGTERM. + */ + kill(0, SIGCONT); + + /* + * Now, having sent signals to everyone but us, make sure they + * die by ptrace. Processes can survive what's been done to + * them so far - the mechanism I understand is receiving a + * SIGSEGV and segfaulting immediately upon return. There is + * always a SIGSEGV pending, and (I'm guessing) signals are + * processed in numeric order so the SIGTERM (signal 15 vs + * SIGSEGV being signal 11) is never handled. + * + * Run a waitpid loop until we get some kind of error. + * Hopefully, it's ECHILD, but there's not a lot we can do if + * it's something else. Tell os_kill_ptraced_process not to + * wait for the child to report its death because there's + * nothing reasonable to do if that fails. + */ + + while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) + os_kill_ptraced_process(pid, 0); + + uml_abort(); +} + +void um_early_printk(const char *s, unsigned int n) +{ + printf("%.*s", n, s); +} |