diff -pruN ../orig-linux-2.6.18/fs/aio.c ./fs/aio.c --- ../orig-linux-2.6.18/fs/aio.c 2006-09-20 04:42:06.000000000 +0100 +++ ./fs/aio.c 2007-01-12 16:04:15.000000000 +0000 @@ -34,6 +34,11 @@ #include #include +#ifdef CONFIG_EPOLL +#include +#include +#endif + #if DEBUG > 1 #define dprintk printk #else @@ -1015,6 +1020,10 @@ put_rq: if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); +#ifdef CONFIG_EPOLL + if (ctx->file && waitqueue_active(&ctx->poll_wait)) + wake_up(&ctx->poll_wait); +#endif if (ret) put_ioctx(ctx); @@ -1024,6 +1033,8 @@ put_rq: /* aio_read_evt * Pull an event off of the ioctx's event ring. Returns the number of * events fetched (0 or 1 ;-) + * If ent parameter is 0, just returns the number of events that would + * be fetched. * FIXME: make this use cmpxchg. * TODO: make the ringbuffer user mmap()able (requires FIXME). */ @@ -1046,13 +1057,18 @@ static int aio_read_evt(struct kioctx *i head = ring->head % info->nr; if (head != ring->tail) { - struct io_event *evp = aio_ring_event(info, head, KM_USER1); - *ent = *evp; - head = (head + 1) % info->nr; - smp_mb(); /* finish reading the event before updatng the head */ - ring->head = head; - ret = 1; - put_aio_ring_event(evp, KM_USER1); + if (ent) { /* event requested */ + struct io_event *evp = + aio_ring_event(info, head, KM_USER1); + *ent = *evp; + head = (head + 1) % info->nr; + /* finish reading the event before updatng the head */ + smp_mb(); + ring->head = head; + ret = 1; + put_aio_ring_event(evp, KM_USER1); + } else /* only need to know availability */ + ret = 1; } spin_unlock(&info->ring_lock); @@ -1235,9 +1251,78 @@ static void io_destroy(struct kioctx *io aio_cancel_all(ioctx); wait_for_all_aios(ioctx); +#ifdef CONFIG_EPOLL + /* forget the poll file, but it's up to the user to close it */ + if (ioctx->file) { + ioctx->file->private_data = 0; + ioctx->file = 0; + } +#endif put_ioctx(ioctx); /* once for the lookup */ } +#ifdef CONFIG_EPOLL + +static int aio_queue_fd_close(struct inode *inode, struct file *file) +{ + struct kioctx *ioctx = file->private_data; + if (ioctx) { + file->private_data = 0; + spin_lock_irq(&ioctx->ctx_lock); + ioctx->file = 0; + spin_unlock_irq(&ioctx->ctx_lock); + } + return 0; +} + +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait) +{ unsigned int pollflags = 0; + struct kioctx *ioctx = file->private_data; + + if (ioctx) { + + spin_lock_irq(&ioctx->ctx_lock); + /* Insert inside our poll wait queue */ + poll_wait(file, &ioctx->poll_wait, wait); + + /* Check our condition */ + if (aio_read_evt(ioctx, 0)) + pollflags = POLLIN | POLLRDNORM; + spin_unlock_irq(&ioctx->ctx_lock); + } + + return pollflags; +} + +static const struct file_operations aioq_fops = { + .release = aio_queue_fd_close, + .poll = aio_queue_fd_poll +}; + +/* make_aio_fd: + * Create a file descriptor that can be used to poll the event queue. + * Based and piggybacked on the excellent epoll code. + */ + +static int make_aio_fd(struct kioctx *ioctx) +{ + int error, fd; + struct inode *inode; + struct file *file; + + error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); + if (error) + return error; + + /* associate the file with the IO context */ + file->private_data = ioctx; + ioctx->file = file; + init_waitqueue_head(&ioctx->poll_wait); + return fd; +} +#endif + + /* sys_io_setup: * Create an aio_context capable of receiving at least nr_events. * ctxp must not point to an aio_context that already exists, and @@ -1250,18 +1335,30 @@ static void io_destroy(struct kioctx *io * resources are available. May fail with -EFAULT if an invalid * pointer is passed for ctxp. Will fail with -ENOSYS if not * implemented. + * + * To request a selectable fd, the user context has to be initialized + * to 1, instead of 0, and the return value is the fd. + * This keeps the system call compatible, since a non-zero value + * was not allowed so far. */ asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) { struct kioctx *ioctx = NULL; unsigned long ctx; long ret; + int make_fd = 0; ret = get_user(ctx, ctxp); if (unlikely(ret)) goto out; ret = -EINVAL; +#ifdef CONFIG_EPOLL + if (ctx == 1) { + make_fd = 1; + ctx = 0; + } +#endif if (unlikely(ctx || nr_events == 0)) { pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", ctx, nr_events); @@ -1272,8 +1369,12 @@ asmlinkage long sys_io_setup(unsigned nr ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); - if (!ret) - return 0; +#ifdef CONFIG_EPOLL + if (make_fd && ret >= 0) + ret = make_aio_fd(ioctx); +#endif + if (ret >= 0) + return ret; get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ io_destroy(ioctx); diff -pruN ../orig-linux-2.6.18/fs/eventpoll.c ./fs/eventpoll.c --- ../orig-linux-2.6.18/fs/eventpoll.c 2006-09-20 04:42:06.000000000 +0100 +++ ./fs/eventpoll.c 2007-01-12 16:04:41.000000000 +0000 @@ -236,8 +236,6 @@ struct ep_pqueue { static void ep_poll_safewake_init(struct poll_safewake *psw); static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq); -static int ep_getfd(int *efd, struct inode **einode, struct file **efile, - struct eventpoll *ep); static int ep_alloc(struct eventpoll **pep); static void ep_free(struct eventpoll *ep); static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd); @@ -267,7 +265,7 @@ static int ep_events_transfer(struct eve static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout); static int eventpollfs_delete_dentry(struct dentry *dentry); -static struct inode *ep_eventpoll_inode(void); +static struct inode *ep_eventpoll_inode(const struct file_operations *fops); static int eventpollfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt); @@ -517,7 +515,7 @@ asmlinkage long sys_epoll_create(int siz * Creates all the items needed to setup an eventpoll file. That is, * a file structure, and inode and a free file descriptor. */ - error = ep_getfd(&fd, &inode, &file, ep); + error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops); if (error) goto eexit_2; @@ -702,8 +700,8 @@ eexit_1: /* * Creates the file descriptor to be used by the epoll interface. */ -static int ep_getfd(int *efd, struct inode **einode, struct file **efile, - struct eventpoll *ep) +int ep_getfd(int *efd, struct inode **einode, struct file **efile, + struct eventpoll *ep, const struct file_operations *fops) { struct qstr this; char name[32]; @@ -719,7 +717,7 @@ static int ep_getfd(int *efd, struct ino goto eexit_1; /* Allocates an inode from the eventpoll file system */ - inode = ep_eventpoll_inode(); + inode = ep_eventpoll_inode(fops); error = PTR_ERR(inode); if (IS_ERR(inode)) goto eexit_2; @@ -750,7 +748,7 @@ static int ep_getfd(int *efd, struct ino file->f_pos = 0; file->f_flags = O_RDONLY; - file->f_op = &eventpoll_fops; + file->f_op = fops; file->f_mode = FMODE_READ; file->f_version = 0; file->private_data = ep; @@ -1569,7 +1567,7 @@ static int eventpollfs_delete_dentry(str } -static struct inode *ep_eventpoll_inode(void) +static struct inode *ep_eventpoll_inode(const struct file_operations *fops) { int error = -ENOMEM; struct inode *inode = new_inode(eventpoll_mnt->mnt_sb); @@ -1577,7 +1575,7 @@ static struct inode *ep_eventpoll_inode( if (!inode) goto eexit_1; - inode->i_fop = &eventpoll_fops; + inode->i_fop = fops; /* * Mark the inode dirty from the very beginning, diff -pruN ../orig-linux-2.6.18/include/linux/aio.h ./include/linux/aio.h --- ../orig-linux-2.6.18/include/linux/aio.h 2006-09-20 04:42:06.000000000 +0100 +++ ./include/linux/aio.h 2007-01-12 16:04:15.000000000 +0000 @@ -191,6 +191,11 @@ struct kioctx { struct aio_ring_info ring_info; struct work_struct wq; +#ifdef CONFIG_EPOLL + // poll integration + wait_queue_head_t poll_wait; + struct file *file; +#endif }; /* prototypes */ diff -pruN ../orig-linux-2.6.18/include/linux/eventpoll.h ./include/linux/eventpoll.h --- ../orig-linux-2.6.18/include/linux/eventpoll.h 2006-09-20 04:42:06.000000000 +0100 +++ ./include/linux/eventpoll.h 2007-01-12 16:04:15.000000000 +0000 @@ -90,6 +90,12 @@ static inline void eventpoll_release(str eventpoll_release_file(file); } +/* + * called by aio code to create fd that can poll the aio event queueQ + */ +struct eventpoll; +int ep_getfd(int *efd, struct inode **einode, struct file **efile, + struct eventpoll *ep, const struct file_operations *fops); #else static inline void eventpoll_init_file(struct file *file) {}