aboutsummaryrefslogtreecommitdiffstats
path: root/tools/blktap2/drivers
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2010-01-29 08:55:27 +0000
committerKeir Fraser <keir.fraser@citrix.com>2010-01-29 08:55:27 +0000
commitb7d8981f39e24bbba2b933c1d17828a47100b7c5 (patch)
treee7ffd7395885141bc7245a600b0bd07d9fa231f9 /tools/blktap2/drivers
parentfedebce36446b00fbff2295d7e29813226a12ca0 (diff)
downloadxen-b7d8981f39e24bbba2b933c1d17828a47100b7c5.tar.gz
xen-b7d8981f39e24bbba2b933c1d17828a47100b7c5.tar.bz2
xen-b7d8981f39e24bbba2b933c1d17828a47100b7c5.zip
blktap2: Prefer AIO eventfd support on kernels >= 2.6.22
Mainline kernel support for eventfd(2) in linux aio was added between 2.6.21 and 2.6.22. Libaio after 0.3.107 has the header file, but presently few systems support it. Neither do we rely on an up-to-date libc6. Instead, this patch adds a header which defines custom iocb_common struct, and works around a potentially missing sys/eventfd.h. Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com>
Diffstat (limited to 'tools/blktap2/drivers')
-rw-r--r--tools/blktap2/drivers/block-aio.c1
-rw-r--r--tools/blktap2/drivers/libaio-compat.h92
-rw-r--r--tools/blktap2/drivers/tapdisk-queue.c178
-rw-r--r--tools/blktap2/drivers/tapdisk-utils.c32
-rw-r--r--tools/blktap2/drivers/tapdisk-utils.h1
5 files changed, 273 insertions, 31 deletions
diff --git a/tools/blktap2/drivers/block-aio.c b/tools/blktap2/drivers/block-aio.c
index c8d36e0472..f398da267f 100644
--- a/tools/blktap2/drivers/block-aio.c
+++ b/tools/blktap2/drivers/block-aio.c
@@ -28,7 +28,6 @@
#include <errno.h>
-#include <libaio.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/blktap2/drivers/libaio-compat.h b/tools/blktap2/drivers/libaio-compat.h
new file mode 100644
index 0000000000..353c36a869
--- /dev/null
+++ b/tools/blktap2/drivers/libaio-compat.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2010, XenSource Inc.
+ * All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ */
+
+/*
+ * kernel 2.6.21 added eventfd(2) support, kernel 2.6.22 eventfds for
+ * aio. libaio 0.3.107 updated the header file, but few systems have
+ * it. define a custom iocb_common struct instead, and work around a
+ * potentially missing sys/eventfd.h. this header should vanish over
+ * time.
+ */
+
+#ifndef __LIBAIO_COMPAT
+#define __LIBAIO_COMPAT
+
+#include <libaio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+struct __compat_io_iocb_common {
+ char __pad_buf[8];
+ char __pad_nbytes[8];
+ long long offset;
+ long long __pad3;
+ unsigned flags;
+ unsigned resfd;
+};
+
+static inline void __io_set_eventfd(struct iocb *iocb, int eventfd)
+{
+ struct __compat_io_iocb_common *c;
+ c = (struct __compat_io_iocb_common*)&iocb->u.c;
+ c->flags |= (1 << 0);
+ c->resfd = eventfd;
+}
+
+#ifndef SYS_eventfd
+#ifndef __NR_eventfd
+# if defined(__alpha__)
+# define __NR_eventfd 478
+# elif defined(__arm__)
+# define __NR_eventfd (__NR_SYSCALL_BASE+351)
+# elif defined(__ia64__)
+# define __NR_eventfd 1309
+# elif defined(__i386__)
+# define __NR_eventfd 323
+# elif defined(__m68k__)
+# define __NR_eventfd 319
+# elif 0 && defined(__mips__)
+# error __NR_eventfd?
+# define __NR_eventfd (__NR_Linux + 319)
+# define __NR_eventfd (__NR_Linux + 278)
+# define __NR_eventfd (__NR_Linux + 282)
+# elif defined(__hppa__)
+# define __NR_eventfd (__NR_Linux + 304)
+# elif defined(__PPC__) || defined(__powerpc64__)
+# define __NR_eventfd 307
+# elif defined(__s390__) || defined(__s390x__)
+# define __NR_eventfd 318
+# elif defined(__sparc__)
+# define __NR_eventfd 313
+# elif defined(__x86_64__)
+# define __NR_eventfd 284
+# endif
+#else
+# error __NR_eventfd?
+#endif
+#define SYS_eventfd __NR_eventfd
+#endif
+
+static inline int tapdisk_sys_eventfd(int initval)
+{
+ return syscall(SYS_eventfd, initval, 0);
+}
+
+#endif /* __LIBAIO_COMPAT */
diff --git a/tools/blktap2/drivers/tapdisk-queue.c b/tools/blktap2/drivers/tapdisk-queue.c
index df1692089b..1e48bd946e 100644
--- a/tools/blktap2/drivers/tapdisk-queue.c
+++ b/tools/blktap2/drivers/tapdisk-queue.c
@@ -30,12 +30,18 @@
#include <stdlib.h>
#include <unistd.h>
#include <libaio.h>
+#ifdef __linux__
+#include <linux/version.h>
+#endif
#include "tapdisk.h"
#include "tapdisk-log.h"
#include "tapdisk-queue.h"
#include "tapdisk-filter.h"
#include "tapdisk-server.h"
+#include "tapdisk-utils.h"
+
+#include "libaio-compat.h"
#include "atomicio.h"
#define WARN(_f, _a...) tlog_write(TLOG_WARN, _f, ##_a)
@@ -270,10 +276,122 @@ struct lio {
io_context_t aio_ctx;
struct io_event *aio_events;
- int poll_fd;
+ int event_fd;
int event_id;
+
+ int flags;
};
+#define LIO_FLAG_EVENTFD (1<<0)
+
+static int
+tapdisk_lio_check_resfd(void)
+{
+ return tapdisk_linux_version() >= KERNEL_VERSION(2, 6, 22);
+}
+
+static void
+tapdisk_lio_destroy_aio(struct tqueue *queue)
+{
+ struct lio *lio = queue->tio_data;
+
+ if (lio->event_fd >= 0) {
+ close(lio->event_fd);
+ lio->event_fd = -1;
+ }
+
+ if (lio->aio_ctx) {
+ io_destroy(lio->aio_ctx);
+ lio->aio_ctx = 0;
+ }
+}
+
+static int
+__lio_setup_aio_poll(struct tqueue *queue, int qlen)
+{
+ struct lio *lio = queue->tio_data;
+ int err, fd;
+
+ lio->aio_ctx = REQUEST_ASYNC_FD;
+
+ fd = io_setup(qlen, &lio->aio_ctx);
+ if (fd < 0) {
+ lio->aio_ctx = 0;
+ err = -errno;
+
+ if (err == -EINVAL)
+ goto fail_fd;
+
+ goto fail;
+ }
+
+ lio->event_fd = fd;
+
+ return 0;
+
+fail_fd:
+ DPRINTF("Couldn't get fd for AIO poll support. This is probably "
+ "because your kernel does not have the aio-poll patch "
+ "applied.\n");
+fail:
+ return err;
+}
+
+static int
+__lio_setup_aio_eventfd(struct tqueue *queue, int qlen)
+{
+ struct lio *lio = queue->tio_data;
+ int err;
+
+ err = io_setup(qlen, &lio->aio_ctx);
+ if (err < 0) {
+ lio->aio_ctx = 0;
+ return err;
+ }
+
+ lio->event_fd = tapdisk_sys_eventfd(0);
+ if (lio->event_fd < 0)
+ return -errno;
+
+ lio->flags |= LIO_FLAG_EVENTFD;
+
+ return 0;
+}
+
+static int
+tapdisk_lio_setup_aio(struct tqueue *queue, int qlen)
+{
+ struct lio *lio = queue->tio_data;
+ int err;
+
+ lio->aio_ctx = 0;
+ lio->event_fd = -1;
+
+ /*
+ * prefer the mainline eventfd(2) api, if available.
+ * if not, fall back to the poll fd patch.
+ */
+
+ err = !tapdisk_lio_check_resfd();
+ if (!err)
+ err = __lio_setup_aio_eventfd(queue, qlen);
+ if (err)
+ err = __lio_setup_aio_poll(queue, qlen);
+
+ if (err == -EAGAIN)
+ goto fail_rsv;
+fail:
+ return err;
+
+fail_rsv:
+ DPRINTF("Couldn't setup AIO context. If you are trying to "
+ "concurrently use a large number of blktap-based disks, you may "
+ "need to increase the system-wide aio request limit. "
+ "(e.g. 'echo 1048576 > /proc/sys/fs/aio-max-nr')\n");
+ goto fail;
+}
+
+
static void
tapdisk_lio_destroy(struct tqueue *queue)
{
@@ -287,10 +405,7 @@ tapdisk_lio_destroy(struct tqueue *queue)
lio->event_id = -1;
}
- if (lio->aio_ctx) {
- io_destroy(lio->aio_ctx);
- lio->aio_ctx = NULL;
- }
+ tapdisk_lio_destroy_aio(queue);
if (lio->aio_events) {
free(lio->aio_events);
@@ -299,6 +414,27 @@ tapdisk_lio_destroy(struct tqueue *queue)
}
static void
+tapdisk_lio_set_eventfd(struct tqueue *queue, int n, struct iocb **iocbs)
+{
+ struct lio *lio = queue->tio_data;
+ int i;
+
+ if (lio->flags & LIO_FLAG_EVENTFD)
+ for (i = 0; i < n; ++i)
+ __io_set_eventfd(iocbs[i], lio->event_fd);
+}
+
+static void
+tapdisk_lio_ack_event(struct tqueue *queue)
+{
+ struct lio *lio = queue->tio_data;
+ uint64_t val;
+
+ if (lio->flags & LIO_FLAG_EVENTFD)
+ read(lio->event_fd, &val, sizeof(val));
+}
+
+static void
tapdisk_lio_event(event_id_t id, char mode, void *private)
{
struct tqueue *queue = private;
@@ -308,6 +444,8 @@ tapdisk_lio_event(event_id_t id, char mode, void *private)
struct tiocb *tiocb;
struct io_event *ep;
+ tapdisk_lio_ack_event(queue);
+
lio = queue->tio_data;
ret = io_getevents(lio->aio_ctx, 0,
queue->size, lio->aio_events, NULL);
@@ -336,22 +474,14 @@ tapdisk_lio_setup(struct tqueue *queue, int qlen)
int err;
lio->event_id = -1;
- lio->aio_ctx = REQUEST_ASYNC_FD;
-
- lio->poll_fd = io_setup(qlen, &lio->aio_ctx);
- err = lio->poll_fd;
- if (err < 0) {
- lio->aio_ctx = NULL;
- if (err == -EAGAIN)
- goto fail_rsv;
-
- goto fail_fd;
- }
+ err = tapdisk_lio_setup_aio(queue, qlen);
+ if (err)
+ goto fail;
lio->event_id =
tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
- lio->poll_fd, 0,
+ lio->event_fd, 0,
tapdisk_lio_event,
queue);
err = lio->event_id;
@@ -369,19 +499,6 @@ tapdisk_lio_setup(struct tqueue *queue, int qlen)
fail:
tapdisk_lio_destroy(queue);
return err;
-
-fail_rsv:
- DPRINTF("Couldn't setup AIO context. If you are trying to "
- "concurrently use a large number of blktap-based disks, you may "
- "need to increase the system-wide aio request limit. "
- "(e.g. 'echo 1048576 > /proc/sys/fs/aio-max-nr')\n");
- goto fail;
-
-fail_fd:
- DPRINTF("Couldn't get fd for AIO poll support. This is probably "
- "because your kernel does not have the aio-poll patch "
- "applied.\n");
- goto fail;
}
static int
@@ -395,6 +512,7 @@ tapdisk_lio_submit(struct tqueue *queue)
tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
merged = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
+ tapdisk_lio_set_eventfd(queue, merged, queue->iocbs);
submitted = io_submit(lio->aio_ctx, merged, queue->iocbs);
DBG("queued: %d, merged: %d, submitted: %d\n",
diff --git a/tools/blktap2/drivers/tapdisk-utils.c b/tools/blktap2/drivers/tapdisk-utils.c
index 4304ecf0f2..757c0bdc77 100644
--- a/tools/blktap2/drivers/tapdisk-utils.c
+++ b/tools/blktap2/drivers/tapdisk-utils.c
@@ -33,6 +33,10 @@
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/resource.h>
+#include <sys/utsname.h>
+#ifdef __linux__
+#include <linux/version.h>
+#endif
#include "blk.h"
#include "tapdisk.h"
@@ -183,3 +187,31 @@ tapdisk_get_image_size(int fd, uint64_t *_sectors, uint32_t *_sector_size)
return 0;
}
+
+#ifdef __linux__
+
+int tapdisk_linux_version(void)
+{
+ struct utsname uts;
+ unsigned int version, patchlevel, sublevel;
+ int n, err;
+
+ err = uname(&uts);
+ if (err)
+ return -errno;
+
+ n = sscanf(uts.release, "%u.%u.%u", &version, &patchlevel, &sublevel);
+ if (n != 3)
+ return -ENOSYS;
+
+ return KERNEL_VERSION(version, patchlevel, sublevel);
+}
+
+#else
+
+int tapdisk_linux_version(void)
+{
+ return -ENOSYS;
+}
+
+#endif
diff --git a/tools/blktap2/drivers/tapdisk-utils.h b/tools/blktap2/drivers/tapdisk-utils.h
index 216c902377..5e08aa8326 100644
--- a/tools/blktap2/drivers/tapdisk-utils.h
+++ b/tools/blktap2/drivers/tapdisk-utils.h
@@ -38,5 +38,6 @@ int tapdisk_set_resource_limits(void);
int tapdisk_namedup(char **, const char *);
int tapdisk_parse_disk_type(const char *, char **, int *);
int tapdisk_get_image_size(int, uint64_t *, uint32_t *);
+int tapdisk_linux_version(void);
#endif