aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorjchesterfield@dhcp92.uk.xensource.com <jchesterfield@dhcp92.uk.xensource.com>2006-07-13 10:13:26 +0100
committerjchesterfield@dhcp92.uk.xensource.com <jchesterfield@dhcp92.uk.xensource.com>2006-07-13 10:13:26 +0100
commit0da81aa1d4a70baefa42b4e5ff1bbf670abc2711 (patch)
treebb0c9f29e962352c1e9949f5e10699847fb2d652 /tools
parent0929bd9fc08ffc28978dad3208422948adb46811 (diff)
downloadxen-0da81aa1d4a70baefa42b4e5ff1bbf670abc2711.tar.gz
xen-0da81aa1d4a70baefa42b4e5ff1bbf670abc2711.tar.bz2
xen-0da81aa1d4a70baefa42b4e5ff1bbf670abc2711.zip
Added blktap support. Includes kernel driver (enabled as CONFIG_XEN_BLKDEV_TAP=y) and userspace tools. The userspace deamon (blktapctrl) is enabled by default when xend is activated. For further information on using and configuring blktap see tools/blktap/README.
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile2
-rw-r--r--tools/blktap/Makefile28
-rw-r--r--tools/blktap/README122
-rw-r--r--tools/blktap/drivers/Makefile76
-rw-r--r--tools/blktap/drivers/aes.c1319
-rw-r--r--tools/blktap/drivers/aes.h26
-rw-r--r--tools/blktap/drivers/blktapctrl.c704
-rw-r--r--tools/blktap/drivers/blktapctrl.h55
-rw-r--r--tools/blktap/drivers/block-aio.c327
-rw-r--r--tools/blktap/drivers/block-qcow.c1369
-rw-r--r--tools/blktap/drivers/block-ram.c296
-rw-r--r--tools/blktap/drivers/block-sync.c242
-rw-r--r--tools/blktap/drivers/block-vmdk.c415
-rw-r--r--tools/blktap/drivers/bswap.h202
-rw-r--r--tools/blktap/drivers/img2qcow.c289
-rw-r--r--tools/blktap/drivers/qcow-create.c80
-rw-r--r--tools/blktap/drivers/qcow2raw.c346
-rw-r--r--tools/blktap/drivers/tapdisk.c671
-rw-r--r--tools/blktap/drivers/tapdisk.h211
-rw-r--r--tools/blktap/lib/Makefile66
-rw-r--r--tools/blktap/lib/blkif.c185
-rw-r--r--tools/blktap/lib/blktaplib.h223
-rw-r--r--tools/blktap/lib/list.h55
-rw-r--r--tools/blktap/lib/xenbus.c387
-rw-r--r--tools/blktap/lib/xs_api.c364
-rw-r--r--tools/blktap/lib/xs_api.h50
-rw-r--r--tools/examples/Makefile1
-rw-r--r--tools/examples/blktap15
-rwxr-xr-xtools/examples/xen-backend.agent3
-rw-r--r--tools/examples/xen-backend.rules1
-rw-r--r--tools/libaio/COPYING515
-rw-r--r--tools/libaio/ChangeLog43
-rw-r--r--tools/libaio/INSTALL18
-rw-r--r--tools/libaio/Makefile40
-rw-r--r--tools/libaio/TODO4
-rw-r--r--tools/libaio/harness/Makefile37
-rw-r--r--tools/libaio/harness/README19
-rw-r--r--tools/libaio/harness/attic/0.t9
-rw-r--r--tools/libaio/harness/attic/1.t9
-rw-r--r--tools/libaio/harness/cases/10.t53
-rw-r--r--tools/libaio/harness/cases/11.t39
-rw-r--r--tools/libaio/harness/cases/12.t49
-rw-r--r--tools/libaio/harness/cases/13.t66
-rw-r--r--tools/libaio/harness/cases/14.t90
-rw-r--r--tools/libaio/harness/cases/2.t41
-rw-r--r--tools/libaio/harness/cases/3.t25
-rw-r--r--tools/libaio/harness/cases/4.t72
-rw-r--r--tools/libaio/harness/cases/5.t47
-rw-r--r--tools/libaio/harness/cases/6.t57
-rw-r--r--tools/libaio/harness/cases/7.t27
-rw-r--r--tools/libaio/harness/cases/8.t49
-rw-r--r--tools/libaio/harness/cases/aio_setup.h98
-rw-r--r--tools/libaio/harness/cases/common-7-8.h37
-rw-r--r--tools/libaio/harness/main.c39
-rw-r--r--tools/libaio/harness/runtests.sh19
-rw-r--r--tools/libaio/libaio.spec177
-rw-r--r--tools/libaio/man/aio.3315
-rw-r--r--tools/libaio/man/aio_cancel.3137
-rw-r--r--tools/libaio/man/aio_cancel64.350
-rw-r--r--tools/libaio/man/aio_error.381
-rw-r--r--tools/libaio/man/aio_error64.364
-rw-r--r--tools/libaio/man/aio_fsync.3139
-rw-r--r--tools/libaio/man/aio_fsync64.351
-rw-r--r--tools/libaio/man/aio_init.396
-rw-r--r--tools/libaio/man/aio_read.3146
-rw-r--r--tools/libaio/man/aio_read64.360
-rw-r--r--tools/libaio/man/aio_return.371
-rw-r--r--tools/libaio/man/aio_return64.351
-rw-r--r--tools/libaio/man/aio_suspend.3123
-rw-r--r--tools/libaio/man/aio_suspend64.351
-rw-r--r--tools/libaio/man/aio_write.3176
-rw-r--r--tools/libaio/man/aio_write64.361
-rw-r--r--tools/libaio/man/io.3351
-rw-r--r--tools/libaio/man/io_cancel.121
-rw-r--r--tools/libaio/man/io_cancel.365
-rw-r--r--tools/libaio/man/io_destroy.117
-rw-r--r--tools/libaio/man/io_fsync.382
-rw-r--r--tools/libaio/man/io_getevents.129
-rw-r--r--tools/libaio/man/io_getevents.379
-rw-r--r--tools/libaio/man/io_prep_fsync.389
-rw-r--r--tools/libaio/man/io_prep_pread.379
-rw-r--r--tools/libaio/man/io_prep_pwrite.377
-rw-r--r--tools/libaio/man/io_queue_init.363
-rw-r--r--tools/libaio/man/io_queue_release.348
-rw-r--r--tools/libaio/man/io_queue_run.350
-rw-r--r--tools/libaio/man/io_queue_wait.356
-rw-r--r--tools/libaio/man/io_set_callback.344
-rw-r--r--tools/libaio/man/io_setup.115
-rw-r--r--tools/libaio/man/io_submit.1109
-rw-r--r--tools/libaio/man/io_submit.3135
-rw-r--r--tools/libaio/man/lio_listio.3229
-rw-r--r--tools/libaio/man/lio_listio64.339
-rw-r--r--tools/libaio/src/Makefile64
-rw-r--r--tools/libaio/src/compat-0_1.c62
-rw-r--r--tools/libaio/src/io_cancel.c23
-rw-r--r--tools/libaio/src/io_destroy.c23
-rw-r--r--tools/libaio/src/io_getevents.c57
-rw-r--r--tools/libaio/src/io_queue_init.c33
-rw-r--r--tools/libaio/src/io_queue_release.c27
-rw-r--r--tools/libaio/src/io_queue_run.c39
-rw-r--r--tools/libaio/src/io_queue_wait.c31
-rw-r--r--tools/libaio/src/io_setup.c23
-rw-r--r--tools/libaio/src/io_submit.c23
-rw-r--r--tools/libaio/src/libaio.h222
-rw-r--r--tools/libaio/src/libaio.map22
-rw-r--r--tools/libaio/src/raw_syscall.c18
-rw-r--r--tools/libaio/src/syscall-alpha.h209
-rw-r--r--tools/libaio/src/syscall-i386.h72
-rw-r--r--tools/libaio/src/syscall-ia64.h44
-rw-r--r--tools/libaio/src/syscall-ppc.h94
-rw-r--r--tools/libaio/src/syscall-s390.h131
-rw-r--r--tools/libaio/src/syscall-x86_64.h63
-rw-r--r--tools/libaio/src/syscall.h27
-rw-r--r--tools/libaio/src/vsys_def.h24
-rw-r--r--tools/misc/xend7
-rw-r--r--tools/python/xen/xend/XendDomainInfo.py2
-rw-r--r--tools/python/xen/xend/server/BlktapController.py14
-rw-r--r--tools/python/xen/xm/create.py8
-rw-r--r--tools/python/xen/xm/main.py8
-rw-r--r--tools/xenstore/Makefile7
120 files changed, 14531 insertions, 4 deletions
diff --git a/tools/Makefile b/tools/Makefile
index ac41f2f321..2a42254e32 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -16,6 +16,8 @@ SUBDIRS-y += guest-headers
SUBDIRS-$(VTPM_TOOLS) += vtpm_manager
SUBDIRS-$(VTPM_TOOLS) += vtpm
SUBDIRS-y += xenstat
+SUBDIRS-y += libaio
+SUBDIRS-y += blktap
# These don't cross-compile
ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
diff --git a/tools/blktap/Makefile b/tools/blktap/Makefile
new file mode 100644
index 0000000000..fb194f3203
--- /dev/null
+++ b/tools/blktap/Makefile
@@ -0,0 +1,28 @@
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+SUBDIRS-y :=
+SUBDIRS-y += lib
+SUBDIRS-y += drivers
+
+.PHONY: all
+all: build
+
+.PHONY: build
+build: mk-symlinks
+ @set -e; for subdir in $(SUBDIRS-y); do \
+ $(MAKE) -C $$subdir all; \
+ done
+
+.PHONY: install
+install:
+ @set -e; for subdir in $(SUBDIRS-y); do \
+ $(MAKE) -C $$subdir install; \
+ done
+
+.PHONY: clean
+clean:
+ rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS
+ @set -e; for subdir in $(SUBDIRS-y); do \
+ $(MAKE) -C $$subdir clean; \
+ done
diff --git a/tools/blktap/README b/tools/blktap/README
new file mode 100644
index 0000000000..5e4108030e
--- /dev/null
+++ b/tools/blktap/README
@@ -0,0 +1,122 @@
+Blktap Userspace Tools + Library
+================================
+
+Andrew Warfield and Julian Chesterfield
+16th June 2006
+
+{firstname.lastname}@cl.cam.ac.uk
+
+The blktap userspace toolkit provides a user-level disk I/O
+interface. The blktap mechanism involves a kernel driver that acts
+similarly to the existing Xen/Linux blkback driver, and a set of
+associated user-level libraries. Using these tools, blktap allows
+virtual block devices presented to VMs to be implemented in userspace
+and to be backed by raw partitions, files, network, etc.
+
+The key benefit of blktap is that it makes it easy and fast to write
+arbitrary block backends, and that these user-level backends actually
+perform very well. Specifically:
+
+- Metadata disk formats such as Copy-on-Write, encrypted disks, sparse
+ formats and other compression features can be easily implemented.
+
+- Accessing file-based images from userspace avoids problems related
+ to flushing dirty pages which are present in the Linux loopback
+ driver. (Specifically, doing a large number of writes to an
+ NFS-backed image don't result in the OOM killer going berserk.)
+
+- Per-disk handler processes enable easier userspace policing of block
+ resources, and process-granularity QoS techniques (disk scheduling
+ and related tools) may be trivially applied to block devices.
+
+- It's very easy to take advantage of userspace facilities such as
+ networking libraries, compression utilities, peer-to-peer
+ file-sharing systems and so on to build more complex block backends.
+
+- Crashes are contained -- incremental development/debugging is very
+ fast.
+
+How it works (in one paragraph):
+
+Working in conjunction with the kernel blktap driver, all disk I/O
+requests from VMs are passed to the userspace deamon (using a shared
+memory interface) through a character device. Each active disk is
+mapped to an individual device node, allowing per-disk processes to
+implement individual block devices where desired. The userspace
+drivers are implemented using asynchronous (Linux libaio),
+O_DIRECT-based calls to preserve the unbuffered, batched and
+asynchronous request dispatch achieved with the existing blkback
+code. We provide a simple, asynchronous virtual disk interface that
+makes it quite easy to add new disk implementations.
+
+As of June 2006 the current supported disk formats are:
+
+ - Raw Images (both on partitions and in image files)
+ - File-backed Qcow disks
+ - Standalone sparse Qcow disks
+ - Fast shareable RAM disk between VMs (requires some form of cluster-based
+ filesystem support e.g. OCFS2 in the guest kernel)
+ - Some VMDK images - your mileage may vary
+
+Raw and QCow images have asynchronous backends and so should perform
+fairly well. VMDK is based directly on the qemu vmdk driver, which is
+synchronous (a.k.a. slow).
+
+Build and Installation Instructions
+===================================
+
+Make to configure the blktap backend driver in your dom0 kernel. It
+will cooperate fine with the existing backend driver, so you can
+experiment with tap disks without breaking existing VM configs.
+
+To build the tools separately, "make && make install" in
+tools/blktap.
+
+
+Using the Tools
+===============
+
+Prepare the image for booting. For qcow files use the qcow utilities
+installed earlier. e.g. qcow-create generates a blank standalone image
+or a file-backed CoW image. img2qcow takes an existing image or
+partition and creates a sparse, standalone qcow-based file.
+
+The userspace disk agent is configured to start automatically via xend
+(alternatively you can start it manually => 'blktapctrl')
+
+Customise the VM config file to use the 'tap' handler, followed by the
+driver type. e.g. for a raw image such as a file or partition:
+
+disk = ['tap:aio:<FILENAME>,sda1,w']
+
+e.g. for a qcow image:
+
+disk = ['tap:qcow:<FILENAME>,sda1,w']
+
+
+Mounting images in Dom0 using the blktap driver
+===============================================
+Tap (and blkback) disks are also mountable in Dom0 without requiring an
+active VM to attach. You will need to build a xenlinux Dom0 kernel that
+includes the blkfront driver (e.g. the default 'make world' or
+'make kernels' build. Simply use the xm command-line tool to activate
+the backend disks, and blkfront will generate a virtual block device that
+can be accessed in the same way as a loop device or partition:
+
+e.g. for a raw image file <FILENAME> that would normally be mounted using
+the loopback driver (such as 'mount -o loop <FILENAME> /mnt/disk'), do the
+following:
+
+xm block-attach 0 tap:aio:<FILENAME> /dev/xvda1 w 0
+mount /dev/xvda1 /mnt/disk <--- don't use loop driver
+
+In this way, you can use any of the userspace device-type drivers built
+with the blktap userspace toolkit to open and mount disks such as qcow
+or vmdk images:
+
+xm block-attach 0 tap:qcow:<FILENAME> /dev/xvda1 w 0
+mount /dev/xvda1 /mnt/disk
+
+
+
+
diff --git a/tools/blktap/drivers/Makefile b/tools/blktap/drivers/Makefile
new file mode 100644
index 0000000000..6601a4d005
--- /dev/null
+++ b/tools/blktap/drivers/Makefile
@@ -0,0 +1,76 @@
+XEN_ROOT = ../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+INCLUDES += -I.. -I../lib
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+IBIN = blktapctrl tapdisk
+QCOW_UTIL = img2qcow qcow2raw qcow-create
+INSTALL_DIR = /usr/sbin
+LIBAIO_DIR = ../../libaio/src
+
+CFLAGS += -fPIC
+CFLAGS += -Wall
+CFLAGS += -Werror
+CFLAGS += -Wno-unused
+CFLAGS += -g3
+CFLAGS += -fno-strict-aliasing
+CFLAGS += -I $(XEN_LIBXC) -I $(LIBAIO_DIR)
+CFLAGS += $(INCLUDES) -I. -I../../xenstore
+CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
+CFLAGS += -D_GNU_SOURCE
+
+# Get gcc to generate the dependencies for us.
+CFLAGS += -Wp,-MD,.$(@F).d
+DEPS = .*.d
+
+THREADLIB := -lpthread -lz
+LIBS := -L. -L.. -L../lib
+LIBS += -L$(XEN_LIBXC)
+LIBS += -lblktap
+LIBS += -lcrypto
+LIBS += -lz
+LIBS += -L$(XEN_XENSTORE) -lxenstore
+
+AIOLIBS := -L $(LIBAIO_DIR)
+AIOLIBS += -laio
+AIOLIBS += -static
+
+BLK-OBJS := block-aio.o
+BLK-OBJS += block-sync.o
+BLK-OBJS += block-vmdk.o
+BLK-OBJS += block-ram.o
+BLK-OBJS += block-qcow.o
+BLK-OBJS += aes.o
+
+all: $(IBIN) qcow-util
+
+LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
+
+
+blktapctrl:
+ $(CC) $(CFLAGS) -o blktapctrl $(LIBS) blktapctrl.c
+
+tapdisk: $(BLK-OBJS)
+ $(CC) $(CFLAGS) -o tapdisk $(BLK-OBJS) tapdisk.c \
+ $(AIOLIBS) $(LIBS)
+
+
+qcow-util: $(BLK-OBJS)
+ $(CC) $(CFLAGS) -o img2qcow $(BLK-OBJS) img2qcow.c \
+ $(AIOLIBS) $(LIBS)
+ $(CC) $(CFLAGS) -o qcow2raw $(BLK-OBJS) qcow2raw.c \
+ $(AIOLIBS) $(LIBS)
+ $(CC) $(CFLAGS) -o qcow-create $(BLK-OBJS) qcow-create.c \
+ $(AIOLIBS) $(LIBS)
+
+install: all
+ $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INSTALL_DIR)
+
+clean:
+ rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL)
+
+.PHONY: clean install
+
+-include $(DEPS)
diff --git a/tools/blktap/drivers/aes.c b/tools/blktap/drivers/aes.c
new file mode 100644
index 0000000000..4d83fac957
--- /dev/null
+++ b/tools/blktap/drivers/aes.c
@@ -0,0 +1,1319 @@
+/**
+ *
+ * aes.c - integrated in QEMU by Fabrice Bellard from the OpenSSL project.
+ */
+/*
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+//#include "vl.h"
+#include <inttypes.h>
+#include <string.h>
+#include "aes.h"
+
+//#define NDEBUG
+#include <assert.h>
+
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+#define MAXKC (256/32)
+#define MAXKB (256/8)
+#define MAXNR 14
+
+/* This controls loop-unrolling in aes_core.c */
+#undef FULL_UNROLL
+# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
+# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
+
+/*
+Te0[x] = S [x].[02, 01, 01, 03];
+Te1[x] = S [x].[03, 02, 01, 01];
+Te2[x] = S [x].[01, 03, 02, 01];
+Te3[x] = S [x].[01, 01, 03, 02];
+Te4[x] = S [x].[01, 01, 01, 01];
+
+Td0[x] = Si[x].[0e, 09, 0d, 0b];
+Td1[x] = Si[x].[0b, 0e, 09, 0d];
+Td2[x] = Si[x].[0d, 0b, 0e, 09];
+Td3[x] = Si[x].[09, 0d, 0b, 0e];
+Td4[x] = Si[x].[01, 01, 01, 01];
+*/
+
+static const u32 Te0[256] = {
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+};
+static const u32 Te1[256] = {
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+};
+static const u32 Te2[256] = {
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+};
+static const u32 Te3[256] = {
+
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+};
+static const u32 Te4[256] = {
+ 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
+ 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
+ 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
+ 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
+ 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
+ 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
+ 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
+ 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
+ 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
+ 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
+ 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
+ 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
+ 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
+ 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
+ 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
+ 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
+ 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
+ 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
+ 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
+ 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
+ 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
+ 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
+ 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
+ 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
+ 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
+ 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
+ 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
+ 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
+ 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
+ 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
+ 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
+ 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
+ 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
+ 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
+ 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
+ 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
+ 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
+ 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
+ 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
+ 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
+ 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
+ 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
+ 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
+ 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
+ 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
+ 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
+ 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
+ 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
+ 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
+ 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
+ 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
+ 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
+ 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
+ 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
+ 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
+ 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
+ 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
+ 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
+ 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
+ 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
+ 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
+ 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
+ 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
+ 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
+};
+static const u32 Td0[256] = {
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+};
+static const u32 Td1[256] = {
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+};
+static const u32 Td2[256] = {
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+};
+static const u32 Td3[256] = {
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+};
+static const u32 Td4[256] = {
+ 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
+ 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
+ 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
+ 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
+ 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
+ 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
+ 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
+ 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
+ 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
+ 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
+ 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
+ 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
+ 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
+ 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
+ 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
+ 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
+ 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
+ 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
+ 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
+ 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
+ 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
+ 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
+ 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
+ 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
+ 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
+ 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
+ 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
+ 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
+ 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
+ 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
+ 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
+ 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
+ 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
+ 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
+ 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
+ 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
+ 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
+ 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
+ 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
+ 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
+ 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
+ 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
+ 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
+ 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
+ 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
+ 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
+ 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
+ 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
+ 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
+ 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
+ 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
+ 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
+ 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
+ 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
+ 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
+ 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
+ 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
+ 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
+ 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
+ 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
+ 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
+ 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
+ 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
+ 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
+};
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+
+ rk = key->rd_key;
+
+ if (bits==128)
+ key->rounds = 10;
+ else if (bits==192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+
+ rk[0] = GETU32(userKey );
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[ 5];
+ rk[ 6] = rk[ 0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 7] = rk[ 1] ^ rk[ 6];
+ rk[ 8] = rk[ 2] ^ rk[ 7];
+ rk[ 9] = rk[ 3] ^ rk[ 8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[ 4] ^ rk[ 9];
+ rk[11] = rk[ 5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[ 7];
+ rk[ 8] = rk[ 0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 9] = rk[ 1] ^ rk[ 8];
+ rk[10] = rk[ 2] ^ rk[ 9];
+ rk[11] = rk[ 3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[ 4] ^
+ (Te4[(temp >> 24) ] & 0xff000000) ^
+ (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp ) & 0xff] & 0x000000ff);
+ rk[13] = rk[ 5] ^ rk[12];
+ rk[14] = rk[ 6] ^ rk[13];
+ rk[15] = rk[ 7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+
+ rk = key->rd_key;
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
+ temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+ temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+ temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ rk[0] =
+ Td0[Te4[(rk[0] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[0] ) & 0xff] & 0xff];
+ rk[1] =
+ Td0[Te4[(rk[1] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[1] ) & 0xff] & 0xff];
+ rk[2] =
+ Td0[Te4[(rk[2] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[2] ) & 0xff] & 0xff];
+ rk[3] =
+ Td0[Te4[(rk[3] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[3] ) & 0xff] & 0xff];
+ }
+ return 0;
+}
+
+#ifndef AES_ASM
+/*
+ * Encrypt a single block
+ * in and out can overlap
+ */
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ Te0[(s0 >> 24) ] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[(s3 ) & 0xff] ^
+ rk[4];
+ t1 =
+ Te0[(s1 >> 24) ] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[(s0 ) & 0xff] ^
+ rk[5];
+ t2 =
+ Te0[(s2 >> 24) ] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[(s1 ) & 0xff] ^
+ rk[6];
+ t3 =
+ Te0[(s3 >> 24) ] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[(s2 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Te0[(t0 >> 24) ] ^
+ Te1[(t1 >> 16) & 0xff] ^
+ Te2[(t2 >> 8) & 0xff] ^
+ Te3[(t3 ) & 0xff] ^
+ rk[0];
+ s1 =
+ Te0[(t1 >> 24) ] ^
+ Te1[(t2 >> 16) & 0xff] ^
+ Te2[(t3 >> 8) & 0xff] ^
+ Te3[(t0 ) & 0xff] ^
+ rk[1];
+ s2 =
+ Te0[(t2 >> 24) ] ^
+ Te1[(t3 >> 16) & 0xff] ^
+ Te2[(t0 >> 8) & 0xff] ^
+ Te3[(t1 ) & 0xff] ^
+ rk[2];
+ s3 =
+ Te0[(t3 >> 24) ] ^
+ Te1[(t0 >> 16) & 0xff] ^
+ Te2[(t1 >> 8) & 0xff] ^
+ Te3[(t2 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (Te4[(t0 >> 24) ] & 0xff000000) ^
+ (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out , s0);
+ s1 =
+ (Te4[(t1 >> 24) ] & 0xff000000) ^
+ (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (Te4[(t2 >> 24) ] & 0xff000000) ^
+ (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (Te4[(t3 >> 24) ] & 0xff000000) ^
+ (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+/*
+ * Decrypt a single block
+ * in and out can overlap
+ */
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ Td0[(s0 >> 24) ] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[(s1 ) & 0xff] ^
+ rk[4];
+ t1 =
+ Td0[(s1 >> 24) ] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[(s2 ) & 0xff] ^
+ rk[5];
+ t2 =
+ Td0[(s2 >> 24) ] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[(s3 ) & 0xff] ^
+ rk[6];
+ t3 =
+ Td0[(s3 >> 24) ] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[(s0 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Td0[(t0 >> 24) ] ^
+ Td1[(t3 >> 16) & 0xff] ^
+ Td2[(t2 >> 8) & 0xff] ^
+ Td3[(t1 ) & 0xff] ^
+ rk[0];
+ s1 =
+ Td0[(t1 >> 24) ] ^
+ Td1[(t0 >> 16) & 0xff] ^
+ Td2[(t3 >> 8) & 0xff] ^
+ Td3[(t2 ) & 0xff] ^
+ rk[1];
+ s2 =
+ Td0[(t2 >> 24) ] ^
+ Td1[(t1 >> 16) & 0xff] ^
+ Td2[(t0 >> 8) & 0xff] ^
+ Td3[(t3 ) & 0xff] ^
+ rk[2];
+ s3 =
+ Td0[(t3 >> 24) ] ^
+ Td1[(t2 >> 16) & 0xff] ^
+ Td2[(t1 >> 8) & 0xff] ^
+ Td3[(t0 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (Td4[(t0 >> 24) ] & 0xff000000) ^
+ (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out , s0);
+ s1 =
+ (Td4[(t1 >> 24) ] & 0xff000000) ^
+ (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (Td4[(t2 >> 24) ] & 0xff000000) ^
+ (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (Td4[(t3 >> 24) ] & 0xff000000) ^
+ (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+#endif /* AES_ASM */
+
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, const int enc)
+{
+
+ unsigned long n;
+ unsigned long len = length;
+ unsigned char tmp[AES_BLOCK_SIZE];
+
+ assert(in && out && key && ivec);
+
+ if (enc) {
+ while (len >= AES_BLOCK_SIZE) {
+ for(n=0; n < AES_BLOCK_SIZE; ++n)
+ tmp[n] = in[n] ^ ivec[n];
+ AES_encrypt(tmp, out, key);
+ memcpy(ivec, out, AES_BLOCK_SIZE);
+ len -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ if (len) {
+ for(n=0; n < len; ++n)
+ tmp[n] = in[n] ^ ivec[n];
+ for(n=len; n < AES_BLOCK_SIZE; ++n)
+ tmp[n] = ivec[n];
+ AES_encrypt(tmp, tmp, key);
+ memcpy(out, tmp, AES_BLOCK_SIZE);
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ }
+ } else {
+ while (len >= AES_BLOCK_SIZE) {
+ memcpy(tmp, in, AES_BLOCK_SIZE);
+ AES_decrypt(in, out, key);
+ for(n=0; n < AES_BLOCK_SIZE; ++n)
+ out[n] ^= ivec[n];
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ len -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ if (len) {
+ memcpy(tmp, in, AES_BLOCK_SIZE);
+ AES_decrypt(tmp, tmp, key);
+ for(n=0; n < len; ++n)
+ out[n] = tmp[n] ^ ivec[n];
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ }
+ }
+}
diff --git a/tools/blktap/drivers/aes.h b/tools/blktap/drivers/aes.h
new file mode 100644
index 0000000000..a0167eb7d5
--- /dev/null
+++ b/tools/blktap/drivers/aes.h
@@ -0,0 +1,26 @@
+#ifndef QEMU_AES_H
+#define QEMU_AES_H
+
+#define AES_MAXNR 14
+#define AES_BLOCK_SIZE 16
+
+struct aes_key_st {
+ uint32_t rd_key[4 *(AES_MAXNR + 1)];
+ int rounds;
+};
+typedef struct aes_key_st AES_KEY;
+
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, const int enc);
+
+#endif
diff --git a/tools/blktap/drivers/blktapctrl.c b/tools/blktap/drivers/blktapctrl.c
new file mode 100644
index 0000000000..f4ade5b780
--- /dev/null
+++ b/tools/blktap/drivers/blktapctrl.c
@@ -0,0 +1,704 @@
+/*
+ * blktapctrl.c
+ *
+ * userspace controller for the blktap disks.
+ * As requests for new block devices arrive,
+ * the controller spawns off a separate process
+ * per-disk.
+ *
+ *
+ * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+#include <err.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <linux/types.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/poll.h>
+#include <sys/ioctl.h>
+#include <string.h>
+#include <unistd.h>
+#include <xs.h>
+#include <printf.h>
+#include <sys/time.h>
+#include <syslog.h>
+
+#include "blktaplib.h"
+#include "blktapctrl.h"
+#include "tapdisk.h"
+
+#define NUM_POLL_FDS 2
+#define MSG_SIZE 4096
+#define MAX_TIMEOUT 10
+#define MAX_RAND_VAL 0xFFFF
+
+int run = 1;
+int max_timeout = MAX_TIMEOUT;
+int ctlfd = 0;
+
+static int open_ctrl_socket(char *devname);
+static int write_msg(int fd, int msgtype, void *ptr, void *ptr2);
+static int read_msg(int fd, int msgtype, void *ptr);
+static driver_list_entry_t *active_disks[MAX_DISK_TYPES];
+
+void sig_handler(int sig)
+{
+ run = 0;
+}
+
+static void init_driver_list(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_DISK_TYPES; i++)
+ active_disks[i] = NULL;
+ return;
+}
+
+static void init_rng(void)
+{
+ static uint32_t seed;
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ seed = tv.tv_usec;
+ srand48(seed);
+ return;
+}
+
+static void make_blktap_dev(char *devname, int major, int minor)
+{
+ struct stat st;
+
+ if (lstat(devname, &st) != 0) {
+ /*Need to create device*/
+ if (mkdir(BLKTAP_DEV_DIR, 0755) == 0)
+ DPRINTF("Created %s directory\n",BLKTAP_DEV_DIR);
+ if (mknod(devname, S_IFCHR|0600,
+ makedev(major, minor)) == 0)
+ DPRINTF("Created %s device\n",devname);
+ } else DPRINTF("%s device already exists\n",devname);
+}
+
+static int get_new_dev(int *major, int *minor, blkif_t *blkif)
+{
+ domid_translate_t tr;
+ int ret;
+ char *devname;
+
+ tr.domid = blkif->domid;
+ tr.busid = (unsigned short)blkif->be_id;
+ ret = ioctl(ctlfd, BLKTAP_IOCTL_NEWINTF, tr );
+
+ if ( (ret <= 0)||(ret > MAX_TAP_DEV) ) {
+ DPRINTF("Incorrect Dev ID [%d]\n",ret);
+ return -1;
+ }
+
+ *minor = ret;
+ *major = ioctl(ctlfd, BLKTAP_IOCTL_MAJOR, ret );
+ if (*major < 0) {
+ DPRINTF("Incorrect Major ID [%d]\n",*major);
+ return -1;
+ }
+
+ asprintf(&devname,"%s/%s%d",BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, *minor);
+ make_blktap_dev(devname,*major,*minor);
+ DPRINTF("Received device id %d and major %d, "
+ "sent domid %d and be_id %d\n",
+ *minor, *major, tr.domid, tr.busid);
+ return 0;
+}
+
+static int get_tapdisk_pid(blkif_t *blkif)
+{
+ int ret;
+
+ if ((ret = write_msg(blkif->fds[WRITE], CTLMSG_PID, blkif, NULL))
+ <= 0) {
+ DPRINTF("Write_msg failed - CTLMSG_PID(%d)\n", ret);
+ return -EINVAL;
+ }
+
+ if ((ret = read_msg(blkif->fds[READ], CTLMSG_PID_RSP, blkif))
+ <= 0) {
+ DPRINTF("Read_msg failure - CTLMSG_PID(%d)\n", ret);
+ return -EINVAL;
+ }
+ return 1;
+}
+
+static blkif_t *test_path(char *path, char **dev, int *type)
+{
+ char *ptr, handle[10];
+ int i, size;
+
+ size = sizeof(dtypes)/sizeof(disk_info_t *);
+ *type = MAX_DISK_TYPES + 1;
+
+ if ( (ptr = strstr(path, ":"))!=NULL) {
+ memcpy(handle, path, (ptr - path));
+ *dev = ptr + 1;
+ ptr = handle + (ptr - path);
+ *ptr = '\0';
+ DPRINTF("Detected handle: [%s]\n",handle);
+
+ for (i = 0; i < size; i++) {
+ if (strncmp(handle, dtypes[i]->handle, (ptr - path))
+ ==0) {
+ *type = dtypes[i]->idnum;
+
+ if (dtypes[i]->single_handler == 1) {
+ /* Check whether tapdisk process
+ already exists */
+ if (active_disks[dtypes[i]->idnum]
+ == NULL) return NULL;
+ else
+ return active_disks[dtypes[i]->idnum]->blkif;
+ }
+ }
+ }
+ } else *dev = NULL;
+
+ return NULL;
+}
+
+static void add_disktype(blkif_t *blkif, int type)
+{
+ driver_list_entry_t *entry, *ptr, *last;
+
+ if (type > MAX_DISK_TYPES) return;
+
+ entry = malloc(sizeof(driver_list_entry_t));
+ entry->blkif = blkif;
+ entry->next = NULL;
+ ptr = active_disks[type];
+
+ if (ptr == NULL) {
+ active_disks[type] = entry;
+ entry->prev = NULL;
+ return;
+ }
+
+ while (ptr != NULL) {
+ last = ptr;
+ ptr = ptr->next;
+ }
+
+ /*We've found the end of the list*/
+ last->next = entry;
+ entry->prev = last;
+
+ return;
+}
+
+static int del_disktype(blkif_t *blkif)
+{
+ driver_list_entry_t *ptr, *cur, *last;
+ int type = blkif->drivertype, count = 0, close = 0;
+
+ if (type > MAX_DISK_TYPES) return 1;
+
+ ptr = active_disks[type];
+ last = NULL;
+ while (ptr != NULL) {
+ count++;
+ if (blkif == ptr->blkif) {
+ cur = ptr;
+ if (ptr->next != NULL) {
+ /*There's more later in the chain*/
+ if (!last) {
+ /*We're first in the list*/
+ active_disks[type] = ptr->next;
+ ptr = ptr->next;
+ ptr->prev = NULL;
+ }
+ else {
+ /*We're sandwiched*/
+ last->next = ptr->next;
+ ptr = ptr->next;
+ ptr->prev = last;
+ }
+
+ } else if (last) {
+ /*There's more earlier in the chain*/
+ last->next = NULL;
+ } else {
+ /*We're the only entry*/
+ active_disks[type] = NULL;
+ if(dtypes[type]->single_handler == 1)
+ close = 1;
+ }
+ DPRINTF("DEL_DISKTYPE: Freeing entry\n");
+ free(cur);
+ if (dtypes[type]->single_handler == 0) close = 1;
+
+ return close;
+ }
+ last = ptr;
+ ptr = ptr->next;
+ }
+ DPRINTF("DEL_DISKTYPE: No match\n");
+ return 1;
+}
+
+static int write_msg(int fd, int msgtype, void *ptr, void *ptr2)
+{
+ blkif_t *blkif;
+ blkif_info_t *blk;
+ msg_hdr_t *msg;
+ msg_newdev_t *msg_dev;
+ char *p, *buf, *path;
+ int msglen, len, ret;
+ fd_set writefds;
+ struct timeval timeout;
+ image_t *image, *img;
+ uint32_t seed;
+
+ blkif = (blkif_t *)ptr;
+ blk = blkif->info;
+ image = blkif->prv;
+ len = 0;
+
+ switch (msgtype)
+ {
+ case CTLMSG_PARAMS:
+ path = (char *)ptr2;
+ DPRINTF("Write_msg called: CTLMSG_PARAMS, sending [%s, %s]\n",
+ blk->params, path);
+
+ msglen = sizeof(msg_hdr_t) + strlen(path) + 1;
+ buf = malloc(msglen);
+
+ /*Assign header fields*/
+ msg = (msg_hdr_t *)buf;
+ msg->type = CTLMSG_PARAMS;
+ msg->len = msglen;
+ msg->drivertype = blkif->drivertype;
+
+ gettimeofday(&timeout, NULL);
+ msg->cookie = blkif->cookie;
+ DPRINTF("Generated cookie, %d\n",blkif->cookie);
+
+ /*Copy blk->params to msg*/
+ p = buf + sizeof(msg_hdr_t);
+ memcpy(p, path, strlen(path) + 1);
+
+ break;
+
+ case CTLMSG_NEWDEV:
+ DPRINTF("Write_msg called: CTLMSG_NEWDEV\n");
+
+ msglen = sizeof(msg_hdr_t) + sizeof(msg_newdev_t);
+ buf = malloc(msglen);
+
+ /*Assign header fields*/
+ msg = (msg_hdr_t *)buf;
+ msg->type = CTLMSG_NEWDEV;
+ msg->len = msglen;
+ msg->drivertype = blkif->drivertype;
+ msg->cookie = blkif->cookie;
+
+ msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
+ msg_dev->devnum = blkif->minor;
+ msg_dev->domid = blkif->domid;
+
+ break;
+
+ case CTLMSG_CLOSE:
+ DPRINTF("Write_msg called: CTLMSG_CLOSE\n");
+
+ msglen = sizeof(msg_hdr_t);
+ buf = malloc(msglen);
+
+ /*Assign header fields*/
+ msg = (msg_hdr_t *)buf;
+ msg->type = CTLMSG_CLOSE;
+ msg->len = msglen;
+ msg->drivertype = blkif->drivertype;
+ msg->cookie = blkif->cookie;
+
+ break;
+
+ case CTLMSG_PID:
+ DPRINTF("Write_msg called: CTLMSG_PID\n");
+
+ msglen = sizeof(msg_hdr_t);
+ buf = malloc(msglen);
+
+ /*Assign header fields*/
+ msg = (msg_hdr_t *)buf;
+ msg->type = CTLMSG_PID;
+ msg->len = msglen;
+ msg->drivertype = blkif->drivertype;
+ msg->cookie = blkif->cookie;
+
+ break;
+
+ default:
+ return -1;
+ }
+
+ /*Now send the message*/
+ ret = 0;
+ FD_ZERO(&writefds);
+ FD_SET(fd,&writefds);
+ timeout.tv_sec = max_timeout; /*Wait for up to max_timeout seconds*/
+ timeout.tv_usec = 0;
+ if (select(fd+1, (fd_set *) 0, &writefds,
+ (fd_set *) 0, &timeout) > 0) {
+ len = write(fd, buf, msglen);
+ if (len == -1) DPRINTF("Write failed: (%d)\n",errno);
+ }
+ free(buf);
+
+ return len;
+}
+
+static int read_msg(int fd, int msgtype, void *ptr)
+{
+ blkif_t *blkif;
+ blkif_info_t *blk;
+ msg_hdr_t *msg;
+ msg_pid_t *msg_pid;
+ char *p, *buf;
+ int msglen = MSG_SIZE, len, ret;
+ fd_set readfds;
+ struct timeval timeout;
+ image_t *image, *img;
+
+
+ blkif = (blkif_t *)ptr;
+ blk = blkif->info;
+ image = blkif->prv;
+
+ buf = malloc(MSG_SIZE);
+
+ ret = 0;
+ FD_ZERO(&readfds);
+ FD_SET(fd,&readfds);
+ timeout.tv_sec = max_timeout; /*Wait for up to max_timeout seconds*/
+ timeout.tv_usec = 0;
+ if (select(fd+1, &readfds, (fd_set *) 0,
+ (fd_set *) 0, &timeout) > 0) {
+ ret = read(fd, buf, msglen);
+
+ }
+ if (ret > 0) {
+ msg = (msg_hdr_t *)buf;
+ switch (msg->type)
+ {
+ case CTLMSG_IMG:
+ img = (image_t *)(buf + sizeof(msg_hdr_t));
+ image->size = img->size;
+ image->secsize = img->secsize;
+ image->info = img->info;
+
+ DPRINTF("Received CTLMSG_IMG: %lu, %lu, %lu\n",
+ image->size, image->secsize, image->info);
+ if(msgtype != CTLMSG_IMG) ret = 0;
+ break;
+
+ case CTLMSG_IMG_FAIL:
+ DPRINTF("Received CTLMSG_IMG_FAIL, "
+ "unable to open image\n");
+ ret = 0;
+ break;
+
+ case CTLMSG_NEWDEV_RSP:
+ DPRINTF("Received CTLMSG_NEWDEV_RSP\n");
+ if(msgtype != CTLMSG_NEWDEV_RSP) ret = 0;
+ break;
+
+ case CTLMSG_NEWDEV_FAIL:
+ DPRINTF("Received CTLMSG_NEWDEV_FAIL\n");
+ ret = 0;
+ break;
+
+ case CTLMSG_CLOSE_RSP:
+ DPRINTF("Received CTLMSG_CLOSE_RSP\n");
+ if (msgtype != CTLMSG_CLOSE_RSP) ret = 0;
+ break;
+
+ case CTLMSG_PID_RSP:
+ DPRINTF("Received CTLMSG_PID_RSP\n");
+ if (msgtype != CTLMSG_PID_RSP) ret = 0;
+ else {
+ msg_pid = (msg_pid_t *)
+ (buf + sizeof(msg_hdr_t));
+ blkif->tappid = msg_pid->pid;
+ DPRINTF("\tPID: [%d]\n",blkif->tappid);
+ }
+ break;
+ default:
+ DPRINTF("UNKNOWN MESSAGE TYPE RECEIVED\n");
+ ret = 0;
+ break;
+ }
+ }
+
+ free(buf);
+
+ return ret;
+
+}
+
+int blktapctrl_new_blkif(blkif_t *blkif)
+{
+ blkif_info_t *blk;
+ int major, minor, fd_read, fd_write, type, new;
+ char *rdctldev, *wrctldev, *cmd, *ptr;
+ image_t *image;
+ blkif_t *exist = NULL;
+
+ DPRINTF("Received a poll for a new vbd\n");
+ if ( ((blk=blkif->info) != NULL) && (blk->params != NULL) ) {
+ if (get_new_dev(&major, &minor, blkif)<0)
+ return -1;
+
+ exist = test_path(blk->params, &ptr, &type);
+ blkif->drivertype = type;
+ blkif->cookie = lrand48() % MAX_RAND_VAL;
+
+ if (!exist) {
+ DPRINTF("Process does not exist:\n");
+ asprintf(&rdctldev, "/dev/xen/tapctrlread%d", minor);
+ blkif->fds[READ] = open_ctrl_socket(rdctldev);
+
+
+ asprintf(&wrctldev, "/dev/xen/tapctrlwrite%d", minor);
+ blkif->fds[WRITE] = open_ctrl_socket(wrctldev);
+
+ if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1)
+ goto fail;
+
+ /*launch the new process*/
+ asprintf(&cmd, "tapdisk %s %s", wrctldev, rdctldev);
+ DPRINTF("Launching process, CMDLINE [%s]\n",cmd);
+ if (system(cmd) == -1) {
+ DPRINTF("Unable to fork, cmdline: [%s]\n",cmd);
+ return -1;
+ }
+
+ free(rdctldev);
+ free(wrctldev);
+ free(cmd);
+ } else {
+ DPRINTF("Process exists!\n");
+ blkif->fds[READ] = exist->fds[READ];
+ blkif->fds[WRITE] = exist->fds[WRITE];
+ }
+
+ add_disktype(blkif, type);
+ blkif->major = major;
+ blkif->minor = minor;
+
+ image = (image_t *)malloc(sizeof(image_t));
+ blkif->prv = (void *)image;
+ blkif->ops = &tapdisk_ops;
+
+ /*Retrieve the PID of the new process*/
+ if (get_tapdisk_pid(blkif) <= 0) {
+ DPRINTF("Unable to contact disk process\n");
+ goto fail;
+ }
+
+ /* Both of the following read and write calls will block up to
+ * max_timeout val*/
+ if (write_msg(blkif->fds[WRITE], CTLMSG_PARAMS, blkif, ptr)
+ <= 0) {
+ DPRINTF("Write_msg failed - CTLMSG_PARAMS\n");
+ goto fail;
+ }
+
+ if (read_msg(blkif->fds[READ], CTLMSG_IMG, blkif) <= 0) {
+ DPRINTF("Read_msg failure - CTLMSG_IMG\n");
+ goto fail;
+ }
+
+ } else return -1;
+
+ return 0;
+fail:
+ ioctl(ctlfd, BLKTAP_IOCTL_FREEINTF, minor);
+ return -EINVAL;
+}
+
+int map_new_blktapctrl(blkif_t *blkif)
+{
+ DPRINTF("Received a poll for a new devmap\n");
+ if (write_msg(blkif->fds[WRITE], CTLMSG_NEWDEV, blkif, NULL) <= 0) {
+ DPRINTF("Write_msg failed - CTLMSG_NEWDEV\n");
+ return -EINVAL;
+ }
+
+ if (read_msg(blkif->fds[READ], CTLMSG_NEWDEV_RSP, blkif) <= 0) {
+ DPRINTF("Read_msg failed - CTLMSG_NEWDEV_RSP\n");
+ return -EINVAL;
+ }
+ DPRINTF("Exiting map_new_blktapctrl\n");
+
+ return blkif->minor - 1;
+}
+
+int unmap_blktapctrl(blkif_t *blkif)
+{
+ DPRINTF("Unmapping vbd\n");
+
+ if (write_msg(blkif->fds[WRITE], CTLMSG_CLOSE, blkif, NULL) <= 0) {
+ DPRINTF("Write_msg failed - CTLMSG_CLOSE\n");
+ return -EINVAL;
+ }
+
+ if (del_disktype(blkif)) {
+ close(blkif->fds[WRITE]);
+ close(blkif->fds[READ]);
+
+ }
+ return 0;
+}
+
+int open_ctrl_socket(char *devname)
+{
+ int ret;
+ int ipc_fd;
+ char *cmd;
+ fd_set socks;
+ struct timeval timeout;
+
+ ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO);
+ if ( (ret != 0) && (errno != EEXIST) ) {
+ DPRINTF("ERROR: pipe failed (%d)\n", errno);
+ exit(0);
+ }
+
+ ipc_fd = open(devname,O_RDWR|O_NONBLOCK);
+
+ if (ipc_fd < 0) {
+ DPRINTF("FD open failed\n");
+ return -1;
+ }
+
+ return ipc_fd;
+}
+
+static void print_drivers(void)
+{
+ int i, size;
+
+ size = sizeof(dtypes)/sizeof(disk_info_t *);
+ DPRINTF("blktapctrl: v1.0.0\n");
+ for (i = 0; i < size; i++)
+ DPRINTF("Found driver: [%s]\n",dtypes[i]->name);
+}
+
+int main(int argc, char *argv[])
+{
+ char *devname;
+ tapdev_info_t *ctlinfo;
+ int tap_pfd, store_pfd, xs_fd, ret, timeout, pfd_count;
+ struct xs_handle *h;
+ struct pollfd pfd[NUM_POLL_FDS];
+ pid_t process;
+
+ __init_blkif();
+ openlog("BLKTAPCTRL", LOG_CONS|LOG_ODELAY, LOG_DAEMON);
+
+ print_drivers();
+ init_driver_list();
+ init_rng();
+
+ register_new_blkif_hook(blktapctrl_new_blkif);
+ register_new_devmap_hook(map_new_blktapctrl);
+ register_new_unmap_hook(unmap_blktapctrl);
+
+ /*Attach to blktap0 */
+ asprintf(&devname,"%s/%s0", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME);
+ make_blktap_dev(devname,254,0);
+ ctlfd = open(devname, O_RDWR);
+ if (ctlfd == -1) {
+ DPRINTF("blktap0 open failed\n");
+ goto open_failed;
+ }
+
+ /* Set up store connection and watch. */
+ h = xs_daemon_open();
+ if (h == NULL) {
+ DPRINTF("xs_daemon_open failed -- "
+ "is xenstore running?\n");
+ goto open_failed;
+ }
+
+ ret = add_blockdevice_probe_watch(h, "Domain-0");
+ if (ret != 0) {
+ DPRINTF("adding device probewatch\n");
+ goto open_failed;
+ }
+
+ ioctl(ctlfd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
+
+ process = getpid();
+ ret = ioctl(ctlfd, BLKTAP_IOCTL_SENDPID, process );
+
+ /*Static pollhooks*/
+ pfd_count = 0;
+ tap_pfd = pfd_count++;
+ pfd[tap_pfd].fd = ctlfd;
+ pfd[tap_pfd].events = POLLIN;
+
+ store_pfd = pfd_count++;
+ pfd[store_pfd].fd = xs_fileno(h);
+ pfd[store_pfd].events = POLLIN;
+
+ while (run) {
+ timeout = 1000; /*Milliseconds*/
+ ret = poll(pfd, pfd_count, timeout);
+
+ if (ret > 0) {
+ if (pfd[store_pfd].revents) {
+ ret = xs_fire_next_watch(h);
+ }
+ }
+ }
+
+ ioctl(ctlfd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
+ close(ctlfd);
+ closelog();
+
+ return 0;
+
+ open_failed:
+ DPRINTF("Unable to start blktapctrl\n");
+ closelog();
+ return -1;
+}
diff --git a/tools/blktap/drivers/blktapctrl.h b/tools/blktap/drivers/blktapctrl.h
new file mode 100644
index 0000000000..4a5e59577e
--- /dev/null
+++ b/tools/blktap/drivers/blktapctrl.h
@@ -0,0 +1,55 @@
+/* blktapctrl.h
+ *
+ * controller image utils.
+ *
+ * (c) 2004-6 Andrew Warfield and Julian Chesterfield
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+static inline long int tapdisk_get_size(blkif_t *blkif)
+{
+ image_t *img = (image_t *)blkif->prv;
+ return img->size;
+}
+
+static inline long int tapdisk_get_secsize(blkif_t *blkif)
+{
+ image_t *img = (image_t *)blkif->prv;
+ return img->secsize;
+}
+
+static inline unsigned tapdisk_get_info(blkif_t *blkif)
+{
+ image_t *img = (image_t *)blkif->prv;
+ return img->info;
+}
+
+struct blkif_ops tapdisk_ops = {
+ .get_size = tapdisk_get_size,
+ .get_secsize = tapdisk_get_secsize,
+ .get_info = tapdisk_get_info,
+};
diff --git a/tools/blktap/drivers/block-aio.c b/tools/blktap/drivers/block-aio.c
new file mode 100644
index 0000000000..ebcfc35f56
--- /dev/null
+++ b/tools/blktap/drivers/block-aio.c
@@ -0,0 +1,327 @@
+/* block-aio.c
+ *
+ * libaio-based raw disk implementation.
+ *
+ * (c) 2006 Andrew Warfield and Julian Chesterfield
+ *
+ * NB: This code is not thread-safe.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include <errno.h>
+#include <libaio.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include "tapdisk.h"
+
+
+/**
+ * We used a kernel patch to return an fd associated with the AIO context
+ * so that we can concurrently poll on synchronous and async descriptors.
+ * This is signalled by passing 1 as the io context to io_setup.
+ */
+#define REQUEST_ASYNC_FD 1
+
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
+
+struct pending_aio {
+ td_callback_t cb;
+ int id;
+ void *private;
+};
+
+struct tdaio_state {
+ int fd;
+
+ /* libaio state */
+ io_context_t aio_ctx;
+ struct iocb iocb_list [MAX_AIO_REQS];
+ struct iocb *iocb_free [MAX_AIO_REQS];
+ struct pending_aio pending_aio[MAX_AIO_REQS];
+ int iocb_free_count;
+ struct iocb *iocb_queue[MAX_AIO_REQS];
+ int iocb_queued;
+ int poll_fd; /* NB: we require aio_poll support */
+ struct io_event aio_events[MAX_AIO_REQS];
+};
+
+#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
+
+/*Get Image size, secsize*/
+static int get_image_info(struct td_state *s, int fd)
+{
+ int ret;
+ long size;
+ unsigned long total_size;
+ struct statvfs statBuf;
+ struct stat stat;
+
+ ret = fstat(fd, &stat);
+ if (ret != 0) {
+ DPRINTF("ERROR: fstat failed, Couldn't stat image");
+ return -EINVAL;
+ }
+
+ if (S_ISBLK(stat.st_mode)) {
+ /*Accessing block device directly*/
+ s->size = 0;
+ if (ioctl(fd,BLKGETSIZE,&s->size)!=0) {
+ DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
+ return -EINVAL;
+ }
+
+ DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
+ "sector_shift [%llu]\n",
+ (long long unsigned)(s->size << SECTOR_SHIFT),
+ (long long unsigned)s->size);
+
+ /*Get the sector size*/
+#if defined(BLKSSZGET)
+ {
+ int arg;
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ ioctl(fd, BLKSSZGET, &s->sector_size);
+
+ if (s->sector_size != DEFAULT_SECTOR_SIZE)
+ DPRINTF("Note: sector size is %ld (not %d)\n",
+ s->sector_size, DEFAULT_SECTOR_SIZE);
+ }
+#else
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+#endif
+
+ } else {
+ /*Local file? try fstat instead*/
+ s->size = (stat.st_size >> SECTOR_SHIFT);
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
+ "sector_shift [%llu]\n",
+ (long long unsigned)(s->size << SECTOR_SHIFT),
+ (long long unsigned)s->size);
+ }
+
+ if (s->size == 0) {
+ s->size =((uint64_t) 16836057);
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ }
+ s->info = 0;
+
+ return 0;
+}
+
+/* Open the disk file and initialize aio state. */
+int tdaio_open (struct td_state *s, const char *name)
+{
+ int i, fd, ret = 0;
+ struct tdaio_state *prv = (struct tdaio_state *)s->private;
+ s->private = prv;
+
+ DPRINTF("XXX: block-aio open('%s')", name);
+ /* Initialize AIO */
+ prv->iocb_free_count = MAX_AIO_REQS;
+ prv->iocb_queued = 0;
+
+ prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
+ prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
+
+ if (prv->poll_fd < 0) {
+ ret = prv->poll_fd;
+ DPRINTF("Couldn't get fd for AIO poll support. This is "
+ "probably because your kernel does not have the "
+ "aio-poll patch applied.\n");
+ goto done;
+ }
+
+ for (i=0;i<MAX_AIO_REQS;i++)
+ prv->iocb_free[i] = &prv->iocb_list[i];
+
+ /* Open the file */
+ fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
+
+ if ( (fd == -1) && (errno == EINVAL) ) {
+
+ /* Maybe O_DIRECT isn't supported. */
+ fd = open(name, O_RDWR | O_LARGEFILE);
+ if (fd != -1) DPRINTF("WARNING: Accessing image without"
+ "O_DIRECT! (%s)\n", name);
+
+ } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
+
+ if (fd == -1) {
+ DPRINTF("Unable to open [%s] (%d)!\n", name, 0 - errno);
+ ret = 0 - errno;
+ goto done;
+ }
+
+ prv->fd = fd;
+
+ ret = get_image_info(s, fd);
+done:
+ return ret;
+}
+
+int tdaio_queue_read(struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct iocb *io;
+ struct pending_aio *pio;
+ struct tdaio_state *prv = (struct tdaio_state *)s->private;
+ int size = nb_sectors * s->sector_size;
+ uint64_t offset = sector * (uint64_t)s->sector_size;
+ long ioidx;
+
+ if (prv->iocb_free_count == 0)
+ return -ENOMEM;
+ io = prv->iocb_free[--prv->iocb_free_count];
+
+ ioidx = IOCB_IDX(prv, io);
+ pio = &prv->pending_aio[ioidx];
+ pio->cb = cb;
+ pio->id = id;
+ pio->private = private;
+
+ io_prep_pread(io, prv->fd, buf, size, offset);
+ io->data = (void *)ioidx;
+
+ prv->iocb_queue[prv->iocb_queued++] = io;
+
+ return 0;
+}
+
+int tdaio_queue_write(struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct iocb *io;
+ struct pending_aio *pio;
+ struct tdaio_state *prv = (struct tdaio_state *)s->private;
+ int size = nb_sectors * s->sector_size;
+ uint64_t offset = sector * (uint64_t)s->sector_size;
+ long ioidx;
+
+ if (prv->iocb_free_count == 0)
+ return -ENOMEM;
+ io = prv->iocb_free[--prv->iocb_free_count];
+
+ ioidx = IOCB_IDX(prv, io);
+ pio = &prv->pending_aio[ioidx];
+ pio->cb = cb;
+ pio->id = id;
+ pio->private = private;
+
+ io_prep_pwrite(io, prv->fd, buf, size, offset);
+ io->data = (void *)ioidx;
+
+ prv->iocb_queue[prv->iocb_queued++] = io;
+
+ return 0;
+}
+
+int tdaio_submit(struct td_state *s)
+{
+ int ret;
+ struct tdaio_state *prv = (struct tdaio_state *)s->private;
+
+ ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+
+ /* XXX: TODO: Handle error conditions here. */
+
+ /* Success case: */
+ prv->iocb_queued = 0;
+
+ return ret;
+}
+
+int *tdaio_get_fd(struct td_state *s)
+{
+ struct tdaio_state *prv = (struct tdaio_state *)s->private;
+ int *fds, i;
+
+ fds = malloc(sizeof(int) * MAX_IOFD);
+ /*initialise the FD array*/
+ for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
+
+ fds[0] = prv->poll_fd;
+
+ return fds;
+}
+
+int tdaio_close(struct td_state *s)
+{
+ struct tdaio_state *prv = (struct tdaio_state *)s->private;
+
+ io_destroy(prv->aio_ctx);
+ close(prv->fd);
+
+ return 0;
+}
+
+int tdaio_do_callbacks(struct td_state *s, int sid)
+{
+ int ret, i, rsp = 0;
+ struct io_event *ep;
+ struct tdaio_state *prv = (struct tdaio_state *)s->private;
+
+ /* Non-blocking test for completed io. */
+ ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
+ NULL);
+
+ for (ep=prv->aio_events,i=ret; i-->0; ep++) {
+ struct iocb *io = ep->obj;
+ struct pending_aio *pio;
+
+ pio = &prv->pending_aio[(long)io->data];
+
+ if (ep->res != io->u.c.nbytes) {
+ /* TODO: handle this case better. */
+ DPRINTF("AIO did less than I asked it to. \n");
+ }
+ rsp += pio->cb(s, ep->res2, pio->id, pio->private);
+
+ prv->iocb_free[prv->iocb_free_count++] = io;
+ }
+ return rsp;
+}
+
+struct tap_disk tapdisk_aio = {
+ "tapdisk_aio",
+ sizeof(struct tdaio_state),
+ tdaio_open,
+ tdaio_queue_read,
+ tdaio_queue_write,
+ tdaio_submit,
+ tdaio_get_fd,
+ tdaio_close,
+ tdaio_do_callbacks,
+};
diff --git a/tools/blktap/drivers/block-qcow.c b/tools/blktap/drivers/block-qcow.c
new file mode 100644
index 0000000000..7eab8c9834
--- /dev/null
+++ b/tools/blktap/drivers/block-qcow.c
@@ -0,0 +1,1369 @@
+/* block-qcow.c
+ *
+ * Asynchronous Qemu copy-on-write disk implementation.
+ * Code based on the Qemu implementation
+ * (see copyright notice below)
+ *
+ * (c) 2006 Andrew Warfield and Julian Chesterfield
+ *
+ */
+
+/*
+ * Block driver for the QCOW format
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files(the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <string.h>
+#include <zlib.h>
+#include <inttypes.h>
+#include <libaio.h>
+#include <openssl/md5.h>
+#include "bswap.h"
+#include "aes.h"
+#include "tapdisk.h"
+
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+
+/******AIO DEFINES******/
+#define REQUEST_ASYNC_FD 1
+#define MAX_QCOW_IDS 0xFFFF
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
+
+struct pending_aio {
+ td_callback_t cb;
+ int id;
+ void *private;
+ int nb_sectors;
+ char *buf;
+ uint64_t sector;
+ int qcow_idx;
+};
+
+#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
+
+#define ZERO_TEST(_b) (_b | 0x00)
+
+/**************************************************************/
+/* QEMU COW block driver with compression and encryption support */
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define XEN_MAGIC (('X' << 24) | ('E' << 16) | ('N' << 8) | 0xfb)
+#define QCOW_VERSION 1
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+
+#define QCOW_OFLAG_COMPRESSED (1LL << 63)
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+typedef struct QCowHeader {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t mtime;
+ uint64_t size; /* in bytes */
+ uint8_t cluster_bits;
+ uint8_t l2_bits;
+ uint32_t crypt_method;
+ uint64_t l1_table_offset;
+} QCowHeader;
+
+/*Extended header for Xen enhancements*/
+typedef struct QCowHeader_ext {
+ uint32_t xmagic;
+ uint32_t cksum;
+ uint32_t min_cluster_alloc;
+} QCowHeader_ext;
+
+#define L2_CACHE_SIZE 16 /*Fixed allocation in Qemu*/
+
+struct tdqcow_state {
+ int fd; /*Main Qcow file descriptor */
+ uint64_t fd_end; /*Store a local record of file length */
+ int bfd; /*Backing file descriptor*/
+ char *name; /*Record of the filename*/
+ int poll_pipe[2]; /*dummy fd for polling on */
+ int encrypted; /*File contents are encrypted or plain*/
+ int cluster_bits; /*Determines length of cluster as
+ *indicated by file hdr*/
+ int cluster_size; /*Length of cluster*/
+ int cluster_sectors; /*Number of sectors per cluster*/
+ int cluster_alloc; /*Blktap fix for allocating full
+ *extents*/
+ int min_cluster_alloc; /*Blktap historical extent alloc*/
+ int l2_bits; /*Size of L2 table entry*/
+ int l2_size; /*Full table size*/
+ int l1_size; /*L1 table size*/
+ uint64_t cluster_offset_mask;
+ uint64_t l1_table_offset; /*L1 table offset from beginning of
+ *file*/
+ uint64_t *l1_table; /*L1 table entries*/
+ uint64_t *l2_cache; /*We maintain a cache of size
+ *L2_CACHE_SIZE of most read entries*/
+ uint64_t l2_cache_offsets[L2_CACHE_SIZE]; /*L2 cache entries*/
+ uint32_t l2_cache_counts[L2_CACHE_SIZE]; /*Cache access record*/
+ uint8_t *cluster_cache;
+ uint8_t *cluster_data;
+ uint8_t *sector_lock; /*Locking bitmap for AIO reads/writes*/
+ uint64_t cluster_cache_offset; /**/
+ uint32_t crypt_method; /*current crypt method, 0 if no
+ *key yet */
+ uint32_t crypt_method_header; /**/
+ AES_KEY aes_encrypt_key; /*AES key*/
+ AES_KEY aes_decrypt_key; /*AES key*/
+ /* libaio state */
+ io_context_t aio_ctx;
+ int nr_reqs [MAX_QCOW_IDS];
+ struct iocb iocb_list [MAX_AIO_REQS];
+ struct iocb *iocb_free [MAX_AIO_REQS];
+ struct pending_aio pending_aio[MAX_AIO_REQS];
+ int iocb_free_count;
+ struct iocb *iocb_queue[MAX_AIO_REQS];
+ int iocb_queued;
+ int poll_fd; /* NB: we require aio_poll support */
+ struct io_event aio_events[MAX_AIO_REQS];
+};
+
+static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
+
+static int init_aio_state(struct td_state *bs)
+{
+ int i;
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ long ioidx;
+
+ /*Initialize Locking bitmap*/
+ s->sector_lock = calloc(1, bs->size);
+
+ if (!s->sector_lock) {
+ DPRINTF("Failed to allocate sector lock\n");
+ goto fail;
+ }
+
+ /* Initialize AIO */
+ s->iocb_free_count = MAX_AIO_REQS;
+ s->iocb_queued = 0;
+
+ /*Signal kernel to create Poll FD for Asyc completion events*/
+ s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
+ s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx);
+
+ if (s->poll_fd < 0) {
+ DPRINTF("Retrieving Async poll fd failed\n");
+ goto fail;
+ }
+
+ for (i=0;i<MAX_AIO_REQS;i++)
+ s->iocb_free[i] = &s->iocb_list[i];
+ for (i=0;i<MAX_QCOW_IDS;i++)
+ s->nr_reqs[i] = 0;
+ DPRINTF("AIO state initialised\n");
+
+ return 0;
+
+ fail:
+ return -1;
+}
+
+/*
+ *Test if block is zero.
+ * Return:
+ * 1 for TRUE
+ * 0 for FALSE
+ */
+static inline int IS_ZERO(char *buf, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ /*if not zero, return false*/
+ if (ZERO_TEST(*(buf + i))) return 0;
+ }
+ return 1;
+}
+
+static uint32_t gen_cksum(char *ptr, int len)
+{
+ unsigned char *md;
+ uint32_t ret;
+
+ md = malloc(MD5_DIGEST_LENGTH);
+
+ if(!md) return 0;
+
+ if (MD5((unsigned char *)ptr, len, md) != md) return 0;
+
+ memcpy(&ret, md, sizeof(uint32_t));
+ free(md);
+ return ret;
+}
+
+static int qcow_set_key(struct td_state *bs, const char *key)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ uint8_t keybuf[16];
+ int len, i;
+
+ memset(keybuf, 0, 16);
+ len = strlen(key);
+ if (len > 16)
+ len = 16;
+ /* XXX: we could compress the chars to 7 bits to increase
+ entropy */
+ for (i = 0; i < len; i++) {
+ keybuf[i] = key[i];
+ }
+ s->crypt_method = s->crypt_method_header;
+
+ if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ return -1;
+ if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+ return -1;
+#if 0
+ /* test */
+ {
+ uint8_t in[16];
+ uint8_t out[16];
+ uint8_t tmp[16];
+ for (i=0; i<16; i++)
+ in[i] = i;
+ AES_encrypt(in, tmp, &s->aes_encrypt_key);
+ AES_decrypt(tmp, out, &s->aes_decrypt_key);
+ for (i = 0; i < 16; i++)
+ DPRINTF(" %02x", tmp[i]);
+ DPRINTF("\n");
+ for (i = 0; i < 16; i++)
+ DPRINTF(" %02x", out[i]);
+ DPRINTF("\n");
+ }
+#endif
+ return 0;
+}
+
+static int async_read(struct tdqcow_state *s, int fd, int size,
+ uint64_t offset,
+ char *buf, td_callback_t cb,
+ int id, uint64_t sector, int qcow_idx, void *private)
+{
+ struct iocb *io;
+ struct pending_aio *pio;
+ long ioidx;
+
+ io = s->iocb_free[--s->iocb_free_count];
+
+ ioidx = IOCB_IDX(s, io);
+ pio = &s->pending_aio[ioidx];
+ pio->cb = cb;
+ pio->id = id;
+ pio->private = private;
+ pio->nb_sectors = size/512;
+ pio->buf = buf;
+ pio->sector = sector;
+ pio->qcow_idx = qcow_idx;
+
+ io_prep_pread(io, fd, buf, size, offset);
+ io->data = (void *)ioidx;
+
+ s->iocb_queue[s->iocb_queued++] = io;
+
+ return 1;
+}
+
+static int async_write(struct tdqcow_state *s, int fd, int size,
+ uint64_t offset,
+ char *buf, td_callback_t cb,
+ int id, uint64_t sector, int qcow_idx, void *private)
+{
+ struct iocb *io;
+ struct pending_aio *pio;
+ long ioidx;
+
+ io = s->iocb_free[--s->iocb_free_count];
+
+ ioidx = IOCB_IDX(s, io);
+ pio = &s->pending_aio[ioidx];
+ pio->cb = cb;
+ pio->id = id;
+ pio->private = private;
+ pio->nb_sectors = size/512;
+ pio->buf = buf;
+ pio->sector = sector;
+ pio->qcow_idx = qcow_idx;
+
+ io_prep_pwrite(io, fd, buf, size, offset);
+ io->data = (void *)ioidx;
+
+ s->iocb_queue[s->iocb_queued++] = io;
+
+ return 1;
+}
+
+/*TODO: Fix sector span!*/
+static int aio_can_lock(struct tdqcow_state *s, uint64_t sector)
+{
+ return (s->sector_lock[sector] ? 0 : 1);
+}
+
+static int aio_lock(struct tdqcow_state *s, uint64_t sector)
+{
+ return ++s->sector_lock[sector];
+}
+
+static void aio_unlock(struct tdqcow_state *s, uint64_t sector)
+{
+ if (!s->sector_lock[sector]) return;
+
+ --s->sector_lock[sector];
+ return;
+}
+
+/*TODO - Use a freelist*/
+static int get_free_idx(struct tdqcow_state *s)
+{
+ int i;
+
+ for(i = 0; i < MAX_QCOW_IDS; i++) {
+ if(s->nr_reqs[i] == 0) return i;
+ }
+ return -1;
+}
+
+/*
+ * The crypt function is compatible with the linux cryptoloop
+ * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+ * supported .
+ */
+static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key)
+{
+ union {
+ uint64_t ll[2];
+ uint8_t b[16];
+ } ivec;
+ int i;
+
+ for (i = 0; i < nb_sectors; i++) {
+ ivec.ll[0] = cpu_to_le64(sector_num);
+ ivec.ll[1] = 0;
+ AES_cbc_encrypt(in_buf, out_buf, 512, key,
+ ivec.b, enc);
+ sector_num++;
+ in_buf += 512;
+ out_buf += 512;
+ }
+}
+
+
+/* 'allocate' is:
+ *
+ * 0 to not allocate.
+ *
+ * 1 to allocate a normal cluster (for sector indexes 'n_start' to
+ * 'n_end')
+ *
+ * 2 to allocate a compressed cluster of size
+ * 'compressed_size'. 'compressed_size' must be > 0 and <
+ * cluster_size
+ *
+ * return 0 if not allocated.
+ */
+static uint64_t get_cluster_offset(struct td_state *bs,
+ uint64_t offset, int allocate,
+ int compressed_size,
+ int n_start, int n_end)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
+ char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr;
+ uint64_t l2_offset, *l2_table, cluster_offset, tmp;
+ uint32_t min_count;
+ int new_l2_table;
+
+ /*Check L1 table for the extent offset*/
+ l1_index = offset >> (s->l2_bits + s->cluster_bits);
+ l2_offset = s->l1_table[l1_index];
+ new_l2_table = 0;
+ if (!l2_offset) {
+ if (!allocate)
+ return 0;
+ /*
+ * allocating a new l2 entry + extent
+ * at the end of the file, we must also
+ * update the L1 entry safely.
+ */
+ l2_offset = s->fd_end;
+
+ /* round to cluster size */
+ l2_offset = (l2_offset + s->cluster_size - 1)
+ & ~(s->cluster_size - 1);
+
+ /* update the L1 entry */
+ s->l1_table[l1_index] = l2_offset;
+ tmp = cpu_to_be64(l2_offset);
+
+ /*Truncate file for L2 table
+ *(initialised to zero in case we crash)*/
+ ftruncate(s->fd, l2_offset + (s->l2_size * sizeof(uint64_t)));
+ s->fd_end += (s->l2_size * sizeof(uint64_t));
+
+ /*Update the L1 table entry on disk
+ * (for O_DIRECT we write 4KByte blocks)*/
+ l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
+ l1_ptr = (char *)s->l1_table + (l1_sector << 12);
+
+ if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
+ DPRINTF("ERROR allocating memory for L1 table\n");
+ }
+ memcpy(tmp_ptr, l1_ptr, 4096);
+
+ /*
+ * Issue non-asynchronous L1 write.
+ * For safety, we must ensure that
+ * entry is written before blocks.
+ */
+ lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
+ if (write(s->fd, tmp_ptr, 4096) != 4096)
+ return 0;
+ free(tmp_ptr);
+
+ new_l2_table = 1;
+ goto cache_miss;
+ } else if (s->min_cluster_alloc == s->l2_size) {
+ /*Fast-track the request*/
+ cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+ return cluster_offset + (l2_index * s->cluster_size);
+ }
+
+ /*Check to see if L2 entry is already cached*/
+ for (i = 0; i < L2_CACHE_SIZE; i++) {
+ if (l2_offset == s->l2_cache_offsets[i]) {
+ /* increment the hit count */
+ if (++s->l2_cache_counts[i] == 0xffffffff) {
+ for (j = 0; j < L2_CACHE_SIZE; j++) {
+ s->l2_cache_counts[j] >>= 1;
+ }
+ }
+ l2_table = s->l2_cache + (i << s->l2_bits);
+ goto found;
+ }
+ }
+
+cache_miss:
+ /* not found: load a new entry in the least used one */
+ min_index = 0;
+ min_count = 0xffffffff;
+ for (i = 0; i < L2_CACHE_SIZE; i++) {
+ if (s->l2_cache_counts[i] < min_count) {
+ min_count = s->l2_cache_counts[i];
+ min_index = i;
+ }
+ }
+ l2_table = s->l2_cache + (min_index << s->l2_bits);
+
+ /*If extent pre-allocated, read table from disk,
+ *otherwise write new table to disk*/
+ if (new_l2_table) {
+ /*Should we allocate the whole extent? Adjustable parameter.*/
+ if (s->cluster_alloc == s->l2_size) {
+ cluster_offset = l2_offset +
+ (s->l2_size * sizeof(uint64_t));
+ cluster_offset = (cluster_offset + s->cluster_size - 1)
+ & ~(s->cluster_size - 1);
+ ftruncate(s->fd, cluster_offset +
+ (s->cluster_size * s->l2_size));
+ s->fd_end = cluster_offset +
+ (s->cluster_size * s->l2_size);
+ for (i = 0; i < s->l2_size; i++) {
+ l2_table[i] = cpu_to_be64(cluster_offset +
+ (i*s->cluster_size));
+ }
+ } else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+
+ lseek(s->fd, l2_offset, SEEK_SET);
+ if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ s->l2_size * sizeof(uint64_t))
+ return 0;
+ } else {
+ lseek(s->fd, l2_offset, SEEK_SET);
+ if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ s->l2_size * sizeof(uint64_t))
+ return 0;
+ }
+
+ /*Update the cache entries*/
+ s->l2_cache_offsets[min_index] = l2_offset;
+ s->l2_cache_counts[min_index] = 1;
+
+found:
+ /*The extent is split into 's->l2_size' blocks of
+ *size 's->cluster_size'*/
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+
+ if (!cluster_offset ||
+ ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
+ if (!allocate)
+ return 0;
+
+ if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
+ (n_end - n_start) < s->cluster_sectors) {
+ /* cluster is already allocated but compressed, we must
+ decompress it in the case it is not completely
+ overwritten */
+ if (decompress_cluster(s, cluster_offset) < 0)
+ return 0;
+ cluster_offset = lseek(s->fd, 0, SEEK_END);
+ cluster_offset = (cluster_offset + s->cluster_size - 1)
+ & ~(s->cluster_size - 1);
+ /* write the cluster content - not asynchronous */
+ lseek(s->fd, cluster_offset, SEEK_SET);
+ if (write(s->fd, s->cluster_cache, s->cluster_size) !=
+ s->cluster_size)
+ return -1;
+ } else {
+ /* allocate a new cluster */
+ cluster_offset = lseek(s->fd, 0, SEEK_END);
+ if (allocate == 1) {
+ /* round to cluster size */
+ cluster_offset =
+ (cluster_offset + s->cluster_size - 1)
+ & ~(s->cluster_size - 1);
+ ftruncate(s->fd, cluster_offset +
+ s->cluster_size);
+ /* if encrypted, we must initialize the cluster
+ content which won't be written */
+ if (s->crypt_method &&
+ (n_end - n_start) < s->cluster_sectors) {
+ uint64_t start_sect;
+ start_sect = (offset &
+ ~(s->cluster_size - 1))
+ >> 9;
+ memset(s->cluster_data + 512,
+ 0xaa, 512);
+ for (i = 0; i < s->cluster_sectors;i++)
+ {
+ if (i < n_start || i >= n_end)
+ {
+ encrypt_sectors(s, start_sect + i,
+ s->cluster_data,
+ s->cluster_data + 512, 1, 1,
+ &s->aes_encrypt_key);
+ lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
+ if (write(s->fd, s->cluster_data, 512) != 512)
+ return -1;
+ }
+ }
+ }
+ } else {
+ cluster_offset |= QCOW_OFLAG_COMPRESSED |
+ (uint64_t)compressed_size
+ << (63 - s->cluster_bits);
+ }
+ }
+ /* update L2 table */
+ tmp = cpu_to_be64(cluster_offset);
+ l2_table[l2_index] = tmp;
+
+ /*For IO_DIRECT we write 4KByte blocks*/
+ l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
+ l2_ptr = (char *)l2_table + (l2_sector << 12);
+
+ if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
+ DPRINTF("ERROR allocating memory for L1 table\n");
+ }
+ memcpy(tmp_ptr2, l2_ptr, 4096);
+ aio_lock(s, offset >> 9);
+ async_write(s, s->fd, 4096, l2_offset + (l2_sector << 12),
+ tmp_ptr2, 0, -2, offset >> 9, 0, NULL);
+ }
+ return cluster_offset;
+}
+
+static void init_cluster_cache(struct td_state *bs)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ uint32_t count = 0;
+ int i, cluster_entries;
+
+ cluster_entries = s->cluster_size / 512;
+ DPRINTF("Initialising Cluster cache, %d sectors per cluster (%d cluster size)\n",
+ cluster_entries, s->cluster_size);
+
+ for (i = 0; i < bs->size; i += cluster_entries) {
+ if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++;
+ if (count >= L2_CACHE_SIZE) return;
+ }
+ DPRINTF("Finished cluster initialisation, added %d entries\n", count);
+ return;
+}
+
+static int qcow_is_allocated(struct td_state *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+
+ int index_in_cluster, n;
+ uint64_t cluster_offset;
+
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ *pnum = n;
+ return (cluster_offset != 0);
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+ const uint8_t *buf, int buf_size)
+{
+ z_stream strm1, *strm = &strm1;
+ int ret, out_len;
+
+ memset(strm, 0, sizeof(*strm));
+
+ strm->next_in = (uint8_t *)buf;
+ strm->avail_in = buf_size;
+ strm->next_out = out_buf;
+ strm->avail_out = out_buf_size;
+
+ ret = inflateInit2(strm, -12);
+ if (ret != Z_OK)
+ return -1;
+ ret = inflate(strm, Z_FINISH);
+ out_len = strm->next_out - out_buf;
+ if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+ (out_len != out_buf_size) ) {
+ inflateEnd(strm);
+ return -1;
+ }
+ inflateEnd(strm);
+ return 0;
+}
+
+static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
+{
+ int ret, csize;
+ uint64_t coffset;
+
+ coffset = cluster_offset & s->cluster_offset_mask;
+ if (s->cluster_cache_offset != coffset) {
+ csize = cluster_offset >> (63 - s->cluster_bits);
+ csize &= (s->cluster_size - 1);
+ lseek(s->fd, coffset, SEEK_SET);
+ ret = read(s->fd, s->cluster_data, csize);
+ if (ret != csize)
+ return -1;
+ if (decompress_buffer(s->cluster_cache, s->cluster_size,
+ s->cluster_data, csize) < 0) {
+ return -1;
+ }
+ s->cluster_cache_offset = coffset;
+ }
+ return 0;
+}
+
+/* Open the disk file and initialize qcow state. */
+int tdqcow_open (struct td_state *bs, const char *name)
+{
+ int fd, len, i, shift, ret, size, l1_table_size;
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ char *buf;
+ QCowHeader *header;
+ QCowHeader_ext *exthdr;
+ uint32_t cksum;
+
+ DPRINTF("QCOW: Opening %s\n",name);
+ /* set up a pipe so that we can hand back a poll fd that won't fire.*/
+ ret = pipe(s->poll_pipe);
+ if (ret != 0)
+ return (0 - errno);
+
+ fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
+ if (fd < 0) {
+ DPRINTF("Unable to open %s (%d)\n",name,0 - errno);
+ return -1;
+ }
+
+ s->fd = fd;
+ asprintf(&s->name,"%s", name);
+
+ ASSERT(sizeof(header) < 512);
+
+ ret = posix_memalign((void **)&buf, 512, 512);
+ if (ret != 0) goto fail;
+
+ if (read(fd, buf, 512) != 512)
+ goto fail;
+
+ header = (QCowHeader *)buf;
+ be32_to_cpus(&header->magic);
+ be32_to_cpus(&header->version);
+ be64_to_cpus(&header->backing_file_offset);
+ be32_to_cpus(&header->backing_file_size);
+ be32_to_cpus(&header->mtime);
+ be64_to_cpus(&header->size);
+ be32_to_cpus(&header->crypt_method);
+ be64_to_cpus(&header->l1_table_offset);
+
+ if (header->magic != QCOW_MAGIC || header->version > QCOW_VERSION)
+ goto fail;
+ if (header->size <= 1 || header->cluster_bits < 9)
+ goto fail;
+ if (header->crypt_method > QCOW_CRYPT_AES)
+ goto fail;
+ s->crypt_method_header = header->crypt_method;
+ if (s->crypt_method_header)
+ s->encrypted = 1;
+ s->cluster_bits = header->cluster_bits;
+ s->cluster_size = 1 << s->cluster_bits;
+ s->cluster_sectors = 1 << (s->cluster_bits - 9);
+ s->l2_bits = header->l2_bits;
+ s->l2_size = 1 << s->l2_bits;
+ s->cluster_alloc = s->l2_size;
+ bs->size = header->size / 512;
+ s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
+
+ /* read the level 1 table */
+ shift = s->cluster_bits + s->l2_bits;
+ s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
+
+ s->l1_table_offset = header->l1_table_offset;
+
+ /*allocate a 4Kbyte multiple of memory*/
+ l1_table_size = s->l1_size * sizeof(uint64_t);
+ if (l1_table_size % 4096 > 0) {
+ l1_table_size = ((l1_table_size >> 12) + 1) << 12;
+ }
+ ret = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
+ if (ret != 0) goto fail;
+ memset(s->l1_table, 0x00, l1_table_size);
+
+ DPRINTF("L1 Table offset detected: %llu, size %d (%d)\n",
+ (long long)s->l1_table_offset,
+ (int) (s->l1_size * sizeof(uint64_t)),
+ l1_table_size);
+
+ lseek(fd, s->l1_table_offset, SEEK_SET);
+ if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
+ goto fail;
+/* for(i = 0;i < s->l1_size; i++) {
+ //be64_to_cpus(&s->l1_table[i]);
+ DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
+ }*/
+
+ /* alloc L2 cache */
+ size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
+ ret = posix_memalign((void **)&s->l2_cache, 4096, size);
+ if(ret != 0) goto fail;
+
+ size = s->cluster_size;
+ ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
+ if(ret != 0) goto fail;
+
+ ret = posix_memalign((void **)&s->cluster_data, 4096, size);
+ if(ret != 0) goto fail;
+ s->cluster_cache_offset = -1;
+
+ /* read the backing file name */
+ s->bfd = -1;
+ if (header->backing_file_offset != 0) {
+ DPRINTF("Reading backing file data\n");
+ len = header->backing_file_size;
+ if (len > 1023)
+ len = 1023;
+
+ /*TODO - Fix read size for O_DIRECT and use original fd!*/
+ fd = open(name, O_RDONLY | O_LARGEFILE);
+
+ lseek(fd, header->backing_file_offset, SEEK_SET);
+ if (read(fd, bs->backing_file, len) != len)
+ goto fail;
+ bs->backing_file[len] = '\0';
+ close(fd);
+ /***********************************/
+
+ /*Open backing file*/
+ fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE);
+ if (fd < 0) {
+ DPRINTF("Unable to open backing file: %s\n",
+ bs->backing_file);
+ goto fail;
+ }
+ s->bfd = fd;
+ s->cluster_alloc = 1; /*Cannot use pre-alloc*/
+ }
+
+ bs->sector_size = 512;
+ bs->info = 0;
+
+ /*Detect min_cluster_alloc*/
+ s->min_cluster_alloc = 1; /*Default*/
+ if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) {
+ /*We test to see if the xen magic # exists*/
+ exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
+ be32_to_cpus(&exthdr->xmagic);
+ if(exthdr->xmagic != XEN_MAGIC)
+ goto end_xenhdr;
+
+ /*Finally check the L1 table cksum*/
+ be32_to_cpus(&exthdr->cksum);
+ cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
+ if(exthdr->cksum != cksum)
+ goto end_xenhdr;
+
+ be32_to_cpus(&exthdr->min_cluster_alloc);
+ s->min_cluster_alloc = exthdr->min_cluster_alloc;
+ }
+
+ end_xenhdr:
+ if (init_aio_state(bs)!=0) {
+ DPRINTF("Unable to initialise AIO state\n");
+ goto fail;
+ }
+ s->fd_end = lseek(s->fd, 0, SEEK_END);
+
+ return 0;
+
+fail:
+ DPRINTF("QCOW Open failed\n");
+ free(s->l1_table);
+ free(s->l2_cache);
+ free(s->cluster_cache);
+ free(s->cluster_data);
+ close(fd);
+ return -1;
+}
+
+ int tdqcow_queue_read(struct td_state *bs, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
+ uint64_t cluster_offset;
+
+ /*Check we can get a lock*/
+ for (i = 0; i < nb_sectors; i++)
+ if (!aio_can_lock(s, sector + i)) {
+ DPRINTF("AIO_CAN_LOCK failed [%llu]\n",
+ (long long) sector + i);
+ return -EBUSY;
+ }
+
+ /*We store a local record of the request*/
+ qcow_idx = get_free_idx(s);
+ while (nb_sectors > 0) {
+ cluster_offset =
+ get_cluster_offset(bs, sector << 9, 0, 0, 0, 0);
+ index_in_cluster = sector & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+
+ if (s->iocb_free_count == 0 || !aio_lock(s, sector)) {
+ DPRINTF("AIO_LOCK or iocb_free_count (%d) failed"
+ "[%llu]\n", s->iocb_free_count,
+ (long long) sector);
+ return -ENOMEM;
+ }
+
+ if (!cluster_offset && (s->bfd > 0)) {
+ s->nr_reqs[qcow_idx]++;
+ asubmit += async_read(s, s->bfd, n * 512, sector << 9,
+ buf, cb, id, sector,
+ qcow_idx, private);
+ } else if(!cluster_offset) {
+ memset(buf, 0, 512 * n);
+ aio_unlock(s, sector);
+ } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ if (decompress_cluster(s, cluster_offset) < 0) {
+ ret = -1;
+ goto done;
+ }
+ memcpy(buf, s->cluster_cache + index_in_cluster * 512,
+ 512 * n);
+ } else {
+ s->nr_reqs[qcow_idx]++;
+ asubmit += async_read(s, s->fd, n * 512,
+ (cluster_offset +
+ index_in_cluster * 512),
+ buf, cb, id, sector,
+ qcow_idx, private);
+ }
+ nb_sectors -= n;
+ sector += n;
+ buf += n * 512;
+ }
+done:
+ /*Callback if no async requests outstanding*/
+ if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
+
+ return 0;
+}
+
+ int tdqcow_queue_write(struct td_state *bs, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
+ uint64_t cluster_offset;
+
+ /*Check we can get a lock*/
+ for (i = 0; i < nb_sectors; i++)
+ if (!aio_can_lock(s, sector + i)) {
+ DPRINTF("AIO_CAN_LOCK failed [%llu]\n",
+ (long long) (sector + i));
+ return -EBUSY;
+ }
+
+ /*We store a local record of the request*/
+ qcow_idx = get_free_idx(s);
+ while (nb_sectors > 0) {
+ index_in_cluster = sector & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+
+ if (s->iocb_free_count == 0 || !aio_lock(s, sector)){
+ DPRINTF("AIO_LOCK or iocb_free_count (%d) failed"
+ "[%llu]\n", s->iocb_free_count,
+ (long long) sector);
+ return -ENOMEM;
+ }
+
+ if (!IS_ZERO(buf,n * 512)) {
+
+ cluster_offset = get_cluster_offset(bs, sector << 9,
+ 1, 0,
+ index_in_cluster,
+ index_in_cluster+n
+ );
+ if (!cluster_offset) {
+ DPRINTF("Ooops, no write cluster offset!\n");
+ ret = -1;
+ goto done;
+ }
+
+ if (s->crypt_method) {
+ encrypt_sectors(s, sector, s->cluster_data,
+ (unsigned char *)buf, n, 1,
+ &s->aes_encrypt_key);
+ s->nr_reqs[qcow_idx]++;
+ asubmit += async_write(s, s->fd, n * 512,
+ (cluster_offset +
+ index_in_cluster*512),
+ (char *)s->cluster_data,
+ cb, id, sector,
+ qcow_idx, private);
+ } else {
+ s->nr_reqs[qcow_idx]++;
+ asubmit += async_write(s, s->fd, n * 512,
+ (cluster_offset +
+ index_in_cluster*512),
+ buf, cb, id, sector,
+ qcow_idx, private);
+ }
+ } else {
+ /*Write data contains zeros, but we must check to see
+ if cluster already allocated*/
+ cluster_offset = get_cluster_offset(bs, sector << 9,
+ 0, 0,
+ index_in_cluster,
+ index_in_cluster+n
+ );
+ if(cluster_offset) {
+ if (s->crypt_method) {
+ encrypt_sectors(s, sector,
+ s->cluster_data,
+ (unsigned char *)buf,
+ n, 1,
+ &s->aes_encrypt_key);
+ s->nr_reqs[qcow_idx]++;
+ asubmit += async_write(s, s->fd,
+ n * 512,
+ (cluster_offset+
+ index_in_cluster * 512),
+ (char *)s->cluster_data, cb, id, sector,
+ qcow_idx, private);
+ } else {
+ s->nr_reqs[qcow_idx]++;
+ asubmit += async_write(s, s->fd, n*512,
+ cluster_offset + index_in_cluster * 512,
+ buf, cb, id, sector,
+ qcow_idx, private);
+ }
+ }
+ else aio_unlock(s, sector);
+ }
+ nb_sectors -= n;
+ sector += n;
+ buf += n * 512;
+ }
+ s->cluster_cache_offset = -1; /* disable compressed cache */
+
+done:
+ /*Callback if no async requests outstanding*/
+ if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
+
+ return 0;
+}
+
+int tdqcow_submit(struct td_state *bs)
+{
+ int ret;
+ struct tdqcow_state *prv = (struct tdqcow_state *)bs->private;
+
+ ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+
+ /* XXX: TODO: Handle error conditions here. */
+
+ /* Success case: */
+ prv->iocb_queued = 0;
+
+ return ret;
+}
+
+
+int *tdqcow_get_fd(struct td_state *bs)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ int *fds, i;
+
+ fds = malloc(sizeof(int) * MAX_IOFD);
+ /*initialise the FD array*/
+ for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
+
+ fds[0] = s->poll_fd;
+ return fds;
+}
+
+int tdqcow_close(struct td_state *bs)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ uint32_t cksum, out;
+ int fd, offset;
+
+ /*Update the hdr cksum*/
+ if(s->min_cluster_alloc == s->l2_size) {
+ cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
+ printf("Writing cksum: %d",cksum);
+ fd = open(s->name, O_WRONLY | O_LARGEFILE); /*Open without O_DIRECT*/
+ offset = sizeof(QCowHeader) + sizeof(uint32_t);
+ lseek(fd, offset, SEEK_SET);
+ out = cpu_to_be32(cksum);
+ write(fd, &out, sizeof(uint32_t));
+ close(fd);
+ }
+
+ free(s->name);
+ free(s->l1_table);
+ free(s->l2_cache);
+ free(s->cluster_cache);
+ free(s->cluster_data);
+ close(s->fd);
+ return 0;
+}
+
+int tdqcow_do_callbacks(struct td_state *s, int sid)
+{
+ int ret, i, rsp = 0,*ptr;
+ struct io_event *ep;
+ struct tdqcow_state *prv = (struct tdqcow_state *)s->private;
+
+ if (sid > MAX_IOFD) return 1;
+
+ /* Non-blocking test for completed io. */
+ ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
+ NULL);
+
+ for (ep=prv->aio_events, i = ret; i-->0; ep++) {
+ struct iocb *io = ep->obj;
+ struct pending_aio *pio;
+
+ pio = &prv->pending_aio[(long)io->data];
+
+ if (ep->res != io->u.c.nbytes) {
+ /* TODO: handle this case better. */
+ ptr = (int *)&ep->res;
+ DPRINTF("AIO did less than I asked it to "
+ "[%lu,%lu,%d]\n",
+ ep->res, io->u.c.nbytes, *ptr);
+ }
+ aio_unlock(prv, pio->sector);
+ if (pio->id >= 0) {
+ if (prv->crypt_method)
+ encrypt_sectors(prv, pio->sector,
+ (unsigned char *)pio->buf,
+ (unsigned char *)pio->buf,
+ pio->nb_sectors, 0,
+ &prv->aes_decrypt_key);
+ prv->nr_reqs[pio->qcow_idx]--;
+ if (prv->nr_reqs[pio->qcow_idx] == 0)
+ rsp += pio->cb(s, ep->res2, pio->id,
+ pio->private);
+ } else if (pio->id == -2) free(pio->buf);
+
+ prv->iocb_free[prv->iocb_free_count++] = io;
+ }
+ return rsp;
+}
+
+int qcow_create(const char *filename, uint64_t total_size,
+ const char *backing_file, int flags)
+{
+ int fd, header_size, backing_filename_len, l1_size, i;
+ int shift, length, adjust, ret = 0;
+ QCowHeader header;
+ QCowHeader_ext exthdr;
+ char backing_filename[1024], *ptr;
+ uint64_t tmp, size;
+ struct stat st;
+
+ DPRINTF("Qcow_create: size %llu\n",(long long unsigned)total_size);
+
+ fd = open(filename,
+ O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+ 0644);
+ if (fd < 0)
+ return -1;
+
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be32(QCOW_MAGIC);
+ header.version = cpu_to_be32(QCOW_VERSION);
+
+ /*Create extended header fields*/
+ exthdr.xmagic = cpu_to_be32(XEN_MAGIC);
+
+ header_size = sizeof(header) + sizeof(QCowHeader_ext);
+ backing_filename_len = 0;
+ size = (total_size >> SECTOR_SHIFT);
+ if (backing_file) {
+ if (strcmp(backing_file, "fat:")) {
+ const char *p;
+ /* XXX: this is a hack: we do not attempt to
+ *check for URL like syntax */
+ p = strchr(backing_file, ':');
+ if (p && (p - backing_file) >= 2) {
+ /* URL like but exclude "c:" like filenames */
+ strncpy(backing_filename, backing_file,
+ sizeof(backing_filename));
+ } else {
+ realpath(backing_file, backing_filename);
+ if (stat(backing_filename, &st) != 0) {
+ return -1;
+ }
+ }
+ header.backing_file_offset = cpu_to_be64(header_size);
+ backing_filename_len = strlen(backing_filename);
+ header.backing_file_size = cpu_to_be32(
+ backing_filename_len);
+ header_size += backing_filename_len;
+
+ /*Set to the backing file size*/
+ size = (st.st_size >> SECTOR_SHIFT);
+ DPRINTF("Backing file size detected: %lld sectors"
+ "(total %lld [%lld MB])\n",
+ (long long)total_size,
+ (long long)(total_size << SECTOR_SHIFT),
+ (long long)(total_size >> 11));
+ } else {
+ backing_file = NULL;
+ DPRINTF("Setting file size: %lld (total %lld)\n",
+ (long long) total_size,
+ (long long) (total_size << SECTOR_SHIFT));
+ }
+ header.mtime = cpu_to_be32(st.st_mtime);
+ header.cluster_bits = 9; /* 512 byte cluster to avoid copying
+ unmodifyed sectors */
+ header.l2_bits = 12; /* 32 KB L2 tables */
+ exthdr.min_cluster_alloc = cpu_to_be32(1);
+ } else {
+ DPRINTF("Setting file size: %lld sectors"
+ "(total %lld [%lld MB])\n",
+ (long long) size,
+ (long long) (size << SECTOR_SHIFT),
+ (long long) (size >> 11));
+ header.cluster_bits = 12; /* 4 KB clusters */
+ header.l2_bits = 9; /* 4 KB L2 tables */
+ exthdr.min_cluster_alloc = cpu_to_be32(1 << 9);
+ }
+ /*Set the header size value*/
+ header.size = cpu_to_be64(size * 512);
+
+ header_size = (header_size + 7) & ~7;
+ if (header_size % 4096 > 0) {
+ header_size = ((header_size >> 12) + 1) << 12;
+ }
+
+ shift = header.cluster_bits + header.l2_bits;
+ l1_size = ((size * 512) + (1LL << shift) - 1) >> shift;
+
+ header.l1_table_offset = cpu_to_be64(header_size);
+ DPRINTF("L1 Table offset: %d, size %d\n",
+ header_size,
+ (int)(l1_size * sizeof(uint64_t)));
+ if (flags) {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+ } else {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+ }
+
+ ptr = calloc(1, l1_size * sizeof(uint64_t));
+ exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t)));
+ printf("Created cksum: %d\n",exthdr.cksum);
+ free(ptr);
+
+ /* write all the data */
+ ret += write(fd, &header, sizeof(header));
+ ret += write(fd, &exthdr, sizeof(exthdr));
+ if (backing_file) {
+ ret += write(fd, backing_filename, backing_filename_len);
+ }
+ lseek(fd, header_size, SEEK_SET);
+ tmp = 0;
+ for (i = 0;i < l1_size; i++) {
+ ret += write(fd, &tmp, sizeof(tmp));
+ }
+
+ /*adjust file length to 4 KByte boundary*/
+ length = header_size + l1_size * sizeof(uint64_t);
+ if (length % 4096 > 0) {
+ length = ((length >> 12) + 1) << 12;
+ ftruncate(fd, length);
+ DPRINTF("Adjusted filelength to %d for 4 "
+ "Kbyte alignment\n",length);
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+int qcow_make_empty(struct td_state *bs)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+
+ memset(s->l1_table, 0, l1_length);
+ lseek(s->fd, s->l1_table_offset, SEEK_SET);
+ if (write(s->fd, s->l1_table, l1_length) < 0)
+ return -1;
+ ftruncate(s->fd, s->l1_table_offset + l1_length);
+
+ memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
+ memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
+
+ return 0;
+}
+
+int qcow_get_cluster_size(struct td_state *bs)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+
+ return s->cluster_size;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+ tables to avoid losing bytes in alignment */
+int qcow_compress_cluster(struct td_state *bs, int64_t sector_num,
+ const uint8_t *buf)
+{
+ struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+ z_stream strm;
+ int ret, out_len;
+ uint8_t *out_buf;
+ uint64_t cluster_offset;
+
+ out_buf = malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+ if (!out_buf)
+ return -1;
+
+ /* best compression, small window, no zlib header */
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -12,
+ 9, Z_DEFAULT_STRATEGY);
+ if (ret != 0) {
+ free(out_buf);
+ return -1;
+ }
+
+ strm.avail_in = s->cluster_size;
+ strm.next_in = (uint8_t *)buf;
+ strm.avail_out = s->cluster_size;
+ strm.next_out = out_buf;
+
+ ret = deflate(&strm, Z_FINISH);
+ if (ret != Z_STREAM_END && ret != Z_OK) {
+ free(out_buf);
+ deflateEnd(&strm);
+ return -1;
+ }
+ out_len = strm.next_out - out_buf;
+
+ deflateEnd(&strm);
+
+ if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+ /* could not compress: write normal cluster */
+ //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
+ } else {
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
+ out_len, 0, 0);
+ cluster_offset &= s->cluster_offset_mask;
+ lseek(s->fd, cluster_offset, SEEK_SET);
+ if (write(s->fd, out_buf, out_len) != out_len) {
+ free(out_buf);
+ return -1;
+ }
+ }
+
+ free(out_buf);
+ return 0;
+}
+
+struct tap_disk tapdisk_qcow = {
+ "tapdisk_qcow",
+ sizeof(struct tdqcow_state),
+ tdqcow_open,
+ tdqcow_queue_read,
+ tdqcow_queue_write,
+ tdqcow_submit,
+ tdqcow_get_fd,
+ tdqcow_close,
+ tdqcow_do_callbacks,
+};
+
diff --git a/tools/blktap/drivers/block-ram.c b/tools/blktap/drivers/block-ram.c
new file mode 100644
index 0000000000..4c378ed427
--- /dev/null
+++ b/tools/blktap/drivers/block-ram.c
@@ -0,0 +1,296 @@
+/* block-ram.c
+ *
+ * Fast Ramdisk implementation.
+ *
+ * (c) 2006 Andrew Warfield and Julian Chesterfield
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <string.h>
+#include "tapdisk.h"
+
+#define MAX_DISK_SIZE 1024000 /*500MB disk limit*/
+
+char *img;
+long int disksector_size;
+long int disksize;
+long int diskinfo;
+static int connections = 0;
+
+struct tdram_state {
+ int fd;
+ int poll_pipe[2]; /* dummy fd for polling on */
+};
+
+/*Get Image size, secsize*/
+static int get_image_info(struct td_state *s, int fd)
+{
+ int ret;
+ long size;
+ unsigned long total_size;
+ struct statvfs statBuf;
+ struct stat stat;
+
+ ret = fstat(fd, &stat);
+ if (ret != 0) {
+ DPRINTF("ERROR: fstat failed, Couldn't stat image");
+ return -EINVAL;
+ }
+
+ if (S_ISBLK(stat.st_mode)) {
+ /*Accessing block device directly*/
+ s->size = 0;
+ if (ioctl(fd,BLKGETSIZE,&s->size)!=0) {
+ DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
+ return -EINVAL;
+ }
+
+ DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
+ "sector_shift [%llu]\n",
+ (long long unsigned)(s->size << SECTOR_SHIFT),
+ (long long unsigned)s->size);
+
+ /*Get the sector size*/
+#if defined(BLKSSZGET)
+ {
+ int arg;
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ ioctl(fd, BLKSSZGET, &s->sector_size);
+
+ if (s->sector_size != DEFAULT_SECTOR_SIZE)
+ DPRINTF("Note: sector size is %ld (not %d)\n",
+ s->sector_size, DEFAULT_SECTOR_SIZE);
+ }
+#else
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+#endif
+
+ } else {
+ /*Local file? try fstat instead*/
+ s->size = (stat.st_size >> SECTOR_SHIFT);
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
+ "sector_shift [%llu]\n",
+ (long long unsigned)(s->size << SECTOR_SHIFT),
+ (long long unsigned)s->size);
+ }
+
+ if (s->size == 0) {
+ s->size =((uint64_t) MAX_DISK_SIZE);
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ }
+ s->info = 0;
+
+ /*Store variables locally*/
+ disksector_size = s->sector_size;
+ disksize = s->size;
+ diskinfo = s->info;
+ DPRINTF("Image sector_size: \n\t[%lu]\n",
+ s->sector_size);
+
+ return 0;
+}
+
+/* Open the disk file and initialize ram state. */
+int tdram_open (struct td_state *s, const char *name)
+{
+ int i, fd, ret = 0, count = 0;
+ struct tdram_state *prv = (struct tdram_state *)s->private;
+ uint64_t size;
+ char *p;
+ s->private = prv;
+
+ connections++;
+
+ /* set up a pipe so that we can hand back a poll fd that won't fire.*/
+ ret = pipe(prv->poll_pipe);
+ if (ret != 0)
+ return (0 - errno);
+
+ if (connections > 1) {
+ s->sector_size = disksector_size;
+ s->size = disksize;
+ s->info = diskinfo;
+ DPRINTF("Image already open, returning parameters:\n");
+ DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
+ "sector_shift [%llu]\n",
+ (long long unsigned)(s->size << SECTOR_SHIFT),
+ (long long unsigned)s->size);
+ DPRINTF("Image sector_size: \n\t[%lu]\n",
+ s->sector_size);
+
+ prv->fd = -1;
+ goto done;
+ }
+
+ /* Open the file */
+ fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
+
+ if ((fd == -1) && (errno == EINVAL)) {
+
+ /* Maybe O_DIRECT isn't supported. */
+ fd = open(name, O_RDWR | O_LARGEFILE);
+ if (fd != -1) DPRINTF("WARNING: Accessing image without"
+ "O_DIRECT! (%s)\n", name);
+
+ } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
+
+ if (fd == -1) {
+ DPRINTF("Unable to open [%s]!\n",name);
+ ret = 0 - errno;
+ goto done;
+ }
+
+ prv->fd = fd;
+
+ ret = get_image_info(s, fd);
+ size = MAX_DISK_SIZE;
+
+ if (s->size > size) {
+ DPRINTF("Disk exceeds limit, must be less than [%d]MB",
+ (MAX_DISK_SIZE<<SECTOR_SHIFT)>>20);
+ return -ENOMEM;
+ }
+
+ /*Read the image into memory*/
+ p = img = malloc(s->size << SECTOR_SHIFT);
+ if (img == NULL) {
+ DPRINTF("Mem malloc failed\n");
+ return -1;
+ }
+ DPRINTF("Reading %llu bytes.......",(long long unsigned)s->size << SECTOR_SHIFT);
+
+ for (i = 0; i < s->size; i++) {
+ ret = read(prv->fd, p, s->sector_size);
+ if (ret != s->sector_size) {
+ ret = 0 - errno;
+ break;
+ } else {
+ count += ret;
+ p = img + count;
+ }
+ }
+ DPRINTF("[%d]\n",count);
+ if (count != s->size << SECTOR_SHIFT) {
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+
+done:
+ return ret;
+}
+
+ int tdram_queue_read(struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct tdram_state *prv = (struct tdram_state *)s->private;
+ int size = nb_sectors * s->sector_size;
+ uint64_t offset = sector * (uint64_t)s->sector_size;
+ int ret;
+
+ memcpy(buf, img + offset, size);
+ ret = size;
+
+ cb(s, (ret < 0) ? ret: 0, id, private);
+
+ return ret;
+}
+
+ int tdram_queue_write(struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct tdram_state *prv = (struct tdram_state *)s->private;
+ int size = nb_sectors * s->sector_size;
+ uint64_t offset = sector * (uint64_t)s->sector_size;
+ int ret;
+
+ /*We assume that write access is controlled at a higher level for multiple disks*/
+ memcpy(img + offset, buf, size);
+ ret = size;
+
+ cb(s, (ret < 0) ? ret : 0, id, private);
+
+ return ret;
+}
+
+int tdram_submit(struct td_state *s)
+{
+ return 0;
+}
+
+
+int *tdram_get_fd(struct td_state *s)
+{
+ struct tdram_state *prv = (struct tdram_state *)s->private;
+ int *fds, i;
+
+ fds = malloc(sizeof(int) * MAX_IOFD);
+ /*initialise the FD array*/
+ for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
+
+ fds[0] = prv->poll_pipe[0];
+ return fds;
+}
+
+int tdram_close(struct td_state *s)
+{
+ struct tdram_state *prv = (struct tdram_state *)s->private;
+
+ connections--;
+
+ return 0;
+}
+
+int tdram_do_callbacks(struct td_state *s, int sid)
+{
+ /* always ask for a kick */
+ return 1;
+}
+
+struct tap_disk tapdisk_ram = {
+ "tapdisk_ram",
+ sizeof(struct tdram_state),
+ tdram_open,
+ tdram_queue_read,
+ tdram_queue_write,
+ tdram_submit,
+ tdram_get_fd,
+ tdram_close,
+ tdram_do_callbacks,
+};
+
diff --git a/tools/blktap/drivers/block-sync.c b/tools/blktap/drivers/block-sync.c
new file mode 100644
index 0000000000..77865cc1ab
--- /dev/null
+++ b/tools/blktap/drivers/block-sync.c
@@ -0,0 +1,242 @@
+/* block-sync.c
+ *
+ * simple slow synchronous raw disk implementation.
+ *
+ * (c) 2006 Andrew Warfield and Julian Chesterfield
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include "tapdisk.h"
+
+struct tdsync_state {
+ int fd;
+ int poll_pipe[2]; /* dummy fd for polling on */
+};
+
+/*Get Image size, secsize*/
+static int get_image_info(struct td_state *s, int fd)
+{
+ int ret;
+ long size;
+ unsigned long total_size;
+ struct statvfs statBuf;
+ struct stat stat;
+
+ ret = fstat(fd, &stat);
+ if (ret != 0) {
+ DPRINTF("ERROR: fstat failed, Couldn't stat image");
+ return -EINVAL;
+ }
+
+ if (S_ISBLK(stat.st_mode)) {
+ /*Accessing block device directly*/
+ s->size = 0;
+ if (ioctl(fd,BLKGETSIZE,&s->size)!=0) {
+ DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
+ return -EINVAL;
+ }
+
+ DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
+ "sector_shift [%llu]\n",
+ (long long unsigned)(s->size << SECTOR_SHIFT),
+ (long long unsigned)s->size);
+
+ /*Get the sector size*/
+#if defined(BLKSSZGET)
+ {
+ int arg;
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ ioctl(fd, BLKSSZGET, &s->sector_size);
+
+ if (s->sector_size != DEFAULT_SECTOR_SIZE)
+ DPRINTF("Note: sector size is %ld (not %d)\n",
+ s->sector_size, DEFAULT_SECTOR_SIZE);
+ }
+#else
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+#endif
+
+ } else {
+ /*Local file? try fstat instead*/
+ s->size = (stat.st_size >> SECTOR_SHIFT);
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ DPRINTF("Image size: \n\tpre sector_shift [%lluu]\n\tpost "
+ "sector_shift [%lluu]\n",
+ (long long unsigned)(s->size << SECTOR_SHIFT),
+ (long long unsigned)s->size);
+ }
+
+ if (s->size == 0)
+ return -EINVAL;
+
+ s->info = 0;
+
+ return 0;
+}
+
+/* Open the disk file and initialize aio state. */
+int tdsync_open (struct td_state *s, const char *name)
+{
+ int i, fd, ret = 0;
+ struct tdsync_state *prv = (struct tdsync_state *)s->private;
+ s->private = prv;
+
+ /* set up a pipe so that we can hand back a poll fd that won't fire.*/
+ ret = pipe(prv->poll_pipe);
+ if (ret != 0)
+ return (0 - errno);
+
+ /* Open the file */
+ fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
+
+ if ( (fd == -1) && (errno == EINVAL) ) {
+
+ /* Maybe O_DIRECT isn't supported. */
+ fd = open(name, O_RDWR | O_LARGEFILE);
+ if (fd != -1) DPRINTF("WARNING: Accessing image without"
+ "O_DIRECT! (%s)\n", name);
+
+ } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
+
+ if (fd == -1) {
+ DPRINTF("Unable to open [%s]!\n",name);
+ ret = 0 - errno;
+ goto done;
+ }
+
+ prv->fd = fd;
+
+ ret = get_image_info(s, fd);
+done:
+ return ret;
+}
+
+ int tdsync_queue_read(struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct tdsync_state *prv = (struct tdsync_state *)s->private;
+ int size = nb_sectors * s->sector_size;
+ uint64_t offset = sector * (uint64_t)s->sector_size;
+ int ret;
+
+ ret = lseek(prv->fd, offset, SEEK_SET);
+ if (ret != (off_t)-1) {
+ ret = read(prv->fd, buf, size);
+ if (ret != size) {
+ ret = 0 - errno;
+ } else {
+ ret = 1;
+ }
+ } else ret = 0 - errno;
+
+ cb(s, (ret < 0) ? ret: 0, id, private);
+
+ return 1;
+}
+
+ int tdsync_queue_write(struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct tdsync_state *prv = (struct tdsync_state *)s->private;
+ int size = nb_sectors * s->sector_size;
+ uint64_t offset = sector * (uint64_t)s->sector_size;
+ int ret = 0;
+
+ ret = lseek(prv->fd, offset, SEEK_SET);
+ if (ret != (off_t)-1) {
+ ret = write(prv->fd, buf, size);
+ if (ret != size) {
+ ret = 0 - errno;
+ } else {
+ ret = 1;
+ }
+ } else ret = 0 - errno;
+
+ cb(s, (ret < 0) ? ret : 0, id, private);
+
+ return 1;
+}
+
+int tdsync_submit(struct td_state *s)
+{
+ return 0;
+}
+
+
+int *tdsync_get_fd(struct td_state *s)
+{
+ struct tdsync_state *prv = (struct tdsync_state *)s->private;
+
+ int *fds, i;
+
+ fds = malloc(sizeof(int) * MAX_IOFD);
+ /*initialise the FD array*/
+ for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
+
+ fds[0] = prv->poll_pipe[0];
+ return fds;
+}
+
+int tdsync_close(struct td_state *s)
+{
+ struct tdsync_state *prv = (struct tdsync_state *)s->private;
+
+ close(prv->fd);
+ close(prv->poll_pipe[0]);
+ close(prv->poll_pipe[1]);
+
+ return 0;
+}
+
+int tdsync_do_callbacks(struct td_state *s, int sid)
+{
+ /* always ask for a kick */
+ return 1;
+}
+
+struct tap_disk tapdisk_sync = {
+ "tapdisk_sync",
+ sizeof(struct tdsync_state),
+ tdsync_open,
+ tdsync_queue_read,
+ tdsync_queue_write,
+ tdsync_submit,
+ tdsync_get_fd,
+ tdsync_close,
+ tdsync_do_callbacks,
+};
+
diff --git a/tools/blktap/drivers/block-vmdk.c b/tools/blktap/drivers/block-vmdk.c
new file mode 100644
index 0000000000..437cd5c01f
--- /dev/null
+++ b/tools/blktap/drivers/block-vmdk.c
@@ -0,0 +1,415 @@
+/* block-vmdk.c
+ *
+ * VMware Disk format implementation.
+ *
+ * (c) 2006 Andrew Warfield and Julian Chesterfield
+ *
+ * This is largely the same as the vmdk driver in Qemu, I've just twisted it
+ * to match our interfaces. The original (BSDish) Copyright message appears
+ * below:
+ */
+
+/*
+ * Block driver for the VMDK format
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ * Copyright (c) 2005 Filip Navara
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <string.h>
+#include "tapdisk.h"
+#include "bswap.h"
+
+#define safer_free(_x) \
+ do { \
+ if (NULL != _x) { \
+ free(_x); \
+ (_x) = NULL; \
+ } \
+ } while (0) ;
+
+#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
+#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
+
+typedef struct {
+ uint32_t version;
+ uint32_t flags;
+ uint32_t disk_sectors;
+ uint32_t granularity;
+ uint32_t l1dir_offset;
+ uint32_t l1dir_size;
+ uint32_t file_sectors;
+ uint32_t cylinders;
+ uint32_t heads;
+ uint32_t sectors_per_track;
+} VMDK3Header;
+
+typedef struct {
+ uint32_t version;
+ uint32_t flags;
+ int64_t capacity;
+ int64_t granularity;
+ int64_t desc_offset;
+ int64_t desc_size;
+ int32_t num_gtes_per_gte;
+ int64_t rgd_offset;
+ int64_t gd_offset;
+ int64_t grain_offset;
+ char filler[1];
+ char check_bytes[4];
+} __attribute__((packed)) VMDK4Header;
+
+#define L2_CACHE_SIZE 16
+
+struct tdvmdk_state {
+ int fd;
+ int poll_pipe[2]; /* dummy fd for polling on */
+
+ unsigned int l1_size;
+ int64_t l1_table_offset;
+ int64_t l1_backup_table_offset;
+ uint32_t l1_entry_sectors;
+ unsigned int l2_size;
+
+ uint32_t *l1_table;
+ uint32_t *l1_backup_table;
+ uint32_t *l2_cache;
+ uint32_t l2_cache_offsets[L2_CACHE_SIZE];
+ uint32_t l2_cache_counts[L2_CACHE_SIZE];
+
+ unsigned int cluster_sectors;
+};
+
+
+/* Open the disk file and initialize aio state. */
+static int tdvmdk_open (struct td_state *s, const char *name)
+{
+ int ret, fd;
+ int l1_size, i;
+ uint32_t magic;
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+
+ /* set up a pipe so that we can hand back a poll fd that won't fire.*/
+ ret = pipe(prv->poll_pipe);
+ if (ret != 0)
+ return -1;
+
+ /* Open the file */
+ fd = open(name, O_RDWR | O_LARGEFILE);
+
+ if ( (fd == -1) && (errno == EINVAL) ) {
+
+ /* Maybe O_DIRECT isn't supported. */
+ fd = open(name, O_RDWR | O_LARGEFILE);
+ if (fd != -1) DPRINTF("WARNING: Accessing image without"
+ "O_DIRECT! (%s)\n", name);
+
+ } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
+
+ if (fd == -1) {
+ DPRINTF("Unable to open [%s]!\n",name);
+ ret = 0 - errno;
+ return -1;
+ }
+
+ prv->fd = fd;
+
+ /* Grok the vmdk header. */
+ if ((ret = read(fd, &magic, sizeof(magic))) != sizeof(magic))
+ goto fail;
+ magic = be32_to_cpu(magic);
+ if (magic == VMDK3_MAGIC) {
+ VMDK3Header header;
+ if (read(fd, &header, sizeof(header)) !=
+ sizeof(header))
+ goto fail;
+ prv->cluster_sectors = le32_to_cpu(header.granularity);
+ prv->l2_size = 1 << 9;
+ prv->l1_size = 1 << 6;
+ s->size = le32_to_cpu(header.disk_sectors);
+ prv->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
+ prv->l1_backup_table_offset = 0;
+ prv->l1_entry_sectors = prv->l2_size * prv->cluster_sectors;
+ } else if (magic == VMDK4_MAGIC) {
+ VMDK4Header header;
+
+ if (read(fd, &header, sizeof(header)) != sizeof(header))
+ goto fail;
+ s->size = le32_to_cpu(header.capacity);
+ prv->cluster_sectors = le32_to_cpu(header.granularity);
+ prv->l2_size = le32_to_cpu(header.num_gtes_per_gte);
+ prv->l1_entry_sectors = prv->l2_size * prv->cluster_sectors;
+ if (prv->l1_entry_sectors <= 0)
+ goto fail;
+ prv->l1_size = (s->size + prv->l1_entry_sectors - 1)
+ / prv->l1_entry_sectors;
+ prv->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
+ prv->l1_backup_table_offset =
+ le64_to_cpu(header.gd_offset) << 9;
+ } else {
+ goto fail;
+ }
+ /* read the L1 table */
+ l1_size = prv->l1_size * sizeof(uint32_t);
+ prv->l1_table = malloc(l1_size);
+ if (!prv->l1_table)
+ goto fail;
+ if (lseek(fd, prv->l1_table_offset, SEEK_SET) == -1)
+ goto fail;
+ if (read(fd, prv->l1_table, l1_size) != l1_size)
+ goto fail;
+ for (i = 0; i < prv->l1_size; i++) {
+ le32_to_cpus(&prv->l1_table[i]);
+ }
+
+ if (prv->l1_backup_table_offset) {
+ prv->l1_backup_table = malloc(l1_size);
+ if (!prv->l1_backup_table)
+ goto fail;
+ if (lseek(fd, prv->l1_backup_table_offset, SEEK_SET) == -1)
+ goto fail;
+ if (read(fd, prv->l1_backup_table, l1_size) != l1_size)
+ goto fail;
+ for(i = 0; i < prv->l1_size; i++) {
+ le32_to_cpus(&prv->l1_backup_table[i]);
+ }
+ }
+
+ prv->l2_cache = malloc(prv->l2_size * L2_CACHE_SIZE *sizeof(uint32_t));
+ if (!prv->l2_cache)
+ goto fail;
+ prv->fd = fd;
+ DPRINTF("VMDK File opened successfully\n");
+ return 0;
+
+fail:
+ DPRINTF("VMDK File open failed.\n");
+ safer_free(prv->l1_backup_table);
+ free(prv->l1_table);
+ free(prv->l2_cache);
+ close(fd);
+ return -1;
+}
+
+static uint64_t get_cluster_offset(struct td_state *s,
+ uint64_t offset, int allocate)
+{
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+ unsigned int l1_index, l2_offset, l2_index;
+ int min_index, i, j;
+ uint32_t min_count, *l2_table, tmp;
+ uint64_t cluster_offset;
+
+ l1_index = (offset >> 9) / prv->l1_entry_sectors;
+ if (l1_index >= prv->l1_size)
+ return 0;
+ l2_offset = prv->l1_table[l1_index];
+ if (!l2_offset)
+ return 0;
+ for (i = 0; i < L2_CACHE_SIZE; i++) {
+ if (l2_offset == prv->l2_cache_offsets[i]) {
+ /* increment the hit count */
+ if (++prv->l2_cache_counts[i] == 0xffffffff) {
+ for(j = 0; j < L2_CACHE_SIZE; j++) {
+ prv->l2_cache_counts[j] >>= 1;
+ }
+ }
+ l2_table = prv->l2_cache + (i * prv->l2_size);
+ goto found;
+ }
+ }
+ /* not found: load a new entry in the least used one */
+ min_index = 0;
+ min_count = 0xffffffff;
+ for (i = 0; i < L2_CACHE_SIZE; i++) {
+ if (prv->l2_cache_counts[i] < min_count) {
+ min_count = prv->l2_cache_counts[i];
+ min_index = i;
+ }
+ }
+ l2_table = prv->l2_cache + (min_index * prv->l2_size);
+ lseek(prv->fd, (int64_t)l2_offset * 512, SEEK_SET);
+ if (read(prv->fd, l2_table, prv->l2_size * sizeof(uint32_t)) !=
+ prv->l2_size * sizeof(uint32_t))
+ return 0;
+ prv->l2_cache_offsets[min_index] = l2_offset;
+ prv->l2_cache_counts[min_index] = 1;
+ found:
+ l2_index = ((offset >> 9) / prv->cluster_sectors) % prv->l2_size;
+ cluster_offset = le32_to_cpu(l2_table[l2_index]);
+ if (!cluster_offset) {
+ if (!allocate)
+ return 0;
+ cluster_offset = lseek(prv->fd, 0, SEEK_END);
+ ftruncate(prv->fd, cluster_offset +
+ (prv->cluster_sectors << 9));
+ cluster_offset >>= 9;
+ /* update L2 table */
+ tmp = cpu_to_le32(cluster_offset);
+ l2_table[l2_index] = tmp;
+ lseek(prv->fd, ((int64_t)l2_offset * 512) +
+ (l2_index * sizeof(tmp)), SEEK_SET);
+ if (write(prv->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
+ return 0;
+ /* update backup L2 table */
+ if (prv->l1_backup_table_offset != 0) {
+ l2_offset = prv->l1_backup_table[l1_index];
+ lseek(prv->fd, ((int64_t)l2_offset * 512) +
+ (l2_index * sizeof(tmp)), SEEK_SET);
+ if (write(prv->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
+ return 0;
+ }
+ }
+ cluster_offset <<= 9;
+ return cluster_offset;
+}
+
+static int tdvmdk_queue_read(struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+ int index_in_cluster, n;
+ uint64_t cluster_offset;
+ int ret = 0;
+ while (nb_sectors > 0) {
+ cluster_offset = get_cluster_offset(s, sector << 9, 0);
+ index_in_cluster = sector % prv->cluster_sectors;
+ n = prv->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ if (!cluster_offset) {
+ memset(buf, 0, 512 * n);
+ } else {
+ lseek(prv->fd, cluster_offset + index_in_cluster * 512,
+ SEEK_SET);
+ ret = read(prv->fd, buf, n * 512);
+ if (ret != n * 512) {
+ ret = -1;
+ goto done;
+ }
+ }
+ nb_sectors -= n;
+ sector += n;
+ buf += n * 512;
+ }
+done:
+ cb(s, ret == -1 ? -1 : 0, id, private);
+
+ return 1;
+}
+
+static int tdvmdk_queue_write(struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+ int index_in_cluster, n;
+ uint64_t cluster_offset;
+ int ret = 0;
+
+
+ while (nb_sectors > 0) {
+ index_in_cluster = sector & (prv->cluster_sectors - 1);
+ n = prv->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ cluster_offset = get_cluster_offset(s, sector << 9, 1);
+ if (!cluster_offset) {
+ ret = -1;
+ goto done;
+ }
+ lseek(prv->fd, cluster_offset + index_in_cluster * 512,
+ SEEK_SET);
+ ret = write(prv->fd, buf, n * 512);
+ if (ret != n * 512) {
+ ret = -1;
+ goto done;
+ }
+ nb_sectors -= n;
+ sector += n;
+ buf += n * 512;
+ }
+done:
+ cb(s, ret == -1 ? -1 : 0, id, private);
+
+ return 1;
+}
+
+static int tdvmdk_submit(struct td_state *s)
+{
+ return 0;
+}
+
+
+static int *tdvmdk_get_fd(struct td_state *s)
+{
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+ int *fds, i;
+
+ fds = malloc(sizeof(int) * MAX_IOFD);
+ /*initialise the FD array*/
+ for (i=0;i<MAX_IOFD;i++) fds[i] = 0;
+
+ fds[0] = prv->poll_pipe[0];
+ return fds;
+}
+
+static int tdvmdk_close(struct td_state *s)
+{
+ struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+
+ safer_free(prv->l1_table);
+ safer_free(prv->l1_backup_table);
+ safer_free(prv->l2_cache);
+ close(prv->fd);
+ close(prv->poll_pipe[0]);
+ close(prv->poll_pipe[1]);
+ return 0;
+}
+
+static int tdvmdk_do_callbacks(struct td_state *s, int sid)
+{
+ /* always ask for a kick */
+ return 1;
+}
+
+struct tap_disk tapdisk_vmdk = {
+ "tapdisk_vmdk",
+ sizeof(struct tdvmdk_state),
+ tdvmdk_open,
+ tdvmdk_queue_read,
+ tdvmdk_queue_write,
+ tdvmdk_submit,
+ tdvmdk_get_fd,
+ tdvmdk_close,
+ tdvmdk_do_callbacks,
+};
+
diff --git a/tools/blktap/drivers/bswap.h b/tools/blktap/drivers/bswap.h
new file mode 100644
index 0000000000..bb9de92b25
--- /dev/null
+++ b/tools/blktap/drivers/bswap.h
@@ -0,0 +1,202 @@
+#ifndef BSWAP_H
+#define BSWAP_H
+
+//#include "config-host.h"
+
+#include <inttypes.h>
+
+#ifdef HAVE_BYTESWAP_H
+#include <byteswap.h>
+#else
+
+#define bswap_16(x) \
+({ \
+ uint16_t __x = (x); \
+ ((uint16_t)( \
+ (((uint16_t)(__x) & (uint16_t)0x00ffU) << 8) | \
+ (((uint16_t)(__x) & (uint16_t)0xff00U) >> 8) )); \
+})
+
+#define bswap_32(x) \
+({ \
+ uint32_t __x = (x); \
+ ((uint32_t)( \
+ (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \
+ (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) << 8) | \
+ (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >> 8) | \
+ (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) )); \
+})
+
+#define bswap_64(x) \
+({ \
+ uint64_t __x = (x); \
+ ((uint64_t)( \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000000000ffULL) << 56) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000000000ff00ULL) << 40) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000ff000000ULL) << 8) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \
+})
+
+#endif /* !HAVE_BYTESWAP_H */
+
+static inline uint16_t bswap16(uint16_t x)
+{
+ return bswap_16(x);
+}
+
+static inline uint32_t bswap32(uint32_t x)
+{
+ return bswap_32(x);
+}
+
+static inline uint64_t bswap64(uint64_t x)
+{
+ return bswap_64(x);
+}
+
+static inline void bswap16s(uint16_t *s)
+{
+ *s = bswap16(*s);
+}
+
+static inline void bswap32s(uint32_t *s)
+{
+ *s = bswap32(*s);
+}
+
+static inline void bswap64s(uint64_t *s)
+{
+ *s = bswap64(*s);
+}
+
+#if defined(WORDS_BIGENDIAN)
+#define be_bswap(v, size) (v)
+#define le_bswap(v, size) bswap ## size(v)
+#define be_bswaps(v, size)
+#define le_bswaps(p, size) *p = bswap ## size(*p);
+#else
+#define le_bswap(v, size) (v)
+#define be_bswap(v, size) bswap ## size(v)
+#define le_bswaps(v, size)
+#define be_bswaps(p, size) *p = bswap ## size(*p);
+#endif
+
+#define CPU_CONVERT(endian, size, type)\
+static inline type endian ## size ## _to_cpu(type v)\
+{\
+ return endian ## _bswap(v, size);\
+}\
+\
+static inline type cpu_to_ ## endian ## size(type v)\
+{\
+ return endian ## _bswap(v, size);\
+}\
+\
+static inline void endian ## size ## _to_cpus(type *p)\
+{\
+ endian ## _bswaps(p, size)\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## s(type *p)\
+{\
+ endian ## _bswaps(p, size)\
+}\
+\
+static inline type endian ## size ## _to_cpup(const type *p)\
+{\
+ return endian ## size ## _to_cpu(*p);\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## w(type *p, type v)\
+{\
+ *p = cpu_to_ ## endian ## size(v);\
+}
+
+CPU_CONVERT(be, 16, uint16_t)
+CPU_CONVERT(be, 32, uint32_t)
+CPU_CONVERT(be, 64, uint64_t)
+
+CPU_CONVERT(le, 16, uint16_t)
+CPU_CONVERT(le, 32, uint32_t)
+CPU_CONVERT(le, 64, uint64_t)
+
+/* unaligned versions (optimized for frequent unaligned accesses)*/
+
+#if defined(__i386__) || defined(__powerpc__)
+
+#define cpu_to_le16wu(p, v) cpu_to_le16w(p, v)
+#define cpu_to_le32wu(p, v) cpu_to_le32w(p, v)
+#define le16_to_cpupu(p) le16_to_cpup(p)
+#define le32_to_cpupu(p) le32_to_cpup(p)
+
+#define cpu_to_be16wu(p, v) cpu_to_be16w(p, v)
+#define cpu_to_be32wu(p, v) cpu_to_be32w(p, v)
+
+#else
+
+static inline void cpu_to_le16wu(uint16_t *p, uint16_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v;
+ p1[1] = v >> 8;
+}
+
+static inline void cpu_to_le32wu(uint32_t *p, uint32_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v;
+ p1[1] = v >> 8;
+ p1[2] = v >> 16;
+ p1[3] = v >> 24;
+}
+
+static inline uint16_t le16_to_cpupu(const uint16_t *p)
+{
+ const uint8_t *p1 = (const uint8_t *)p;
+ return p1[0] | (p1[1] << 8);
+}
+
+static inline uint32_t le32_to_cpupu(const uint32_t *p)
+{
+ const uint8_t *p1 = (const uint8_t *)p;
+ return p1[0] | (p1[1] << 8) | (p1[2] << 16) | (p1[3] << 24);
+}
+
+static inline void cpu_to_be16wu(uint16_t *p, uint16_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v >> 8;
+ p1[1] = v;
+}
+
+static inline void cpu_to_be32wu(uint32_t *p, uint32_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v >> 24;
+ p1[1] = v >> 16;
+ p1[2] = v >> 8;
+ p1[3] = v;
+}
+
+#endif
+
+#ifdef WORDS_BIGENDIAN
+#define cpu_to_32wu cpu_to_be32wu
+#else
+#define cpu_to_32wu cpu_to_le32wu
+#endif
+
+#undef le_bswap
+#undef be_bswap
+#undef le_bswaps
+#undef be_bswaps
+
+#endif /* BSWAP_H */
diff --git a/tools/blktap/drivers/img2qcow.c b/tools/blktap/drivers/img2qcow.c
new file mode 100644
index 0000000000..2c9974c7fc
--- /dev/null
+++ b/tools/blktap/drivers/img2qcow.c
@@ -0,0 +1,289 @@
+/* img2qcow.c
+ *
+ * Generates a qcow format disk and fills it from an existing image.
+ *
+ * (c) 2006 Julian Chesterfield and Andrew Warfield
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <string.h>
+#include "tapdisk.h"
+
+#if 1
+#define DFPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a )
+#else
+#define DFPRINTF(_f, _a...) ((void)0)
+#endif
+
+#define TAPDISK 1
+#define BLOCK_PROCESSSZ 4096
+
+static int maxfds, *io_fd, running = 1, complete = 0;
+static int returned_events = 0, submit_events = 0;
+static uint64_t prev = 0;
+static char output[25];
+
+void print_bytes(void *ptr, int length) {
+
+ int i,k;
+ unsigned char *p = ptr;
+
+ DFPRINTF("Buf dump, length %d:\n",length);
+ for (k = 0; k < length; k++) {
+ DFPRINTF("%x",*p);
+ *p++;
+ if(k % 16 == 0) DFPRINTF("\n");
+ else if(k % 2 == 0) DFPRINTF(" ");
+ }
+ DFPRINTF("\n");
+ return;
+}
+
+void debug_output(uint64_t progress, uint64_t size)
+{
+ uint64_t blocks = size/20;
+
+ /*Output progress every 5% */
+ if (progress/blocks > prev) {
+ memcpy(output+prev+1,"=>",2);
+ prev++;
+ DFPRINTF("\r%s %llu%%", output,
+ (long long)(prev-1)*5);
+ }
+ return;
+}
+
+static inline void LOCAL_FD_SET(fd_set *readfds)
+{
+ FD_SET(io_fd[0], readfds);
+ maxfds = io_fd[0] + 1;
+
+ return;
+}
+
+static int get_image_info(struct td_state *s, int fd)
+{
+ int ret;
+ long size;
+ unsigned long total_size;
+ struct statvfs statBuf;
+ struct stat stat;
+
+ ret = fstat(fd, &stat);
+ if (ret != 0) {
+ DFPRINTF("ERROR: fstat failed, Couldn't stat image");
+ return -EINVAL;
+ }
+
+ if (S_ISBLK(stat.st_mode)) {
+ /*Accessing block device directly*/
+ s->size = 0;
+ if (ioctl(fd,BLKGETSIZE,&s->size)!=0) {
+ DFPRINTF("ERR: BLKGETSIZE failed, "
+ "couldn't stat image");
+ return -EINVAL;
+ }
+
+ DFPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
+ "sector_shift [%llu]\n",
+ (long long unsigned)(s->size << SECTOR_SHIFT),
+ (long long unsigned)s->size);
+
+ /*Get the sector size*/
+#if defined(BLKSSZGET)
+ {
+ int arg;
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ ioctl(fd, BLKSSZGET, &s->sector_size);
+
+ if (s->sector_size != DEFAULT_SECTOR_SIZE)
+ DFPRINTF("Note: sector size is %ld (not %d)\n",
+ s->sector_size, DEFAULT_SECTOR_SIZE);
+ }
+#else
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+#endif
+
+ } else {
+ /*Local file? try fstat instead*/
+ s->size = (stat.st_size >> SECTOR_SHIFT);
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ DFPRINTF("Image size: [%llu]\n",
+ (long long unsigned)s->size);
+ }
+
+ return 0;
+}
+
+static int send_responses(struct td_state *s, int res, int idx, void *private)
+{
+ if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res);
+
+ returned_events++;
+
+ free(private);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct tap_disk *drv;
+ struct td_state *s;
+ int ret = -1, fd, len;
+ fd_set readfds;
+ struct timeval timeout;
+ uint64_t i;
+ char *buf;
+
+ if (argc != 3) {
+ fprintf(stderr, "Qcow-utils: v1.0.0\n");
+ fprintf(stderr, "usage: %s <QCOW FILENAME> <SRC IMAGE>\n",
+ argv[0]);
+ exit(-1);
+ }
+
+ s = malloc(sizeof(struct td_state));
+
+ /*Open image*/
+ fd = open(argv[2], O_RDONLY | O_LARGEFILE);
+
+ if (fd == -1) {
+ DFPRINTF("Unable to open [%s], (err %d)!\n",argv[2],0 - errno);
+ exit(-1);
+ }
+
+ get_image_info(s, fd);
+
+ /*Create qcow file*/
+ ret = qcow_create(argv[1],s->size<<SECTOR_SHIFT,NULL,0);
+
+ if (ret < 0) {
+ DFPRINTF("Unable to create QCOW file\n");
+ exit(-1);
+ } else DFPRINTF("Qcow file created: size %llu sectors\n",
+ (long long unsigned)s->size);
+
+ drv = &tapdisk_qcow;
+ s->private = malloc(drv->private_data_size);
+
+ /*Open qcow file*/
+ if (drv->td_open(s, argv[1])!=0) {
+ DFPRINTF("Unable to open Qcow file [%s]\n",argv[1]);
+ exit(-1);
+ }
+
+ io_fd = drv->td_get_fd(s);
+
+ /*Initialise the output string*/
+ memset(output,0x20,25);
+ output[0] = '[';
+ output[22] = ']';
+ output[23] = '\0';
+ DFPRINTF("%s",output);
+
+ i = 0;
+ while (running) {
+ timeout.tv_sec = 0;
+
+ if (!complete) {
+ /*Read sector from image*/
+ if (lseek(fd, i, SEEK_SET) == (off_t)-1) {
+ DFPRINTF("Unable to access file offset %llu\n",
+ (long long)i);
+ exit(-1);
+ }
+
+ if( (ret = posix_memalign((void **)&buf,
+ BLOCK_PROCESSSZ,
+ BLOCK_PROCESSSZ)) != 0) {
+ DFPRINTF("Unable to read memalign buf (%d)\n",ret);
+ exit(-1);
+ }
+
+ /*We attempt to read 4k sized blocks*/
+ len = read(fd, buf, BLOCK_PROCESSSZ);
+ if (len < 512) {
+ DFPRINTF("Unable to read sector %llu\n",
+ (long long unsigned) (i >> 9));
+ complete = 1;
+ continue;
+ }
+
+ if (len % 512) {
+ len = (len >> 9) << 9;
+ }
+
+ ret = drv->td_queue_write(s, i >> 9,
+ len >> 9, buf,
+ send_responses, 0, buf);
+
+ if (!ret) submit_events++;
+
+ if (ret < 0) {
+ DFPRINTF("UNABLE TO WRITE block [%llu]\n",
+ (long long unsigned) (i >> 9));
+ } else i += len;
+
+ if (i >> 9 == s->size) complete = 1;
+
+ debug_output(i,s->size << 9);
+
+ if ((submit_events % 10 == 0) || complete)
+ drv->td_submit(s);
+ timeout.tv_usec = 0;
+
+ } else {
+ timeout.tv_usec = 1000;
+ if (!submit_events) running = 0;
+ }
+
+
+ /*Check AIO FD*/
+ LOCAL_FD_SET(&readfds);
+ ret = select(maxfds + 1, &readfds, (fd_set *) 0,
+ (fd_set *) 0, &timeout);
+
+ if (ret > 0) drv->td_do_callbacks(s, 0);
+ if (complete && (returned_events == submit_events))
+ running = 0;
+ }
+ memcpy(output+prev+1,"=",1);
+ DFPRINTF("\r%s 100%%\nTRANSFER COMPLETE\n\n", output);
+ drv->td_close(s);
+ free(s->private);
+ free(s);
+
+ return 0;
+}
diff --git a/tools/blktap/drivers/qcow-create.c b/tools/blktap/drivers/qcow-create.c
new file mode 100644
index 0000000000..be473934e8
--- /dev/null
+++ b/tools/blktap/drivers/qcow-create.c
@@ -0,0 +1,80 @@
+/* qcow-create.c
+ *
+ * Generates a qcow format disk.
+ *
+ * (c) 2006 Andrew Warfield and Julian Chesterfield
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <string.h>
+#include "tapdisk.h"
+
+#if 1
+#define DFPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a )
+#else
+#define DFPRINTF(_f, _a...) ((void)0)
+#endif
+
+
+int main(int argc, char *argv[])
+{
+ int ret = -1;
+ uint64_t size;
+
+ if ( (argc < 3) || (argc > 4) ) {
+ fprintf(stderr, "Qcow-utils: v1.0.0\n");
+ fprintf(stderr,
+ "usage: %s <SIZE(MB)> <FILENAME> "
+ "[<BACKING_FILENAME>]\n",
+ argv[0]);
+ exit(-1);
+ }
+
+ size = atoi(argv[1]);
+ size = size << 20;
+ DFPRINTF("Creating file size %llu\n",(long long unsigned)size);
+ switch(argc) {
+ case 3:
+ ret = qcow_create(argv[2],size,NULL,0);
+ break;
+ case 4:
+ ret = qcow_create(argv[2],size,argv[3],0);
+ break;
+ }
+ if (ret < 0) DPRINTF("Unable to create QCOW file\n");
+ else DPRINTF("QCOW file successfully created\n");
+
+ return 0;
+}
diff --git a/tools/blktap/drivers/qcow2raw.c b/tools/blktap/drivers/qcow2raw.c
new file mode 100644
index 0000000000..a7abc1bfa5
--- /dev/null
+++ b/tools/blktap/drivers/qcow2raw.c
@@ -0,0 +1,346 @@
+/* qcow2raw.c
+ *
+ * Generates raw image data from an existing qcow image
+ *
+ * (c) 2006 Julian Chesterfield and Andrew Warfield
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <string.h>
+#include "tapdisk.h"
+
+#if 1
+#define DFPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a )
+#else
+#define DFPRINTF(_f, _a...) ((void)0)
+#endif
+
+#define TAPDISK 1
+#define BLOCK_PROCESSSZ 4096
+
+static int maxfds, *qcowio_fd, *aio_fd, running = 1, complete = 0;
+static int read_complete = 0, write_complete = 0;
+static int returned_read_events = 0, returned_write_events = 0;
+static int submit_events = 0;
+static uint32_t read_idx = 0, write_idx = 0;
+struct tap_disk *drv1, *drv2;
+struct td_state *sqcow, *saio;
+static uint64_t prev = 0, written = 0;
+static char output[25];
+
+void print_bytes(void *ptr, int length) {
+
+ int i,k;
+ unsigned char *p = ptr;
+
+ DFPRINTF("Buf dump, length %d:\n",length);
+ for (k = 0; k < length; k++) {
+ DFPRINTF("%x",*p);
+ *p++;
+ if (k % 16 == 0) DFPRINTF("\n");
+ else if (k % 2 == 0) DFPRINTF(" ");
+ }
+ DFPRINTF("\n");
+ return;
+}
+
+void debug_output(uint64_t progress, uint64_t size)
+{
+ /*Output progress every 5% */
+ uint64_t blocks = size/20;
+
+ if (progress/blocks > prev) {
+ memcpy(output+prev+1,"=>",2);
+ prev++;
+ DFPRINTF("\r%s %llu%%",
+ output, (long long)((prev-1)*5));
+ }
+ return;
+}
+
+static inline void LOCAL_FD_SET(fd_set *readfds)
+{
+ FD_SET(qcowio_fd[0], readfds);
+ FD_SET(aio_fd[0], readfds);
+
+ maxfds = (qcowio_fd[0] > aio_fd[0] ? qcowio_fd[0] : aio_fd[0]) + 1;
+
+ return;
+}
+
+static int send_write_responses(struct td_state *s, int res, int idx, void *private)
+{
+ if (res < 0) {
+ DFPRINTF("AIO FAILURE: res [%d]!\n",res);
+ return 0;
+ }
+ written += BLOCK_PROCESSSZ;
+ returned_write_events++;
+ write_idx = idx;
+ if (complete && (returned_write_events == submit_events))
+ write_complete = 1;
+
+ debug_output(written, s->size << 9);
+ free(private);
+ return 0;
+}
+
+static int send_read_responses(struct td_state *s, int res, int idx, void *private)
+{
+ int ret;
+
+ if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res);
+
+ returned_read_events++;
+ read_idx = idx;
+ if (complete && (returned_read_events == submit_events))
+ read_complete = 1;
+
+ ret = drv2->td_queue_write(saio, idx, BLOCK_PROCESSSZ>>9, private,
+ send_write_responses, idx, private);
+ if (ret != 0) {
+ DFPRINTF("ERROR in submitting queue write!\n");
+ return 0;
+ }
+
+ if ( (complete && returned_read_events == submit_events) ||
+ (returned_read_events % 10 == 0) ) {
+ drv2->td_submit(saio);
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = -1, fd, len,input;
+ long int size;
+ fd_set readfds;
+ struct timeval timeout;
+ uint64_t i;
+ char *buf;
+ struct stat finfo;
+
+ if (argc != 3) {
+ fprintf(stderr, "Qcow-utils: v1.0.0\n");
+ fprintf(stderr, "usage: %s <Dest File descriptor> "
+ "<Qcow SRC IMAGE>\n",
+ argv[0]);
+ exit(-1);
+ }
+
+ sqcow = malloc(sizeof(struct td_state));
+ saio = malloc(sizeof(struct td_state));
+
+ /*Open qcow source file*/
+ drv1 = &tapdisk_qcow;
+ sqcow->private = malloc(drv1->private_data_size);
+
+ if (drv1->td_open(sqcow, argv[2])!=0) {
+ DFPRINTF("Unable to open Qcow file [%s]\n",argv[2]);
+ exit(-1);
+ } else DFPRINTF("QCOW file opened, size %llu\n",
+ (long long unsigned)sqcow->size);
+
+ qcowio_fd = drv1->td_get_fd(sqcow);
+
+ /*Setup aio destination file*/
+ ret = stat(argv[1],&finfo);
+ if (ret == -1) {
+ /*Check errno*/
+ switch(errno) {
+ case ENOENT:
+ /*File doesn't exist, create*/
+ fd = open(argv[1],
+ O_RDWR | O_LARGEFILE | O_CREAT, 0644);
+ if (fd < 0) {
+ DFPRINTF("ERROR creating file [%s] "
+ "(errno %d)\n",
+ argv[1], 0 - errno);
+ exit(-1);
+ }
+ if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
+ DFPRINTF("Unable to create file "
+ "[%s] of size %llu (errno %d). "
+ "Exiting...\n",
+ argv[1],
+ (long long unsigned)sqcow->size<<9,
+ 0 - errno);
+ close(fd);
+ exit(-1);
+ }
+ close(fd);
+ break;
+ case ENXIO:
+ DFPRINTF("ERROR Device [%s] does not exist\n",argv[1]);
+ exit(-1);
+ default:
+ DFPRINTF("An error occurred opening Device [%s] "
+ "(errno %d)\n",
+ argv[1], 0 - errno);
+ exit(-1);
+ }
+ } else {
+ fprintf(stderr, "WARNING: All existing data in "
+ "%s will be overwritten.\nDo you wish to continue? "
+ "(y or n) ",
+ argv[1]);
+ if (getchar() != 'y') {
+ DFPRINTF("Exiting...\n");
+ exit(-1);
+ }
+
+ /*TODO - Test the existing file or device for adequate space*/
+ fd = open(argv[1], O_RDWR | O_LARGEFILE);
+ if (fd < 0) {
+ DFPRINTF("ERROR: opening file [%s] (errno %d)\n",
+ argv[1], 0 - errno);
+ exit(-1);
+ }
+
+ if (S_ISBLK(finfo.st_mode)) {
+ if(ioctl(fd,BLKGETSIZE,&size)!=0) {
+ DFPRINTF("ERROR: BLKGETSIZE failed, "
+ "couldn't stat image [%s]\n",
+ argv[1]);
+ close(fd);
+ exit(-1);
+ }
+ if (size < sqcow->size<<9) {
+ DFPRINTF("ERROR: Not enough space on device "
+ "%s (%lu bytes available, %llu bytes required\n",
+ argv[1], size,
+ (long long unsigned)sqcow->size<<9);
+ close(fd);
+ exit(-1);
+ }
+ } else {
+ if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
+ DFPRINTF("Unable to create file "
+ "[%s] of size %llu (errno %d). "
+ "Exiting...\n",
+ argv[1],
+ (long long unsigned)sqcow->size<<9,
+ 0 - errno);
+ close(fd);
+ exit(-1);
+ } else DFPRINTF("File [%s] truncated to length %llu "
+ "(%llu)\n",
+ argv[1],
+ (long long unsigned)sqcow->size<<9,
+ (long long unsigned)sqcow->size);
+ }
+ close(fd);
+ }
+
+ /*Open aio destination file*/
+ drv2 = &tapdisk_aio;
+ saio->private = malloc(drv2->private_data_size);
+
+ if (drv2->td_open(saio, argv[1])!=0) {
+ DFPRINTF("Unable to open Qcow file [%s]\n", argv[1]);
+ exit(-1);
+ }
+
+ aio_fd = drv2->td_get_fd(saio);
+
+ /*Initialise the output string*/
+ memset(output,0x20,25);
+ output[0] = '[';
+ output[22] = ']';
+ output[23] = '\0';
+ DFPRINTF("%s",output);
+
+ i = 0;
+ while (running) {
+ timeout.tv_sec = 0;
+
+ if (!complete) {
+ /*Read Pages from qcow image*/
+ if ( (ret = posix_memalign((void **)&buf,
+ BLOCK_PROCESSSZ,
+ BLOCK_PROCESSSZ))
+ != 0) {
+ DFPRINTF("Unable to alloc memory (%d)\n",ret);
+ exit(-1);
+ }
+
+ /*Attempt to read 4k sized blocks*/
+ ret = drv1->td_queue_read(sqcow, i>>9,
+ BLOCK_PROCESSSZ>>9, buf,
+ send_read_responses, i>>9, buf);
+
+ if (ret < 0) {
+ DFPRINTF("UNABLE TO READ block [%llu]\n",
+ (long long unsigned)i);
+ exit(-1);
+ } else {
+ i += BLOCK_PROCESSSZ;
+ submit_events++;
+ }
+
+ if (i >= sqcow->size<<9) {
+ complete = 1;
+ }
+
+ if ((submit_events % 10 == 0) || complete)
+ drv1->td_submit(sqcow);
+ timeout.tv_usec = 0;
+
+ } else {
+ timeout.tv_usec = 1000;
+ if (!submit_events) running = 0;
+ }
+
+
+ /*Check AIO FD*/
+ LOCAL_FD_SET(&readfds);
+ ret = select(maxfds + 1, &readfds, (fd_set *) 0,
+ (fd_set *) 0, &timeout);
+
+ if (ret > 0) {
+ if (FD_ISSET(qcowio_fd[0], &readfds))
+ drv1->td_do_callbacks(sqcow, 0);
+ if (FD_ISSET(aio_fd[0], &readfds))
+ drv2->td_do_callbacks(saio, 0);
+ }
+ if (complete && (returned_write_events == submit_events))
+ running = 0;
+ }
+ memcpy(output+prev+1,"=",1);
+ DFPRINTF("\r%s 100%%\nTRANSFER COMPLETE\n\n", output);
+
+ return 0;
+}
diff --git a/tools/blktap/drivers/tapdisk.c b/tools/blktap/drivers/tapdisk.c
new file mode 100644
index 0000000000..f817a89a46
--- /dev/null
+++ b/tools/blktap/drivers/tapdisk.c
@@ -0,0 +1,671 @@
+/* tapdisk.c
+ *
+ * separate disk process, spawned by blktapctrl. Inherits code from driver
+ * plugins
+ *
+ * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield.
+ *
+ */
+
+#define MSG_SIZE 4096
+#define TAPDISK
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/poll.h>
+#include <unistd.h>
+#include <errno.h>
+#include <pthread.h>
+#include <time.h>
+#include <err.h>
+#include <poll.h>
+#include <sys/statvfs.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include "blktaplib.h"
+#include "tapdisk.h"
+
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define INPUT 0
+#define OUTPUT 1
+
+static int maxfds, fds[2], run = 1;
+
+static pid_t process;
+int connected_disks = 0;
+fd_list_entry_t *fd_start = NULL;
+
+void usage(void)
+{
+ fprintf(stderr, "blktap-utils: v1.0.0\n");
+ fprintf(stderr, "usage: tapdisk <READ fifo> <WRITE fifo>\n");
+ exit(-1);
+}
+
+void daemonize(void)
+{
+ int i;
+
+ if (getppid()==1) return; /* already a daemon */
+ if (fork() != 0) exit(0);
+
+#if 0
+ /*Set new program session ID and close all descriptors*/
+ setsid();
+ for (i = getdtablesize(); i >= 0; --i) close(i);
+
+ /*Send all I/O to /dev/null */
+ i = open("/dev/null",O_RDWR);
+ dup(i);
+ dup(i);
+#endif
+ return;
+}
+
+static void unmap_disk(struct td_state *s)
+{
+ tapdev_info_t *info = s->ring_info;
+ struct tap_disk *drv = s->drv;
+ fd_list_entry_t *ptr, *prev;
+
+ drv->td_close(s);
+
+ if (info != NULL && info->mem > 0)
+ munmap(info->mem, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE);
+
+ ptr = s->fd_entry;
+ prev = ptr->prev;
+
+ if (prev) {
+ /*There are entries earlier in the list*/
+ prev->next = ptr->next;
+ if (ptr->next) {
+ ptr = ptr->next;
+ ptr->prev = prev;
+ }
+ } else {
+ /*We are the first entry in list*/
+ if (ptr->next) {
+ ptr = ptr->next;
+ fd_start = ptr;
+ ptr->prev = NULL;
+ } else fd_start = NULL;
+ }
+
+ close(info->fd);
+
+ free(s->fd_entry);
+ free(s->blkif);
+ free(s->ring_info);
+ free(s);
+
+ return;
+
+}
+
+void sig_handler(int sig)
+{
+ /*Received signal to close. If no disks are active, we close app.*/
+
+ if (connected_disks < 1) run = 0;
+}
+
+static inline int LOCAL_FD_SET(fd_set *readfds)
+{
+ fd_list_entry_t *ptr;
+ int i;
+
+ ptr = fd_start;
+ while (ptr != NULL) {
+ if (ptr->tap_fd) {
+ FD_SET(ptr->tap_fd, readfds);
+ for (i = 0; i < MAX_IOFD; i++) {
+ if (ptr->io_fd[i])
+ FD_SET(ptr->io_fd[i], readfds);
+ maxfds = (ptr->io_fd[i] > maxfds ?
+ ptr->io_fd[i]: maxfds);
+ }
+ maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds);
+ }
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
+
+static inline fd_list_entry_t *add_fd_entry(int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
+{
+ fd_list_entry_t *ptr, *last, *entry;
+ int i;
+ DPRINTF("Adding fd_list_entry\n");
+
+ /*Add to linked list*/
+ s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
+ entry->tap_fd = tap_fd;
+ for (i = 0; i < MAX_IOFD; i++) entry->io_fd[i] = io_fd[i];
+ entry->s = s;
+ entry->next = NULL;
+
+ ptr = fd_start;
+ if (ptr == NULL) {
+ /*We are the first entry*/
+ fd_start = entry;
+ entry->prev = NULL;
+ goto finish;
+ }
+
+ while (ptr != NULL) {
+ last = ptr;
+ ptr = ptr->next;
+ }
+ last->next = entry;
+ entry->prev = last;
+
+ finish:
+ return entry;
+}
+
+static inline struct td_state *get_state(int cookie)
+{
+ fd_list_entry_t *ptr;
+
+ ptr = fd_start;
+ while (ptr != NULL) {
+ if (ptr->cookie == cookie) return ptr->s;
+ ptr = ptr->next;
+ }
+ return NULL;
+}
+
+static struct tap_disk *get_driver(int drivertype)
+{
+ /* blktapctrl has passed us the driver type */
+
+ return dtypes[drivertype]->drv;
+}
+
+static struct td_state *state_init(void)
+{
+ int i;
+ struct td_state *s;
+ blkif_t *blkif;
+
+ s = malloc(sizeof(struct td_state));
+ blkif = s->blkif = malloc(sizeof(blkif_t));
+ s->ring_info = malloc(sizeof(tapdev_info_t));
+
+ for (i = 0; i < MAX_REQUESTS; i++)
+ blkif->pending_list[i].count = 0;
+
+ return s;
+}
+
+static int map_new_dev(struct td_state *s, int minor)
+{
+ int tap_fd;
+ tapdev_info_t *info = s->ring_info;
+ char *devname;
+ fd_list_entry_t *ptr;
+
+ asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor);
+ tap_fd = open(devname, O_RDWR);
+ if (tap_fd == -1)
+ {
+ DPRINTF("open failed on dev %s!",devname);
+ goto fail;
+ }
+ info->fd = tap_fd;
+
+ /*Map the shared memory*/
+ info->mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0);
+ if ((long int)info->mem == -1)
+ {
+ DPRINTF("mmap failed on dev %s!\n",devname);
+ goto fail;
+ }
+
+ /* assign the rings to the mapped memory */
+ info->sring = (blkif_sring_t *)((unsigned long)info->mem);
+ BACK_RING_INIT(&info->fe_ring, info->sring, PAGE_SIZE);
+
+ info->vstart =
+ (unsigned long)info->mem + (BLKTAP_RING_PAGES << PAGE_SHIFT);
+
+ ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process );
+ ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
+ free(devname);
+
+ /*Update the fd entry*/
+ ptr = fd_start;
+ while (ptr != NULL) {
+ if (s == ptr->s) {
+ ptr->tap_fd = tap_fd;
+ break;
+ }
+ ptr = ptr->next;
+ }
+
+ return minor;
+
+ fail:
+ free(devname);
+ return -1;
+}
+
+static int read_msg(char *buf)
+{
+ int length, len, msglen, tap_fd, *io_fd;
+ char *ptr, *path;
+ image_t *img;
+ struct timeval timeout;
+ msg_hdr_t *msg;
+ msg_newdev_t *msg_dev;
+ msg_pid_t *msg_pid;
+ struct tap_disk *drv;
+ int ret = -1;
+ struct td_state *s = NULL;
+ fd_list_entry_t *entry;
+
+ length = read(fds[READ], buf, MSG_SIZE);
+
+ if (length > 0 && length >= sizeof(msg_hdr_t))
+ {
+ msg = (msg_hdr_t *)buf;
+ DPRINTF("Tapdisk: Received msg, len %d, type %d, UID %d\n",
+ length,msg->type,msg->cookie);
+
+ switch (msg->type) {
+ case CTLMSG_PARAMS:
+ ptr = buf + sizeof(msg_hdr_t);
+ len = (length - sizeof(msg_hdr_t));
+ path = calloc(1, len);
+
+ memcpy(path, ptr, len);
+ DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path);
+
+ /*Assign driver*/
+ drv = get_driver(msg->drivertype);
+ if (drv == NULL)
+ goto params_done;
+
+ DPRINTF("Loaded driver: name [%s], type [%d]\n",
+ drv->disk_type, msg->drivertype);
+
+ /* Allocate the disk structs */
+ s = state_init();
+ if (s == NULL)
+ goto params_done;
+
+ s->drv = drv;
+ s->private = malloc(drv->private_data_size);
+ if (s->private == NULL) {
+ free(s);
+ goto params_done;
+ }
+
+ /*Open file*/
+ ret = drv->td_open(s, path);
+ io_fd = drv->td_get_fd(s);
+
+ entry = add_fd_entry(0, io_fd, s);
+ entry->cookie = msg->cookie;
+ DPRINTF("Entered cookie %d\n",entry->cookie);
+
+ memset(buf, 0x00, MSG_SIZE);
+
+ params_done:
+ if (ret == 0) {
+ msglen = sizeof(msg_hdr_t) + sizeof(image_t);
+ msg->type = CTLMSG_IMG;
+ img = (image_t *)(buf + sizeof(msg_hdr_t));
+ img->size = s->size;
+ img->secsize = s->sector_size;
+ img->info = s->info;
+ } else {
+ msglen = sizeof(msg_hdr_t);
+ msg->type = CTLMSG_IMG_FAIL;
+ msg->len = msglen;
+ }
+ len = write(fds[WRITE], buf, msglen);
+ free(path);
+ return 1;
+
+
+
+ case CTLMSG_NEWDEV:
+ msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
+
+ s = get_state(msg->cookie);
+ DPRINTF("Retrieving state, cookie %d.....[%s]\n",msg->cookie, (s == NULL ? "FAIL":"OK"));
+ if (s != NULL) {
+ ret = ((map_new_dev(s, msg_dev->devnum)
+ == msg_dev->devnum ? 0: -1));
+ connected_disks++;
+ }
+
+ memset(buf, 0x00, MSG_SIZE);
+ msglen = sizeof(msg_hdr_t);
+ msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP
+ : CTLMSG_NEWDEV_FAIL);
+ msg->len = msglen;
+
+ len = write(fds[WRITE], buf, msglen);
+ return 1;
+
+ case CTLMSG_CLOSE:
+ s = get_state(msg->cookie);
+ if (s) unmap_disk(s);
+
+ connected_disks--;
+ sig_handler(SIGINT);
+
+ return 1;
+
+ case CTLMSG_PID:
+ memset(buf, 0x00, MSG_SIZE);
+ msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t);
+ msg->type = CTLMSG_PID_RSP;
+ msg->len = msglen;
+
+ msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t));
+ process = getpid();
+ msg_pid->pid = process;
+
+ len = write(fds[WRITE], buf, msglen);
+ return 1;
+
+ default:
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static inline int write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp)
+{
+ tapdev_info_t *info = s->ring_info;
+ blkif_response_t *rsp_d;
+
+ rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt);
+ memcpy(rsp_d, rsp, sizeof(blkif_response_t));
+ wmb();
+ info->fe_ring.rsp_prod_pvt++;
+
+ return 0;
+}
+
+static inline void kick_responses(struct td_state *s)
+{
+ tapdev_info_t *info = s->ring_info;
+
+ if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod)
+ {
+ RING_PUSH_RESPONSES(&info->fe_ring);
+ ioctl(info->fd, BLKTAP_IOCTL_KICK_FE);
+ }
+}
+
+void io_done(struct td_state *s, int sid)
+{
+ struct tap_disk *drv = s->drv;
+
+ if (!run) return; /*We have received signal to close*/
+
+ if (drv->td_do_callbacks(s, sid) > 0) kick_responses(s);
+
+ return;
+}
+
+int send_responses(struct td_state *s, int res, int idx, void *private)
+{
+ blkif_request_t *req;
+ int responses_queued = 0;
+ blkif_t *blkif = s->blkif;
+
+ req = &blkif->pending_list[idx].req;
+
+ if ( (idx > MAX_REQUESTS-1) ||
+ (blkif->pending_list[idx].count == 0) )
+ {
+ DPRINTF("invalid index returned(%u)!\n", idx);
+ return 0;
+ }
+
+ if (res != 0) {
+ DPRINTF("*** request error %d! \n", res);
+ return 0;
+ }
+
+ blkif->pending_list[idx].count--;
+
+ if (blkif->pending_list[idx].count == 0)
+ {
+ blkif_request_t tmp;
+ blkif_response_t *rsp;
+
+ tmp = blkif->pending_list[idx].req;
+ rsp = (blkif_response_t *)req;
+
+ rsp->id = tmp.id;
+ rsp->operation = tmp.operation;
+ rsp->status = blkif->pending_list[idx].status;
+
+ write_rsp_to_ring(s, rsp);
+ responses_queued++;
+ }
+ return responses_queued;
+}
+
+static void get_io_request(struct td_state *s)
+{
+ RING_IDX rp, rc, j, i, ret;
+ blkif_request_t *req;
+ int idx, nsects;
+ uint64_t sector_nr;
+ char *page;
+ int early = 0; /* count early completions */
+ struct tap_disk *drv = s->drv;
+ blkif_t *blkif = s->blkif;
+ tapdev_info_t *info = s->ring_info;
+
+ if (!run) return; /*We have received signal to close*/
+
+ rp = info->fe_ring.sring->req_prod;
+ rmb();
+ for (j = info->fe_ring.req_cons; j != rp; j++)
+ {
+ int done = 0;
+
+ req = NULL;
+ req = RING_GET_REQUEST(&info->fe_ring, j);
+ ++info->fe_ring.req_cons;
+
+ if (req == NULL) continue;
+
+ idx = req->id;
+ ASSERT(blkif->pending_list[idx].count == 0);
+ memcpy(&blkif->pending_list[idx].req, req, sizeof(*req));
+ blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
+ blkif->pending_list[idx].count = req->nr_segments;
+
+ sector_nr = req->sector_number;
+
+ for (i = 0; i < req->nr_segments; i++) {
+ nsects = req->seg[i].last_sect -
+ req->seg[i].first_sect + 1;
+
+ if ((req->seg[i].last_sect >= PAGE_SIZE >> 9) ||
+ (nsects <= 0))
+ continue;
+
+ page = (char *)MMAP_VADDR(info->vstart,
+ (unsigned long)req->id, i);
+ page += (req->seg[i].first_sect << SECTOR_SHIFT);
+
+ if (sector_nr >= s->size) {
+ DPRINTF("Sector request failed:\n");
+ DPRINTF("%s request, idx [%d,%d] size [%llu], "
+ "sector [%llu,%llu]\n",
+ (req->operation == BLKIF_OP_WRITE ?
+ "WRITE" : "READ"),
+ idx,i,
+ (long long unsigned)
+ nsects<<SECTOR_SHIFT,
+ (long long unsigned)
+ sector_nr<<SECTOR_SHIFT,
+ (long long unsigned) sector_nr);
+ continue;
+ }
+
+ switch (req->operation)
+ {
+ case BLKIF_OP_WRITE:
+ ret = drv->td_queue_write(s, sector_nr,
+ nsects, page, send_responses,
+ idx, NULL);
+ if (ret > 0) early += ret;
+ else if (ret == -EBUSY) {
+ /*
+ * TODO: Sector is locked *
+ * Need to put req back on queue *
+ */
+ }
+ break;
+ case BLKIF_OP_READ:
+ ret = drv->td_queue_read(s, sector_nr,
+ nsects, page, send_responses,
+ idx, NULL);
+ if (ret > 0) early += ret;
+ else if (ret == -EBUSY) {
+ /*
+ * TODO: Sector is locked *
+ * Need to put req back on queue *
+ */
+ }
+ break;
+ default:
+ DPRINTF("Unknown block operation\n");
+ break;
+ }
+ sector_nr += nsects;
+ }
+ }
+
+ /*Batch done*/
+ drv->td_submit(s);
+
+ if (early > 0)
+ io_done(s,10);
+
+ return;
+}
+
+int main(int argc, char *argv[])
+{
+ int len, msglen, ret, i;
+ char *p, *buf;
+ fd_set readfds, writefds;
+ struct timeval timeout;
+ fd_list_entry_t *ptr;
+ struct tap_disk *drv;
+ struct td_state *s;
+
+ if (argc != 3) usage();
+
+ daemonize();
+
+ openlog("TAPDISK", LOG_CONS|LOG_ODELAY, LOG_DAEMON);
+ /*Setup signal handlers*/
+ signal (SIGBUS, sig_handler);
+ signal (SIGINT, sig_handler);
+
+ /*Open the control channel*/
+ fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK);
+ fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK);
+
+ if ( (fds[READ] < 0) || (fds[WRITE] < 0) )
+ {
+ DPRINTF("FD open failed [%d,%d]\n",fds[READ], fds[WRITE]);
+ exit(-1);
+ }
+
+ buf = calloc(MSG_SIZE, 1);
+
+ if (buf == NULL)
+ {
+ DPRINTF("ERROR: allocating memory.\n");
+ exit(-1);
+ }
+
+ while (run)
+ {
+ ret = 0;
+ FD_ZERO(&readfds);
+ FD_SET(fds[READ], &readfds);
+ maxfds = fds[READ];
+
+ /*Set all tap fds*/
+ LOCAL_FD_SET(&readfds);
+
+ timeout.tv_sec = 0;
+ timeout.tv_usec = 1000;
+
+ /*Wait for incoming messages*/
+ ret = select(maxfds + 1, &readfds, (fd_set *) 0,
+ (fd_set *) 0, &timeout);
+
+ if (ret > 0)
+ {
+ ptr = fd_start;
+ while (ptr != NULL) {
+ if (FD_ISSET(ptr->tap_fd, &readfds))
+ get_io_request(ptr->s);
+ for (i = 0; i < MAX_IOFD; i++) {
+ if (ptr->io_fd[i] &&
+ FD_ISSET(ptr->io_fd[i], &readfds))
+ io_done(ptr->s, i);
+ }
+
+ ptr = ptr->next;
+ }
+
+ if (FD_ISSET(fds[READ], &readfds))
+ read_msg(buf);
+ }
+ }
+ free(buf);
+ close(fds[READ]);
+ close(fds[WRITE]);
+
+ ptr = fd_start;
+ while (ptr != NULL) {
+ s = ptr->s;
+ drv = s->drv;
+
+ unmap_disk(s);
+ drv->td_close(s);
+ free(s->private);
+ free(s->blkif);
+ free(s->ring_info);
+ free(s);
+ close(ptr->tap_fd);
+ ptr = ptr->next;
+ }
+ closelog();
+
+ return 0;
+}
diff --git a/tools/blktap/drivers/tapdisk.h b/tools/blktap/drivers/tapdisk.h
new file mode 100644
index 0000000000..1f03156456
--- /dev/null
+++ b/tools/blktap/drivers/tapdisk.h
@@ -0,0 +1,211 @@
+/* tapdisk.h
+ *
+ * Generic disk interface for blktap-based image adapters.
+ *
+ * (c) 2006 Andrew Warfield and Julian Chesterfield
+ *
+ * Some notes on the tap_disk interface:
+ *
+ * tap_disk aims to provide a generic interface to easily implement new
+ * types of image accessors. The structure-of-function-calls is similar
+ * to disk interfaces used in qemu/denali/etc, with the significant
+ * difference being the expectation of asynchronous rather than synchronous
+ * I/O. The asynchronous interface is intended to allow lots of requests to
+ * be pipelined through a disk, without the disk requiring any of its own
+ * threads of control. As such, a batch of requests is delivered to the disk
+ * using:
+ *
+ * td_queue_[read,write]()
+ *
+ * and passing in a completion callback, which the disk is responsible for
+ * tracking. The end of a back is marked with a call to:
+ *
+ * td_submit()
+ *
+ * The disk implementation must provide a file handle, which is used to
+ * indicate that it needs to do work. tapdisk will add this file handle
+ * (returned from td_get_fd()) to it's poll set, and will call into the disk
+ * using td_do_callbacks() whenever there is data pending.
+ *
+ * Two disk implementations demonstrate how this interface may be used to
+ * implement disks with both asynchronous and synchronous calls. block-aio.c
+ * maps this interface down onto the linux libaio calls, while block-sync uses
+ * normal posix read/write.
+ *
+ * A few things to realize about the sync case, which doesn't need to defer
+ * io completions:
+ *
+ * - td_queue_[read,write]() call read/write directly, and then call the
+ * callback immediately. The MUST then return a value greater than 0
+ * in order to tell tapdisk that requests have finished early, and to
+ * force responses to be kicked to the clents.
+ *
+ * - The fd used for poll is an otherwise unused pipe, which allows poll to
+ * be safely called without ever returning anything.
+ *
+ */
+
+#ifndef TAPDISK_H_
+#define TAPDISK_H_
+
+#include <stdint.h>
+#include <syslog.h>
+#include "blktaplib.h"
+
+/*If enabled, log all debug messages to syslog*/
+#if 1
+#define DPRINTF(_f, _a...) syslog( LOG_DEBUG, _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+/* Things disks need to know about, these should probably be in a higher-level
+ * header. */
+#define MAX_REQUESTS 64
+#define MAX_SEGMENTS_PER_REQ 11
+#define SECTOR_SHIFT 9
+#define DEFAULT_SECTOR_SIZE 512
+
+/* This structure represents the state of an active virtual disk. */
+struct td_state {
+ void *private;
+ void *drv;
+ void *blkif;
+ void *image;
+ void *ring_info;
+ void *fd_entry;
+ char backing_file[1024]; /*Used by differencing disks, e.g. qcow*/
+ long int sector_size;
+ uint64_t size;
+ long int info;
+};
+
+/* Prototype of the callback to activate as requests complete. */
+typedef int (*td_callback_t)(struct td_state *s, int res, int id, void *prv);
+
+/* Structure describing the interface to a virtual disk implementation. */
+/* See note at the top of this file describing this interface. */
+struct tap_disk {
+ const char *disk_type;
+ int private_data_size;
+ int (*td_open) (struct td_state *s, const char *name);
+ int (*td_queue_read) (struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *prv);
+ int (*td_queue_write) (struct td_state *s, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *prv);
+ int (*td_submit) (struct td_state *s);
+ int *(*td_get_fd) (struct td_state *s);
+ int (*td_close) (struct td_state *s);
+ int (*td_do_callbacks)(struct td_state *s, int sid);
+};
+
+typedef struct disk_info {
+ int idnum;
+ char name[50]; /* e.g. "RAMDISK" */
+ char handle[10]; /* xend handle, e.g. 'ram' */
+ int single_handler; /* is there a single controller for all */
+ /* instances of disk type? */
+#ifdef TAPDISK
+ struct tap_disk *drv;
+#endif
+} disk_info_t;
+
+void debug_fe_ring(struct td_state *s);
+
+extern struct tap_disk tapdisk_aio;
+extern struct tap_disk tapdisk_sync;
+extern struct tap_disk tapdisk_vmdk;
+extern struct tap_disk tapdisk_ram;
+extern struct tap_disk tapdisk_qcow;
+
+#define MAX_DISK_TYPES 20
+#define MAX_IOFD 2
+
+#define DISK_TYPE_AIO 0
+#define DISK_TYPE_SYNC 1
+#define DISK_TYPE_VMDK 2
+#define DISK_TYPE_RAM 3
+#define DISK_TYPE_QCOW 4
+
+
+/*Define Individual Disk Parameters here */
+static disk_info_t aio_disk = {
+ DISK_TYPE_AIO,
+ "raw image (aio)",
+ "aio",
+ 0,
+#ifdef TAPDISK
+ &tapdisk_aio,
+#endif
+};
+
+static disk_info_t sync_disk = {
+ DISK_TYPE_SYNC,
+ "raw image (sync)",
+ "sync",
+ 0,
+#ifdef TAPDISK
+ &tapdisk_sync,
+#endif
+};
+
+static disk_info_t vmdk_disk = {
+ DISK_TYPE_VMDK,
+ "vmware image (vmdk)",
+ "vmdk",
+ 1,
+#ifdef TAPDISK
+ &tapdisk_vmdk,
+#endif
+};
+
+static disk_info_t ram_disk = {
+ DISK_TYPE_RAM,
+ "ramdisk image (ram)",
+ "ram",
+ 1,
+#ifdef TAPDISK
+ &tapdisk_ram,
+#endif
+};
+
+static disk_info_t qcow_disk = {
+ DISK_TYPE_QCOW,
+ "qcow disk (qcow)",
+ "qcow",
+ 0,
+#ifdef TAPDISK
+ &tapdisk_qcow,
+#endif
+};
+
+/*Main disk info array */
+static disk_info_t *dtypes[] = {
+ &aio_disk,
+ &sync_disk,
+ &vmdk_disk,
+ &ram_disk,
+ &qcow_disk,
+};
+
+typedef struct driver_list_entry {
+ void *blkif;
+ void *prev;
+ void *next;
+} driver_list_entry_t;
+
+typedef struct fd_list_entry {
+ int cookie;
+ int tap_fd;
+ int io_fd[MAX_IOFD];
+ struct td_state *s;
+ void *prev;
+ void *next;
+} fd_list_entry_t;
+
+int qcow_create(const char *filename, uint64_t total_size,
+ const char *backing_file, int flags);
+
+#endif /*TAPDISK_H_*/
diff --git a/tools/blktap/lib/Makefile b/tools/blktap/lib/Makefile
new file mode 100644
index 0000000000..c0eb28bde1
--- /dev/null
+++ b/tools/blktap/lib/Makefile
@@ -0,0 +1,66 @@
+XEN_ROOT = ../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+MAJOR = 3.0
+MINOR = 0
+SONAME = libblktap.so.$(MAJOR)
+
+BLKTAP_INSTALL_DIR = /usr/sbin
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+INCLUDES += -I. -I.. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
+
+LIBS := -lz
+
+SRCS :=
+SRCS += xenbus.c blkif.c xs_api.c
+
+CFLAGS += -Werror
+CFLAGS += -Wno-unused
+CFLAGS += -fno-strict-aliasing -fPIC
+CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
+# get asprintf():
+CFLAGS += -D _GNU_SOURCE
+
+# Get gcc to generate the dependencies for us.
+CFLAGS += -Wp,-MD,.$(@F).d
+CFLAGS += $(INCLUDES)
+DEPS = .*.d
+
+OBJS = $(patsubst %.c,%.o,$(SRCS))
+IBINS :=
+
+LIB = libblktap.a libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
+
+all: build
+
+build:
+ $(MAKE) libblktap
+
+install: all
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/include
+ $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
+
+clean:
+ rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS
+
+libblktap: $(OBJS)
+ $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared \
+ -L$(XEN_XENSTORE) -l xenstore \
+ -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
+ ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
+ ln -sf libblktap.so.$(MAJOR) $@.so
+ ar rc libblktap.a $@.so
+
+.PHONY: TAGS all build clean install libblktap
+
+TAGS:
+ etags -t $(SRCS) *.h
+
+-include $(DEPS)
+
diff --git a/tools/blktap/lib/blkif.c b/tools/blktap/lib/blkif.c
new file mode 100644
index 0000000000..9a195960a0
--- /dev/null
+++ b/tools/blktap/lib/blkif.c
@@ -0,0 +1,185 @@
+/*
+ * tools/blktap_user/blkif.c
+ *
+ * The blkif interface for blktap. A blkif describes an in-use virtual disk.
+ * (c) 2005 Andrew Warfield and Julian Chesterfield
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <err.h>
+#include <unistd.h>
+
+#include "blktaplib.h"
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+ blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif != NULL) &&
+ ((blkif->domid != domid) || (blkif->handle != handle)) )
+ blkif = blkif->hash_next;
+ return blkif;
+}
+
+blkif_t *alloc_blkif(domid_t domid)
+{
+ blkif_t *blkif;
+ DPRINTF("Alloc_blkif called [%d]\n",domid);
+ blkif = (blkif_t *)malloc(sizeof(blkif_t));
+ if (!blkif)
+ return NULL;
+ memset(blkif, 0, sizeof(*blkif));
+ blkif->domid = domid;
+ blkif->devnum = -1;
+ return blkif;
+}
+
+/*Controller callbacks*/
+static int (*new_devmap_hook)(blkif_t *blkif) = NULL;
+void register_new_devmap_hook(int (*fn)(blkif_t *blkif))
+{
+ new_devmap_hook = fn;
+}
+
+static int (*new_unmap_hook)(blkif_t *blkif) = NULL;
+void register_new_unmap_hook(int (*fn)(blkif_t *blkif))
+{
+ new_unmap_hook = fn;
+}
+
+static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
+void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
+{
+ new_blkif_hook = fn;
+}
+
+int blkif_init(blkif_t *blkif, long int handle, long int pdev,
+ long int readonly)
+{
+ domid_t domid;
+ blkif_t **pblkif;
+ int devnum;
+
+ if (blkif == NULL)
+ return -EINVAL;
+
+ domid = blkif->domid;
+ blkif->handle = handle;
+ blkif->pdev = pdev;
+ blkif->readonly = readonly;
+
+ /*
+ * Call out to the new_blkif_hook.
+ * The tap application should define this,
+ * and it should return having set blkif->ops
+ *
+ */
+ if (new_blkif_hook == NULL)
+ {
+ DPRINTF("Probe detected a new blkif, but no new_blkif_hook!");
+ return -1;
+ }
+ if (new_blkif_hook(blkif)!=0) {
+ DPRINTF("BLKIF: Image open failed\n");
+ return -1;
+ }
+
+ /* Now wire it in. */
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ DPRINTF("Created hash entry: %d [%d,%ld]\n",
+ BLKIF_HASH(domid, handle), domid, handle);
+
+ while ( *pblkif != NULL )
+ {
+ if ( ((*pblkif)->domid == domid) &&
+ ((*pblkif)->handle == handle) )
+ {
+ DPRINTF("Could not create blkif: already exists\n");
+ return -1;
+ }
+ pblkif = &(*pblkif)->hash_next;
+ }
+ blkif->hash_next = NULL;
+ *pblkif = blkif;
+
+ if (new_devmap_hook == NULL)
+ {
+ DPRINTF("Probe setting up new blkif but no devmap hook!");
+ return -1;
+ }
+
+ devnum = new_devmap_hook(blkif);
+ if (devnum == -1)
+ return -1;
+ blkif->devnum = devnum;
+
+ return 0;
+}
+
+void free_blkif(blkif_t *blkif)
+{
+ blkif_t **pblkif, *curs;
+ image_t *image;
+
+ pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
+ while ( (curs = *pblkif) != NULL )
+ {
+ if ( blkif == curs )
+ {
+ *pblkif = curs->hash_next;
+ }
+ pblkif = &curs->hash_next;
+ }
+ if (blkif != NULL) {
+ if ((image=(image_t *)blkif->prv)!=NULL) {
+ free(blkif->prv);
+ }
+ if (blkif->info!=NULL) {
+ free(blkif->info);
+ }
+ if (new_unmap_hook != NULL) new_unmap_hook(blkif);
+ free(blkif);
+ }
+}
+
+void __init_blkif(void)
+{
+ memset(blkif_hash, 0, sizeof(blkif_hash));
+}
diff --git a/tools/blktap/lib/blktaplib.h b/tools/blktap/lib/blktaplib.h
new file mode 100644
index 0000000000..ceab6b7d51
--- /dev/null
+++ b/tools/blktap/lib/blktaplib.h
@@ -0,0 +1,223 @@
+/* blktaplib.h
+ *
+ * Blktap library userspace code.
+ *
+ * (c) 2005 Andrew Warfield and Julian Chesterfield
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __BLKTAPLIB_H__
+#define __BLKTAPLIB_H__
+
+#include <xenctrl.h>
+#include <sys/user.h>
+#include <xen/xen.h>
+#include <xen/io/blkif.h>
+#include <xen/io/ring.h>
+#include <xs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
+/* size of the extra VMA area to map in attached pages. */
+#define BLKTAP_VMA_PAGES BLK_RING_SIZE
+
+/* blktap IOCTLs: These must correspond with the blktap driver ioctls*/
+#define BLKTAP_IOCTL_KICK_FE 1
+#define BLKTAP_IOCTL_KICK_BE 2
+#define BLKTAP_IOCTL_SETMODE 3
+#define BLKTAP_IOCTL_SENDPID 4
+#define BLKTAP_IOCTL_NEWINTF 5
+#define BLKTAP_IOCTL_MINOR 6
+#define BLKTAP_IOCTL_MAJOR 7
+#define BLKTAP_QUERY_ALLOC_REQS 8
+#define BLKTAP_IOCTL_FREEINTF 9
+#define BLKTAP_IOCTL_PRINT_IDXS 100
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
+#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
+#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE 0x00000002
+
+#define BLKTAP_MODE_INTERPOSE \
+ (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+ return (
+ ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+ ( arg == BLKTAP_MODE_INTERPOSE ) );
+}
+
+#define MAX_REQUESTS 64
+
+#define BLKTAP_IOCTL_KICK 1
+#define MAX_PENDING_REQS 64
+#define BLKTAP_DEV_DIR "/dev/xen"
+#define BLKTAP_DEV_NAME "blktap"
+#define BLKTAP_DEV_MAJOR 254
+#define BLKTAP_DEV_MINOR 0
+
+#define BLKTAP_RING_PAGES 1 /* Front */
+#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
+
+struct blkif;
+
+typedef struct {
+ blkif_request_t req;
+ struct blkif *blkif;
+ int count;
+ int16_t status;
+} pending_req_t;
+
+struct blkif_ops {
+ long int (*get_size)(struct blkif *blkif);
+ long int (*get_secsize)(struct blkif *blkif);
+ unsigned (*get_info)(struct blkif *blkif);
+};
+
+typedef struct blkif {
+ domid_t domid;
+ long int handle;
+
+ long int pdev;
+ long int readonly;
+
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } state;
+
+ struct blkif_ops *ops;
+ struct blkif *hash_next;
+
+ void *prv; /* device-specific data */
+ void *info; /*Image parameter passing */
+ pending_req_t pending_list[MAX_REQUESTS];
+ int devnum;
+ int fds[2];
+ int be_id;
+ int major;
+ int minor;
+ pid_t tappid;
+ int drivertype;
+ uint16_t cookie;
+} blkif_t;
+
+typedef struct blkif_info {
+ char *params;
+} blkif_info_t;
+
+void register_new_devmap_hook(int (*fn)(blkif_t *blkif));
+void register_new_unmap_hook(int (*fn)(blkif_t *blkif));
+void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+blkif_t *alloc_blkif(domid_t domid);
+int blkif_init(blkif_t *blkif, long int handle, long int pdev,
+ long int readonly);
+void free_blkif(blkif_t *blkif);
+void __init_blkif(void);
+
+typedef struct tapdev_info {
+ int fd;
+ char *mem;
+ blkif_sring_t *sring;
+ blkif_back_ring_t fe_ring;
+ unsigned long vstart;
+ blkif_t *blkif;
+} tapdev_info_t;
+
+typedef struct domid_translate {
+ unsigned short domid;
+ unsigned short busid;
+} domid_translate_t ;
+
+typedef struct image {
+ long int size;
+ long int secsize;
+ long int info;
+} image_t;
+
+typedef struct msg_hdr {
+ uint16_t type;
+ uint16_t len;
+ uint16_t drivertype;
+ uint16_t cookie;
+} msg_hdr_t;
+
+typedef struct msg_newdev {
+ uint8_t devnum;
+ uint16_t domid;
+} msg_newdev_t;
+
+typedef struct msg_pid {
+ pid_t pid;
+} msg_pid_t;
+
+#define READ 0
+#define WRITE 1
+
+/*Control Messages between manager and tapdev*/
+#define CTLMSG_PARAMS 1
+#define CTLMSG_IMG 2
+#define CTLMSG_IMG_FAIL 3
+#define CTLMSG_NEWDEV 4
+#define CTLMSG_NEWDEV_RSP 5
+#define CTLMSG_NEWDEV_FAIL 6
+#define CTLMSG_CLOSE 7
+#define CTLMSG_CLOSE_RSP 8
+#define CTLMSG_PID 9
+#define CTLMSG_PID_RSP 10
+
+/* xenstore/xenbus: */
+extern int add_blockdevice_probe_watch(struct xs_handle *h,
+ const char *domname);
+int xs_fire_next_watch(struct xs_handle *h);
+
+
+/* Abitrary values, must match the underlying driver... */
+#define MAX_PENDING_REQS 64
+#define MAX_TAP_DEV 100
+
+/* Accessing attached data page mappings */
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_vstart,_req,_seg) \
+ ((_vstart) + \
+ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * PAGE_SIZE))
+
+/* Defines that are only used by library clients */
+
+#ifndef __COMPILING_BLKTAP_LIB
+
+static char *blkif_op_name[] = {
+ [BLKIF_OP_READ] = "READ",
+ [BLKIF_OP_WRITE] = "WRITE",
+};
+
+#endif /* __COMPILING_BLKTAP_LIB */
+
+#endif /* __BLKTAPLIB_H__ */
diff --git a/tools/blktap/lib/list.h b/tools/blktap/lib/list.h
new file mode 100644
index 0000000000..bda5f46a38
--- /dev/null
+++ b/tools/blktap/lib/list.h
@@ -0,0 +1,55 @@
+/*
+ * list.h
+ *
+ * This is a subset of linux's list.h intended to be used in user-space.
+ *
+ */
+
+#ifndef __LIST_H__
+#define __LIST_H__
+
+#define LIST_POISON1 ((void *) 0x00100100)
+#define LIST_POISON2 ((void *) 0x00200200)
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#endif /* __LIST_H__ */
diff --git a/tools/blktap/lib/xenbus.c b/tools/blktap/lib/xenbus.c
new file mode 100644
index 0000000000..91cdd00536
--- /dev/null
+++ b/tools/blktap/lib/xenbus.c
@@ -0,0 +1,387 @@
+/*
+ * xenbus.c
+ *
+ * xenbus interface to the blocktap.
+ *
+ * this handles the top-half of integration with block devices through the
+ * store -- the tap driver negotiates the device channel etc, while the
+ * userland tap client needs to sort out the disk parameters etc.
+ *
+ * (c) 2005 Andrew Warfield and Julian Chesterfield
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <printf.h>
+#include <string.h>
+#include <err.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <xs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <time.h>
+#include <sys/time.h>
+#include "blktaplib.h"
+#include "list.h"
+#include "xs_api.h"
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+struct backend_info
+{
+ /* our communications channel */
+ blkif_t *blkif;
+
+ long int frontend_id;
+ long int pdev;
+ long int readonly;
+
+ char *backpath;
+ char *frontpath;
+
+ struct list_head list;
+};
+
+static LIST_HEAD(belist);
+
+static int strsep_len(const char *str, char c, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] == c) {
+ if (len == 0)
+ return i;
+ len--;
+ }
+ return (len == 0) ? i : -ERANGE;
+}
+
+static int get_be_id(const char *str)
+{
+ int len,end;
+ const char *ptr;
+ char *tptr, num[10];
+
+ len = strsep_len(str, '/', 6);
+ end = strlen(str);
+ if( (len < 0) || (end < 0) ) return -1;
+
+ ptr = str + len + 1;
+ strncpy(num, ptr, end - len);
+ tptr = num + (end - (len + 1));
+ *tptr = '\0';
+
+ return atoi(num);
+}
+
+static struct backend_info *be_lookup_be(const char *bepath)
+{
+ struct backend_info *be;
+
+ list_for_each_entry(be, &belist, list)
+ if (strcmp(bepath, be->backpath) == 0)
+ return be;
+ return (struct backend_info *)NULL;
+}
+
+static int be_exists_be(const char *bepath)
+{
+ return (be_lookup_be(bepath) != NULL);
+}
+
+static struct backend_info *be_lookup_fe(const char *fepath)
+{
+ struct backend_info *be;
+
+ list_for_each_entry(be, &belist, list)
+ if (strcmp(fepath, be->frontpath) == 0)
+ return be;
+ return (struct backend_info *)NULL;
+}
+
+static int backend_remove(struct xs_handle *h, struct backend_info *be)
+{
+ /* Unhook from be list. */
+ list_del(&be->list);
+
+ /* Free everything else. */
+ if (be->blkif) {
+ DPRINTF("Freeing blkif dev [%d]\n",be->blkif->devnum);
+ free_blkif(be->blkif);
+ }
+ if (be->frontpath)
+ free(be->frontpath);
+ if (be->backpath)
+ free(be->backpath);
+ free(be);
+ return 0;
+}
+
+static void ueblktap_setup(struct xs_handle *h, char *bepath)
+{
+ struct backend_info *be;
+ char *path = NULL, *p,*dev;
+ int len, er, deverr;
+ long int pdev = 0, handle;
+ blkif_info_t *blk;
+
+ be = be_lookup_be(bepath);
+ if (be == NULL)
+ {
+ DPRINTF("ERROR: backend changed called for nonexistent "
+ "backend! (%s)\n", bepath);
+ goto fail;
+ }
+
+ deverr = xs_gather(h, bepath, "physical-device", "%li", &pdev, NULL);
+ if (!deverr) {
+ DPRINTF("pdev set to %ld\n",pdev);
+ if (be->pdev && be->pdev != pdev) {
+ DPRINTF("changing physical-device not supported");
+ goto fail;
+ }
+ be->pdev = pdev;
+ }
+
+ /*Check to see if device is to be opened read-only*/
+ asprintf(&path, "%s/%s", bepath, "read-only");
+ if (xs_exists(h, path))
+ be->readonly = 1;
+
+ if (be->blkif == NULL) {
+
+ /* Front end dir is a number, which is used as the handle. */
+ p = strrchr(be->frontpath, '/') + 1;
+ handle = strtoul(p, NULL, 0);
+
+ be->blkif = alloc_blkif(be->frontend_id);
+
+ if (be->blkif == NULL)
+ goto fail;
+
+ be->blkif->be_id = get_be_id(bepath);
+
+ /*Insert device specific info*/
+ blk = malloc(sizeof(blkif_info_t));
+ if (!blk) {
+ DPRINTF("Out of memory - blkif_info_t\n");
+ goto fail;
+ }
+ er = xs_gather(h, bepath, "params", NULL, &blk->params, NULL);
+ if (er)
+ goto fail;
+ be->blkif->info = blk;
+
+ if (deverr) {
+ /*Dev number was not available, try to set manually*/
+ pdev = convert_dev_name_to_num(blk->params);
+ be->pdev = pdev;
+ }
+
+ er = blkif_init(be->blkif, handle, be->pdev, be->readonly);
+
+ if (er != 0) {
+ DPRINTF("Unable to open device %s\n",blk->params);
+ goto fail;
+ }
+
+ DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", bepath);
+ }
+ /* Supply the information about the device to xenstore */
+ er = xs_printf(h, be->backpath, "sectors", "%lu",
+ be->blkif->ops->get_size(be->blkif));
+
+ if (er == 0) {
+ DPRINTF("ERROR: Failed writing sectors");
+ goto fail;
+ }
+
+ er = xs_printf(h, be->backpath, "sector-size", "%lu",
+ be->blkif->ops->get_secsize(be->blkif));
+
+ if (er == 0) {
+ DPRINTF("ERROR: Failed writing sector-size");
+ goto fail;
+ }
+
+ er = xs_printf(h, be->backpath, "info", "%u",
+ be->blkif->ops->get_info(be->blkif));
+
+ if (er == 0) {
+ DPRINTF("ERROR: Failed writing info");
+ goto fail;
+ }
+
+ be->blkif->state = CONNECTED;
+ DPRINTF("[SETUP] Complete\n\n");
+ goto close;
+
+fail:
+ if ( (be != NULL) && (be->blkif != NULL) )
+ backend_remove(h, be);
+close:
+ if (path)
+ free(path);
+ return;
+}
+
+/**
+ * Xenstore watch callback entry point. This code replaces the hotplug scripts,
+ * and as soon as the xenstore backend driver entries are created, this script
+ * gets called.
+ */
+static void ueblktap_probe(struct xs_handle *h, struct xenbus_watch *w,
+ const char *bepath_im)
+{
+ struct backend_info *be = NULL;
+ char *frontend = NULL, *bepath = NULL, *p;
+ int er, len;
+ blkif_t *blkif;
+
+
+ bepath = strdup(bepath_im);
+
+ if (!bepath) {
+ DPRINTF("No path\n");
+ return;
+ }
+
+ /*
+ *asserts that xenstore structure is always 7 levels deep
+ *e.g. /local/domain/0/backend/vbd/1/2049
+ */
+ len = strsep_len(bepath, '/', 7);
+ if (len < 0)
+ goto free_be;
+ bepath[len] = '\0';
+
+ be = malloc(sizeof(*be));
+ if (!be) {
+ DPRINTF("ERROR: allocating backend structure\n");
+ goto free_be;
+ }
+ memset(be, 0, sizeof(*be));
+ frontend = NULL;
+
+ er = xs_gather(h, bepath,
+ "frontend-id", "%li", &be->frontend_id,
+ "frontend", NULL, &frontend,
+ NULL);
+
+ if (er) {
+ /*
+ *Unable to find frontend entries,
+ *bus-id is no longer valid
+ */
+ DPRINTF("ERROR: Frontend-id check failed, removing backend: "
+ "[%s]\n",bepath);
+
+ /**
+ * BE info should already exist,
+ * free new mem and find old entry
+ */
+ free(be);
+ be = be_lookup_be(bepath);
+ if ( (be != NULL) && (be->blkif != NULL) )
+ backend_remove(h, be);
+ else goto free_be;
+ if (bepath)
+ free(bepath);
+ return;
+ }
+
+ /* Are we already tracking this device? */
+ if (be_exists_be(bepath)) {
+ goto free_be;
+ }
+
+ be->backpath = bepath;
+ be->frontpath = frontend;
+
+ list_add(&be->list, &belist);
+
+ DPRINTF("[PROBE]\tADDED NEW DEVICE (%s)\n", bepath);
+ DPRINTF("\tFRONTEND (%s),(%ld)\n", frontend,be->frontend_id);
+
+ ueblktap_setup(h, bepath);
+ return;
+
+ free_be:
+ if (frontend)
+ free(frontend);
+ if (bepath)
+ free(bepath);
+ if (be)
+ free(be);
+ return;
+}
+
+/**
+ *We set a general watch on the backend vbd directory
+ *ueblktap_probe is called for every update
+ *Our job is to monitor for new entries. As they
+ *are created, we initalise the state and attach a disk.
+ */
+
+int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
+{
+ char *domid, *path;
+ struct xenbus_watch *vbd_watch;
+ int er;
+
+ domid = get_dom_domid(h, domname);
+
+ DPRINTF("%s: %s\n",
+ domname, (domid != NULL) ? domid : "[ not found! ]");
+
+ asprintf(&path, "/local/domain/%s/backend/tap", domid);
+ if (path == NULL)
+ return -ENOMEM;
+
+ vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch));
+ if (!vbd_watch) {
+ DPRINTF("ERROR: unable to malloc vbd_watch [%s]\n", path);
+ return -EINVAL;
+ }
+ vbd_watch->node = path;
+ vbd_watch->callback = ueblktap_probe;
+ er = register_xenbus_watch(h, vbd_watch);
+ if (er == 0) {
+ DPRINTF("ERROR: adding vbd probe watch %s\n", path);
+ return -EINVAL;
+ }
+ return 0;
+}
diff --git a/tools/blktap/lib/xs_api.c b/tools/blktap/lib/xs_api.c
new file mode 100644
index 0000000000..44abcf2080
--- /dev/null
+++ b/tools/blktap/lib/xs_api.c
@@ -0,0 +1,364 @@
+/*
+ * xs_api.c
+ *
+ * blocktap interface functions to xenstore
+ *
+ * (c) 2005 Andrew Warfield and Julian Chesterfield
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <printf.h>
+#include <string.h>
+#include <err.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <xs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <poll.h>
+#include "blktaplib.h"
+#include "list.h"
+#include "xs_api.h"
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+static LIST_HEAD(watches);
+#define BASE_DEV_VAL 2048
+
+int xs_gather(struct xs_handle *xs, const char *dir, ...)
+{
+ va_list ap;
+ const char *name;
+ char *path, **e;
+ int ret = 0, num,i;
+ unsigned int len;
+ xs_transaction_t xth;
+
+again:
+ if ( (xth = xs_transaction_start(xs)) == XBT_NULL) {
+ DPRINTF("unable to start xs trasanction\n");
+ ret = ENOMEM;
+ return ret;
+ }
+
+ va_start(ap, dir);
+ while ( (ret == 0) && (name = va_arg(ap, char *)) != NULL) {
+ const char *fmt = va_arg(ap, char *);
+ void *result = va_arg(ap, void *);
+ char *p;
+
+ if (asprintf(&path, "%s/%s", dir, name) == -1)
+ {
+ printf("allocation error in xs_gather!\n");
+ ret = ENOMEM;
+ break;
+ }
+
+ p = xs_read(xs, xth, path, &len);
+
+
+ free(path);
+ if (p == NULL) {
+ ret = ENOENT;
+ break;
+ }
+ if (fmt) {
+ if (sscanf(p, fmt, result) == 0)
+ ret = EINVAL;
+ free(p);
+ } else
+ *(char **)result = p;
+ }
+ va_end(ap);
+
+ if (!xs_transaction_end(xs, xth, ret)) {
+ if (ret == 0 && errno == EAGAIN)
+ goto again;
+ else
+ ret = errno;
+ }
+
+ return ret;
+}
+
+
+/* Single printf and write: returns -errno or 0. */
+int xs_printf(struct xs_handle *h, const char *dir, const char *node,
+ const char *fmt, ...)
+{
+ char *buf, *path;
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = vasprintf(&buf, fmt, ap);
+ va_end(ap);
+
+ asprintf(&path, "%s/%s", dir, node);
+
+ if ( (path == NULL) || (buf == NULL) )
+ return 0;
+
+ ret = xs_write(h, XBT_NULL, path, buf, strlen(buf)+1);
+
+ free(buf);
+ free(path);
+
+ return ret;
+}
+
+
+int xs_exists(struct xs_handle *h, const char *path)
+{
+ char **d;
+ unsigned int num;
+ xs_transaction_t xth;
+
+ if ( (xth = xs_transaction_start(h)) == XBT_NULL) {
+ printf("unable to start xs trasanction\n");
+ return 0;
+ }
+
+ d = xs_directory(h, xth, path, &num);
+ xs_transaction_end(h, xth, 0);
+ if (d == NULL)
+ return 0;
+ free(d);
+ return 1;
+}
+
+
+
+/**
+ * This assumes that the domain name we are looking for is unique.
+ * Name parameter Domain-0
+ */
+char *get_dom_domid(struct xs_handle *h, const char *name)
+{
+ char **e, *val, *domid = NULL;
+ unsigned int num, len;
+ int i;
+ char *path;
+ xs_transaction_t xth;
+
+ if ( (xth = xs_transaction_start(h)) == XBT_NULL) {
+ warn("unable to start xs trasanction\n");
+ return NULL;
+ }
+
+ e = xs_directory(h, xth, "/local/domain", &num);
+
+ i = 0;
+ while (i < num) {
+ asprintf(&path, "/local/domain/%s/name", e[i]);
+ val = xs_read(h, xth, path, &len);
+ free(path);
+ if (val == NULL)
+ continue;
+
+ if (strcmp(val, name) == 0) {
+ /* match! */
+ asprintf(&path, "/local/domain/%s/domid", e[i]);
+ domid = xs_read(h, xth, path, &len);
+ free(val);
+ free(path);
+ break;
+ }
+ free(val);
+ i++;
+ }
+ xs_transaction_end(h, xth, 0);
+
+ free(e);
+ return domid;
+}
+
+int convert_dev_name_to_num(char *name) {
+ char *p_sd, *p_hd, *p_xvd, *p_plx, *p, *alpha,*ptr;
+ int majors[10] = {3,22,33,34,56,57,88,89,90,91};
+ int maj,i;
+
+ asprintf(&p_sd,"/dev/sd");
+ asprintf(&p_hd,"/dev/hd");
+ asprintf(&p_xvd,"/dev/xvd");
+ asprintf(&p_plx,"plx");
+ asprintf(&alpha,"abcdefghijklmnop");
+
+
+ if (strstr(name, p_sd) != NULL) {
+ p = name + strlen(p_sd);
+ for(i = 0, ptr = alpha; i < strlen(alpha); i++) {
+ if(*ptr == *p)
+ break;
+ *ptr++;
+ }
+ *p++;
+ return BASE_DEV_VAL + (16*i) + atoi(p);
+ } else if (strstr(name, p_hd) != NULL) {
+ p = name + strlen(p_hd);
+ for (i = 0, ptr = alpha; i < strlen(alpha); i++) {
+ if(*ptr == *p) break;
+ *ptr++;
+ }
+ *p++;
+ return (majors[i/2]*256) + atoi(p);
+
+ } else if (strstr(name, p_xvd) != NULL) {
+ p = name + strlen(p_xvd);
+ for(i = 0, ptr = alpha; i < strlen(alpha); i++) {
+ if(*ptr == *p) break;
+ *ptr++;
+ }
+ *p++;
+ return (202*256) + (16*i) + atoi(p);
+
+ } else if (strstr(name, p_plx) != NULL) {
+ p = name + strlen(p_plx);
+ return atoi(p);
+
+ } else {
+ DPRINTF("Unknown device type, setting to default.\n");
+ return BASE_DEV_VAL;
+ }
+ return 0;
+}
+
+/**
+ * A little paranoia: we don't just trust token.
+ */
+static struct xenbus_watch *find_watch(const char *token)
+{
+ struct xenbus_watch *i, *cmp;
+
+ cmp = (void *)strtoul(token, NULL, 16);
+
+ list_for_each_entry(i, &watches, list)
+ if (i == cmp)
+ return i;
+ return NULL;
+}
+
+/**
+ * Register callback to watch this node.
+ * like xs_watch, return 0 on failure
+ */
+int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
+{
+ /* Pointer in ascii is the token. */
+ char token[sizeof(watch) * 2 + 1];
+ int er;
+
+ sprintf(token, "%lX", (long)watch);
+ if (find_watch(token))
+ {
+ DPRINTF("watch collision!\n");
+ return -EINVAL;
+ }
+
+ er = xs_watch(h, watch->node, token);
+ if (er != 0) {
+ list_add(&watch->list, &watches);
+ }
+
+ return er;
+}
+
+int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
+{
+ char token[sizeof(watch) * 2 + 1];
+ int er;
+
+ sprintf(token, "%lX", (long)watch);
+ if (!find_watch(token))
+ {
+ DPRINTF("no such watch!\n");
+ return -EINVAL;
+ }
+
+
+ er = xs_unwatch(h, watch->node, token);
+ list_del(&watch->list);
+
+ if (er == 0)
+ DPRINTF("XENBUS Failed to release watch %s: %i\n",
+ watch->node, er);
+ return 0;
+}
+
+/**
+ * Re-register callbacks to all watches.
+ */
+void reregister_xenbus_watches(struct xs_handle *h)
+{
+ struct xenbus_watch *watch;
+ char token[sizeof(watch) * 2 + 1];
+
+ list_for_each_entry(watch, &watches, list) {
+ sprintf(token, "%lX", (long)watch);
+ xs_watch(h, watch->node, token);
+ }
+}
+
+/**
+ * based on watch_thread()
+ */
+int xs_fire_next_watch(struct xs_handle *h)
+{
+ char **res;
+ char *token;
+ char *node = NULL;
+ struct xenbus_watch *w;
+ int er;
+ unsigned int num;
+
+ res = xs_read_watch(h, &num);
+ if (res == NULL)
+ return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */
+
+ node = res[XS_WATCH_PATH];
+ token = res[XS_WATCH_TOKEN];
+
+ w = find_watch(token);
+ if (!w)
+ {
+ DPRINTF("unregistered watch fired\n");
+ goto done;
+ }
+ w->callback(h, w, node);
+
+ done:
+ free(res);
+ return 1;
+}
diff --git a/tools/blktap/lib/xs_api.h b/tools/blktap/lib/xs_api.h
new file mode 100644
index 0000000000..c4183a2dde
--- /dev/null
+++ b/tools/blktap/lib/xs_api.h
@@ -0,0 +1,50 @@
+/*
+ * xs_api.h
+ *
+ * (c) 2005 Andrew Warfield and Julian Chesterfield
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+struct xenbus_watch
+{
+ struct list_head list;
+ char *node;
+ void (*callback)(struct xs_handle *h,
+ struct xenbus_watch *,
+ const char *node);
+};
+
+int xs_gather(struct xs_handle *xs, const char *dir, ...);
+int xs_printf(struct xs_handle *h, const char *dir, const char *node,
+ const char *fmt, ...);
+int xs_exists(struct xs_handle *h, const char *path);
+char *get_dom_domid(struct xs_handle *h, const char *name);
+int convert_dev_name_to_num(char *name);
+int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch);
+int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch);
+void reregister_xenbus_watches(struct xs_handle *h);
+int xs_fire_next_watch(struct xs_handle *h);
diff --git a/tools/examples/Makefile b/tools/examples/Makefile
index c5ccb6c8af..1280081577 100644
--- a/tools/examples/Makefile
+++ b/tools/examples/Makefile
@@ -26,6 +26,7 @@ XEN_SCRIPTS += network-route vif-route
XEN_SCRIPTS += network-nat vif-nat
XEN_SCRIPTS += block
XEN_SCRIPTS += block-enbd block-nbd
+XEN_SCRIPTS += blktap
XEN_SCRIPTS += vtpm vtpm-delete
XEN_SCRIPTS += xen-hotplug-cleanup
XEN_SCRIPTS += external-device-migrate
diff --git a/tools/examples/blktap b/tools/examples/blktap
new file mode 100644
index 0000000000..ba9f4ee52f
--- /dev/null
+++ b/tools/examples/blktap
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# Copyright (c) 2005, XenSource Ltd.
+
+dir=$(dirname "$0")
+. "$dir/xen-hotplug-common.sh"
+
+findCommand "$@"
+
+if [ "$command" == 'add' ]
+then
+ success
+fi
+
+exit 0
diff --git a/tools/examples/xen-backend.agent b/tools/examples/xen-backend.agent
index e662015da2..3a01a2c7ea 100755
--- a/tools/examples/xen-backend.agent
+++ b/tools/examples/xen-backend.agent
@@ -7,6 +7,9 @@ PATH=/etc/xen/scripts:$PATH
claim_lock xenbus_hotplug_global
case "$XENBUS_TYPE" in
+ tap)
+ /etc/xen/scripts/blktap "$ACTION"
+ ;;
vbd)
/etc/xen/scripts/block "$ACTION"
;;
diff --git a/tools/examples/xen-backend.rules b/tools/examples/xen-backend.rules
index 91f0b06107..21c6d8c8fc 100644
--- a/tools/examples/xen-backend.rules
+++ b/tools/examples/xen-backend.rules
@@ -1,3 +1,4 @@
+SUBSYSTEM=="xen-backend", KERNEL=="tap*", RUN+="/etc/xen/scripts/blktap $env{ACTION}"
SUBSYSTEM=="xen-backend", KERNEL=="vbd*", RUN+="/etc/xen/scripts/block $env{ACTION}"
SUBSYSTEM=="xen-backend", KERNEL=="vtpm*", RUN+="/etc/xen/scripts/vtpm $env{ACTION}"
SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="online", RUN+="$env{script} online"
diff --git a/tools/libaio/COPYING b/tools/libaio/COPYING
new file mode 100644
index 0000000000..c4792dd27a
--- /dev/null
+++ b/tools/libaio/COPYING
@@ -0,0 +1,515 @@
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+^L
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+^L
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+^L
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+^L
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+^L
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+^L
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+^L
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+^L
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+ <one line to give the library's name and a brief idea of what it
+does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper
+mail.
+
+You should also get your employer (if you work as a programmer) or
+your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James
+Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/tools/libaio/ChangeLog b/tools/libaio/ChangeLog
new file mode 100644
index 0000000000..ddcf6e3841
--- /dev/null
+++ b/tools/libaio/ChangeLog
@@ -0,0 +1,43 @@
+0.4.0
+ - remove libredhat-kernel
+ - add rough outline for man pages
+ - make the compiled io_getevents() add the extra parameter and
+ pass the timeout for updating as per 2.5
+ - fixes for ia64, now works
+ - fixes for x86-64
+ - powerpc support from Gianni Tedesco <gianni@ecsc.co.uk>
+ - disable the NULL check in harness/cases/4.t on ia64: ia64
+ maps the 0 page and causes this check to fail.
+
+0.3.15
+ - use real syscall interface, but don't break source compatibility
+ yet (that will happen with 0.4.0)
+
+0.3.13
+ - add test cases
+
+0.3.11
+ - use library versioning of libredhat-kernel to always provide a
+ fallback
+
+0.3.9
+ - add io_queue_release function
+
+0.3.8
+ - make clean deletes libredhat-kernel.so.1
+ - const struct timespec *
+ - add make srpm target
+
+0.3.7
+ - fix assembly function .types
+ - export io_getevents
+ - fix io_submit function prototype to match the kernel
+ - provide /usr/lib/libredhat-kernel.so link for compilation
+ (do NOT link against libredhat-kernel.so directly)
+ - fix soname to libaio.so.1
+ - fix dummy libredhat-kernel's soname
+ - work around nfs bug
+ - provide and install libredhat-kernel.so.1 stub
+ - Makefile improvements
+ - make sure dummy libredhat-kernel.so only returns -ENOSYS
+
diff --git a/tools/libaio/INSTALL b/tools/libaio/INSTALL
new file mode 100644
index 0000000000..29b907797a
--- /dev/null
+++ b/tools/libaio/INSTALL
@@ -0,0 +1,18 @@
+To install the library, execute the command:
+
+ make prefix=`pwd`/usr install
+
+which will install the binaries and header files into the directory
+usr. Set prefix=/usr to get them installed into the main system.
+
+Please note: Do not attempt to install on the system the
+"libredhat-kernel.so" file. It is a dummy shared library
+provided only for the purpose of being able to bootstrap
+this facility while running on systems without the correct
+libredhat-kernel.so built. The contents of the included
+libredhat-kernel.so are only stubs; this library is NOT
+functional for anything except the internal purpose of
+linking libaio.so against the provided stubs. At runtime,
+libaio.so requires a real libredhat-kernel.so library; this
+is provided by the Red Hat kernel RPM packages with async
+I/O functionality.
diff --git a/tools/libaio/Makefile b/tools/libaio/Makefile
new file mode 100644
index 0000000000..06d8775e33
--- /dev/null
+++ b/tools/libaio/Makefile
@@ -0,0 +1,40 @@
+NAME=libaio
+SPECFILE=$(NAME).spec
+VERSION=$(shell awk '/Version:/ { print $$2 }' $(SPECFILE))
+RELEASE=$(shell awk '/Release:/ { print $$2 }' $(SPECFILE))
+CVSTAG = $(NAME)_$(subst .,-,$(VERSION))_$(subst .,-,$(RELEASE))
+RPMBUILD=$(shell `which rpmbuild >&/dev/null` && echo "rpmbuild" || echo "rpm")
+
+prefix=/usr
+includedir=$(prefix)/include
+libdir=$(prefix)/lib
+
+default: all
+
+all:
+ @$(MAKE) -C src
+
+install: all
+
+clean:
+ @$(MAKE) -C src clean
+ @$(MAKE) -C harness clean
+
+tag-archive:
+ @cvs -Q tag -F $(CVSTAG)
+
+create-archive: tag-archive
+ @rm -rf /tmp/$(NAME)
+ @cd /tmp; cvs -Q -d $(CVSROOT) export -r$(CVSTAG) $(NAME) || echo GRRRrrrrr -- ignore [export aborted]
+ @mv /tmp/$(NAME) /tmp/$(NAME)-$(VERSION)
+ @cd /tmp; tar czSpf $(NAME)-$(VERSION).tar.gz $(NAME)-$(VERSION)
+ @rm -rf /tmp/$(NAME)-$(VERSION)
+ @cp /tmp/$(NAME)-$(VERSION).tar.gz .
+ @rm -f /tmp/$(NAME)-$(VERSION).tar.gz
+ @echo " "
+ @echo "The final archive is ./$(NAME)-$(VERSION).tar.gz."
+
+archive: clean tag-archive create-archive
+
+srpm: create-archive
+ $(RPMBUILD) --define "_sourcedir `pwd`" --define "_srcrpmdir `pwd`" --nodeps -bs $(SPECFILE)
diff --git a/tools/libaio/TODO b/tools/libaio/TODO
new file mode 100644
index 0000000000..0a9ac15b19
--- /dev/null
+++ b/tools/libaio/TODO
@@ -0,0 +1,4 @@
+- Write man pages.
+- Make -static links against libaio work.
+- Fallback on userspace if the kernel calls return -ENOSYS.
+
diff --git a/tools/libaio/harness/Makefile b/tools/libaio/harness/Makefile
new file mode 100644
index 0000000000..d2483fdda2
--- /dev/null
+++ b/tools/libaio/harness/Makefile
@@ -0,0 +1,37 @@
+# foo.
+TEST_SRCS:=$(shell find cases/ -name \*.t | sort -n -t/ -k2)
+PROGS:=$(patsubst %.t,%.p,$(TEST_SRCS))
+HARNESS_SRCS:=main.c
+# io_queue.c
+
+CFLAGS=-Wall -Werror -g -O -laio
+#-lpthread -lrt
+
+all: $(PROGS)
+
+$(PROGS): %.p: %.t $(HARNESS_SRCS)
+ $(CC) $(CFLAGS) -DTEST_NAME=\"$<\" -o $@ main.c
+
+clean:
+ rm -f $(PROGS) *.o runtests.out rofile wofile rwfile
+
+.PHONY:
+
+testdir/rofile: .PHONY
+ rm -f $@
+ echo "test" >$@
+ chmod 400 $@
+
+testdir/wofile: .PHONY
+ rm -f $@
+ echo "test" >$@
+ chmod 200 $@
+
+testdir/rwfile: .PHONY
+ rm -f $@
+ echo "test" >$@
+ chmod 600 $@
+
+check: $(PROGS) testdir/rofile testdir/rwfile testdir/wofile
+ ./runtests.sh $(PROGS)
+
diff --git a/tools/libaio/harness/README b/tools/libaio/harness/README
new file mode 100644
index 0000000000..5557370589
--- /dev/null
+++ b/tools/libaio/harness/README
@@ -0,0 +1,19 @@
+Notes on running this test suite:
+
+To run the test suite, run "make check". All test cases should pass
+and there should be 0 fails.
+
+Several of the test cases require a directory on the filesystem under
+test for the creation of test files, as well as the generation of
+error conditions. The test cases assume the directories (or symlinks
+to directories) are as follows:
+
+ testdir/
+ - used for general read/write test cases. Must have at
+ least as much free space as the machine has RAM (up
+ to 768MB).
+ testdir.enospc/
+ - a filesystem that has space for writing 8KB out, but
+ fails with -ENOSPC beyond 8KB.
+ testdir.ext2/
+ - must be an ext2 filesystem.
diff --git a/tools/libaio/harness/attic/0.t b/tools/libaio/harness/attic/0.t
new file mode 100644
index 0000000000..033e62c1b2
--- /dev/null
+++ b/tools/libaio/harness/attic/0.t
@@ -0,0 +1,9 @@
+/* 0.t
+ Test harness check: okay.
+*/
+int test_main(void)
+{
+ printf("test_main: okay\n");
+ return 0;
+}
+
diff --git a/tools/libaio/harness/attic/1.t b/tools/libaio/harness/attic/1.t
new file mode 100644
index 0000000000..799ffd179a
--- /dev/null
+++ b/tools/libaio/harness/attic/1.t
@@ -0,0 +1,9 @@
+/* 1.t
+ Test harness check: fail.
+*/
+int test_main(void)
+{
+ printf("test_main: fail\n");
+ return 1;
+}
+
diff --git a/tools/libaio/harness/cases/10.t b/tools/libaio/harness/cases/10.t
new file mode 100644
index 0000000000..9d3beb2fdb
--- /dev/null
+++ b/tools/libaio/harness/cases/10.t
@@ -0,0 +1,53 @@
+/* 10.t - uses testdir.enospc/rwfile
+- Check results on out-of-space and out-of-quota. (10.t)
+ - write that fills filesystem but does not go over should succeed
+ - write that fills filesystem and goes over should be partial
+ - write to full filesystem should return -ENOSPC
+ - read beyond end of file after ENOSPC should return 0
+*/
+#include "aio_setup.h"
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+int test_main(void)
+{
+/* Note: changing either of these requires updating the ext2-enospc.img
+ * filesystem image. Also, if SIZE is less than PAGE_SIZE, problems
+ * crop up due to ext2's preallocation.
+ */
+#define LIMIT 65536
+#define SIZE 65536
+ char *buf;
+ int rwfd;
+ int status = 0, res;
+
+ rwfd = open("testdir.enospc/rwfile", O_RDWR|O_CREAT|O_TRUNC, 0600);
+ assert(rwfd != -1);
+ res = ftruncate(rwfd, 0); assert(res == 0);
+ buf = malloc(SIZE); assert(buf != NULL);
+ memset(buf, 0, SIZE);
+
+
+ status |= attempt_rw(rwfd, buf, SIZE, LIMIT-SIZE, WRITE, SIZE);
+ status |= attempt_rw(rwfd, buf, SIZE, LIMIT-SIZE, READ, SIZE);
+
+ status |= attempt_rw(rwfd, buf, SIZE, LIMIT, WRITE, -ENOSPC);
+ status |= attempt_rw(rwfd, buf, SIZE, LIMIT, READ, 0);
+
+ res = ftruncate(rwfd, 0); assert(res == 0);
+
+ status |= attempt_rw(rwfd, buf, SIZE, 1+LIMIT-SIZE, WRITE, SIZE-1);
+ status |= attempt_rw(rwfd, buf, SIZE, 1+LIMIT-SIZE, READ, SIZE-1);
+ status |= attempt_rw(rwfd, buf, SIZE, LIMIT, READ, 0);
+
+ status |= attempt_rw(rwfd, buf, SIZE, LIMIT, WRITE, -ENOSPC);
+ status |= attempt_rw(rwfd, buf, SIZE, LIMIT, READ, 0);
+ status |= attempt_rw(rwfd, buf, 0, LIMIT, WRITE, 0);
+
+ res = close(rwfd); assert(res == 0);
+ res = unlink("testdir.enospc/rwfile"); assert(res == 0);
+ return status;
+}
+
diff --git a/tools/libaio/harness/cases/11.t b/tools/libaio/harness/cases/11.t
new file mode 100644
index 0000000000..efcf6d45f3
--- /dev/null
+++ b/tools/libaio/harness/cases/11.t
@@ -0,0 +1,39 @@
+/* 11.t - uses testdir/rwfile
+- repeated read / write of same page (to check accounting) (11.t)
+*/
+#include "aio_setup.h"
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+int test_main(void)
+{
+#define COUNT 1000000
+#define SIZE 256
+ char *buf;
+ int rwfd;
+ int status = 0;
+ int i;
+
+ rwfd = open("testdir/rwfile", O_RDWR|O_CREAT|O_TRUNC, 0600);
+ assert(rwfd != -1);
+ buf = malloc(SIZE); assert(buf != NULL);
+ memset(buf, 0, SIZE);
+
+ for (i=0; i<COUNT; i++) {
+ status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE_SILENT, SIZE);
+ if (status)
+ break;
+ }
+ printf("completed %d out of %d writes\n", i, COUNT);
+ for (i=0; i<COUNT; i++) {
+ status |= attempt_rw(rwfd, buf, SIZE, 0, READ_SILENT, SIZE);
+ if (status)
+ break;
+ }
+ printf("completed %d out of %d reads\n", i, COUNT);
+
+ return status;
+}
+
diff --git a/tools/libaio/harness/cases/12.t b/tools/libaio/harness/cases/12.t
new file mode 100644
index 0000000000..3499204440
--- /dev/null
+++ b/tools/libaio/harness/cases/12.t
@@ -0,0 +1,49 @@
+/* 12.t
+- ioctx access across fork() (12.t)
+ */
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include "aio_setup.h"
+
+void test_child(void)
+{
+ int res;
+ res = attempt_io_submit(io_ctx, 0, NULL, -EINVAL);
+ fflush(stdout);
+ _exit(res);
+}
+
+int test_main(void)
+{
+ int res, status;
+ pid_t pid;
+
+ if (attempt_io_submit(io_ctx, 0, NULL, 0))
+ return 1;
+
+ sigblock(sigmask(SIGCHLD) | siggetmask());
+ fflush(NULL);
+ pid = fork(); assert(pid != -1);
+
+ if (pid == 0)
+ test_child();
+
+ res = waitpid(pid, &status, 0);
+
+ if (WIFEXITED(status)) {
+ int failed = (WEXITSTATUS(status) != 0);
+ printf("child exited with status %d%s\n", WEXITSTATUS(status),
+ failed ? " -- FAILED" : "");
+ return failed;
+ }
+
+ /* anything else: failed */
+ if (WIFSIGNALED(status))
+ printf("child killed by signal %d -- FAILED.\n",
+ WTERMSIG(status));
+
+ return 1;
+}
diff --git a/tools/libaio/harness/cases/13.t b/tools/libaio/harness/cases/13.t
new file mode 100644
index 0000000000..5f18005b6a
--- /dev/null
+++ b/tools/libaio/harness/cases/13.t
@@ -0,0 +1,66 @@
+/* 13.t - uses testdir/rwfile
+- Submit multiple writes larger than aio-max-size (deadlocks on older
+ aio code)
+*/
+#include "aio_setup.h"
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+int test_main(void)
+{
+#define SIZE (1024 * 1024)
+#define IOS 8
+ struct iocb iocbs[IOS];
+ struct iocb *iocb_list[IOS];
+ char *bufs[IOS];
+ int rwfd;
+ int status = 0, res;
+ int i;
+
+ rwfd = open("testdir/rwfile", O_RDWR|O_CREAT|O_TRUNC, 0600);
+ assert(rwfd != -1);
+ res = ftruncate(rwfd, 0); assert(res == 0);
+
+ for (i=0; i<IOS; i++) {
+ bufs[i] = malloc(SIZE);
+ assert(bufs[i] != NULL);
+ memset(bufs[i], 0, SIZE);
+
+ io_prep_pwrite(&iocbs[i], rwfd, bufs[i], SIZE, i * SIZE);
+ iocb_list[i] = &iocbs[i];
+ }
+
+ status |= attempt_io_submit(io_ctx, IOS, iocb_list, IOS);
+
+ for (i=0; i<IOS; i++) {
+ struct timespec ts = { tv_sec: 30, tv_nsec: 0 };
+ struct io_event event;
+ struct iocb *iocb;
+
+ res = io_getevents(io_ctx, 0, 1, &event, &ts);
+ if (res != 1) {
+ status |= 1;
+ printf("io_getevents failed [%d] with res=%d [%s]\n",
+ i, res, (res < 0) ? strerror(-res) : "okay");
+ break;
+ }
+
+ if (event.res != SIZE)
+ status |= 1;
+
+ iocb = (void *)event.obj;
+ printf("event[%d]: write[%d] %s, returned: %ld [%s]\n",
+ i, (int)(iocb - &iocbs[0]),
+ (event.res != SIZE) ? "failed" : "okay",
+ (long)event.res,
+ (event.res < 0) ? strerror(-event.res) : "okay"
+ );
+ }
+
+ res = ftruncate(rwfd, 0); assert(res == 0);
+ res = close(rwfd); assert(res == 0);
+ return status;
+}
+
diff --git a/tools/libaio/harness/cases/14.t b/tools/libaio/harness/cases/14.t
new file mode 100644
index 0000000000..514622b569
--- /dev/null
+++ b/tools/libaio/harness/cases/14.t
@@ -0,0 +1,90 @@
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include "aio_setup.h"
+#include <sys/mman.h>
+
+#define SIZE 768*1024*1024
+
+//just submit an I/O
+
+int test_child(void)
+{
+ char *buf;
+ int rwfd;
+ int res;
+ long size;
+ struct iocb iocb;
+ struct iocb *iocbs[] = { &iocb };
+ int loop = 10;
+ int i;
+
+ aio_setup(1024);
+
+ size = SIZE;
+
+ printf("size = %ld\n", size);
+
+ rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd !=
+-1);
+ res = ftruncate(rwfd, 0); assert(res == 0);
+ buf = malloc(size); assert(buf !=
+NULL);
+
+ for(i=0;i<loop;i++) {
+
+ switch(i%2) {
+ case 0:
+ io_prep_pwrite(&iocb, rwfd, buf, size, 0);
+ break;
+ case 1:
+ io_prep_pread(&iocb, rwfd, buf, size, 0);
+ }
+
+ res = io_submit(io_ctx, 1, iocbs);
+ if (res != 1) {
+ printf("child: submit: io_submit res=%d [%s]\n", res,
+strerror(-res));
+ _exit(1);
+ }
+ }
+
+ res = ftruncate(rwfd, 0); assert(res == 0);
+
+ _exit(0);
+}
+
+/* from 12.t */
+int test_main(void)
+{
+ int res, status;
+ pid_t pid;
+
+ if (attempt_io_submit(io_ctx, 0, NULL, 0))
+ return 1;
+
+ sigblock(sigmask(SIGCHLD) | siggetmask());
+ fflush(NULL);
+ pid = fork(); assert(pid != -1);
+
+ if (pid == 0)
+ test_child();
+
+ res = waitpid(pid, &status, 0);
+
+ if (WIFEXITED(status)) {
+ int failed = (WEXITSTATUS(status) != 0);
+ printf("child exited with status %d%s\n", WEXITSTATUS(status),
+ failed ? " -- FAILED" : "");
+ return failed;
+ }
+
+ /* anything else: failed */
+ if (WIFSIGNALED(status))
+ printf("child killed by signal %d -- FAILED.\n",
+ WTERMSIG(status));
+
+ return 1;
+}
diff --git a/tools/libaio/harness/cases/2.t b/tools/libaio/harness/cases/2.t
new file mode 100644
index 0000000000..3a0212d698
--- /dev/null
+++ b/tools/libaio/harness/cases/2.t
@@ -0,0 +1,41 @@
+/* 2.t
+- io_setup (#2)
+ - with invalid context pointer
+ - with maxevents <= 0
+ - with an already initialized ctxp
+*/
+
+int attempt(int n, io_context_t *ctxp, int expect)
+{
+ int res;
+
+ printf("expect %3d: io_setup(%5d, %p) = ", expect, n, ctxp);
+ fflush(stdout);
+ res = io_setup(n, ctxp);
+ printf("%3d [%s]%s\n", res, strerror(-res),
+ (res != expect) ? " -- FAILED" : "");
+ if (res != expect)
+ return 1;
+
+ return 0;
+}
+
+int test_main(void)
+{
+ io_context_t ctx;
+ int status = 0;
+
+ ctx = NULL;
+ status |= attempt(-1000, KERNEL_RW_POINTER, -EFAULT);
+ status |= attempt( 1000, KERNEL_RW_POINTER, -EFAULT);
+ status |= attempt( 0, KERNEL_RW_POINTER, -EFAULT);
+ status |= attempt(-1000, &ctx, -EINVAL);
+ status |= attempt( -1, &ctx, -EINVAL);
+ status |= attempt( 0, &ctx, -EINVAL);
+ assert(ctx == NULL);
+ status |= attempt( 1, &ctx, 0);
+ status |= attempt( 1, &ctx, -EINVAL);
+
+ return status;
+}
+
diff --git a/tools/libaio/harness/cases/3.t b/tools/libaio/harness/cases/3.t
new file mode 100644
index 0000000000..7773d80f06
--- /dev/null
+++ b/tools/libaio/harness/cases/3.t
@@ -0,0 +1,25 @@
+/* 3.t
+- io_submit/io_getevents with invalid addresses (3.t)
+
+*/
+#include "aio_setup.h"
+
+int test_main(void)
+{
+ struct iocb a, b;
+ struct iocb *good_ios[] = { &a, &b };
+ struct iocb *bad1_ios[] = { NULL, &b };
+ struct iocb *bad2_ios[] = { KERNEL_RW_POINTER, &a };
+ int status = 0;
+
+ status |= attempt_io_submit(BAD_CTX, 1, good_ios, -EINVAL);
+ status |= attempt_io_submit( io_ctx, 0, good_ios, 0);
+ status |= attempt_io_submit( io_ctx, 1, NULL, -EFAULT);
+ status |= attempt_io_submit( io_ctx, 1, (void *)-1, -EFAULT);
+ status |= attempt_io_submit( io_ctx, 2, bad1_ios, -EFAULT);
+ status |= attempt_io_submit( io_ctx, 2, bad2_ios, -EFAULT);
+ status |= attempt_io_submit( io_ctx, -1, good_ios, -EINVAL);
+
+ return status;
+}
+
diff --git a/tools/libaio/harness/cases/4.t b/tools/libaio/harness/cases/4.t
new file mode 100644
index 0000000000..972b4f24b1
--- /dev/null
+++ b/tools/libaio/harness/cases/4.t
@@ -0,0 +1,72 @@
+/* 4.t
+- read of descriptor without read permission (4.t)
+- write to descriptor without write permission (4.t)
+- check that O_APPEND writes actually append
+
+*/
+#include "aio_setup.h"
+
+#define SIZE 512
+#define READ 'r'
+#define WRITE 'w'
+int attempt(int fd, void *buf, int count, long long pos, int rw, int expect)
+{
+ struct iocb iocb;
+ int res;
+
+ switch(rw) {
+ case READ: io_prep_pread (&iocb, fd, buf, count, pos); break;
+ case WRITE: io_prep_pwrite(&iocb, fd, buf, count, pos); break;
+ }
+
+ printf("expect %3d: (%c), res = ", expect, rw);
+ fflush(stdout);
+ res = sync_submit(&iocb);
+ printf("%3d [%s]%s\n", res, (res <= 0) ? strerror(-res) : "Success",
+ (res != expect) ? " -- FAILED" : "");
+ if (res != expect)
+ return 1;
+
+ return 0;
+}
+
+int test_main(void)
+{
+ char buf[SIZE];
+ int rofd, wofd, rwfd;
+ int status = 0, res;
+
+ memset(buf, 0, SIZE);
+
+ rofd = open("testdir/rofile", O_RDONLY); assert(rofd != -1);
+ wofd = open("testdir/wofile", O_WRONLY); assert(wofd != -1);
+ rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1);
+
+ status |= attempt(rofd, buf, SIZE, 0, WRITE, -EBADF);
+ status |= attempt(wofd, buf, SIZE, 0, READ, -EBADF);
+ status |= attempt(rwfd, buf, SIZE, 0, WRITE, SIZE);
+ status |= attempt(rwfd, buf, SIZE, 0, READ, SIZE);
+ status |= attempt(rwfd, buf, SIZE, -1, READ, -EINVAL);
+ status |= attempt(rwfd, buf, SIZE, -1, WRITE, -EINVAL);
+
+ rwfd = open("testdir/rwfile", O_RDWR|O_APPEND); assert(rwfd != -1);
+ res = ftruncate(rwfd, 0); assert(res == 0);
+ status |= attempt(rwfd, buf, SIZE, 0, READ, 0);
+ status |= attempt(rwfd, "1234", 4, 0, WRITE, 4);
+ status |= attempt(rwfd, "5678", 4, 0, WRITE, 4);
+ memset(buf, 0, SIZE);
+ status |= attempt(rwfd, buf, SIZE, 0, READ, 8);
+ printf("read after append: [%s]\n", buf);
+ assert(memcmp(buf, "12345678", 8) == 0);
+
+ status |= attempt(rwfd, KERNEL_RW_POINTER, SIZE, 0, READ, -EFAULT);
+ status |= attempt(rwfd, KERNEL_RW_POINTER, SIZE, 0, WRITE, -EFAULT);
+
+ /* Some architectures map the 0 page. Ugh. */
+#if !defined(__ia64__)
+ status |= attempt(rwfd, NULL, SIZE, 0, WRITE, -EFAULT);
+#endif
+
+ return status;
+}
+
diff --git a/tools/libaio/harness/cases/5.t b/tools/libaio/harness/cases/5.t
new file mode 100644
index 0000000000..7669fd7006
--- /dev/null
+++ b/tools/libaio/harness/cases/5.t
@@ -0,0 +1,47 @@
+/* 5.t
+- Write from a mmap() of the same file. (5.t)
+*/
+#include "aio_setup.h"
+#include <sys/mman.h>
+
+int test_main(void)
+{
+ int page_size = getpagesize();
+#define SIZE 512
+ char *buf;
+ int rwfd;
+ int status = 0, res;
+
+ rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1);
+ res = ftruncate(rwfd, 512); assert(res == 0);
+
+ buf = mmap(0, page_size, PROT_READ|PROT_WRITE, MAP_SHARED, rwfd, 0);
+ assert(buf != (char *)-1);
+
+ status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, SIZE);
+ status |= attempt_rw(rwfd, buf, SIZE, 0, READ, SIZE);
+
+ res = munmap(buf, page_size); assert(res == 0);
+ buf = mmap(0, page_size, PROT_READ|PROT_WRITE, MAP_SHARED, rwfd, 0);
+ assert(buf != (char *)-1);
+
+ status |= attempt_rw(rwfd, buf, SIZE, 0, READ, SIZE);
+ status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, SIZE);
+
+ res = munmap(buf, page_size); assert(res == 0);
+ buf = mmap(0, page_size, PROT_READ, MAP_SHARED, rwfd, 0);
+ assert(buf != (char *)-1);
+
+ status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, SIZE);
+ status |= attempt_rw(rwfd, buf, SIZE, 0, READ, -EFAULT);
+
+ res = munmap(buf, page_size); assert(res == 0);
+ buf = mmap(0, page_size, PROT_WRITE, MAP_SHARED, rwfd, 0);
+ assert(buf != (char *)-1);
+
+ status |= attempt_rw(rwfd, buf, SIZE, 0, READ, SIZE);
+ status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, -EFAULT);
+
+ return status;
+}
+
diff --git a/tools/libaio/harness/cases/6.t b/tools/libaio/harness/cases/6.t
new file mode 100644
index 0000000000..cea4b01c96
--- /dev/null
+++ b/tools/libaio/harness/cases/6.t
@@ -0,0 +1,57 @@
+/* 6.t
+- huge reads (pinned pages) (6.t)
+- huge writes (6.t)
+*/
+#include "aio_setup.h"
+#include <sys/mman.h>
+
+long getmemsize(void)
+{
+ FILE *f = fopen("/proc/meminfo", "r");
+ long size;
+ int gotit = 0;
+ char str[256];
+
+ assert(f != NULL);
+ while (NULL != fgets(str, 255, f)) {
+ str[255] = 0;
+ if (0 == memcmp(str, "MemTotal:", 9)) {
+ if (1 == sscanf(str + 9, "%ld", &size)) {
+ gotit = 1;
+ break;
+ }
+ }
+ }
+ fclose(f);
+
+ assert(gotit != 0);
+ return size;
+}
+
+int test_main(void)
+{
+ char *buf;
+ int rwfd;
+ int status = 0, res;
+ long size;
+
+ size = getmemsize();
+ printf("size = %ld\n", size);
+ assert(size >= (16 * 1024));
+ if (size > (768 * 1024))
+ size = 768 * 1024;
+ size *= 1024;
+
+ rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1);
+ res = ftruncate(rwfd, 0); assert(res == 0);
+ buf = malloc(size); assert(buf != NULL);
+
+ //memset(buf, 0, size);
+ status |= attempt_rw(rwfd, buf, size, 0, WRITE, size);
+ status |= attempt_rw(rwfd, buf, size, 0, READ, size);
+
+ //res = ftruncate(rwfd, 0); assert(res == 0);
+
+ return status;
+}
+
diff --git a/tools/libaio/harness/cases/7.t b/tools/libaio/harness/cases/7.t
new file mode 100644
index 0000000000..d2d6cbc653
--- /dev/null
+++ b/tools/libaio/harness/cases/7.t
@@ -0,0 +1,27 @@
+/* 7.t
+- Write overlapping the file size rlimit boundary: should be a short
+ write. (7.t)
+- Write at the file size rlimit boundary: should give EFBIG. (I think
+ the spec requires that you do NOT deliver SIGXFSZ in this case, where
+ you would do so for sync IO.) (7.t)
+- Special case: a write of zero bytes at or beyond the file size rlimit
+ boundary must return success. (7.t)
+*/
+
+#include <sys/resource.h>
+
+void SET_RLIMIT(long long limit)
+{
+ struct rlimit rlim;
+ int res;
+
+ rlim.rlim_cur = limit; assert(rlim.rlim_cur == limit);
+ rlim.rlim_max = limit; assert(rlim.rlim_max == limit);
+
+ res = setrlimit(RLIMIT_FSIZE, &rlim); assert(res == 0);
+}
+
+#define LIMIT 8192
+#define FILENAME "testdir/rwfile"
+
+#include "common-7-8.h"
diff --git a/tools/libaio/harness/cases/8.t b/tools/libaio/harness/cases/8.t
new file mode 100644
index 0000000000..8a3d83ec94
--- /dev/null
+++ b/tools/libaio/harness/cases/8.t
@@ -0,0 +1,49 @@
+/* 8.t
+- Ditto for the above three tests at the offset maximum (largest
+ possible ext2/3 file size.) (8.t)
+ */
+#include <sys/vfs.h>
+
+#define EXT2_OLD_SUPER_MAGIC 0xEF51
+#define EXT2_SUPER_MAGIC 0xEF53
+
+long long get_fs_limit(int fd)
+{
+ struct statfs s;
+ int res;
+ long long lim = 0;
+
+ res = fstatfs(fd, &s); assert(res == 0);
+
+ switch(s.f_type) {
+ case EXT2_OLD_SUPER_MAGIC:
+ case EXT2_SUPER_MAGIC:
+#if 0
+ {
+ long long tmp;
+ tmp = s.f_bsize / 4;
+ /* 12 direct + indirect block + dind + tind */
+ lim = 12 + tmp + tmp * tmp + tmp * tmp * tmp;
+ lim *= s.f_bsize;
+ printf("limit(%ld) = %Ld\n", (long)s.f_bsize, lim);
+ }
+#endif
+ switch(s.f_bsize) {
+ case 4096: lim = 2199023251456; break;
+ default:
+ printf("unknown ext2 blocksize %ld\n", (long)s.f_bsize);
+ exit(3);
+ }
+ break;
+ default:
+ printf("unknown filesystem 0x%08lx\n", (long)s.f_type);
+ exit(3);
+ }
+ return lim;
+}
+
+#define SET_RLIMIT(x) do ; while (0)
+#define LIMIT get_fs_limit(rwfd)
+#define FILENAME "testdir.ext2/rwfile"
+
+#include "common-7-8.h"
diff --git a/tools/libaio/harness/cases/aio_setup.h b/tools/libaio/harness/cases/aio_setup.h
new file mode 100644
index 0000000000..37c96189b2
--- /dev/null
+++ b/tools/libaio/harness/cases/aio_setup.h
@@ -0,0 +1,98 @@
+io_context_t io_ctx;
+#define BAD_CTX ((io_context_t)-1)
+
+void aio_setup(int n)
+{
+ int res = io_queue_init(n, &io_ctx);
+ if (res != 0) {
+ printf("io_queue_setup(%d) returned %d (%s)\n",
+ n, res, strerror(-res));
+ exit(3);
+ }
+}
+
+int attempt_io_submit(io_context_t ctx, long nr, struct iocb *ios[], int expect)
+{
+ int res;
+
+ printf("expect %3d: io_submit(%10p, %3ld, %10p) = ", expect, ctx, nr, ios);
+ fflush(stdout);
+ res = io_submit(ctx, nr, ios);
+ printf("%3d [%s]%s\n", res, (res <= 0) ? strerror(-res) : "",
+ (res != expect) ? " -- FAILED" : "");
+ if (res != expect)
+ return 1;
+
+ return 0;
+}
+
+int sync_submit(struct iocb *iocb)
+{
+ struct io_event event;
+ struct iocb *iocbs[] = { iocb };
+ int res;
+
+ /* 30 second timeout should be enough */
+ struct timespec ts;
+ ts.tv_sec = 30;
+ ts.tv_nsec = 0;
+
+ res = io_submit(io_ctx, 1, iocbs);
+ if (res != 1) {
+ printf("sync_submit: io_submit res=%d [%s]\n", res, strerror(-res));
+ return res;
+ }
+
+ res = io_getevents(io_ctx, 0, 1, &event, &ts);
+ if (res != 1) {
+ printf("sync_submit: io_getevents res=%d [%s]\n", res, strerror(-res));
+ return res;
+ }
+ return event.res;
+}
+
+#define SETUP aio_setup(1024)
+
+
+#define READ 'r'
+#define WRITE 'w'
+#define READ_SILENT 'R'
+#define WRITE_SILENT 'W'
+int attempt_rw(int fd, void *buf, int count, long long pos, int rw, int expect)
+{
+ struct iocb iocb;
+ int res;
+ int silent = 0;
+
+ switch(rw) {
+ case READ_SILENT:
+ silent = 1;
+ case READ:
+ io_prep_pread (&iocb, fd, buf, count, pos);
+ break;
+ case WRITE_SILENT:
+ silent = 1;
+ case WRITE:
+ io_prep_pwrite(&iocb, fd, buf, count, pos);
+ break;
+ }
+
+ if (!silent) {
+ printf("expect %5d: (%c), res = ", expect, rw);
+ fflush(stdout);
+ }
+ res = sync_submit(&iocb);
+ if (!silent || res != expect) {
+ if (silent)
+ printf("expect %5d: (%c), res = ", expect, rw);
+ printf("%5d [%s]%s\n", res,
+ (res <= 0) ? strerror(-res) : "Success",
+ (res != expect) ? " -- FAILED" : "");
+ }
+
+ if (res != expect)
+ return 1;
+
+ return 0;
+}
+
diff --git a/tools/libaio/harness/cases/common-7-8.h b/tools/libaio/harness/cases/common-7-8.h
new file mode 100644
index 0000000000..3ec2bb439d
--- /dev/null
+++ b/tools/libaio/harness/cases/common-7-8.h
@@ -0,0 +1,37 @@
+/* common-7-8.h
+*/
+#include "aio_setup.h"
+
+#include <unistd.h>
+
+#define SIZE 512
+
+int test_main(void)
+{
+ char *buf;
+ int rwfd;
+ int status = 0, res;
+ long long limit;
+
+ rwfd = open(FILENAME, O_RDWR); assert(rwfd != -1);
+ res = ftruncate(rwfd, 0); assert(res == 0);
+ buf = malloc(SIZE); assert(buf != NULL);
+ memset(buf, 0, SIZE);
+
+ limit = LIMIT;
+
+ SET_RLIMIT(limit);
+
+ status |= attempt_rw(rwfd, buf, SIZE, limit-SIZE, WRITE, SIZE);
+ status |= attempt_rw(rwfd, buf, SIZE, limit-SIZE, READ, SIZE);
+
+ status |= attempt_rw(rwfd, buf, SIZE, 1+limit-SIZE, WRITE, SIZE-1);
+ status |= attempt_rw(rwfd, buf, SIZE, 1+limit-SIZE, READ, SIZE-1);
+
+ status |= attempt_rw(rwfd, buf, SIZE, limit, WRITE, -EFBIG);
+ status |= attempt_rw(rwfd, buf, SIZE, limit, READ, 0);
+ status |= attempt_rw(rwfd, buf, 0, limit, WRITE, 0);
+
+ return status;
+}
+
diff --git a/tools/libaio/harness/main.c b/tools/libaio/harness/main.c
new file mode 100644
index 0000000000..74b2764620
--- /dev/null
+++ b/tools/libaio/harness/main.c
@@ -0,0 +1,39 @@
+#include <stdio.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <libaio.h>
+
+#if defined(__i386__)
+#define KERNEL_RW_POINTER ((void *)0xc0010000)
+#else
+//#warning Not really sure where kernel memory is. Guessing.
+#define KERNEL_RW_POINTER ((void *)0xffffffffc0010000)
+#endif
+
+
+char test_name[] = TEST_NAME;
+
+#include TEST_NAME
+
+int main(void)
+{
+ int res;
+
+#if defined(SETUP)
+ SETUP;
+#endif
+
+ res = test_main();
+ printf("test %s completed %s.\n", test_name,
+ res ? "FAILED" : "PASSED"
+ );
+ fflush(stdout);
+ return res ? 1 : 0;
+}
diff --git a/tools/libaio/harness/runtests.sh b/tools/libaio/harness/runtests.sh
new file mode 100644
index 0000000000..d763d88b31
--- /dev/null
+++ b/tools/libaio/harness/runtests.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+passes=0
+fails=0
+
+echo "Test run starting at" `date`
+
+while [ $# -ge 1 ] ; do
+ this_test=$1
+ shift
+ echo "Starting $this_test"
+ $this_test 2>&1
+ res=$?
+ if [ $res -eq 0 ] ; then str="" ; passes=$[passes + 1] ; else str=" -- FAILED" ; fails=$[fails + 1] ; fi
+ echo "Completed $this_test with $res$str".
+done
+
+echo "Pass: $passes Fail: $fails"
+echo "Test run complete at" `date`
diff --git a/tools/libaio/libaio.spec b/tools/libaio/libaio.spec
new file mode 100644
index 0000000000..1f16c91b07
--- /dev/null
+++ b/tools/libaio/libaio.spec
@@ -0,0 +1,177 @@
+Name: libaio
+Version: 0.3.104
+Release: 1
+Summary: Linux-native asynchronous I/O access library
+Copyright: LGPL
+Group: System Environment/Libraries
+Source: %{name}-%{version}.tar.gz
+BuildRoot: %{_tmppath}/%{name}-root
+# Fix ExclusiveArch as we implement this functionality on more architectures
+ExclusiveArch: i386 x86_64 ia64 s390 s390x ppc ppc64 ppc64pseries ppc64iseries alpha alphaev6
+
+%description
+The Linux-native asynchronous I/O facility ("async I/O", or "aio") has a
+richer API and capability set than the simple POSIX async I/O facility.
+This library, libaio, provides the Linux-native API for async I/O.
+The POSIX async I/O facility requires this library in order to provide
+kernel-accelerated async I/O capabilities, as do applications which
+require the Linux-native async I/O API.
+
+%package devel
+Summary: Development files for Linux-native asynchronous I/O access
+Group: Development/System
+Requires: libaio
+Provides: libaio.so.1
+
+%description devel
+This package provides header files to include and libraries to link with
+for the Linux-native asynchronous I/O facility ("async I/O", or "aio").
+
+%prep
+%setup
+
+%build
+make
+
+%install
+[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT
+
+make install prefix=$RPM_BUILD_ROOT/usr \
+ libdir=$RPM_BUILD_ROOT/%{_libdir} \
+ root=$RPM_BUILD_ROOT
+
+%clean
+[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT
+
+%post -p /sbin/ldconfig
+
+%postun -p /sbin/ldconfig
+
+%files
+%defattr(-,root,root)
+%attr(0755,root,root) %{_libdir}/libaio.so.*
+%doc COPYING TODO
+
+%files devel
+%defattr(-,root,root)
+%attr(0644,root,root) %{_includedir}/*
+%attr(0755,root,root) %{_libdir}/libaio.so
+%attr(0644,root,root) %{_libdir}/libaio.a
+
+%changelog
+* Fri Apr 1 2005 Jeff Moyer <jmoyer@redhat.com> - 0.3.104-1
+- Add Alpha architecture support. (Sergey Tikhonov <tsv@solvo.ru>)
+
+* Tue Jan 25 2005 Jeff Moyer <jmoyer@redhat.com> - 0.3.103-1
+- Fix SONAME breakage. In changing file names around, I also changed the
+ SONAME, which is a no no.
+
+* Thu Oct 14 2004 Jeff Moyer <jmoyer@redhat.com> - 0.3.102-1
+- S390 asm had a bug; I forgot to update the clobber list. Lucky for me,
+ newer compilers complain about such things.
+- Also update the s390 asm to look more like the new kernel variants.
+
+* Wed Oct 13 2004 Jeff Moyer <jmoyer@redhat.com> - 0.3.101-1
+- Revert syscall return values to be -ERRNO. This was an inadvertant bug
+ introduced when clobber lists changed.
+- add ppc64pseries and ppc64iseries to exclusivearch
+
+* Tue Sep 14 2004 Jeff Moyer <jmoyer@redhat.com> - 0.3.100-1
+- Switch around the tests for _PPC_ and _powerpc64_ so that the ppc64
+ platforms get the right padding.
+
+* Wed Jul 14 2004 Jeff Moyer <jmoyer@redhat.com> - 0.3.99-4
+- Ok, there was a race in moving the cvs module. Someone rebuild from
+ the old cvs into fc3. *sigh* bumping rev.
+
+* Wed Jul 14 2004 Jeff Moyer <jmoyer@redhat.com> - 0.3.99-3
+- Actually provide libaio.so.1.
+
+* Tue Mar 30 2004 Jeff Moyer <jmoyer@redhat.com> - 0.3.99-2
+- Apparently the 0.3.93 patch was not meant for 0.3.96. Backed it out.
+
+* Tue Mar 30 2004 Jeff Moyer <jmoyer@redhat.com> - 0.3.99-1
+- Fix compat calls.
+- make library .so.1.0.0 and make symlinks properly.
+- Fix header file for inclusion in c++ code.
+
+* Thu Feb 26 2004 Jeff Moyer <jmoyer@redhat.com> 0.3.98-2
+- bah. fix version nr in changelog.
+
+* Thu Feb 26 2004 Jeff Moyer <jmoyer@redhat.com> 0.3.98-1
+- fix compiler warnings.
+
+* Thu Feb 26 2004 Jeff Moyer <jmoyer@redhat.com> 0.3.97-2
+- make srpm was using rpm to do a build. changed that to use rpmbuild if
+ it exists, and fallback to rpm if it doesn't.
+
+* Tue Feb 24 2004 Jeff Moyer <jmoyer@redhat.com> 0.3.97-1
+- Use libc syscall(2) instead of rolling our own calling mechanism. This
+ change is inspired due to a failure to build with newer gcc, since clobber
+ lists were wrong.
+- Add -fpic to the CFLAGS for all architectures. Should address bz #109457.
+- change a #include from <linux/types.h> to <sys/types.h>. Fixes a build
+ issue on s390.
+
+* Wed Jul 7 2003 Bill Nottingham <notting@redhat.com> 0.3.96-3
+- fix paths on lib64 arches
+
+* Wed Jun 18 2003 Michael K. Johnson <johnsonm@redhat.com> 0.3.96-2
+- optimization in io_getevents from Arjan van de Ven in 0.3.96-1
+- deal with ia64 in 0.3.96-2
+
+* Wed May 28 2003 Michael K. Johnson <johnsonm@redhat.com> 0.3.95-1
+- ppc bugfix from Julie DeWandel
+
+* Tue May 20 2003 Michael K. Johnson <johnsonm@redhat.com> 0.3.94-1
+- symbol versioning fix from Ulrich Drepper
+
+* Mon Jan 27 2003 Benjamin LaHaise <bcrl@redhat.com>
+- bump to 0.3.93-3 for rebuild.
+
+* Mon Dec 16 2002 Benjamin LaHaise <bcrl@redhat.com>
+- libaio 0.3.93 test release
+- add powerpc support from Gianni Tedesco <gianni@ecsc.co.uk>
+- add s/390 support from Arnd Bergmann <arnd@bergmann-dalldorf.de>
+
+* Fri Sep 12 2002 Benjamin LaHaise <bcrl@redhat.com>
+- libaio 0.3.92 test release
+- build on x86-64
+
+* Thu Sep 12 2002 Benjamin LaHaise <bcrl@redhat.com>
+- libaio 0.3.91 test release
+- build on ia64
+- remove libredhat-kernel from the .spec file
+
+* Thu Sep 5 2002 Benjamin LaHaise <bcrl@redhat.com>
+- libaio 0.3.90 test release
+
+* Mon Apr 29 2002 Benjamin LaHaise <bcrl@redhat.com>
+- add requires initscripts >= 6.47-1 to get boot time libredhat-kernel
+ linkage correct.
+- typo fix
+
+* Thu Apr 25 2002 Benjamin LaHaise <bcrl@redhat.com>
+- make /usr/lib/libredhat-kernel.so point to /lib/libredhat-kernel.so.1.0.0
+
+* Mon Apr 15 2002 Tim Powers <timp@redhat.com>
+- make the post scriptlet not use /bin/sh
+
+* Sat Apr 12 2002 Benjamin LaHaise <bcrl@redhat.com>
+- add /lib/libredhat-kernel* to %files.
+
+* Fri Apr 12 2002 Benjamin LaHaise <bcrl@redhat.com>
+- make the dummy install as /lib/libredhat-kernel.so.1.0.0 so
+ that ldconfig will link against it if no other is installed.
+
+* Tue Jan 22 2002 Benjamin LaHaise <bcrl@redhat.com>
+- add io_getevents
+
+* Tue Jan 22 2002 Michael K. Johnson <johnsonm@redhat.com>
+- Make linker happy with /usr/lib symlink for libredhat-kernel.so
+
+* Mon Jan 21 2002 Michael K. Johnson <johnsonm@redhat.com>
+- Added stub library
+
+* Sun Jan 20 2002 Michael K. Johnson <johnsonm@redhat.com>
+- Initial packaging
diff --git a/tools/libaio/man/aio.3 b/tools/libaio/man/aio.3
new file mode 100644
index 0000000000..6dc3c63a8f
--- /dev/null
+++ b/tools/libaio/man/aio.3
@@ -0,0 +1,315 @@
+.TH aio 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio \- Asynchronous IO
+.SH SYNOPSIS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.fi
+.SH DESCRIPTION
+The POSIX.1b standard defines a new set of I/O operations which can
+significantly reduce the time an application spends waiting at I/O. The
+new functions allow a program to initiate one or more I/O operations and
+then immediately resume normal work while the I/O operations are
+executed in parallel. This functionality is available if the
+.IR "unistd.h"
+file defines the symbol
+.B "_POSIX_ASYNCHRONOUS_IO"
+.
+
+These functions are part of the library with realtime functions named
+.IR "librt"
+. They are not actually part of the
+.IR "libc"
+binary.
+The implementation of these functions can be done using support in the
+kernel (if available) or using an implementation based on threads at
+userlevel. In the latter case it might be necessary to link applications
+with the thread library
+.IR "libpthread"
+in addition to
+.IR "librt"
+and
+.IR "libaio"
+.
+
+All AIO operations operate on files which were opened previously. There
+might be arbitrarily many operations running for one file. The
+asynchronous I/O operations are controlled using a data structure named
+.IR "struct aiocb"
+It is defined in
+.IR "aio.h"
+ as follows.
+
+.nf
+struct aiocb
+{
+ int aio_fildes; /* File desriptor. */
+ int aio_lio_opcode; /* Operation to be performed. */
+ int aio_reqprio; /* Request priority offset. */
+ volatile void *aio_buf; /* Location of buffer. */
+ size_t aio_nbytes; /* Length of transfer. */
+ struct sigevent aio_sigevent; /* Signal number and value. */
+
+ /* Internal members. */
+ struct aiocb *__next_prio;
+ int __abs_prio;
+ int __policy;
+ int __error_code;
+ __ssize_t __return_value;
+
+#ifndef __USE_FILE_OFFSET64
+ __off_t aio_offset; /* File offset. */
+ char __pad[sizeof (__off64_t) - sizeof (__off_t)];
+#else
+ __off64_t aio_offset; /* File offset. */
+#endif
+ char __unused[32];
+};
+
+.fi
+The POSIX.1b standard mandates that the
+.IR "struct aiocb"
+structure
+contains at least the members described in the following table. There
+might be more elements which are used by the implementation, but
+depending upon these elements is not portable and is highly deprecated.
+
+.TP
+.IR "int aio_fildes"
+This element specifies the file descriptor to be used for the
+operation. It must be a legal descriptor, otherwise the operation will
+fail.
+
+The device on which the file is opened must allow the seek operation.
+I.e., it is not possible to use any of the AIO operations on devices
+like terminals where an
+.IR "lseek"
+ call would lead to an error.
+.TP
+.IR "off_t aio_offset"
+This element specifies the offset in the file at which the operation (input
+or output) is performed. Since the operations are carried out in arbitrary
+order and more than one operation for one file descriptor can be
+started, one cannot expect a current read/write position of the file
+descriptor.
+.TP
+.IR "volatile void *aio_buf"
+This is a pointer to the buffer with the data to be written or the place
+where the read data is stored.
+.TP
+.IR "size_t aio_nbytes"
+This element specifies the length of the buffer pointed to by
+.IR "aio_buf"
+.
+.TP
+.IR "int aio_reqprio"
+If the platform has defined
+.B "_POSIX_PRIORITIZED_IO"
+and
+.B "_POSIX_PRIORITY_SCHEDULING"
+, the AIO requests are
+processed based on the current scheduling priority. The
+.IR "aio_reqprio"
+element can then be used to lower the priority of the
+AIO operation.
+.TP
+.IR "struct sigevent aio_sigevent"
+This element specifies how the calling process is notified once the
+operation terminates. If the
+.IR "sigev_notify"
+element is
+.B "SIGEV_NONE"
+, no notification is sent. If it is
+.B "SIGEV_SIGNAL"
+,
+the signal determined by
+.IR "sigev_signo"
+is sent. Otherwise,
+.IR "sigev_notify"
+must be
+.B "SIGEV_THREAD"
+. In this case, a thread
+is created which starts executing the function pointed to by
+.IR "sigev_notify_function"
+.
+.TP
+.IR "int aio_lio_opcode"
+This element is only used by the
+.IR "lio_listio"
+ and
+.IR "lio_listio64"
+ functions. Since these functions allow an
+arbitrary number of operations to start at once, and each operation can be
+input or output (or nothing), the information must be stored in the
+control block. The possible values are:
+.TP
+.B "LIO_READ"
+Start a read operation. Read from the file at position
+.IR "aio_offset"
+ and store the next
+.IR "aio_nbytes"
+ bytes in the
+buffer pointed to by
+.IR "aio_buf"
+.
+.TP
+.B "LIO_WRITE"
+Start a write operation. Write
+.IR "aio_nbytes"
+bytes starting at
+.IR "aio_buf"
+into the file starting at position
+.IR "aio_offset"
+.
+.TP
+.B "LIO_NOP"
+Do nothing for this control block. This value is useful sometimes when
+an array of
+.IR "struct aiocb"
+values contains holes, i.e., some of the
+values must not be handled although the whole array is presented to the
+.IR "lio_listio"
+function.
+
+When the sources are compiled using
+.B "_FILE_OFFSET_BITS == 64"
+on a
+32 bit machine, this type is in fact
+.IR "struct aiocb64"
+, since the LFS
+interface transparently replaces the
+.IR "struct aiocb"
+definition.
+.PP
+For use with the AIO functions defined in the LFS, there is a similar type
+defined which replaces the types of the appropriate members with larger
+types but otherwise is equivalent to
+.IR "struct aiocb"
+. Particularly,
+all member names are the same.
+
+.nf
+/* The same for the 64bit offsets. Please note that the members aio_fildes
+ to __return_value have to be the same in aiocb and aiocb64. */
+#ifdef __USE_LARGEFILE64
+struct aiocb64
+{
+ int aio_fildes; /* File desriptor. */
+ int aio_lio_opcode; /* Operation to be performed. */
+ int aio_reqprio; /* Request priority offset. */
+ volatile void *aio_buf; /* Location of buffer. */
+ size_t aio_nbytes; /* Length of transfer. */
+ struct sigevent aio_sigevent; /* Signal number and value. */
+
+ /* Internal members. */
+ struct aiocb *__next_prio;
+ int __abs_prio;
+ int __policy;
+ int __error_code;
+ __ssize_t __return_value;
+
+ __off64_t aio_offset; /* File offset. */
+ char __unused[32];
+};
+
+.fi
+.TP
+.IR "int aio_fildes"
+This element specifies the file descriptor which is used for the
+operation. It must be a legal descriptor since otherwise the operation
+fails for obvious reasons.
+The device on which the file is opened must allow the seek operation.
+I.e., it is not possible to use any of the AIO operations on devices
+like terminals where an
+.IR "lseek"
+ call would lead to an error.
+.TP
+.IR "off64_t aio_offset"
+This element specifies at which offset in the file the operation (input
+or output) is performed. Since the operation are carried in arbitrary
+order and more than one operation for one file descriptor can be
+started, one cannot expect a current read/write position of the file
+descriptor.
+.TP
+.IR "volatile void *aio_buf"
+This is a pointer to the buffer with the data to be written or the place
+where the read data is stored.
+.TP
+.IR "size_t aio_nbytes"
+This element specifies the length of the buffer pointed to by
+.IR "aio_buf"
+.
+.TP
+.IR "int aio_reqprio"
+If for the platform
+.B "_POSIX_PRIORITIZED_IO"
+and
+.B "_POSIX_PRIORITY_SCHEDULING"
+are defined the AIO requests are
+processed based on the current scheduling priority. The
+.IR "aio_reqprio"
+element can then be used to lower the priority of the
+AIO operation.
+.TP
+.IR "struct sigevent aio_sigevent"
+This element specifies how the calling process is notified once the
+operation terminates. If the
+.IR "sigev_notify"
+, element is
+.B "SIGEV_NONE"
+no notification is sent. If it is
+.B "SIGEV_SIGNAL"
+,
+the signal determined by
+.IR "sigev_signo"
+is sent. Otherwise,
+.IR "sigev_notify"
+ must be
+.B "SIGEV_THREAD"
+in which case a thread
+which starts executing the function pointed to by
+.IR "sigev_notify_function"
+.
+.TP
+.IR "int aio_lio_opcode"
+This element is only used by the
+.IR "lio_listio"
+and
+.IR "lio_listio64"
+functions. Since these functions allow an
+arbitrary number of operations to start at once, and since each operation can be
+input or output (or nothing), the information must be stored in the
+control block. See the description of
+.IR "struct aiocb"
+for a description
+of the possible values.
+.PP
+When the sources are compiled using
+.B "_FILE_OFFSET_BITS == 64"
+on a
+32 bit machine, this type is available under the name
+.IR "struct aiocb64"
+, since the LFS transparently replaces the old interface.
+.SH "RETURN VALUES"
+.SH ERRORS
+.SH "SEE ALSO"
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_cancel.3 b/tools/libaio/man/aio_cancel.3
new file mode 100644
index 0000000000..502c83c3da
--- /dev/null
+++ b/tools/libaio/man/aio_cancel.3
@@ -0,0 +1,137 @@
+.TH aio_cancel 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_cancel - Cancel asynchronous I/O requests
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_cancel (int fildes " , struct aiocb *aiocbp " )"
+.fi
+.SH DESCRIPTION
+When one or more requests are asynchronously processed, it might be
+useful in some situations to cancel a selected operation, e.g., if it
+becomes obvious that the written data is no longer accurate and would
+have to be overwritten soon. As an example, assume an application, which
+writes data in files in a situation where new incoming data would have
+to be written in a file which will be updated by an enqueued request.
+The POSIX AIO implementation provides such a function, but this function
+is not capable of forcing the cancellation of the request. It is up to the
+implementation to decide whether it is possible to cancel the operation
+or not. Therefore using this function is merely a hint.
+.B "The libaio implementation does not implement the cancel operation in the"
+.B "POSIX libraries".
+.PP
+The
+.IR aio_cancel
+function can be used to cancel one or more
+outstanding requests. If the
+.IR aiocbp
+parameter is
+.IR NULL
+, the
+function tries to cancel all of the outstanding requests which would process
+the file descriptor
+.IR fildes
+(i.e., whose
+.IR aio_fildes
+member
+is
+.IR fildes
+). If
+.IR aiocbp is not
+.IR NULL
+,
+.IR aio_cancel
+attempts to cancel the specific request pointed to by
+.IR aiocbp.
+
+For requests which were successfully canceled, the normal notification
+about the termination of the request should take place. I.e., depending
+on the
+.IR "struct sigevent"
+object which controls this, nothing
+happens, a signal is sent or a thread is started. If the request cannot
+be canceled, it terminates the usual way after performing the operation.
+After a request is successfully canceled, a call to
+.IR aio_error
+with
+a reference to this request as the parameter will return
+.B ECANCELED
+and a call to
+.IR aio_return
+will return
+.IR -1.
+If the request wasn't canceled and is still running the error status is
+still
+.B EINPROGRESS.
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+, this
+function is in fact
+.IR aio_cancel64
+since the LFS interface
+transparently replaces the normal implementation.
+
+.SH "RETURN VALUES"
+.TP
+.B AIO_CANCELED
+If there were
+requests which haven't terminated and which were successfully canceled.
+.TP
+.B AIO_NOTCANCELED
+If there is one or more requests left which couldn't be canceled,
+. In this case
+.IR aio_error
+must be used to find out which of the, perhaps multiple, requests (in
+.IR aiocbp
+is
+.IR NULL
+) weren't successfully canceled.
+.TP
+.B AIO_ALLDONE
+If all
+requests already terminated at the time
+.IR aio_cancel
+is called the
+return value is
+.
+.SH ERRORS
+If an error occurred during the execution of
+.IR aio_cancel
+the
+function returns
+.IR -1
+and sets
+.IR errno
+to one of the following
+values.
+.TP
+.B EBADF
+The file descriptor
+.IR fildes
+is not valid.
+.TP
+.B ENOSYS
+.IR aio_cancel
+is not implemented.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_cancel64.3 b/tools/libaio/man/aio_cancel64.3
new file mode 100644
index 0000000000..ede775be5e
--- /dev/null
+++ b/tools/libaio/man/aio_cancel64.3
@@ -0,0 +1,50 @@
+.TH aio_cancel64 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_cancel64 \- Cancel asynchronous I/O requests
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_cancel64 (int fildes, struct aiocb64 *aiocbp)"
+.fi
+.SH DESCRIPTION
+This function is similar to
+.IR aio_cancel
+with the only difference
+that the argument is a reference to a variable of type
+.IR struct aiocb64
+.
+
+When the sources are compiled with
+.IR _FILE_OFFSET_BITS == 64
+, this
+function is available under the name
+.IR aio_cancel
+and so
+transparently replaces the interface for small files on 32 bit
+machines.
+.SH "RETURN VALUES"
+See aio_cancel(3).
+.SH ERRORS
+See aio_cancel(3).
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_error.3 b/tools/libaio/man/aio_error.3
new file mode 100644
index 0000000000..12b82cf894
--- /dev/null
+++ b/tools/libaio/man/aio_error.3
@@ -0,0 +1,81 @@
+.TH aio_error 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_error \- Getting the Status of AIO Operations
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_error (const struct aiocb *aiocbp)"
+.fi
+.SH DESCRIPTION
+The function
+.IR aio_error
+determines the error state of the request described by the
+.IR "struct aiocb"
+variable pointed to by
+.I aiocbp
+.
+
+When the operation is performed truly asynchronously (as with
+.IR "aio_read"
+and
+.IR "aio_write"
+and with
+.IR "lio_listio"
+when the mode is
+.IR "LIO_NOWAIT"
+), one sometimes needs to know whether a
+specific request already terminated and if so, what the result was.
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+this function is in fact
+.IR "aio_error64"
+since the LFS interface transparently replaces the normal implementation.
+.SH "RETURN VALUES"
+If the request has not yet terminated the value returned is always
+.IR "EINPROGRESS"
+. Once the request has terminated the value
+.IR "aio_error"
+returns is either
+.I 0
+if the request completed successfully or it returns the value which would be stored in the
+.IR "errno"
+variable if the request would have been done using
+.IR "read"
+,
+.IR "write"
+, or
+.IR "fsync"
+.
+.SH ERRORS
+.TP
+.IR "ENOSYS"
+if it is not implemented. It
+could also return
+.TP
+.IR "EINVAL"
+if the
+.I aiocbp
+parameter does not
+refer to an asynchronous operation whose return status is not yet known.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_error64.3 b/tools/libaio/man/aio_error64.3
new file mode 100644
index 0000000000..3333161d9a
--- /dev/null
+++ b/tools/libaio/man/aio_error64.3
@@ -0,0 +1,64 @@
+.TH aio_error64 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_error64 \- Return errors
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_error64 (const struct aiocb64 *aiocbp)"
+.fi
+.SH DESCRIPTION
+This function is similar to
+.IR aio_error
+with the only difference
+that the argument is a reference to a variable of type
+.IR "struct aiocb64".
+.PP
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+this
+function is available under the name
+.IR aio_error
+and so
+transparently replaces the interface for small files on 32 bit
+machines.
+.SH "RETURN VALUES"
+If the request has not yet terminated the value returned is always
+.IR "EINPROGRESS"
+. Once the request has terminated the value
+.IR "aio_error"
+returns is either
+.I 0
+if the request completed successfully or it returns the value which would be stored in the
+.IR "errno"
+variable if the request would have been done using
+.IR "read"
+,
+.IR "write"
+, or
+.IR "fsync"
+.
+.SH ERRORS
+See
+.IR aio_error(3).
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_fsync.3 b/tools/libaio/man/aio_fsync.3
new file mode 100644
index 0000000000..637f0f63d4
--- /dev/null
+++ b/tools/libaio/man/aio_fsync.3
@@ -0,0 +1,139 @@
+.TH aio_fsync 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_fsync \- Synchronize a file's complete in-core state with that on disk
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_fsync (int op, struct aiocb aiocbp)"
+.fi
+.SH DESCRIPTION
+.PP
+When dealing with asynchronous operations it is sometimes necessary to
+get into a consistent state. This would mean for AIO that one wants to
+know whether a certain request or a group of request were processed.
+This could be done by waiting for the notification sent by the system
+after the operation terminated, but this sometimes would mean wasting
+resources (mainly computation time). Instead POSIX.1b defines two
+functions which will help with most kinds of consistency.
+.PP
+The
+.IR aio_fsync
+and
+.IR "aio_fsync64"
+functions are only available
+if the symbol
+.IR "_POSIX_SYNCHRONIZED_IO"
+is defined in
+.I unistd.h
+.
+
+Calling this function forces all I/O operations operating queued at the
+time of the function call operating on the file descriptor
+.IR "aiocbp->aio_fildes"
+into the synchronized I/O completion state . The
+.IR "aio_fsync"
+function returns
+immediately but the notification through the method described in
+.IR "aiocbp->aio_sigevent"
+will happen only after all requests for this
+file descriptor have terminated and the file is synchronized. This also
+means that requests for this very same file descriptor which are queued
+after the synchronization request are not affected.
+
+If
+.IR "op"
+is
+.IR "O_DSYNC"
+the synchronization happens as with a call
+to
+.IR "fdatasync"
+. Otherwise
+.IR "op"
+should be
+.IR "O_SYNC"
+and
+the synchronization happens as with
+.IR "fsync"
+.
+
+As long as the synchronization has not happened, a call to
+.IR "aio_error"
+with the reference to the object pointed to by
+.IR "aiocbp"
+returns
+.IR "EINPROGRESS"
+. Once the synchronization is
+done
+.IR "aio_error"
+return
+.IR 0
+if the synchronization was not
+successful. Otherwise the value returned is the value to which the
+.IR "fsync"
+or
+.IR "fdatasync"
+function would have set the
+.IR "errno"
+variable. In this case nothing can be assumed about the
+consistency for the data written to this file descriptor.
+
+.SH "RETURN VALUES"
+The return value of this function is
+.IR 0
+if the request was
+successfully enqueued. Otherwise the return value is
+.IR -1
+and
+.IR "errno".
+.SH ERRORS
+.TP
+.B EAGAIN
+The request could not be enqueued due to temporary lack of resources.
+.TP
+.B EBADF
+The file descriptor
+.IR "aiocbp->aio_fildes"
+is not valid or not open
+for writing.
+.TP
+.B EINVAL
+The implementation does not support I/O synchronization or the
+.IR "op"
+parameter is other than
+.IR "O_DSYNC"
+and
+.IR "O_SYNC"
+.
+.TP
+.B ENOSYS
+This function is not implemented.
+.PP
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+ this
+function is in fact
+.IR "aio_return64"
+since the LFS interface
+transparently replaces the normal implementation.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_fsync64.3 b/tools/libaio/man/aio_fsync64.3
new file mode 100644
index 0000000000..5dce22dda9
--- /dev/null
+++ b/tools/libaio/man/aio_fsync64.3
@@ -0,0 +1,51 @@
+.TH aio_fsync64 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_fsync64 \- Synchronize a file's complete in-core state with that on disk
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_fsync64 (int op, struct aiocb64 *aiocbp)"
+.fi
+.SH DESCRIPTION
+This function is similar to
+.IR aio_fsync
+with the only difference
+that the argument is a reference to a variable of type
+.IR "struct aiocb64".
+
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+this
+function is available under the name
+.IR aio_fsync
+and so
+transparently replaces the interface for small files on 32 bit
+machines.
+.SH "RETURN VALUES"
+See
+.IR aio_fsync.
+.SH ERRORS
+See
+.IR aio_fsync.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_init.3 b/tools/libaio/man/aio_init.3
new file mode 100644
index 0000000000..3b0ec95a83
--- /dev/null
+++ b/tools/libaio/man/aio_init.3
@@ -0,0 +1,96 @@
+.TH aio_init 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_init \- How to optimize the AIO implementation
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "void aio_init (const struct aioinit *init)"
+.fi
+.SH DESCRIPTION
+
+The POSIX standard does not specify how the AIO functions are
+implemented. They could be system calls, but it is also possible to
+emulate them at userlevel.
+
+At the point of this writing, the available implementation is a userlevel
+implementation which uses threads for handling the enqueued requests.
+While this implementation requires making some decisions about
+limitations, hard limitations are something which is best avoided
+in the GNU C library. Therefore, the GNU C library provides a means
+for tuning the AIO implementation according to the individual use.
+
+.BI "struct aioinit"
+.PP
+This data type is used to pass the configuration or tunable parameters
+to the implementation. The program has to initialize the members of
+this struct and pass it to the implementation using the
+.IR aio_init
+function.
+.TP
+.B "int aio_threads"
+This member specifies the maximal number of threads which may be used
+at any one time.
+.TP
+.B "int aio_num"
+This number provides an estimate on the maximal number of simultaneously
+enqueued requests.
+.TP
+.B "int aio_locks"
+Unused.
+.TP
+.B "int aio_usedba"
+Unused.
+.TP
+.B "int aio_debug"
+Unused.
+.TP
+.B "int aio_numusers"
+Unused.
+.TP
+.B "int aio_reserved[2]"
+Unused.
+.PP
+This function must be called before any other AIO function. Calling it
+is completely voluntary, as it is only meant to help the AIO
+implementation perform better.
+
+Before calling the
+.IR aio_init
+, function the members of a variable of
+type
+.IR "struct aioinit"
+must be initialized. Then a reference to
+this variable is passed as the parameter to
+.IR aio_init
+which itself
+may or may not pay attention to the hints.
+
+It is a extension which follows a proposal from the SGI implementation in
+.IR Irix 6
+. It is not covered by POSIX.1b or Unix98.
+.SH "RETURN VALUES"
+The function has no return value.
+.SH ERRORS
+The function has no error cases defined.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_read.3 b/tools/libaio/man/aio_read.3
new file mode 100644
index 0000000000..5bcb6c8a11
--- /dev/null
+++ b/tools/libaio/man/aio_read.3
@@ -0,0 +1,146 @@
+.TH aio_read 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_read \- Initiate an asynchronous read operation
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_read (struct aiocb *aiocbp)"
+.fi
+.SH DESCRIPTION
+This function initiates an asynchronous read operation. It
+immediately returns after the operation was enqueued or when an
+error was encountered.
+
+The first
+.IR "aiocbp->aio_nbytes"
+bytes of the file for which
+.IR "aiocbp->aio_fildes"
+is a descriptor are written to the buffer
+starting at
+.IR "aiocbp->aio_buf"
+. Reading starts at the absolute
+position
+.IR "aiocbp->aio_offset"
+in the file.
+
+If prioritized I/O is supported by the platform the
+.IR "aiocbp->aio_reqprio"
+value is used to adjust the priority before
+the request is actually enqueued.
+
+The calling process is notified about the termination of the read
+request according to the
+.IR "aiocbp->aio_sigevent"
+value.
+
+.SH "RETURN VALUES"
+When
+.IR "aio_read"
+returns, the return value is zero if no error
+occurred that can be found before the process is enqueued. If such an
+early error is found, the function returns
+.IR -1
+and sets
+.IR "errno".
+
+.PP
+If
+.IR "aio_read"
+returns zero, the current status of the request
+can be queried using
+.IR "aio_error"
+and
+.IR "aio_return"
+functions.
+As long as the value returned by
+.IR "aio_error"
+is
+.IR "EINPROGRESS"
+the operation has not yet completed. If
+.IR "aio_error"
+returns zero,
+the operation successfully terminated, otherwise the value is to be
+interpreted as an error code. If the function terminated, the result of
+the operation can be obtained using a call to
+.IR "aio_return"
+. The
+returned value is the same as an equivalent call to
+.IR "read"
+would
+have returned.
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+this
+function is in fact
+.IR "aio_read64"
+since the LFS interface transparently
+replaces the normal implementation.
+
+.SH ERRORS
+In the case of an early error:
+.TP
+.B EAGAIN
+The request was not enqueued due to (temporarily) exceeded resource
+limitations.
+.TP
+.B ENOSYS
+The
+.IR "aio_read"
+function is not implemented.
+.TP
+.B EBADF
+The
+.IR "aiocbp->aio_fildes"
+descriptor is not valid. This condition
+need not be recognized before enqueueing the request and so this error
+might also be signaled asynchronously.
+.TP
+.B EINVAL
+The
+.IR "aiocbp->aio_offset"
+or
+.IR "aiocbp->aio_reqpiro"
+value is
+invalid. This condition need not be recognized before enqueueing the
+request and so this error might also be signaled asynchronously.
+
+.PP
+In the case of a normal return, possible error codes returned by
+.IR "aio_error"
+are:
+.TP
+.B EBADF
+The
+.IR "aiocbp->aio_fildes"
+descriptor is not valid.
+.TP
+.B ECANCELED
+The operation was canceled before the operation was finished
+.TP
+.B EINVAL
+The
+.IR "aiocbp->aio_offset"
+value is invalid.
+.PP
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_read64.3 b/tools/libaio/man/aio_read64.3
new file mode 100644
index 0000000000..8e407a5591
--- /dev/null
+++ b/tools/libaio/man/aio_read64.3
@@ -0,0 +1,60 @@
+.TH aio_read64 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_read64 \- Initiate an asynchronous read operation
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_read64 (struct aiocb *aiocbp)"
+.fi
+.SH DESCRIPTION
+This function is similar to the
+.IR "aio_read"
+function. The only
+difference is that on
+.IR "32 bit"
+machines, the file descriptor should
+be opened in the large file mode. Internally,
+.IR "aio_read64"
+uses
+functionality equivalent to
+.IR "lseek64"
+to position the file descriptor correctly for the reading,
+as opposed to
+.IR "lseek"
+functionality used in
+.IR "aio_read".
+
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+, this
+function is available under the name
+.IR "aio_read"
+and so transparently
+replaces the interface for small files on 32 bit machines.
+.SH "RETURN VALUES"
+See
+.IR aio_read.
+.SH ERRORS
+See
+.IR aio_read.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_return.3 b/tools/libaio/man/aio_return.3
new file mode 100644
index 0000000000..1e3335fdb6
--- /dev/null
+++ b/tools/libaio/man/aio_return.3
@@ -0,0 +1,71 @@
+.TH aio_return 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_return \- Retrieve status of asynchronous I/O operation
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "ssize_t aio_return (const struct aiocb *aiocbp)"
+.fi
+.SH DESCRIPTION
+This function can be used to retrieve the return status of the operation
+carried out by the request described in the variable pointed to by
+.IR aiocbp
+. As long as the error status of this request as returned
+by
+.IR aio_error
+is
+.IR EINPROGRESS
+the return of this function is
+undefined.
+
+Once the request is finished this function can be used exactly once to
+retrieve the return value. Following calls might lead to undefined
+behavior.
+When the sources are compiled with
+.B "_FILE_OFFSET_BITS == 64"
+this function is in fact
+.IR aio_return64
+since the LFS interface
+transparently replaces the normal implementation.
+.SH "RETURN VALUES"
+The return value itself is the value which would have been
+returned by the
+.IR read
+,
+.IR write
+, or
+.IR fsync
+call.
+.SH ERRORS
+The function can return
+.TP
+.B ENOSYS
+if it is not implemented.
+.TP
+.B EINVAL
+if the
+.IR aiocbp
+parameter does not
+refer to an asynchronous operation whose return status is not yet known.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_return64.3 b/tools/libaio/man/aio_return64.3
new file mode 100644
index 0000000000..7e78362b32
--- /dev/null
+++ b/tools/libaio/man/aio_return64.3
@@ -0,0 +1,51 @@
+.TH aio_read64 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_read64 \- Retrieve status of asynchronous I/O operation
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_return64 (const struct aiocb64 *aiocbp)"
+.fi
+.SH DESCRIPTION
+This function is similar to
+.IR "aio_return"
+with the only difference
+that the argument is a reference to a variable of type
+.IR "struct aiocb64".
+
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+this
+function is available under the name
+.IR "aio_return"
+and so
+transparently replaces the interface for small files on 32 bit
+machines.
+.SH "RETURN VALUES"
+See
+.IR aio_return.
+.SH ERRORS
+See
+.IR aio_return.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_suspend.3 b/tools/libaio/man/aio_suspend.3
new file mode 100644
index 0000000000..cae1b65691
--- /dev/null
+++ b/tools/libaio/man/aio_suspend.3
@@ -0,0 +1,123 @@
+.TH aio_suspend 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_suspend \- Wait until one or more requests of a specific set terminates.
+.SH SYNOPSYS
+.nf
+.B "#include <errno.h>"
+.sp
+.br
+.B "#include <aio.h>"
+.sp
+.br
+.BI "int aio_suspend (const struct aiocb *const list[], int nent, const struct timespec *timeout)"
+.fi
+.SH DESCRIPTION
+Another method of synchronization is to wait until one or more requests of a
+specific set terminated. This could be achieved by the
+.IR "aio_*"
+functions to notify the initiating process about the termination but in
+some situations this is not the ideal solution. In a program which
+constantly updates clients somehow connected to the server it is not
+always the best solution to go round robin since some connections might
+be slow. On the other hand letting the
+.IR "aio_*"
+function notify the
+caller might also be not the best solution since whenever the process
+works on preparing data for on client it makes no sense to be
+interrupted by a notification since the new client will not be handled
+before the current client is served. For situations like this
+.IR "aio_suspend"
+should be used.
+.PP
+When calling this function, the calling thread is suspended until at
+least one of the requests pointed to by the
+.IR "nent"
+elements of the
+array
+.IR "list"
+has completed. If any of the requests has already
+completed at the time
+.IR "aio_suspend"
+is called, the function returns
+immediately. Whether a request has terminated or not is determined by
+comparing the error status of the request with
+.IR "EINPROGRESS"
+. If
+an element of
+.IR "list"
+is
+.IR "NULL"
+, the entry is simply ignored.
+
+If no request has finished, the calling process is suspended. If
+.IR "timeout"
+is
+.IR "NULL"
+, the process is not woken until a request
+has finished. If
+.IR "timeout"
+is not
+.IR "NULL"
+, the process remains
+suspended at least as long as specified in
+.IR "timeout"
+. In this case,
+.IR "aio_suspend"
+returns with an error.
+.PP
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+this
+function is in fact
+.IR "aio_suspend64"
+since the LFS interface
+transparently replaces the normal implementation.
+.SH "RETURN VALUES"
+The return value of the function is
+.IR 0
+if one or more requests
+from the
+.IR "list"
+have terminated. Otherwise the function returns
+.IR -1
+and
+.IR "errno"
+is set.
+.SH ERRORS
+.TP
+.B EAGAIN
+None of the requests from the
+.IR "list"
+completed in the time specified
+by
+.IR "timeout"
+.
+.TP
+.B EINTR
+A signal interrupted the
+.IR "aio_suspend"
+function. This signal might
+also be sent by the AIO implementation while signalling the termination
+of one of the requests.
+.TP
+.B ENOSYS
+The
+.IR "aio_suspend"
+function is not implemented.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_suspend64.3 b/tools/libaio/man/aio_suspend64.3
new file mode 100644
index 0000000000..2f289ecceb
--- /dev/null
+++ b/tools/libaio/man/aio_suspend64.3
@@ -0,0 +1,51 @@
+.TH aio_suspend64 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_suspend64 \- Wait until one or more requests of a specific set terminates
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_suspend64 (const struct aiocb64 *const list[], int nent, const struct timespec *timeout)"
+.fi
+.SH DESCRIPTION
+This function is similar to
+.IR "aio_suspend"
+with the only difference
+that the argument is a reference to a variable of type
+.IR "struct aiocb64".
+
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+this
+function is available under the name
+.IR "aio_suspend"
+and so
+transparently replaces the interface for small files on 32 bit
+machines.
+.SH "RETURN VALUES"
+See
+.IR aio_suspend.
+.SH ERRORS
+See
+.IR aio_suspend.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_write(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_write.3 b/tools/libaio/man/aio_write.3
new file mode 100644
index 0000000000..7c0cfd0bf7
--- /dev/null
+++ b/tools/libaio/man/aio_write.3
@@ -0,0 +1,176 @@
+.TH aio_write 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_write \- Initiate an asynchronous write operation
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_write (struct aiocb * aiocbp);"
+.fi
+.SH DESCRIPTION
+This function initiates an asynchronous write operation. The function
+call immediately returns after the operation was enqueued or if before
+this happens an error was encountered.
+
+The first
+.IR "aiocbp->aio_nbytes"
+bytes from the buffer starting at
+.IR "aiocbp->aio_buf"
+are written to the file for which
+.IR "aiocbp->aio_fildes"
+is an descriptor, starting at the absolute
+position
+.IR "aiocbp->aio_offset"
+in the file.
+
+If prioritized I/O is supported by the platform, the
+.IR "aiocbp->aio_reqprio "
+value is used to adjust the priority before
+the request is actually enqueued.
+
+The calling process is notified about the termination of the read
+request according to the
+.IR "aiocbp->aio_sigevent"
+value.
+
+When
+.IR "aio_write"
+returns, the return value is zero if no error
+occurred that can be found before the process is enqueued. If such an
+early error is found the function returns
+.IR -1
+and sets
+.IR "errno"
+to one of the following values.
+
+.TP
+.B EAGAIN
+The request was not enqueued due to (temporarily) exceeded resource
+limitations.
+.TP
+.B ENOSYS
+The
+.IR "aio_write"
+function is not implemented.
+.TP
+.B EBADF
+The
+.IR "aiocbp->aio_fildes"
+descriptor is not valid. This condition
+may not be recognized before enqueueing the request, and so this error
+might also be signaled asynchronously.
+.TP
+.B EINVAL
+The
+.IR "aiocbp->aio_offset"
+or
+.IR "aiocbp->aio_reqprio"
+value is
+invalid. This condition may not be recognized before enqueueing the
+request and so this error might also be signaled asynchronously.
+.PP
+
+In the case
+.IR "aio_write"
+returns zero, the current status of the
+request can be queried using
+.IR "aio_error"
+and
+.IR "aio_return"
+functions. As long as the value returned by
+.IR "aio_error"
+is
+.IR "EINPROGRESS"
+the operation has not yet completed. If
+.IR "aio_error"
+returns zero, the operation successfully terminated,
+otherwise the value is to be interpreted as an error code. If the
+function terminated, the result of the operation can be get using a call
+to
+.IR "aio_return"
+. The returned value is the same as an equivalent
+call to
+.IR "read"
+would have returned. Possible error codes returned
+by
+.IR "aio_error"
+are:
+
+.TP
+.B EBADF
+The
+.IR "aiocbp->aio_fildes"
+descriptor is not valid.
+.TP
+.B ECANCELED
+The operation was canceled before the operation was finished.
+.TP
+.B EINVAL
+The
+.IR "aiocbp->aio_offset"
+value is invalid.
+.PP
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+, this
+function is in fact
+.IR "aio_write64"
+since the LFS interface transparently
+replaces the normal implementation.
+.SH "RETURN VALUES"
+When
+.IR "aio_write"
+returns, the return value is zero if no error
+occurred that can be found before the process is enqueued. If such an
+early error is found the function returns
+.IR -1
+and sets
+.IR "errno"
+to one of the following values.
+.SH ERRORS
+.TP
+.B EAGAIN
+The request was not enqueued due to (temporarily) exceeded resource
+limitations.
+.TP
+.B ENOSYS
+The
+.IR "aio_write"
+function is not implemented.
+.TP
+.B EBADF
+The
+.IR "aiocbp->aio_fildes"
+descriptor is not valid. This condition
+may not be recognized before enqueueing the request, and so this error
+might also be signaled asynchronously.
+.TP
+.B EINVAL
+The
+.IR "aiocbp->aio_offset"
+or
+.IR "aiocbp->aio_reqprio"
+value is
+invalid. This condition may not be recognized before enqueueing the
+request and so this error might also be signaled asynchronously.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write64(3),
+.BR errno(3),
diff --git a/tools/libaio/man/aio_write64.3 b/tools/libaio/man/aio_write64.3
new file mode 100644
index 0000000000..1080903aca
--- /dev/null
+++ b/tools/libaio/man/aio_write64.3
@@ -0,0 +1,61 @@
+.TH aio_write64 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+aio_write64 \- Initiate an asynchronous write operation
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <aio.h>
+.sp
+.br
+.BI "int aio_write64 (struct aiocb *aiocbp)"
+.fi
+.SH DESCRIPTION
+This function is similar to the
+.IR "aio_write"
+function. The only
+difference is that on
+.IR "32 bit"
+machines the file descriptor should
+be opened in the large file mode. Internally
+.IR "aio_write64"
+uses
+functionality equivalent to
+.IR "lseek64"
+to position the file descriptor correctly for the writing,
+as opposed to
+.IR "lseek"
+functionality used in
+.IR "aio_write".
+
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+, this
+function is available under the name
+.IR "aio_write"
+and so transparently
+replaces the interface for small files on 32 bit machines.
+.SH "RETURN VALUES"
+See
+.IR aio_write.
+.SH ERRORS
+See
+.IR aio_write.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR errno(3),
diff --git a/tools/libaio/man/io.3 b/tools/libaio/man/io.3
new file mode 100644
index 0000000000..d910a689f5
--- /dev/null
+++ b/tools/libaio/man/io.3
@@ -0,0 +1,351 @@
+.TH io 3 2002-09-12 "Linux 2.4" Linux IO"
+.SH NAME
+io \- Asynchronous IO
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <libio.h>
+.sp
+.fi
+.SH DESCRIPTION
+The libaio library defines a new set of I/O operations which can
+significantly reduce the time an application spends waiting at I/O. The
+new functions allow a program to initiate one or more I/O operations and
+then immediately resume normal work while the I/O operations are
+executed in parallel.
+
+These functions are part of the library with realtime functions named
+.IR "libaio"
+. They are not actually part of the
+.IR "libc"
+binary.
+The implementation of these functions can be done using support in the
+kernel.
+
+All IO operations operate on files which were opened previously. There
+might be arbitrarily many operations running for one file. The
+asynchronous I/O operations are controlled using a data structure named
+.IR "struct iocb"
+It is defined in
+.IR "libio.h"
+as follows.
+
+.nf
+
+typedef struct io_context *io_context_t;
+
+typedef enum io_iocb_cmd {
+ IO_CMD_PREAD = 0,
+ IO_CMD_PWRITE = 1,
+
+ IO_CMD_FSYNC = 2,
+ IO_CMD_FDSYNC = 3,
+
+ IO_CMD_POLL = 5,
+ IO_CMD_NOOP = 6,
+} io_iocb_cmd_t;
+
+struct io_iocb_common {
+ void *buf;
+ unsigned __pad1;
+ long nbytes;
+ unsigned __pad2;
+ long long offset;
+ long long __pad3, __pad4;
+}; /* result code is the amount read or -'ve errno */
+
+
+struct iocb {
+ void *data;
+ unsigned key;
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+ union {
+ struct io_iocb_common c;
+ struct io_iocb_vector v;
+ struct io_iocb_poll poll;
+ struct io_iocb_sockaddr saddr;
+ } u;
+};
+
+
+.fi
+.TP
+.IR "int aio_fildes"
+This element specifies the file descriptor to be used for the
+operation. It must be a legal descriptor, otherwise the operation will
+fail.
+
+The device on which the file is opened must allow the seek operation.
+I.e., it is not possible to use any of the IO operations on devices
+like terminals where an
+.IR "lseek"
+call would lead to an error.
+.TP
+.IR "long u.c.offset"
+This element specifies the offset in the file at which the operation (input
+or output) is performed. Since the operations are carried out in arbitrary
+order and more than one operation for one file descriptor can be
+started, one cannot expect a current read/write position of the file
+descriptor.
+.TP
+.IR "void *buf"
+This is a pointer to the buffer with the data to be written or the place
+where the read data is stored.
+.TP
+.IR "long u.c.nbytes"
+This element specifies the length of the buffer pointed to by
+.IR "io_buf"
+.
+.TP
+.IR "int aio_reqprio"
+Is not currently used.
+.TP
+.B "IO_CMD_PREAD"
+Start a read operation. Read from the file at position
+.IR "u.c.offset"
+and store the next
+.IR "u.c.nbytes"
+bytes in the
+buffer pointed to by
+.IR "buf"
+.
+.TP
+.B "IO_CMD_PWRITE"
+Start a write operation. Write
+.IR "u.c.nbytes"
+bytes starting at
+.IR "buf"
+into the file starting at position
+.IR "u.c.offset"
+.
+.TP
+.B "IO_CMD_NOP"
+Do nothing for this control block. This value is useful sometimes when
+an array of
+.IR "struct iocb"
+values contains holes, i.e., some of the
+values must not be handled although the whole array is presented to the
+.IR "io_submit"
+function.
+.TP
+.B "IO_CMD_FSYNC"
+.TP
+.B "IO_CMD_POLL"
+This is experimental.
+.SH EXAMPLE
+.nf
+/*
+ * Simplistic version of copy command using async i/o
+ *
+ * From: Stephen Hemminger <shemminger@osdl.org>
+ * Copy file by using a async I/O state machine.
+ * 1. Start read request
+ * 2. When read completes turn it into a write request
+ * 3. When write completes decrement counter and free resources
+ *
+ *
+ * Usage: aiocp file(s) desination
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <libaio.h>
+
+#define AIO_BLKSIZE (64*1024)
+#define AIO_MAXIO 32
+
+static int busy = 0; // # of I/O's in flight
+static int tocopy = 0; // # of blocks left to copy
+static int dstfd = -1; // destination file descriptor
+static const char *dstname = NULL;
+static const char *srcname = NULL;
+
+
+/* Fatal error handler */
+static void io_error(const char *func, int rc)
+{
+ if (rc == -ENOSYS)
+ fprintf(stderr, "AIO not in this kernel\n");
+ else if (rc < 0 && -rc < sys_nerr)
+ fprintf(stderr, "%s: %s\n", func, sys_errlist[-rc]);
+ else
+ fprintf(stderr, "%s: error %d\n", func, rc);
+
+ if (dstfd > 0)
+ close(dstfd);
+ if (dstname)
+ unlink(dstname);
+ exit(1);
+}
+
+/*
+ * Write complete callback.
+ * Adjust counts and free resources
+ */
+static void wr_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
+{
+ if (res2 != 0) {
+ io_error("aio write", res2);
+ }
+ if (res != iocb->u.c.nbytes) {
+ fprintf(stderr, "write missed bytes expect %d got %d\n", iocb->u.c.nbytes, res2);
+ exit(1);
+ }
+ --tocopy;
+ --busy;
+ free(iocb->u.c.buf);
+
+ memset(iocb, 0xff, sizeof(iocb)); // paranoia
+ free(iocb);
+ write(2, "w", 1);
+}
+
+/*
+ * Read complete callback.
+ * Change read iocb into a write iocb and start it.
+ */
+static void rd_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
+{
+ /* library needs accessors to look at iocb? */
+ int iosize = iocb->u.c.nbytes;
+ char *buf = iocb->u.c.buf;
+ off_t offset = iocb->u.c.offset;
+
+ if (res2 != 0)
+ io_error("aio read", res2);
+ if (res != iosize) {
+ fprintf(stderr, "read missing bytes expect %d got %d\n", iocb->u.c.nbytes, res);
+ exit(1);
+ }
+
+
+ /* turn read into write */
+ io_prep_pwrite(iocb, dstfd, buf, iosize, offset);
+ io_set_callback(iocb, wr_done);
+ if (1 != (res = io_submit(ctx, 1, &iocb)))
+ io_error("io_submit write", res);
+ write(2, "r", 1);
+}
+
+
+int main(int argc, char *const *argv)
+{
+ int srcfd;
+ struct stat st;
+ off_t length = 0, offset = 0;
+ io_context_t myctx;
+
+ if (argc != 3 || argv[1][0] == '-') {
+ fprintf(stderr, "Usage: aiocp SOURCE DEST");
+ exit(1);
+ }
+ if ((srcfd = open(srcname = argv[1], O_RDONLY)) < 0) {
+ perror(srcname);
+ exit(1);
+ }
+ if (fstat(srcfd, &st) < 0) {
+ perror("fstat");
+ exit(1);
+ }
+ length = st.st_size;
+
+ if ((dstfd = open(dstname = argv[2], O_WRONLY | O_CREAT, 0666)) < 0) {
+ close(srcfd);
+ perror(dstname);
+ exit(1);
+ }
+
+ /* initialize state machine */
+ memset(&myctx, 0, sizeof(myctx));
+ io_queue_init(AIO_MAXIO, &myctx);
+ tocopy = howmany(length, AIO_BLKSIZE);
+
+ while (tocopy > 0) {
+ int i, rc;
+ /* Submit as many reads as once as possible upto AIO_MAXIO */
+ int n = MIN(MIN(AIO_MAXIO - busy, AIO_MAXIO / 2),
+ howmany(length - offset, AIO_BLKSIZE));
+ if (n > 0) {
+ struct iocb *ioq[n];
+
+ for (i = 0; i < n; i++) {
+ struct iocb *io = (struct iocb *) malloc(sizeof(struct iocb));
+ int iosize = MIN(length - offset, AIO_BLKSIZE);
+ char *buf = (char *) malloc(iosize);
+
+ if (NULL == buf || NULL == io) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+
+ io_prep_pread(io, srcfd, buf, iosize, offset);
+ io_set_callback(io, rd_done);
+ ioq[i] = io;
+ offset += iosize;
+ }
+
+ rc = io_submit(myctx, n, ioq);
+ if (rc < 0)
+ io_error("io_submit", rc);
+
+ busy += n;
+ }
+
+ // Handle IO's that have completed
+ rc = io_queue_run(myctx);
+ if (rc < 0)
+ io_error("io_queue_run", rc);
+
+ // if we have maximum number of i/o's in flight
+ // then wait for one to complete
+ if (busy == AIO_MAXIO) {
+ rc = io_queue_wait(myctx, NULL);
+ if (rc < 0)
+ io_error("io_queue_wait", rc);
+ }
+
+ }
+
+ close(srcfd);
+ close(dstfd);
+ exit(0);
+}
+
+/*
+ * Results look like:
+ * [alanm@toolbox ~/MOT3]$ ../taio kernel-source-2.4.8-0.4g.ppc.rpm abc
+ * rrrrrrrrrrrrrrrwwwrwrrwwrrwrwwrrwrwrwwrrwrwrrrrwwrwwwrrwrrrwwwwwwwwwwwwwwwww
+ * rrrrrrrrrrrrrrwwwrrwrwrwrwrrwwwwwwwwwwwwwwrrrrrrrrrrrrrrrrrrwwwwrwrwwrwrwrwr
+ * wrrrrrrrwwwwwwwwwwwwwrrrwrrrwrrwrwwwwwwwwwwrrrrwwrwrrrrrrrrrrrwwwwwwwwwwwrww
+ * wwwrrrrrrrrwwrrrwwrwrwrwwwrrrrrrrwwwrrwwwrrwrwwwwwwwwrrrrrrrwwwrrrrrrrwwwwww
+ * wwwwwwwrwrrrrrrrrwrrwrrwrrwrwrrrwrrrwrrrwrwwwwwwwwwwwwwwwwwwrrrwwwrrrrrrrrrr
+ * rrwrrrrrrwrrwwwwwwwwwwwwwwwwrwwwrrwrwwrrrrrrrrrrrrrrrrrrrwwwwwwwwwwwwwwwwwww
+ * rrrrrwrrwrwrwrrwrrrwwwwwwwwrrrrwrrrwrwwrwrrrwrrwrrrrwwwwwwwrwrwwwwrwwrrrwrrr
+ * rrrwwwwwwwrrrrwwrrrrrrrrrrrrwrwrrrrwwwwwwwwwwwwwwrwrrrrwwwwrwrrrrwrwwwrrrwww
+ * rwwrrrrrrrwrrrrrrrrrrrrwwwwrrrwwwrwrrwwwwwwwwwwwwwwwwwwwwwrrrrrrrwwwwwwwrw
+ */
+.fi
+.SH "SEE ALSO"
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_cancel.1 b/tools/libaio/man/io_cancel.1
new file mode 100644
index 0000000000..16e898a7de
--- /dev/null
+++ b/tools/libaio/man/io_cancel.1
@@ -0,0 +1,21 @@
+.\"/* sys_io_cancel:
+.\" * Attempts to cancel an iocb previously passed to io_submit. If
+.\" * the operation is successfully cancelled, the resulting event is
+.\" * copied into the memory pointed to by result without being placed
+.\" * into the completion queue and 0 is returned. May fail with
+.\" * -EFAULT if any of the data structures pointed to are invalid.
+.\" * May fail with -EINVAL if aio_context specified by ctx_id is
+.\" * invalid. May fail with -EAGAIN if the iocb specified was not
+.\" * cancelled. Will fail with -ENOSYS if not implemented.
+.\" */
+.\"
+.TH io_cancel 2 2002-09-03 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_cancel \- cancel io requests
+.SH SYNOPSIS
+.B #include <errno.h>
+.br
+.B #include <libaio.h>
+.LP
+.BI "int io_submit(io_context_t " ctx ", struct iocb *" iocb ", struct io_event *" result ");"
+
diff --git a/tools/libaio/man/io_cancel.3 b/tools/libaio/man/io_cancel.3
new file mode 100644
index 0000000000..9a16084a5b
--- /dev/null
+++ b/tools/libaio/man/io_cancel.3
@@ -0,0 +1,65 @@
+.TH io_cancel 2 2002-09-03 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_cancel \- Cancel io requests
+.SH SYNOPSIS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <libaio.h>
+.sp
+.br
+.BI "int io_cancel(io_context_t ctx, struct iocb *iocb)"
+.br
+.sp
+struct iocb {
+ void *data; /* Return in the io completion event */
+ unsigned key; /* For use in identifying io requests */
+ short aio_lio_opcode;
+ short aio_reqprio; /* Not used */
+ int aio_fildes;
+};
+.fi
+.SH DESCRIPTION
+Attempts to cancel an iocb previously passed to io_submit. If
+the operation is successfully cancelled, the resulting event is
+copied into the memory pointed to by result without being placed
+into the completion queue.
+.PP
+When one or more requests are asynchronously processed, it might be
+useful in some situations to cancel a selected operation, e.g., if it
+becomes obvious that the written data is no longer accurate and would
+have to be overwritten soon. As an example, assume an application, which
+writes data in files in a situation where new incoming data would have
+to be written in a file which will be updated by an enqueued request.
+.SH "RETURN VALUES"
+0 is returned on success , otherwise returns Errno.
+.SH ERRORS
+.TP
+.B EFAULT
+If any of the data structures pointed to are invalid.
+.TP
+.B EINVAL
+If aio_context specified by ctx_id is
+invalid.
+.TP
+.B EAGAIN
+If the iocb specified was not
+cancelled.
+.TP
+.B ENOSYS
+if not implemented.
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_destroy.1 b/tools/libaio/man/io_destroy.1
new file mode 100644
index 0000000000..177683b8e0
--- /dev/null
+++ b/tools/libaio/man/io_destroy.1
@@ -0,0 +1,17 @@
+.\"/* sys_io_destroy:
+.\" * Destroy the aio_context specified. May cancel any outstanding
+.\" * AIOs and block on completion. Will fail with -ENOSYS if not
+.\" * implemented. May fail with -EFAULT if the context pointed to
+.\" * is invalid.
+.\" */
+.\" libaio provides this as io_queue_release.
+.TH io_destroy 2 2002-09-03 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_destroy \- destroy an io context
+.SH SYNOPSIS
+.B #include <errno.h>
+.br
+.B #include <libaio.h>
+.LP
+.BI "int io_destroy(io_context_t " ctx ");"
+
diff --git a/tools/libaio/man/io_fsync.3 b/tools/libaio/man/io_fsync.3
new file mode 100644
index 0000000000..53eb63d278
--- /dev/null
+++ b/tools/libaio/man/io_fsync.3
@@ -0,0 +1,82 @@
+./" static inline int io_fsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd)
+./" {
+./" io_prep_fsync(iocb, fd);
+./" io_set_callback(iocb, cb);
+./" return io_submit(ctx, 1, &iocb);
+./" }
+.TH io_fsync 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+io_fsync \- Synchronize a file's complete in-core state with that on disk
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <libaio.h>
+.sp
+.br
+.BI "int io_fsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd)"
+.sp
+struct iocb {
+ void *data;
+ unsigned key;
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+};
+.sp
+typedef void (*io_callback_t)(io_context_t ctx, struct iocb *iocb, long res, long res2);
+.sp
+.fi
+.SH DESCRIPTION
+When dealing with asynchronous operations it is sometimes necessary to
+get into a consistent state. This would mean for AIO that one wants to
+know whether a certain request or a group of request were processed.
+This could be done by waiting for the notification sent by the system
+after the operation terminated, but this sometimes would mean wasting
+resources (mainly computation time).
+.PP
+Calling this function forces all I/O operations operating queued at the
+time of the function call operating on the file descriptor
+.IR "iocb->io_fildes"
+into the synchronized I/O completion state . The
+.IR "io_fsync"
+function returns
+immediately but the notification through the method described in
+.IR "io_callback"
+will happen only after all requests for this
+file descriptor have terminated and the file is synchronized. This also
+means that requests for this very same file descriptor which are queued
+after the synchronization request are not affected.
+.SH "RETURN VALUES"
+Returns 0, otherwise returns errno.
+.SH ERRORS
+.TP
+.B EFAULT
+.I iocbs
+referenced data outside of the program's accessible address space.
+.TP
+.B EINVAL
+.I ctx
+refers to an unitialized aio context, the iocb pointed to by
+.I iocbs
+contains an improperly initialized iocb,
+.TP
+.B EBADF
+The iocb contains a file descriptor that does not exist.
+.TP
+.B EINVAL
+The file specified in the iocb does not support the given io operation.
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_getevents(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_getevents.1 b/tools/libaio/man/io_getevents.1
new file mode 100644
index 0000000000..27730b9959
--- /dev/null
+++ b/tools/libaio/man/io_getevents.1
@@ -0,0 +1,29 @@
+./"/* io_getevents:
+./" * Attempts to read at least min_nr events and up to nr events from
+./" * the completion queue for the aio_context specified by ctx_id. May
+./" * fail with -EINVAL if ctx_id is invalid, if min_nr is out of range,
+./" * if nr is out of range, if when is out of range. May fail with
+./" * -EFAULT if any of the memory specified to is invalid. May return
+./" * 0 or < min_nr if no events are available and the timeout specified
+./" * by when has elapsed, where when == NULL specifies an infinite
+./" * timeout. Note that the timeout pointed to by when is relative and
+./" * will be updated if not NULL and the operation blocks. Will fail
+./" * with -ENOSYS if not implemented.
+./" */
+./"asmlinkage long sys_io_getevents(io_context_t ctx_id,
+./" long min_nr,
+./" long nr,
+./" struct io_event *events,
+./" struct timespec *timeout)
+./"
+.TH io_getevents 2 2002-09-03 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_getevents \- read resulting events from io requests
+.SH SYNOPSIS
+.B #include <errno.h>
+.br
+.B #include <libaio.h>
+.sp
+.BI "int io_getevents(io_context_t " ctx ", long " min_nr ", long " nr ", struct io_events *" events "[], struct timespec *" timeout ");"
+
+
diff --git a/tools/libaio/man/io_getevents.3 b/tools/libaio/man/io_getevents.3
new file mode 100644
index 0000000000..8e9ddc866a
--- /dev/null
+++ b/tools/libaio/man/io_getevents.3
@@ -0,0 +1,79 @@
+./"/* io_getevents:
+./" * Attempts to read at least min_nr events and up to nr events from
+./" * the completion queue for the aio_context specified by ctx_id. May
+./" * fail with -EINVAL if ctx_id is invalid, if min_nr is out of range,
+./" * if nr is out of range, if when is out of range. May fail with
+./" * -EFAULT if any of the memory specified to is invalid. May return
+./" * 0 or < min_nr if no events are available and the timeout specified
+./" * by when has elapsed, where when == NULL specifies an infinite
+./" * timeout. Note that the timeout pointed to by when is relative and
+./" * will be updated if not NULL and the operation blocks. Will fail
+./" * with -ENOSYS if not implemented.
+./" */
+./"asmlinkage long sys_io_getevents(io_context_t ctx_id,
+./" long min_nr,
+./" long nr,
+./" struct io_event *events,
+./" struct timespec *timeout)
+./"
+.TH io_getevents 2 2002-09-03 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_getevents \- Read resulting events from io requests
+.SH SYNOPSIS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <libaio.h>
+.br
+.sp
+struct iocb {
+ void *data;
+ unsigned key;
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+};
+.sp
+struct io_event {
+ unsigned PADDED(data, __pad1);
+ unsigned PADDED(obj, __pad2);
+ unsigned PADDED(res, __pad3);
+ unsigned PADDED(res2, __pad4);
+};
+.sp
+.BI "int io_getevents(io_context_t " ctx ", long " nr ", struct io_event *" events "[], struct timespec *" timeout ");"
+
+.fi
+.SH DESCRIPTION
+Attempts to read up to nr events from
+the completion queue for the aio_context specified by ctx.
+.SH "RETURN VALUES"
+May return
+0 if no events are available and the timeout specified
+by when has elapsed, where when == NULL specifies an infinite
+timeout. Note that the timeout pointed to by when is relative and
+will be updated if not NULL and the operation blocks. Will fail
+with ENOSYS if not implemented.
+.SH ERRORS
+.TP
+.B EINVAL
+if ctx_id is invalid, if min_nr is out of range,
+if nr is out of range, if when is out of range.
+.TP
+.B EFAULT
+if any of the memory specified to is invalid.
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_prep_fsync.3 b/tools/libaio/man/io_prep_fsync.3
new file mode 100644
index 0000000000..4cf935acaf
--- /dev/null
+++ b/tools/libaio/man/io_prep_fsync.3
@@ -0,0 +1,89 @@
+./" static inline void io_prep_fsync(struct iocb *iocb, int fd)
+./" {
+./" memset(iocb, 0, sizeof(*iocb));
+./" iocb->aio_fildes = fd;
+./" iocb->aio_lio_opcode = IO_CMD_FSYNC;
+./" iocb->aio_reqprio = 0;
+./" }
+.TH io_prep_fsync 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+io_prep_fsync \- Synchronize a file's complete in-core state with that on disk
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.br
+.sp
+.B #include <libaio.h>
+.br
+.sp
+.BI "static inline void io_prep_fsync(struct iocb *iocb, int fd)"
+.sp
+struct iocb {
+ void *data;
+ unsigned key;
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+};
+.sp
+.fi
+.SH DESCRIPTION
+This is an inline convenience function for setting up an iocbv for a FSYNC request.
+.br
+The file for which
+.TP
+.IR "iocb->aio_fildes = fd"
+is a descriptor is set up with
+the command
+.TP
+.IR "iocb->aio_lio_opcode = IO_CMD_FSYNC:
+.
+.PP
+The io_prep_fsync() function shall set up an IO_CMD_FSYNC operation
+to asynchronously force all I/O
+operations associated with the file indicated by the file
+descriptor aio_fildes member of the iocb structure referenced by
+the iocb argument and queued at the time of the call to
+io_submit() to the synchronized I/O completion state. The function
+call shall return when the synchronization request has been
+initiated or queued to the file or device (even when the data
+cannot be synchronized immediately).
+
+All currently queued I/O operations shall be completed as if by a call
+to fsync(); that is, as defined for synchronized I/O file
+integrity completion. If the
+operation queued by io_prep_fsync() fails, then, as for fsync(),
+outstanding I/O operations are not guaranteed to have
+been completed.
+
+If io_prep_fsync() succeeds, then it is only the I/O that was queued
+at the time of the call to io_submit() that is guaranteed to be
+forced to the relevant completion state. The completion of
+subsequent I/O on the file descriptor is not guaranteed to be
+completed in a synchronized fashion.
+.PP
+This function returns immediately . To schedule the operation, the
+function
+.IR io_submit
+must be called.
+.PP
+Simultaneous asynchronous operations using the same iocb produce
+undefined results.
+.SH "RETURN VALUES"
+None
+.SH ERRORS
+None
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_prep_pread.3 b/tools/libaio/man/io_prep_pread.3
new file mode 100644
index 0000000000..5938aecc6b
--- /dev/null
+++ b/tools/libaio/man/io_prep_pread.3
@@ -0,0 +1,79 @@
+./" static inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
+./" {
+./" memset(iocb, 0, sizeof(*iocb));
+./" iocb->aio_fildes = fd;
+./" iocb->aio_lio_opcode = IO_CMD_PREAD;
+./" iocb->aio_reqprio = 0;
+./" iocb->u.c.buf = buf;
+./" iocb->u.c.nbytes = count;
+./" iocb->u.c.offset = offset;
+./" }
+.TH io_prep_pread 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+io_prep_pread \- Set up asynchronous read
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.sp
+.br
+.B #include <libaio.h>
+.br
+.sp
+.BI "inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
+"
+.sp
+struct iocb {
+ void *data;
+ unsigned key;
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+};
+.fi
+.SH DESCRIPTION
+.IR io_prep_pread
+is an inline convenience function designed to facilitate the initialization of
+the iocb for an asynchronous read operation.
+
+The first
+.TP
+.IR "iocb->u.c.nbytes = count"
+bytes of the file for which
+.TP
+.IR "iocb->aio_fildes = fd"
+is a descriptor are written to the buffer
+starting at
+.TP
+.IR "iocb->u.c.buf = buf"
+.
+.br
+Reading starts at the absolute position
+.TP
+.IR "ioc->u.c.offset = offset"
+in the file.
+.PP
+This function returns immediately . To schedule the operation, the
+function
+.IR io_submit
+must be called.
+.PP
+Simultaneous asynchronous operations using the same iocb produce
+undefined results.
+.SH "RETURN VALUES"
+None
+.SH ERRORS
+None
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_prep_pwrite.3 b/tools/libaio/man/io_prep_pwrite.3
new file mode 100644
index 0000000000..68b3500587
--- /dev/null
+++ b/tools/libaio/man/io_prep_pwrite.3
@@ -0,0 +1,77 @@
+./" static inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
+./" {
+./" memset(iocb, 0, sizeof(*iocb));
+./" iocb->aio_fildes = fd;
+./" iocb->aio_lio_opcode = IO_CMD_PWRITE;
+./" iocb->aio_reqprio = 0;
+./" iocb->u.c.buf = buf;
+./" iocb->u.c.nbytes = count;
+./" iocb->u.c.offset = offset;
+./" }
+.TH io_prep_pwrite 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+io_prep_pwrite \- Set up iocb for asynchronous writes
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.br
+.sp
+.B #include <libaio.h>
+.br
+.sp
+.BI "inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
+"
+.sp
+struct iocb {
+ void *data;
+ unsigned key;
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+};
+.fi
+.SH DESCRIPTION
+io_prep_write is a convenicence function for setting up parallel writes.
+
+The first
+.TP
+.IR "iocb->u.c.nbytes = count"
+bytes of the file for which
+.TP
+.IR "iocb->aio_fildes = fd"
+is a descriptor are written from the buffer
+starting at
+.TP
+.IR "iocb->u.c.buf = buf"
+.
+.br
+Writing starts at the absolute position
+.TP
+.IR "ioc->u.c.offset = offset"
+in the file.
+.PP
+This function returns immediately . To schedule the operation, the
+function
+.IR io_submit
+must be called.
+.PP
+Simultaneous asynchronous operations using the same iocb produce
+undefined results.
+.SH "RETURN VALUES"
+None
+.SH ERRORS
+None
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_queue_init.3 b/tools/libaio/man/io_queue_init.3
new file mode 100644
index 0000000000..317f631cfc
--- /dev/null
+++ b/tools/libaio/man/io_queue_init.3
@@ -0,0 +1,63 @@
+.TH io_queue_init 2 2002-09-03 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_queue_init \- Initialize asynchronous io state machine
+
+.SH SYNOPSIS
+.nf
+.B #include <errno.h>
+.br
+.sp
+.B #include <libaio.h>
+.br
+.sp
+.BI "int io_queue_init(int maxevents, io_context_t *ctx );"
+.sp
+.fi
+.SH DESCRIPTION
+.B io_queue_init
+Attempts to create an aio context capable of receiving at least
+.IR maxevents
+events.
+.IR ctx
+must point to an aio context that already exists and must be initialized
+to
+.IR 0
+before the call.
+If the operation is successful, *cxtp is filled with the resulting handle.
+.SH "RETURN VALUES"
+On success,
+.B io_queue_init
+returns 0. Otherwise, -error is return, where
+error is one of the Exxx values defined in the Errors section.
+.SH ERRORS
+.TP
+.B EFAULT
+.I iocbs
+referenced data outside of the program's accessible address space.
+.TP
+.B EINVAL
+.I maxevents
+is <= 0 or
+.IR ctx
+is an invalid memory locattion.
+.TP
+.B ENOSYS
+Not implemented
+.TP
+.B EAGAIN
+.IR "maxevents > max_aio_reqs"
+where max_aio_reqs is a tunable value.
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_queue_release.3 b/tools/libaio/man/io_queue_release.3
new file mode 100644
index 0000000000..06b9ec033d
--- /dev/null
+++ b/tools/libaio/man/io_queue_release.3
@@ -0,0 +1,48 @@
+.TH io_queue_release 2 2002-09-03 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_queue_release \- Release the context associated with the userspace handle
+.SH SYNOPSIS
+.nf
+.B #include <errno.h>
+.br
+.B #include <libaio.h>
+.br
+.sp
+.BI "int io_queue_release(io_context_t ctx)"
+.sp
+.SH DESCRIPTION
+.B io_queue_release
+destroys the context associated with the userspace handle. May cancel any outstanding
+AIOs and block on completion.
+
+.B cts.
+.SH "RETURN VALUES"
+On success,
+.B io_queue_release
+returns 0. Otherwise, -error is return, where
+error is one of the Exxx values defined in the Errors section.
+.SH ERRORS
+.TP
+.B EINVAL
+.I ctx
+refers to an unitialized aio context, the iocb pointed to by
+.I iocbs
+contains an improperly initialized iocb,
+.TP
+.B ENOSYS
+Not implemented
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
+
diff --git a/tools/libaio/man/io_queue_run.3 b/tools/libaio/man/io_queue_run.3
new file mode 100644
index 0000000000..57dd417875
--- /dev/null
+++ b/tools/libaio/man/io_queue_run.3
@@ -0,0 +1,50 @@
+.TH io_queue_run 2 2002-09-03 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_queue_run \- Handle completed io requests
+.SH SYNOPSIS
+.nf
+.B #include <errno.h>
+.br
+.sp
+.B #include <libaio.h>
+.br
+.sp
+.BI "int io_queue_run(io_context_t ctx );"
+.sp
+.fi
+.SH DESCRIPTION
+.B io_queue_run
+Attempts to read all the events events from
+the completion queue for the aio_context specified by ctx_id.
+.SH "RETURN VALUES"
+May return
+0 if no events are available.
+Will fail with -ENOSYS if not implemented.
+.SH ERRORS
+.TP
+.B EFAULT
+.I iocbs
+referenced data outside of the program's accessible address space.
+.TP
+.B EINVAL
+.I ctx
+refers to an unitialized aio context, the iocb pointed to by
+.I iocbs
+contains an improperly initialized iocb,
+.TP
+.B ENOSYS
+Not implemented
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_queue_wait.3 b/tools/libaio/man/io_queue_wait.3
new file mode 100644
index 0000000000..2306663eae
--- /dev/null
+++ b/tools/libaio/man/io_queue_wait.3
@@ -0,0 +1,56 @@
+.TH io_queue_wait 2 2002-09-03 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_queue_wait \- Wait for io requests to complete
+.SH SYNOPSIS
+.nf
+.B #include <errno.h>
+.br
+.sp
+.B #include <libaio.h>
+.br
+.sp
+.BI "int io_queue_wait(io_context_t ctx, const struct timespec *timeout);"
+.fi
+.SH DESCRIPTION
+Attempts to read an event from
+the completion queue for the aio_context specified by ctx_id.
+.SH "RETURN VALUES"
+May return
+0 if no events are available and the timeout specified
+by when has elapsed, where when == NULL specifies an infinite
+timeout. Note that the timeout pointed to by when is relative and
+will be updated if not NULL and the operation blocks. Will fail
+with -ENOSYS if not implemented.
+.SH "RETURN VALUES"
+On success,
+.B io_queue_wait
+returns 0. Otherwise, -error is return, where
+error is one of the Exxx values defined in the Errors section.
+.SH ERRORS
+.TP
+.B EFAULT
+.I iocbs
+referenced data outside of the program's accessible address space.
+.TP
+.B EINVAL
+.I ctx
+refers to an unitialized aio context, the iocb pointed to by
+.I iocbs
+contains an improperly initialized iocb,
+.TP
+.B ENOSYS
+Not implemented
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_set_callback(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_set_callback.3 b/tools/libaio/man/io_set_callback.3
new file mode 100644
index 0000000000..a8ca789eb2
--- /dev/null
+++ b/tools/libaio/man/io_set_callback.3
@@ -0,0 +1,44 @@
+./"static inline void io_set_callback(struct iocb *iocb, io_callback_t cb)
+.TH io_set_callback 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+io_set_callback \- Set up io completion callback function
+.SH SYNOPSYS
+.nf
+.B #include <errno.h>
+.br
+.sp
+.B #include <libaio.h>
+.br
+.sp
+.BI "static inline void io_set_callback(struct iocb *iocb, io_callback_t cb)"
+.sp
+struct iocb {
+ void *data;
+ unsigned key;
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+};
+.sp
+typedef void (*io_callback_t)(io_context_t ctx, struct iocb *iocb, long res, long res2);
+.sp
+.fi
+.SH DESCRIPTION
+The callback is not done if the caller uses raw events from
+io_getevents, only with the library helpers
+.SH "RETURN VALUES"
+.SH ERRORS
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_submit(3),
+.BR errno(3)
diff --git a/tools/libaio/man/io_setup.1 b/tools/libaio/man/io_setup.1
new file mode 100644
index 0000000000..68690e1e73
--- /dev/null
+++ b/tools/libaio/man/io_setup.1
@@ -0,0 +1,15 @@
+./"/* sys_io_setup:
+./" * Create an aio_context capable of receiving at least nr_events.
+./" * ctxp must not point to an aio_context that already exists, and
+./" * must be initialized to 0 prior to the call. On successful
+./" * creation of the aio_context, *ctxp is filled in with the resulting
+./" * handle. May fail with -EINVAL if *ctxp is not initialized,
+./" * if the specified nr_events exceeds internal limits. May fail
+./" * with -EAGAIN if the specified nr_events exceeds the user's limit
+./" * of available events. May fail with -ENOMEM if insufficient kernel
+./" * resources are available. May fail with -EFAULT if an invalid
+./" * pointer is passed for ctxp. Will fail with -ENOSYS if not
+./" * implemented.
+./" */
+./" -- note: libaio is actually providing io_queue_init and io_queue_grow
+./" as separate functions. For now io_setup is the same as io_queue_grow.
diff --git a/tools/libaio/man/io_submit.1 b/tools/libaio/man/io_submit.1
new file mode 100644
index 0000000000..f66e80f1b5
--- /dev/null
+++ b/tools/libaio/man/io_submit.1
@@ -0,0 +1,109 @@
+.TH io_submit 2 2002-09-02 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_submit \- submit io requests
+.SH SYNOPSIS
+.B #include <errno.h>
+.br
+.B #include <libaio.h>
+.LP
+.BI "int io_submit(io_context_t " ctx ", long " nr ", struct iocb *" iocbs "[]);"
+.SH DESCRIPTION
+.B io_submit
+submits to the io_context
+.I ctx
+up to
+.I nr
+I/O requests pointed to by the vector
+.IR iocbs .
+
+The
+.B iocb
+structure is defined as something like
+.sp
+.RS
+.nf
+struct iocb {
+ void *data;
+.\" unsigned key;
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+};
+.fi
+.RE
+.sp
+.I data
+is a an opaque pointer which will upon completion be returned in the
+.B io_event
+structure by
+.BR io_getevents (2).
+.\" and io_wait(2)
+Callers will typically use this to point directly or indirectly to a
+callback function.
+.sp
+.I aio_lio_opcode
+is the I/O operation requested. Callers will typically set this and the
+arguments to the I/O operation calling the
+.BR io_prep_ (3)
+function corresponding to the operation.
+.sp
+.I aio_reqprio
+is the priority of the request. Higher values have more priority; the
+normal priority is 0.
+.sp
+.I aio_fildes
+is the file descriptor for the I/O operation.
+Callers will typically set this and the
+arguments to the I/O operation calling the
+.BR io_prep_ *(3)
+function corresponding to the operation.
+.sp
+The caller may not modify the contents or resubmit a submitted
+.B iocb
+structure until after the operation completes or is canceled.
+The implementation of
+.BR io_submit (2)
+is permitted to modify reserved fields of the
+.B iocb
+structure.
+.SH "RETURN VALUES"
+If able to submit at least one iocb,
+.B io_submit
+returns the number of iocbs submitted successfully. Otherwise,
+.RI - error
+is returned, where
+.I error
+is one of the Exxx values defined in the Errors section.
+.SH ERRORS
+.TP
+.B EFAULT
+.I iocbs
+referenced data outside of the program's accessible address space.
+.TP
+.B EINVAL
+.I nr
+is negative,
+.I ctx
+refers to an uninitialized aio context, the iocb pointed to by
+.IR iocbs [0]
+is improperly initialized or specifies an unsupported operation.
+.TP
+.B EBADF
+The iocb pointed to by
+.IR iocbs [0]
+contains a file descriptor that does not exist.
+.TP
+.B EAGAIN
+Insufficient resources were available to queue any operations.
+.SH "SEE ALSO"
+.BR io_setup (2),
+.BR io_destroy (2),
+.BR io_getevents (2),
+.\".BR io_wait (2),
+.BR io_prep_pread (3),
+.BR io_prep_pwrite (3),
+.BR io_prep_fsync (3),
+.BR io_prep_fdsync (3),
+.BR io_prep_noop (3),
+.BR io_cancel (2),
+.BR errno (3)
diff --git a/tools/libaio/man/io_submit.3 b/tools/libaio/man/io_submit.3
new file mode 100644
index 0000000000..b6966efd8b
--- /dev/null
+++ b/tools/libaio/man/io_submit.3
@@ -0,0 +1,135 @@
+./"/* sys_io_submit:
+./" * Queue the nr iocbs pointed to by iocbpp for processing. Returns
+./" * the number of iocbs queued. May return -EINVAL if the aio_context
+./" * specified by ctx_id is invalid, if nr is < 0, if the iocb at
+./" * *iocbpp[0] is not properly initialized, if the operation specified
+./" * is invalid for the file descriptor in the iocb. May fail with
+./" * -EFAULT if any of the data structures point to invalid data. May
+./" * fail with -EBADF if the file descriptor specified in the first
+./" * iocb is invalid. May fail with -EAGAIN if insufficient resources
+./" * are available to queue any iocbs. Will return 0 if nr is 0. Will
+./" * fail with -ENOSYS if not implemented.
+./" */
+.TH io_submit 2 2002-09-02 "Linux 2.4" "Linux AIO"
+.SH NAME
+io_submit \- Submit io requests
+.SH SYNOPSIS
+.nf
+.B #include <errno.h>
+.br
+.sp
+.B #include <libaio.h>
+.br
+.sp
+.BI "int io_submit(io_context_t " ctx ", long " nr ", struct iocb *" iocbs "[]);"
+.sp
+struct iocb {
+ void *data;
+ unsigned key;
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+};
+.fi
+.SH DESCRIPTION
+.B io_submit
+submits
+.I nr
+iocbs for processing for a given io context ctx.
+
+The
+.IR "io_submit"
+function can be used to enqueue an arbitrary
+number of read and write requests at one time. The requests can all be
+meant for the same file, all for different files or every solution in
+between.
+
+.IR "io_submit"
+gets the
+.IR "nr"
+requests from the array pointed to
+by
+.IR "iocbs"
+. The operation to be performed is determined by the
+.IR "aio_lio_opcode"
+member in each element of
+.IR "iocbs"
+. If this
+field is
+.B "IO_CMD_PREAD"
+a read operation is enqueued, similar to a call
+of
+.IR "io_prep_pread"
+for this element of the array (except that the way
+the termination is signalled is different, as we will see below). If
+the
+.IR "aio_lio_opcode"
+member is
+.B "IO_CMD_PWRITE"
+a write operation
+is enqueued. Otherwise the
+.IR "aio_lio_opcode"
+must be
+.B "IO_CMD_NOP"
+in which case this element of
+.IR "iocbs"
+is simply ignored. This
+``operation'' is useful in situations where one has a fixed array of
+.IR "struct iocb"
+elements from which only a few need to be handled at
+a time. Another situation is where the
+.IR "io_submit"
+call was
+canceled before all requests are processed and the remaining requests have to be reissued.
+
+The other members of each element of the array pointed to by
+.IR "iocbs"
+must have values suitable for the operation as described in
+the documentation for
+.IR "io_prep_pread"
+and
+.IR "io_prep_pwrite"
+above.
+
+The function returns immediately after
+having enqueued all the requests.
+On success,
+.B io_submit
+returns the number of iocbs submitted successfully. Otherwise, -error is return, where
+error is one of the Exxx values defined in the Errors section.
+.PP
+If an error is detected, then the behavior is undefined.
+.PP
+Simultaneous asynchronous operations using the same iocb produce
+undefined results.
+.SH ERRORS
+.TP
+.B EFAULT
+.I iocbs
+referenced data outside of the program's accessible address space.
+.TP
+.B EINVAL
+.I ctx
+refers to an unitialized aio context, the iocb pointed to by
+.I iocbs
+contains an improperly initialized iocb,
+.TP
+.B EBADF
+The iocb contains a file descriptor that does not exist.
+.TP
+.B EINVAL
+The file specified in the iocb does not support the given io operation.
+.SH "SEE ALSO"
+.BR io(3),
+.BR io_cancel(3),
+.BR io_fsync(3),
+.BR io_getevents(3),
+.BR io_prep_fsync(3),
+.BR io_prep_pread(3),
+.BR io_prep_pwrite(3),
+.BR io_queue_init(3),
+.BR io_queue_release(3),
+.BR io_queue_run(3),
+.BR io_queue_wait(3),
+.BR io_set_callback(3),
+.BR errno(3)
diff --git a/tools/libaio/man/lio_listio.3 b/tools/libaio/man/lio_listio.3
new file mode 100644
index 0000000000..9b5b5e4eb5
--- /dev/null
+++ b/tools/libaio/man/lio_listio.3
@@ -0,0 +1,229 @@
+.TH lio_listio 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+lio_listio - List directed I/O
+.SH SYNOPSYS
+.B #include <errno.h>
+.br
+.B #include <libaio.h>
+.LP
+.BI "int lio_listio (int mode, struct aiocb *const list[], int nent, struct sigevent *sig)"
+.nf
+.SH DESCRIPTION
+
+Besides these functions with the more or less traditional interface,
+POSIX.1b also defines a function which can initiate more than one
+operation at a time, and which can handle freely mixed read and write
+operations. It is therefore similar to a combination of
+.IR readv
+and
+.IR "writev"
+.
+
+The
+.IR "lio_listio"
+function can be used to enqueue an arbitrary
+number of read and write requests at one time. The requests can all be
+meant for the same file, all for different files or every solution in
+between.
+
+.IR "lio_listio"
+gets the
+.IR "nent"
+requests from the array pointed to
+by
+.IR "list"
+. The operation to be performed is determined by the
+.IR "aio_lio_opcode"
+member in each element of
+.IR "list"
+. If this
+field is
+.B "LIO_READ"
+a read operation is enqueued, similar to a call
+of
+.IR "aio_read"
+for this element of the array (except that the way
+the termination is signalled is different, as we will see below). If
+the
+.IR "aio_lio_opcode"
+member is
+.B "LIO_WRITE"
+a write operation
+is enqueued. Otherwise the
+.IR "aio_lio_opcode"
+must be
+.B "LIO_NOP"
+in which case this element of
+.IR "list"
+is simply ignored. This
+``operation'' is useful in situations where one has a fixed array of
+.IR "struct aiocb"
+elements from which only a few need to be handled at
+a time. Another situation is where the
+.IR "lio_listio"
+call was
+canceled before all requests are processed and the remaining requests have to be reissued.
+
+The other members of each element of the array pointed to by
+.IR "list"
+must have values suitable for the operation as described in
+the documentation for
+.IR "aio_read"
+and
+.IR "aio_write"
+above.
+
+The
+.IR "mode"
+argument determines how
+.IR "lio_listio"
+behaves after
+having enqueued all the requests. If
+.IR "mode"
+is
+.B "LIO_WAIT"
+it
+waits until all requests terminated. Otherwise
+.IR "mode"
+must be
+.B "LIO_NOWAIT"
+and in this case the function returns immediately after
+having enqueued all the requests. In this case the caller gets a
+notification of the termination of all requests according to the
+.IR "sig"
+parameter. If
+.IR "sig"
+is
+.B "NULL"
+no notification is
+send. Otherwise a signal is sent or a thread is started, just as
+described in the description for
+.IR "aio_read"
+or
+.IR "aio_write"
+.
+
+When the sources are compiled with
+.B "_FILE_OFFSET_BITS == 64"
+, this
+function is in fact
+.IR "lio_listio64"
+since the LFS interface
+transparently replaces the normal implementation.
+.SH "RETURN VALUES"
+If
+.IR "mode"
+is
+.B "LIO_WAIT"
+, the return value of
+.IR "lio_listio"
+is
+.IR 0
+when all requests completed successfully. Otherwise the
+function return
+.IR 1
+and
+.IR "errno"
+is set accordingly. To find
+out which request or requests failed one has to use the
+.IR "aio_error"
+function on all the elements of the array
+.IR "list"
+.
+
+In case
+.IR "mode"
+is
+.B "LIO_NOWAIT"
+, the function returns
+.IR 0
+if
+all requests were enqueued correctly. The current state of the requests
+can be found using
+.IR "aio_error"
+and
+.IR "aio_return"
+as described
+above. If
+.IR "lio_listio"
+returns
+.IR -1
+in this mode, the
+global variable
+.IR "errno"
+is set accordingly. If a request did not
+yet terminate, a call to
+.IR "aio_error"
+returns
+.B "EINPROGRESS"
+. If
+the value is different, the request is finished and the error value (or
+
+.IR 0
+) is returned and the result of the operation can be retrieved
+using
+.IR "aio_return"
+.
+.SH ERRORS
+Possible values for
+.IR "errno"
+are:
+
+.TP
+.B EAGAIN
+The resources necessary to queue all the requests are not available at
+the moment. The error status for each element of
+.IR "list"
+must be
+checked to determine which request failed.
+
+Another reason could be that the system wide limit of AIO requests is
+exceeded. This cannot be the case for the implementation on GNU systems
+since no arbitrary limits exist.
+.TP
+.B EINVAL
+The
+.IR "mode"
+parameter is invalid or
+.IR "nent"
+is larger than
+.B "AIO_LISTIO_MAX"
+.
+.TP
+.B EIO
+One or more of the request's I/O operations failed. The error status of
+each request should be checked to determine which one failed.
+.TP
+.B ENOSYS
+The
+.IR "lio_listio"
+function is not supported.
+.PP
+
+If the
+.IR "mode"
+parameter is
+.B "LIO_NOWAIT"
+and the caller cancels
+a request, the error status for this request returned by
+.IR "aio_error"
+is
+.B "ECANCELED"
+.
+.SH "SEE ALSO"
+.BR aio(3),
+.BR aio_cancel(3),
+.BR aio_cancel64(3),
+.BR aio_error(3),
+.BR aio_error64(3),
+.BR aio_fsync(3),
+.BR aio_fsync64(3),
+.BR aio_init(3),
+.BR aio_read(3),
+.BR aio_read64(3),
+.BR aio_return(3),
+.BR aio_return64(3),
+.BR aio_suspend(3),
+.BR aio_suspend64(3),
+.BR aio_write(3),
+.BR aio_write64(3)
diff --git a/tools/libaio/man/lio_listio64.3 b/tools/libaio/man/lio_listio64.3
new file mode 100644
index 0000000000..97f69556c0
--- /dev/null
+++ b/tools/libaio/man/lio_listio64.3
@@ -0,0 +1,39 @@
+.TH lio_listio64 3 2002-09-12 "Linux 2.4" Linux AIO"
+.SH NAME
+lio_listio64 \- List directed I/O
+.SH SYNOPSYS
+.B #include <errno.h>
+.br
+.B #include <libaio.h>
+.LP
+.BI "int lio_listio64 (int mode, struct aiocb *const list[], int nent, struct sigevent *sig)"
+.nf
+.SH DESCRIPTION
+This function is similar to the
+.IR "code{lio_listio"
+function. The only
+difference is that on
+.IR "32 bit"
+machines, the file descriptor should
+be opened in the large file mode. Internally,
+.IR "lio_listio64"
+uses
+functionality equivalent to
+.IR lseek64"
+to position the file descriptor correctly for the reading or
+writing, as opposed to
+.IR "lseek"
+functionality used in
+.IR "lio_listio".
+
+When the sources are compiled with
+.IR "_FILE_OFFSET_BITS == 64"
+, this
+function is available under the name
+.IR "lio_listio"
+and so
+transparently replaces the interface for small files on 32 bit
+machines.
+.SH "RETURN VALUES"
+.SH ERRORS
+.SH "SEE ALSO"
diff --git a/tools/libaio/src/Makefile b/tools/libaio/src/Makefile
new file mode 100644
index 0000000000..8d134cc005
--- /dev/null
+++ b/tools/libaio/src/Makefile
@@ -0,0 +1,64 @@
+prefix=/usr
+includedir=$(prefix)/include
+libdir=$(prefix)/lib
+
+ARCH := $(shell uname -m | sed -e s/i.86/i386/)
+CFLAGS := -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC
+SO_CFLAGS=-shared $(CFLAGS)
+L_CFLAGS=$(CFLAGS)
+LINK_FLAGS=
+
+soname=libaio.so.1
+minor=0
+micro=1
+libname=$(soname).$(minor).$(micro)
+all_targets += libaio.a $(libname)
+
+all: $(all_targets)
+
+# libaio provided functions
+libaio_srcs := io_queue_init.c io_queue_release.c
+libaio_srcs += io_queue_wait.c io_queue_run.c
+
+# real syscalls
+libaio_srcs += io_getevents.c io_submit.c io_cancel.c
+libaio_srcs += io_setup.c io_destroy.c
+
+# internal functions
+libaio_srcs += raw_syscall.c
+
+# old symbols
+libaio_srcs += compat-0_1.c
+
+libaio_objs := $(patsubst %.c,%.ol,$(libaio_srcs))
+libaio_sobjs := $(patsubst %.c,%.os,$(libaio_srcs))
+
+$(libaio_objs) $(libaio_sobjs): libaio.h vsys_def.h
+
+%.os: %.c
+ $(CC) $(SO_CFLAGS) -c -o $@ $<
+
+%.ol: %.c
+ $(CC) $(L_CFLAGS) -c -o $@ $<
+
+
+libaio.a: $(libaio_objs)
+ rm -f libaio.a
+ ar r libaio.a $^
+ ranlib libaio.a
+
+$(libname): $(libaio_sobjs) libaio.map
+ $(CC) $(SO_CFLAGS) -Wl,--version-script=libaio.map -Wl,-soname=$(soname) -o $@ $(libaio_sobjs) $(LINK_FLAGS)
+
+install: $(all_targets)
+ install -D -m 644 libaio.h $(includedir)/libaio.h
+ install -D -m 644 libaio.a $(libdir)/libaio.a
+ install -D -m 755 $(libname) $(libdir)/$(libname)
+ ln -sf $(libname) $(libdir)/$(soname)
+ ln -sf $(libname) $(libdir)/libaio.so
+
+$(libaio_objs): libaio.h
+
+clean:
+ rm -f $(all_targets) $(libaio_objs) $(libaio_sobjs) $(soname).new
+ rm -f *.so* *.a *.o
diff --git a/tools/libaio/src/compat-0_1.c b/tools/libaio/src/compat-0_1.c
new file mode 100644
index 0000000000..136396f996
--- /dev/null
+++ b/tools/libaio/src/compat-0_1.c
@@ -0,0 +1,62 @@
+/* libaio Linux async I/O interface
+
+ compat-0_1.c : compatibility symbols for libaio 0.1.x-0.3.x
+
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <stdlib.h>
+#include <asm/errno.h>
+
+#include "libaio.h"
+#include "vsys_def.h"
+
+#include "syscall.h"
+
+
+/* ABI change. Provide partial compatibility on this one for now. */
+SYMVER(compat0_1_io_cancel, io_cancel, 0.1);
+int compat0_1_io_cancel(io_context_t ctx, struct iocb *iocb)
+{
+ struct io_event event;
+
+ /* FIXME: the old ABI would return the event on the completion queue */
+ return io_cancel(ctx, iocb, &event);
+}
+
+SYMVER(compat0_1_io_queue_wait, io_queue_wait, 0.1);
+int compat0_1_io_queue_wait(io_context_t ctx, struct timespec *when)
+{
+ struct timespec timeout;
+ if (when)
+ timeout = *when;
+ return io_getevents(ctx, 0, 0, NULL, when ? &timeout : NULL);
+}
+
+
+/* ABI change. Provide backwards compatibility for this one. */
+SYMVER(compat0_1_io_getevents, io_getevents, 0.1);
+int compat0_1_io_getevents(io_context_t ctx_id, long nr,
+ struct io_event *events,
+ const struct timespec *const_timeout)
+{
+ struct timespec timeout;
+ if (const_timeout)
+ timeout = *const_timeout;
+ return io_getevents(ctx_id, 1, nr, events,
+ const_timeout ? &timeout : NULL);
+}
+
diff --git a/tools/libaio/src/io_cancel.c b/tools/libaio/src/io_cancel.c
new file mode 100644
index 0000000000..2f0f5f4aa0
--- /dev/null
+++ b/tools/libaio/src/io_cancel.c
@@ -0,0 +1,23 @@
+/* io_cancel.c
+ libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <libaio.h>
+#include "syscall.h"
+
+io_syscall3(int, io_cancel_0_4, io_cancel, io_context_t, ctx, struct iocb *, iocb, struct io_event *, event)
+DEFSYMVER(io_cancel_0_4, io_cancel, 0.4)
diff --git a/tools/libaio/src/io_destroy.c b/tools/libaio/src/io_destroy.c
new file mode 100644
index 0000000000..0ab6bd1743
--- /dev/null
+++ b/tools/libaio/src/io_destroy.c
@@ -0,0 +1,23 @@
+/* io_destroy
+ libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <errno.h>
+#include <libaio.h>
+#include "syscall.h"
+
+io_syscall1(int, io_destroy, io_destroy, io_context_t, ctx)
diff --git a/tools/libaio/src/io_getevents.c b/tools/libaio/src/io_getevents.c
new file mode 100644
index 0000000000..5a0517402d
--- /dev/null
+++ b/tools/libaio/src/io_getevents.c
@@ -0,0 +1,57 @@
+/* io_getevents.c
+ libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <libaio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <time.h>
+#include "syscall.h"
+
+io_syscall5(int, __io_getevents_0_4, io_getevents, io_context_t, ctx, long, min_nr, long, nr, struct io_event *, events, struct timespec *, timeout)
+
+#define AIO_RING_MAGIC 0xa10a10a1
+
+/* Ben will hate me for this */
+struct aio_ring {
+ unsigned id; /* kernel internal index number */
+ unsigned nr; /* number of io_events */
+ unsigned head;
+ unsigned tail;
+
+ unsigned magic;
+ unsigned compat_features;
+ unsigned incompat_features;
+ unsigned header_length; /* size of aio_ring */
+};
+
+int io_getevents_0_4(io_context_t ctx, long min_nr, long nr, struct io_event * events, struct timespec * timeout)
+{
+ struct aio_ring *ring;
+ ring = (struct aio_ring*)ctx;
+ if (ring==NULL || ring->magic != AIO_RING_MAGIC)
+ goto do_syscall;
+ if (timeout!=NULL && timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
+ if (ring->head == ring->tail)
+ return 0;
+ }
+
+do_syscall:
+ return __io_getevents_0_4(ctx, min_nr, nr, events, timeout);
+}
+
+DEFSYMVER(io_getevents_0_4, io_getevents, 0.4)
diff --git a/tools/libaio/src/io_queue_init.c b/tools/libaio/src/io_queue_init.c
new file mode 100644
index 0000000000..563d1375a4
--- /dev/null
+++ b/tools/libaio/src/io_queue_init.c
@@ -0,0 +1,33 @@
+/* io_queue_init.c
+ libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <libaio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+#include "syscall.h"
+
+int io_queue_init(int maxevents, io_context_t *ctxp)
+{
+ if (maxevents > 0) {
+ *ctxp = NULL;
+ return io_setup(maxevents, ctxp);
+ }
+ return -EINVAL;
+}
diff --git a/tools/libaio/src/io_queue_release.c b/tools/libaio/src/io_queue_release.c
new file mode 100644
index 0000000000..94bbb867a0
--- /dev/null
+++ b/tools/libaio/src/io_queue_release.c
@@ -0,0 +1,27 @@
+/* io_queue_release.c
+ libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <libaio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+int io_queue_release(io_context_t ctx)
+{
+ return io_destroy(ctx);
+}
diff --git a/tools/libaio/src/io_queue_run.c b/tools/libaio/src/io_queue_run.c
new file mode 100644
index 0000000000..e0132f4009
--- /dev/null
+++ b/tools/libaio/src/io_queue_run.c
@@ -0,0 +1,39 @@
+/* io_submit
+ libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <libaio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <time.h>
+
+int io_queue_run(io_context_t ctx)
+{
+ static struct timespec timeout = { 0, 0 };
+ struct io_event event;
+ int ret;
+
+ /* FIXME: batch requests? */
+ while (1 == (ret = io_getevents(ctx, 0, 1, &event, &timeout))) {
+ io_callback_t cb = (io_callback_t)event.data;
+ struct iocb *iocb = event.obj;
+
+ cb(ctx, iocb, event.res, event.res2);
+ }
+
+ return ret;
+}
diff --git a/tools/libaio/src/io_queue_wait.c b/tools/libaio/src/io_queue_wait.c
new file mode 100644
index 0000000000..538d2f3b7b
--- /dev/null
+++ b/tools/libaio/src/io_queue_wait.c
@@ -0,0 +1,31 @@
+/* io_submit
+ libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#define NO_SYSCALL_ERRNO
+#include <sys/types.h>
+#include <libaio.h>
+#include <errno.h>
+#include "syscall.h"
+
+struct timespec;
+
+int io_queue_wait_0_4(io_context_t ctx, struct timespec *timeout)
+{
+ return io_getevents(ctx, 0, 0, NULL, timeout);
+}
+DEFSYMVER(io_queue_wait_0_4, io_queue_wait, 0.4)
diff --git a/tools/libaio/src/io_setup.c b/tools/libaio/src/io_setup.c
new file mode 100644
index 0000000000..4ba1afc993
--- /dev/null
+++ b/tools/libaio/src/io_setup.c
@@ -0,0 +1,23 @@
+/* io_setup
+ libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <errno.h>
+#include <libaio.h>
+#include "syscall.h"
+
+io_syscall2(int, io_setup, io_setup, int, maxevents, io_context_t *, ctxp)
diff --git a/tools/libaio/src/io_submit.c b/tools/libaio/src/io_submit.c
new file mode 100644
index 0000000000..e22ba54960
--- /dev/null
+++ b/tools/libaio/src/io_submit.c
@@ -0,0 +1,23 @@
+/* io_submit
+ libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <errno.h>
+#include <libaio.h>
+#include "syscall.h"
+
+io_syscall3(int, io_submit, io_submit, io_context_t, ctx, long, nr, struct iocb **, iocbs)
diff --git a/tools/libaio/src/libaio.h b/tools/libaio/src/libaio.h
new file mode 100644
index 0000000000..657460128a
--- /dev/null
+++ b/tools/libaio/src/libaio.h
@@ -0,0 +1,222 @@
+/* /usr/include/libaio.h
+ *
+ * Copyright 2000,2001,2002 Red Hat, Inc.
+ *
+ * Written by Benjamin LaHaise <bcrl@redhat.com>
+ *
+ * libaio Linux async I/O interface
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef __LIBAIO_H
+#define __LIBAIO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <string.h>
+
+struct timespec;
+struct sockaddr;
+struct iovec;
+struct iocb;
+
+typedef struct io_context *io_context_t;
+
+typedef enum io_iocb_cmd {
+ IO_CMD_PREAD = 0,
+ IO_CMD_PWRITE = 1,
+
+ IO_CMD_FSYNC = 2,
+ IO_CMD_FDSYNC = 3,
+
+ IO_CMD_POLL = 5,
+ IO_CMD_NOOP = 6,
+} io_iocb_cmd_t;
+
+#if defined(__i386__) /* little endian, 32 bits */
+#define PADDED(x, y) x; unsigned y
+#define PADDEDptr(x, y) x; unsigned y
+#define PADDEDul(x, y) unsigned long x; unsigned y
+#elif defined(__ia64__) || defined(__x86_64__) || defined(__alpha__)
+#define PADDED(x, y) x, y
+#define PADDEDptr(x, y) x
+#define PADDEDul(x, y) unsigned long x
+#elif defined(__powerpc64__) /* big endian, 64 bits */
+#define PADDED(x, y) unsigned y; x
+#define PADDEDptr(x,y) x
+#define PADDEDul(x, y) unsigned long x
+#elif defined(__PPC__) /* big endian, 32 bits */
+#define PADDED(x, y) unsigned y; x
+#define PADDEDptr(x, y) unsigned y; x
+#define PADDEDul(x, y) unsigned y; unsigned long x
+#elif defined(__s390x__) /* big endian, 64 bits */
+#define PADDED(x, y) unsigned y; x
+#define PADDEDptr(x,y) x
+#define PADDEDul(x, y) unsigned long x
+#elif defined(__s390__) /* big endian, 32 bits */
+#define PADDED(x, y) unsigned y; x
+#define PADDEDptr(x, y) unsigned y; x
+#define PADDEDul(x, y) unsigned y; unsigned long x
+#else
+#error endian?
+#endif
+
+struct io_iocb_poll {
+ PADDED(int events, __pad1);
+}; /* result code is the set of result flags or -'ve errno */
+
+struct io_iocb_sockaddr {
+ struct sockaddr *addr;
+ int len;
+}; /* result code is the length of the sockaddr, or -'ve errno */
+
+struct io_iocb_common {
+ PADDEDptr(void *buf, __pad1);
+ PADDEDul(nbytes, __pad2);
+ long long offset;
+ long long __pad3, __pad4;
+}; /* result code is the amount read or -'ve errno */
+
+struct io_iocb_vector {
+ const struct iovec *vec;
+ int nr;
+ long long offset;
+}; /* result code is the amount read or -'ve errno */
+
+struct iocb {
+ PADDEDptr(void *data, __pad1); /* Return in the io completion event */
+ PADDED(unsigned key, __pad2); /* For use in identifying io requests */
+
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+
+ union {
+ struct io_iocb_common c;
+ struct io_iocb_vector v;
+ struct io_iocb_poll poll;
+ struct io_iocb_sockaddr saddr;
+ } u;
+};
+
+struct io_event {
+ PADDEDptr(void *data, __pad1);
+ PADDEDptr(struct iocb *obj, __pad2);
+ PADDEDul(res, __pad3);
+ PADDEDul(res2, __pad4);
+};
+
+#undef PADDED
+#undef PADDEDptr
+#undef PADDEDul
+
+typedef void (*io_callback_t)(io_context_t ctx, struct iocb *iocb, long res, long res2);
+
+/* library wrappers */
+extern int io_queue_init(int maxevents, io_context_t *ctxp);
+/*extern int io_queue_grow(io_context_t ctx, int new_maxevents);*/
+extern int io_queue_release(io_context_t ctx);
+/*extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);*/
+extern int io_queue_run(io_context_t ctx);
+
+/* Actual syscalls */
+extern int io_setup(int maxevents, io_context_t *ctxp);
+extern int io_destroy(io_context_t ctx);
+extern int io_submit(io_context_t ctx, long nr, struct iocb *ios[]);
+extern int io_cancel(io_context_t ctx, struct iocb *iocb, struct io_event *evt);
+extern int io_getevents(io_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout);
+
+
+static inline void io_set_callback(struct iocb *iocb, io_callback_t cb)
+{
+ iocb->data = (void *)cb;
+}
+
+static inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
+{
+ memset(iocb, 0, sizeof(*iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = IO_CMD_PREAD;
+ iocb->aio_reqprio = 0;
+ iocb->u.c.buf = buf;
+ iocb->u.c.nbytes = count;
+ iocb->u.c.offset = offset;
+}
+
+static inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
+{
+ memset(iocb, 0, sizeof(*iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = IO_CMD_PWRITE;
+ iocb->aio_reqprio = 0;
+ iocb->u.c.buf = buf;
+ iocb->u.c.nbytes = count;
+ iocb->u.c.offset = offset;
+}
+
+static inline void io_prep_poll(struct iocb *iocb, int fd, int events)
+{
+ memset(iocb, 0, sizeof(*iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = IO_CMD_POLL;
+ iocb->aio_reqprio = 0;
+ iocb->u.poll.events = events;
+}
+
+static inline int io_poll(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd, int events)
+{
+ io_prep_poll(iocb, fd, events);
+ io_set_callback(iocb, cb);
+ return io_submit(ctx, 1, &iocb);
+}
+
+static inline void io_prep_fsync(struct iocb *iocb, int fd)
+{
+ memset(iocb, 0, sizeof(*iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = IO_CMD_FSYNC;
+ iocb->aio_reqprio = 0;
+}
+
+static inline int io_fsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd)
+{
+ io_prep_fsync(iocb, fd);
+ io_set_callback(iocb, cb);
+ return io_submit(ctx, 1, &iocb);
+}
+
+static inline void io_prep_fdsync(struct iocb *iocb, int fd)
+{
+ memset(iocb, 0, sizeof(*iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = IO_CMD_FDSYNC;
+ iocb->aio_reqprio = 0;
+}
+
+static inline int io_fdsync(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd)
+{
+ io_prep_fdsync(iocb, fd);
+ io_set_callback(iocb, cb);
+ return io_submit(ctx, 1, &iocb);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __LIBAIO_H */
diff --git a/tools/libaio/src/libaio.map b/tools/libaio/src/libaio.map
new file mode 100644
index 0000000000..dc37725960
--- /dev/null
+++ b/tools/libaio/src/libaio.map
@@ -0,0 +1,22 @@
+LIBAIO_0.1 {
+ global:
+ io_queue_init;
+ io_queue_run;
+ io_queue_wait;
+ io_queue_release;
+ io_cancel;
+ io_submit;
+ io_getevents;
+ local:
+ *;
+
+};
+
+LIBAIO_0.4 {
+ global:
+ io_setup;
+ io_destroy;
+ io_cancel;
+ io_getevents;
+ io_queue_wait;
+} LIBAIO_0.1;
diff --git a/tools/libaio/src/raw_syscall.c b/tools/libaio/src/raw_syscall.c
new file mode 100644
index 0000000000..3c8d7fa6d9
--- /dev/null
+++ b/tools/libaio/src/raw_syscall.c
@@ -0,0 +1,18 @@
+#include "syscall.h"
+
+#if defined(__ia64__)
+/* based on code from glibc by Jes Sorensen */
+__asm__(".text\n"
+ ".globl __ia64_aio_raw_syscall\n"
+ "__ia64_aio_raw_syscall:\n"
+ "alloc r2=ar.pfs,1,0,8,0\n"
+ "mov r15=r32\n"
+ "break 0x100000\n"
+ ";;"
+ "br.ret.sptk.few b0\n"
+ ".size __ia64_aio_raw_syscall, . - __ia64_aio_raw_syscall\n"
+ ".endp __ia64_aio_raw_syscall"
+);
+#endif
+
+;
diff --git a/tools/libaio/src/syscall-alpha.h b/tools/libaio/src/syscall-alpha.h
new file mode 100644
index 0000000000..467b74f07e
--- /dev/null
+++ b/tools/libaio/src/syscall-alpha.h
@@ -0,0 +1,209 @@
+#define __NR_io_setup 398
+#define __NR_io_destroy 399
+#define __NR_io_getevents 400
+#define __NR_io_submit 401
+#define __NR_io_cancel 402
+
+#define inline_syscall_r0_asm
+#define inline_syscall_r0_out_constraint "=v"
+
+#define inline_syscall_clobbers \
+ "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", \
+ "$22", "$23", "$24", "$25", "$27", "$28", "memory"
+
+#define inline_syscall0(name, args...) \
+{ \
+ register long _sc_0 inline_syscall_r0_asm; \
+ register long _sc_19 __asm__("$19"); \
+ \
+ _sc_0 = name; \
+ __asm__ __volatile__ \
+ ("callsys # %0 %1 <= %2" \
+ : inline_syscall_r0_out_constraint (_sc_0), \
+ "=r"(_sc_19) \
+ : "0"(_sc_0) \
+ : inline_syscall_clobbers, \
+ "$16", "$17", "$18", "$20", "$21"); \
+ _sc_ret = _sc_0, _sc_err = _sc_19; \
+}
+
+#define inline_syscall1(name,arg1) \
+{ \
+ register long _sc_0 inline_syscall_r0_asm; \
+ register long _sc_16 __asm__("$16"); \
+ register long _sc_19 __asm__("$19"); \
+ \
+ _sc_0 = name; \
+ _sc_16 = (long) (arg1); \
+ __asm__ __volatile__ \
+ ("callsys # %0 %1 <= %2 %3" \
+ : inline_syscall_r0_out_constraint (_sc_0), \
+ "=r"(_sc_19), "=r"(_sc_16) \
+ : "0"(_sc_0), "2"(_sc_16) \
+ : inline_syscall_clobbers, \
+ "$17", "$18", "$20", "$21"); \
+ _sc_ret = _sc_0, _sc_err = _sc_19; \
+}
+
+#define inline_syscall2(name,arg1,arg2) \
+{ \
+ register long _sc_0 inline_syscall_r0_asm; \
+ register long _sc_16 __asm__("$16"); \
+ register long _sc_17 __asm__("$17"); \
+ register long _sc_19 __asm__("$19"); \
+ \
+ _sc_0 = name; \
+ _sc_16 = (long) (arg1); \
+ _sc_17 = (long) (arg2); \
+ __asm__ __volatile__ \
+ ("callsys # %0 %1 <= %2 %3 %4" \
+ : inline_syscall_r0_out_constraint (_sc_0), \
+ "=r"(_sc_19), "=r"(_sc_16), "=r"(_sc_17) \
+ : "0"(_sc_0), "2"(_sc_16), "3"(_sc_17) \
+ : inline_syscall_clobbers, \
+ "$18", "$20", "$21"); \
+ _sc_ret = _sc_0, _sc_err = _sc_19; \
+}
+
+#define inline_syscall3(name,arg1,arg2,arg3) \
+{ \
+ register long _sc_0 inline_syscall_r0_asm; \
+ register long _sc_16 __asm__("$16"); \
+ register long _sc_17 __asm__("$17"); \
+ register long _sc_18 __asm__("$18"); \
+ register long _sc_19 __asm__("$19"); \
+ \
+ _sc_0 = name; \
+ _sc_16 = (long) (arg1); \
+ _sc_17 = (long) (arg2); \
+ _sc_18 = (long) (arg3); \
+ __asm__ __volatile__ \
+ ("callsys # %0 %1 <= %2 %3 %4 %5" \
+ : inline_syscall_r0_out_constraint (_sc_0), \
+ "=r"(_sc_19), "=r"(_sc_16), "=r"(_sc_17), \
+ "=r"(_sc_18) \
+ : "0"(_sc_0), "2"(_sc_16), "3"(_sc_17), \
+ "4"(_sc_18) \
+ : inline_syscall_clobbers, "$20", "$21"); \
+ _sc_ret = _sc_0, _sc_err = _sc_19; \
+}
+
+#define inline_syscall4(name,arg1,arg2,arg3,arg4) \
+{ \
+ register long _sc_0 inline_syscall_r0_asm; \
+ register long _sc_16 __asm__("$16"); \
+ register long _sc_17 __asm__("$17"); \
+ register long _sc_18 __asm__("$18"); \
+ register long _sc_19 __asm__("$19"); \
+ \
+ _sc_0 = name; \
+ _sc_16 = (long) (arg1); \
+ _sc_17 = (long) (arg2); \
+ _sc_18 = (long) (arg3); \
+ _sc_19 = (long) (arg4); \
+ __asm__ __volatile__ \
+ ("callsys # %0 %1 <= %2 %3 %4 %5 %6" \
+ : inline_syscall_r0_out_constraint (_sc_0), \
+ "=r"(_sc_19), "=r"(_sc_16), "=r"(_sc_17), \
+ "=r"(_sc_18) \
+ : "0"(_sc_0), "2"(_sc_16), "3"(_sc_17), \
+ "4"(_sc_18), "1"(_sc_19) \
+ : inline_syscall_clobbers, "$20", "$21"); \
+ _sc_ret = _sc_0, _sc_err = _sc_19; \
+}
+
+#define inline_syscall5(name,arg1,arg2,arg3,arg4,arg5) \
+{ \
+ register long _sc_0 inline_syscall_r0_asm; \
+ register long _sc_16 __asm__("$16"); \
+ register long _sc_17 __asm__("$17"); \
+ register long _sc_18 __asm__("$18"); \
+ register long _sc_19 __asm__("$19"); \
+ register long _sc_20 __asm__("$20"); \
+ \
+ _sc_0 = name; \
+ _sc_16 = (long) (arg1); \
+ _sc_17 = (long) (arg2); \
+ _sc_18 = (long) (arg3); \
+ _sc_19 = (long) (arg4); \
+ _sc_20 = (long) (arg5); \
+ __asm__ __volatile__ \
+ ("callsys # %0 %1 <= %2 %3 %4 %5 %6 %7" \
+ : inline_syscall_r0_out_constraint (_sc_0), \
+ "=r"(_sc_19), "=r"(_sc_16), "=r"(_sc_17), \
+ "=r"(_sc_18), "=r"(_sc_20) \
+ : "0"(_sc_0), "2"(_sc_16), "3"(_sc_17), \
+ "4"(_sc_18), "1"(_sc_19), "5"(_sc_20) \
+ : inline_syscall_clobbers, "$21"); \
+ _sc_ret = _sc_0, _sc_err = _sc_19; \
+}
+
+#define inline_syscall6(name,arg1,arg2,arg3,arg4,arg5,arg6) \
+{ \
+ register long _sc_0 inline_syscall_r0_asm; \
+ register long _sc_16 __asm__("$16"); \
+ register long _sc_17 __asm__("$17"); \
+ register long _sc_18 __asm__("$18"); \
+ register long _sc_19 __asm__("$19"); \
+ register long _sc_20 __asm__("$20"); \
+ register long _sc_21 __asm__("$21"); \
+ \
+ _sc_0 = name; \
+ _sc_16 = (long) (arg1); \
+ _sc_17 = (long) (arg2); \
+ _sc_18 = (long) (arg3); \
+ _sc_19 = (long) (arg4); \
+ _sc_20 = (long) (arg5); \
+ _sc_21 = (long) (arg6); \
+ __asm__ __volatile__ \
+ ("callsys # %0 %1 <= %2 %3 %4 %5 %6 %7 %8" \
+ : inline_syscall_r0_out_constraint (_sc_0), \
+ "=r"(_sc_19), "=r"(_sc_16), "=r"(_sc_17), \
+ "=r"(_sc_18), "=r"(_sc_20), "=r"(_sc_21) \
+ : "0"(_sc_0), "2"(_sc_16), "3"(_sc_17), "4"(_sc_18), \
+ "1"(_sc_19), "5"(_sc_20), "6"(_sc_21) \
+ : inline_syscall_clobbers); \
+ _sc_ret = _sc_0, _sc_err = _sc_19; \
+}
+
+#define INLINE_SYSCALL1(name, nr, args...) \
+({ \
+ long _sc_ret, _sc_err; \
+ inline_syscall##nr(__NR_##name, args); \
+ if (_sc_err != 0) \
+ { \
+ _sc_ret = -(_sc_ret); \
+ } \
+ _sc_ret; \
+})
+
+#define io_syscall1(type,fname,sname,type1,arg1) \
+type fname(type1 arg1) \
+{ \
+ return (type)INLINE_SYSCALL1(sname, 1, arg1); \
+}
+
+#define io_syscall2(type,fname,sname,type1,arg1,type2,arg2) \
+type fname(type1 arg1,type2 arg2) \
+{ \
+ return (type)INLINE_SYSCALL1(sname, 2, arg1, arg2); \
+}
+
+#define io_syscall3(type,fname,sname,type1,arg1,type2,arg2,type3,arg3) \
+type fname(type1 arg1,type2 arg2,type3 arg3) \
+{ \
+ return (type)INLINE_SYSCALL1(sname, 3, arg1, arg2, arg3); \
+}
+
+#define io_syscall4(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+type fname (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
+{ \
+ return (type)INLINE_SYSCALL1(sname, 4, arg1, arg2, arg3, arg4); \
+}
+
+#define io_syscall5(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+ type5,arg5) \
+type fname (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \
+{ \
+ return (type)INLINE_SYSCALL1(sname, 5, arg1, arg2, arg3, arg4, arg5);\
+}
diff --git a/tools/libaio/src/syscall-i386.h b/tools/libaio/src/syscall-i386.h
new file mode 100644
index 0000000000..9576975a19
--- /dev/null
+++ b/tools/libaio/src/syscall-i386.h
@@ -0,0 +1,72 @@
+#define __NR_io_setup 245
+#define __NR_io_destroy 246
+#define __NR_io_getevents 247
+#define __NR_io_submit 248
+#define __NR_io_cancel 249
+
+#define io_syscall1(type,fname,sname,type1,arg1) \
+type fname(type1 arg1) \
+{ \
+long __res; \
+__asm__ volatile ("xchgl %%edi,%%ebx\n" \
+ "int $0x80\n" \
+ "xchgl %%edi,%%ebx" \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"D" ((long)(arg1))); \
+return __res; \
+}
+
+#define io_syscall2(type,fname,sname,type1,arg1,type2,arg2) \
+type fname(type1 arg1,type2 arg2) \
+{ \
+long __res; \
+__asm__ volatile ("xchgl %%edi,%%ebx\n" \
+ "int $0x80\n" \
+ "xchgl %%edi,%%ebx" \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"D" ((long)(arg1)),"c" ((long)(arg2))); \
+return __res; \
+}
+
+#define io_syscall3(type,fname,sname,type1,arg1,type2,arg2,type3,arg3) \
+type fname(type1 arg1,type2 arg2,type3 arg3) \
+{ \
+long __res; \
+__asm__ volatile ("xchgl %%edi,%%ebx\n" \
+ "int $0x80\n" \
+ "xchgl %%edi,%%ebx" \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"D" ((long)(arg1)),"c" ((long)(arg2)), \
+ "d" ((long)(arg3))); \
+return __res; \
+}
+
+#define io_syscall4(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+type fname (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
+{ \
+long __res; \
+__asm__ volatile ("xchgl %%edi,%%ebx\n" \
+ "int $0x80\n" \
+ "xchgl %%edi,%%ebx" \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"D" ((long)(arg1)),"c" ((long)(arg2)), \
+ "d" ((long)(arg3)),"S" ((long)(arg4))); \
+return __res; \
+}
+
+#define io_syscall5(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+ type5,arg5) \
+type fname (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \
+{ \
+long __res; \
+long tmp; \
+__asm__ volatile ("movl %%ebx,%7\n" \
+ "movl %2,%%ebx\n" \
+ "int $0x80\n" \
+ "movl %7,%%ebx" \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"rm" ((long)(arg1)),"c" ((long)(arg2)), \
+ "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)), \
+ "m" (tmp)); \
+return __res; \
+}
diff --git a/tools/libaio/src/syscall-ia64.h b/tools/libaio/src/syscall-ia64.h
new file mode 100644
index 0000000000..2f6a01a4a9
--- /dev/null
+++ b/tools/libaio/src/syscall-ia64.h
@@ -0,0 +1,44 @@
+#define __NR_io_setup 1238
+#define __NR_io_destroy 1239
+#define __NR_io_getevents 1240
+#define __NR_io_submit 1241
+#define __NR_io_cancel 1242
+
+#define __ia64_raw_syscall(fname, sname) \
+ __asm__ (".text\n" \
+ ".globl " SYMSTR(fname) "\n" \
+ SYMSTR(fname) ":\n" \
+ " mov r15=" SYMSTR( __NR_ ## sname ) "\n" \
+ " break 0x100000\n" \
+ " ;;\n" \
+ " cmp.eq p6,p0=-1,r10\n" \
+ " ;;\n" \
+ " (p6) sub r8=0,r8\n" \
+ " br.ret.sptk.few b0\n" \
+ ".size " SYMSTR(fname) ", . - " SYMSTR(fname) "\n" \
+ ".endp " SYMSTR(fname) "\n" \
+ );
+
+#define io_syscall0(type, name) \
+ extern type name(void); \
+ __ia64_raw_syscall(name);
+
+#define io_syscall1(type, fname, sname, type1, arg1) \
+ extern type fname(type1 arg1); \
+ __ia64_raw_syscall(fname, sname);
+
+#define io_syscall2(type, fname, sname, type1, arg1, type2, arg2) \
+ extern type fname(type1 arg1, type2 arg2); \
+ __ia64_raw_syscall(fname, sname);
+
+#define io_syscall3(type, fname, sname, type1, arg1, type2, arg2, type3, arg3) \
+ extern type fname(type1 arg1, type2 arg2, type3 arg3); \
+ __ia64_raw_syscall(fname, sname);
+
+#define io_syscall4(type, fname, sname, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \
+ extern type fname(type1 arg1, type2 arg2, type3 arg3, type4 arg4); \
+ __ia64_raw_syscall(fname, sname);
+
+#define io_syscall5(type, fname, sname, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \
+ extern type fname(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5); \
+ __ia64_raw_syscall(fname, sname);
diff --git a/tools/libaio/src/syscall-ppc.h b/tools/libaio/src/syscall-ppc.h
new file mode 100644
index 0000000000..ca70dd2092
--- /dev/null
+++ b/tools/libaio/src/syscall-ppc.h
@@ -0,0 +1,94 @@
+#define __NR_io_setup 227
+#define __NR_io_destroy 228
+#define __NR_io_getevents 229
+#define __NR_io_submit 230
+#define __NR_io_cancel 231
+
+/* On powerpc a system call basically clobbers the same registers like a
+ * function call, with the exception of LR (which is needed for the
+ * "sc; bnslr" sequence) and CR (where only CR0.SO is clobbered to signal
+ * an error return status).
+ */
+
+#define __syscall_nr(nr, type, name, args...) \
+ unsigned long __sc_ret, __sc_err; \
+ { \
+ register unsigned long __sc_0 __asm__ ("r0"); \
+ register unsigned long __sc_3 __asm__ ("r3"); \
+ register unsigned long __sc_4 __asm__ ("r4"); \
+ register unsigned long __sc_5 __asm__ ("r5"); \
+ register unsigned long __sc_6 __asm__ ("r6"); \
+ register unsigned long __sc_7 __asm__ ("r7"); \
+ register unsigned long __sc_8 __asm__ ("r8"); \
+ \
+ __sc_loadargs_##nr(name, args); \
+ __asm__ __volatile__ \
+ ("sc \n\t" \
+ "mfcr %0 " \
+ : "=&r" (__sc_0), \
+ "=&r" (__sc_3), "=&r" (__sc_4), \
+ "=&r" (__sc_5), "=&r" (__sc_6), \
+ "=&r" (__sc_7), "=&r" (__sc_8) \
+ : __sc_asm_input_##nr \
+ : "cr0", "ctr", "memory", \
+ "r9", "r10","r11", "r12"); \
+ __sc_ret = __sc_3; \
+ __sc_err = __sc_0; \
+ } \
+ if (__sc_err & 0x10000000) return -((int)__sc_ret); \
+ return (type) __sc_ret
+
+#define __sc_loadargs_0(name, dummy...) \
+ __sc_0 = __NR_##name
+#define __sc_loadargs_1(name, arg1) \
+ __sc_loadargs_0(name); \
+ __sc_3 = (unsigned long) (arg1)
+#define __sc_loadargs_2(name, arg1, arg2) \
+ __sc_loadargs_1(name, arg1); \
+ __sc_4 = (unsigned long) (arg2)
+#define __sc_loadargs_3(name, arg1, arg2, arg3) \
+ __sc_loadargs_2(name, arg1, arg2); \
+ __sc_5 = (unsigned long) (arg3)
+#define __sc_loadargs_4(name, arg1, arg2, arg3, arg4) \
+ __sc_loadargs_3(name, arg1, arg2, arg3); \
+ __sc_6 = (unsigned long) (arg4)
+#define __sc_loadargs_5(name, arg1, arg2, arg3, arg4, arg5) \
+ __sc_loadargs_4(name, arg1, arg2, arg3, arg4); \
+ __sc_7 = (unsigned long) (arg5)
+
+#define __sc_asm_input_0 "0" (__sc_0)
+#define __sc_asm_input_1 __sc_asm_input_0, "1" (__sc_3)
+#define __sc_asm_input_2 __sc_asm_input_1, "2" (__sc_4)
+#define __sc_asm_input_3 __sc_asm_input_2, "3" (__sc_5)
+#define __sc_asm_input_4 __sc_asm_input_3, "4" (__sc_6)
+#define __sc_asm_input_5 __sc_asm_input_4, "5" (__sc_7)
+
+#define io_syscall1(type,fname,sname,type1,arg1) \
+type fname(type1 arg1) \
+{ \
+ __syscall_nr(1, type, sname, arg1); \
+}
+
+#define io_syscall2(type,fname,sname,type1,arg1,type2,arg2) \
+type fname(type1 arg1, type2 arg2) \
+{ \
+ __syscall_nr(2, type, sname, arg1, arg2); \
+}
+
+#define io_syscall3(type,fname,sname,type1,arg1,type2,arg2,type3,arg3) \
+type fname(type1 arg1, type2 arg2, type3 arg3) \
+{ \
+ __syscall_nr(3, type, sname, arg1, arg2, arg3); \
+}
+
+#define io_syscall4(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+type fname(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
+{ \
+ __syscall_nr(4, type, sname, arg1, arg2, arg3, arg4); \
+}
+
+#define io_syscall5(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \
+type fname(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \
+{ \
+ __syscall_nr(5, type, sname, arg1, arg2, arg3, arg4, arg5); \
+}
diff --git a/tools/libaio/src/syscall-s390.h b/tools/libaio/src/syscall-s390.h
new file mode 100644
index 0000000000..3ec5ee34ee
--- /dev/null
+++ b/tools/libaio/src/syscall-s390.h
@@ -0,0 +1,131 @@
+#define __NR_io_setup 243
+#define __NR_io_destroy 244
+#define __NR_io_getevents 245
+#define __NR_io_submit 246
+#define __NR_io_cancel 247
+
+#define io_svc_clobber "1", "cc", "memory"
+
+#define io_syscall1(type,fname,sname,type1,arg1) \
+type fname(type1 arg1) { \
+ register type1 __arg1 asm("2") = arg1; \
+ register long __svcres asm("2"); \
+ long __res; \
+ __asm__ __volatile__ ( \
+ " .if %1 < 256\n" \
+ " svc %b1\n" \
+ " .else\n" \
+ " la %%r1,%1\n" \
+ " .svc 0\n" \
+ " .endif" \
+ : "=d" (__svcres) \
+ : "i" (__NR_##sname), \
+ "0" (__arg1) \
+ : io_svc_clobber ); \
+ __res = __svcres; \
+ return (type) __res; \
+}
+
+#define io_syscall2(type,fname,sname,type1,arg1,type2,arg2) \
+type fname(type1 arg1, type2 arg2) { \
+ register type1 __arg1 asm("2") = arg1; \
+ register type2 __arg2 asm("3") = arg2; \
+ register long __svcres asm("2"); \
+ long __res; \
+ __asm__ __volatile__ ( \
+ " .if %1 < 256\n" \
+ " svc %b1\n" \
+ " .else\n" \
+ " la %%r1,%1\n" \
+ " svc 0\n" \
+ " .endif" \
+ : "=d" (__svcres) \
+ : "i" (__NR_##sname), \
+ "0" (__arg1), \
+ "d" (__arg2) \
+ : io_svc_clobber ); \
+ __res = __svcres; \
+ return (type) __res; \
+}
+
+#define io_syscall3(type,fname,sname,type1,arg1,type2,arg2, \
+ type3,arg3) \
+type fname(type1 arg1, type2 arg2, type3 arg3) { \
+ register type1 __arg1 asm("2") = arg1; \
+ register type2 __arg2 asm("3") = arg2; \
+ register type3 __arg3 asm("4") = arg3; \
+ register long __svcres asm("2"); \
+ long __res; \
+ __asm__ __volatile__ ( \
+ " .if %1 < 256\n" \
+ " svc %b1\n" \
+ " .else\n" \
+ " la %%r1,%1\n" \
+ " svc 0\n" \
+ " .endif" \
+ : "=d" (__svcres) \
+ : "i" (__NR_##sname), \
+ "0" (__arg1), \
+ "d" (__arg2), \
+ "d" (__arg3) \
+ : io_svc_clobber ); \
+ __res = __svcres; \
+ return (type) __res; \
+}
+
+#define io_syscall4(type,fname,sname,type1,arg1,type2,arg2, \
+ type3,arg3,type4,arg4) \
+type fname(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
+ register type1 __arg1 asm("2") = arg1; \
+ register type2 __arg2 asm("3") = arg2; \
+ register type3 __arg3 asm("4") = arg3; \
+ register type4 __arg4 asm("5") = arg4; \
+ register long __svcres asm("2"); \
+ long __res; \
+ __asm__ __volatile__ ( \
+ " .if %1 < 256\n" \
+ " svc %b1\n" \
+ " .else\n" \
+ " la %%r1,%1\n" \
+ " svc 0\n" \
+ " .endif" \
+ : "=d" (__svcres) \
+ : "i" (__NR_##sname), \
+ "0" (__arg1), \
+ "d" (__arg2), \
+ "d" (__arg3), \
+ "d" (__arg4) \
+ : io_svc_clobber ); \
+ __res = __svcres; \
+ return (type) __res; \
+}
+
+#define io_syscall5(type,fname,sname,type1,arg1,type2,arg2, \
+ type3,arg3,type4,arg4,type5,arg5) \
+type fname(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
+ type5 arg5) { \
+ register type1 __arg1 asm("2") = arg1; \
+ register type2 __arg2 asm("3") = arg2; \
+ register type3 __arg3 asm("4") = arg3; \
+ register type4 __arg4 asm("5") = arg4; \
+ register type5 __arg5 asm("6") = arg5; \
+ register long __svcres asm("2"); \
+ long __res; \
+ __asm__ __volatile__ ( \
+ " .if %1 < 256\n" \
+ " svc %b1\n" \
+ " .else\n" \
+ " la %%r1,%1\n" \
+ " svc 0\n" \
+ " .endif" \
+ : "=d" (__svcres) \
+ : "i" (__NR_##sname), \
+ "0" (__arg1), \
+ "d" (__arg2), \
+ "d" (__arg3), \
+ "d" (__arg4), \
+ "d" (__arg5) \
+ : io_svc_clobber ); \
+ __res = __svcres; \
+ return (type) __res; \
+}
diff --git a/tools/libaio/src/syscall-x86_64.h b/tools/libaio/src/syscall-x86_64.h
new file mode 100644
index 0000000000..9361856723
--- /dev/null
+++ b/tools/libaio/src/syscall-x86_64.h
@@ -0,0 +1,63 @@
+#define __NR_io_setup 206
+#define __NR_io_destroy 207
+#define __NR_io_getevents 208
+#define __NR_io_submit 209
+#define __NR_io_cancel 210
+
+#define __syscall_clobber "r11","rcx","memory"
+#define __syscall "syscall"
+
+#define io_syscall1(type,fname,sname,type1,arg1) \
+type fname(type1 arg1) \
+{ \
+long __res; \
+__asm__ volatile (__syscall \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"D" ((long)(arg1)) : __syscall_clobber ); \
+return __res; \
+}
+
+#define io_syscall2(type,fname,sname,type1,arg1,type2,arg2) \
+type fname(type1 arg1,type2 arg2) \
+{ \
+long __res; \
+__asm__ volatile (__syscall \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"D" ((long)(arg1)),"S" ((long)(arg2)) : __syscall_clobber ); \
+return __res; \
+}
+
+#define io_syscall3(type,fname,sname,type1,arg1,type2,arg2,type3,arg3) \
+type fname(type1 arg1,type2 arg2,type3 arg3) \
+{ \
+long __res; \
+__asm__ volatile (__syscall \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"D" ((long)(arg1)),"S" ((long)(arg2)), \
+ "d" ((long)(arg3)) : __syscall_clobber); \
+return __res; \
+}
+
+#define io_syscall4(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+type fname (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
+{ \
+long __res; \
+__asm__ volatile ("movq %5,%%r10 ;" __syscall \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"D" ((long)(arg1)),"S" ((long)(arg2)), \
+ "d" ((long)(arg3)),"g" ((long)(arg4)) : __syscall_clobber,"r10" ); \
+return __res; \
+}
+
+#define io_syscall5(type,fname,sname,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+ type5,arg5) \
+type fname (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \
+{ \
+long __res; \
+__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall \
+ : "=a" (__res) \
+ : "0" (__NR_##sname),"D" ((long)(arg1)),"S" ((long)(arg2)), \
+ "d" ((long)(arg3)),"g" ((long)(arg4)),"g" ((long)(arg5)) : \
+ __syscall_clobber,"r8","r10" ); \
+return __res; \
+}
diff --git a/tools/libaio/src/syscall.h b/tools/libaio/src/syscall.h
new file mode 100644
index 0000000000..0283825817
--- /dev/null
+++ b/tools/libaio/src/syscall.h
@@ -0,0 +1,27 @@
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#define _SYMSTR(str) #str
+#define SYMSTR(str) _SYMSTR(str)
+
+#define SYMVER(compat_sym, orig_sym, ver_sym) \
+ __asm__(".symver " SYMSTR(compat_sym) "," SYMSTR(orig_sym) "@LIBAIO_" SYMSTR(ver_sym));
+
+#define DEFSYMVER(compat_sym, orig_sym, ver_sym) \
+ __asm__(".symver " SYMSTR(compat_sym) "," SYMSTR(orig_sym) "@@LIBAIO_" SYMSTR(ver_sym));
+
+#if defined(__i386__)
+#include "syscall-i386.h"
+#elif defined(__x86_64__)
+#include "syscall-x86_64.h"
+#elif defined(__ia64__)
+#include "syscall-ia64.h"
+#elif defined(__PPC__)
+#include "syscall-ppc.h"
+#elif defined(__s390__)
+#include "syscall-s390.h"
+#elif defined(__alpha__)
+#include "syscall-alpha.h"
+#else
+#error "add syscall-arch.h"
+#endif
diff --git a/tools/libaio/src/vsys_def.h b/tools/libaio/src/vsys_def.h
new file mode 100644
index 0000000000..13d032e330
--- /dev/null
+++ b/tools/libaio/src/vsys_def.h
@@ -0,0 +1,24 @@
+/* libaio Linux async I/O interface
+ Copyright 2002 Red Hat, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+extern int vsys_io_setup(unsigned nr_reqs, io_context_t *ctxp);
+extern int vsys_io_destroy(io_context_t ctx);
+extern int vsys_io_submit(io_context_t ctx, long nr, struct iocb *iocbs[]);
+extern int vsys_io_cancel(io_context_t ctx, struct iocb *iocb);
+extern int vsys_io_wait(io_context_t ctx, struct iocb *iocb, const struct timespec *when);
+extern int vsys_io_getevents(io_context_t ctx_id, long nr, struct io_event *events, const struct timespec *timeout);
+
diff --git a/tools/misc/xend b/tools/misc/xend
index cd35438090..e9bd8e18c7 100644
--- a/tools/misc/xend
+++ b/tools/misc/xend
@@ -92,6 +92,10 @@ def start_xenstored():
def start_consoled():
if os.fork() == 0:
os.execvp('xenconsoled', ['xenconsoled'])
+
+def start_blktapctrl():
+ if os.fork() == 0:
+ os.execvp('blktapctrl', ['blktapctrl'])
def main():
try:
@@ -106,16 +110,19 @@ def main():
elif sys.argv[1] == 'start':
start_xenstored()
start_consoled()
+ start_blktapctrl()
return daemon.start()
elif sys.argv[1] == 'trace_start':
start_xenstored()
start_consoled()
+ start_blktapctrl()
return daemon.start(trace=1)
elif sys.argv[1] == 'stop':
return daemon.stop()
elif sys.argv[1] == 'restart':
start_xenstored()
start_consoled()
+ start_blktapctrl()
return daemon.stop() or daemon.start()
elif sys.argv[1] == 'status':
return daemon.status()
diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
index e9b21c7ce5..14f9f4311a 100644
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -1701,6 +1701,7 @@ def addControllerClass(device_class, cls):
from xen.xend.server import blkif, netif, tpmif, pciif, iopif, irqif, usbif
+from xen.xend.server.BlktapController import BlktapController
addControllerClass('vbd', blkif.BlkifController)
addControllerClass('vif', netif.NetifController)
addControllerClass('vtpm', tpmif.TPMifController)
@@ -1708,3 +1709,4 @@ addControllerClass('pci', pciif.PciController)
addControllerClass('ioports', iopif.IOPortsController)
addControllerClass('irq', irqif.IRQController)
addControllerClass('usb', usbif.UsbifController)
+addControllerClass('tap', BlktapController)
diff --git a/tools/python/xen/xend/server/BlktapController.py b/tools/python/xen/xend/server/BlktapController.py
new file mode 100644
index 0000000000..062769a061
--- /dev/null
+++ b/tools/python/xen/xend/server/BlktapController.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2005, XenSource Ltd.
+
+
+from xen.xend.server.blkif import BlkifController
+
+
+class BlktapController(BlkifController):
+ def __init__(self, vm):
+ BlkifController.__init__(self, vm)
+
+ def frontendRoot(self):
+ """@see DevController#frontendRoot"""
+
+ return "%s/device/vbd" % self.vm.getDomainPath()
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index e5d0273465..4af00f458d 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -479,7 +479,13 @@ def configure_disks(config_devs, vals):
"""Create the config for disks (virtual block devices).
"""
for (uname, dev, mode, backend) in vals.disk:
- config_vbd = ['vbd',
+
+ if uname.startswith('tap:'):
+ cls = 'tap'
+ else:
+ cls = 'vbd'
+
+ config_vbd = [cls,
['uname', uname],
['dev', dev ],
['mode', mode ] ]
diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
index 791c18eacd..f34ad0947e 100644
--- a/tools/python/xen/xm/main.py
+++ b/tools/python/xen/xm/main.py
@@ -994,7 +994,13 @@ def xm_block_attach(args):
arg_check(args, 'block-attach', 4, 5)
dom = args[0]
- vbd = ['vbd',
+
+ if args[1].startswith('tap:'):
+ cls = 'tap'
+ else:
+ cls = 'vbd'
+
+ vbd = [cls,
['uname', args[1]],
['dev', args[2]],
['mode', args[3]]]
diff --git a/tools/xenstore/Makefile b/tools/xenstore/Makefile
index d6b143e1c6..c8a6a483d8 100644
--- a/tools/xenstore/Makefile
+++ b/tools/xenstore/Makefile
@@ -35,7 +35,7 @@ XENSTORED_Linux = xenstored_linux.o
XENSTORED_OBJS += $(XENSTORED_$(OS))
.PHONY: all
-all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls
+all: libxenstore.so libxenstore.a xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls
test_interleaved_transactions: test_interleaved_transactions.o
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@
@@ -90,6 +90,9 @@ talloc_test.o: talloc.c
libxenstore.so: xs.opic xs_lib.opic
$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so -shared -o $@ $^ -lpthread
+libxenstore.a: libxenstore.so
+ ar rcs libxenstore.a $^
+
.PHONY: clean
clean: testsuite-clean
rm -f *.o *.opic *.so
@@ -172,7 +175,7 @@ install: all
$(INSTALL_PROG) xenstore-control $(DESTDIR)/usr/bin
$(INSTALL_PROG) xenstore-ls $(DESTDIR)/usr/bin
$(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
- $(INSTALL_LIBS) libxenstore.so $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxenstore.* $(DESTDIR)/usr/$(LIBDIR)
$(INSTALL_DATA) xs.h $(DESTDIR)/usr/include
$(INSTALL_DATA) xs_lib.h $(DESTDIR)/usr/include