From 7556663ceb36533b6dd84135c310854915ae3470 Mon Sep 17 00:00:00 2001 From: "akw27@arcadians.cl.cam.ac.uk" Date: Thu, 9 Jun 2005 09:41:04 +0000 Subject: bitkeeper revision 1.1695 (42a80eb0iCQq47kS2MSpOLTFyng5Cg) Parallax fixes/updates: - all data access now through the async lookup code. - added synchronous wrappers for tools to use. - added end-to-end crc32 on all image blocks - various other fixes/cleanups/test stubs. Signed-off-by: andrew.warfield@cl.cam.ac.uk --- tools/blktap/Makefile | 40 +-- tools/blktap/blockstore.c | 1 - tools/blktap/parallax-threaded.c | 654 --------------------------------------- tools/blktap/parallax-threaded.h | 24 -- tools/blktap/parallax.c | 6 +- tools/blktap/requests-async.c | 202 +++++++++--- tools/blktap/requests-async.h | 14 +- tools/blktap/snaplog.c | 1 + tools/blktap/vdi_fill.c | 7 +- tools/blktap/vdi_unittest.c | 184 +++++++++++ tools/blktap/vdi_validate.c | 21 +- 11 files changed, 400 insertions(+), 754 deletions(-) delete mode 100644 tools/blktap/parallax-threaded.c delete mode 100644 tools/blktap/parallax-threaded.h create mode 100644 tools/blktap/vdi_unittest.c (limited to 'tools') diff --git a/tools/blktap/Makefile b/tools/blktap/Makefile index 9807e05387..a0c197aedb 100644 --- a/tools/blktap/Makefile +++ b/tools/blktap/Makefile @@ -15,6 +15,8 @@ INSTALL_DIR = $(INSTALL) -d -m0755 INCLUDES += +LIBS := -lpthread -lz + SRCS := SRCS += blktaplib.c @@ -24,10 +26,8 @@ PLX_SRCS += radix.c PLX_SRCS += snaplog.c PLX_SRCS += blockstore.c PLX_SRCS += block-async.c -PLXT_SRCS := $(PLX_SRCS) -PLXT_SRCS += parallax-threaded.c -VDI_SRCS := $(PLX_SRCS) PLX_SRCS += requests-async.c +VDI_SRCS := $(PLX_SRCS) PLX_SRCS += parallax.c VDI_TOOLS := @@ -58,7 +58,7 @@ IBINS = blkdump parallax $(VDI_TOOLS) LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) -all: mk-symlinks blkdump $(VDI_TOOLS) parallax parallax-threaded blockstored +all: mk-symlinks blkdump $(VDI_TOOLS) parallax blockstored $(MAKE) $(LIB) LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse) @@ -80,7 +80,7 @@ install: all $(INSTALL_PROG) $(IBINS) $(DESTDIR)/$(BLKTAP_INSTALL_DIR) clean: - rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump $(VDI_TOOLS) parallax parallax-threaded + rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump $(VDI_TOOLS) parallax rpm: all rm -rf staging @@ -96,45 +96,45 @@ libblktap.so: libblktap.so.$(MAJOR): ln -sf libblktap.so.$(MAJOR).$(MINOR) $@ libblktap.so.$(MAJOR).$(MINOR): $(OBJS) - $(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ -lpthread -lz + $(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ $(LIBS) blkdump: $(LIB) $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. -l blktap blkdump.c parallax: $(LIB) $(PLX_SRCS) - $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L. -lblktap -lpthread $(PLX_SRCS) - -parallax-threaded: $(LIB) $(PLXT_SRCS) - $(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L. -lpthread -lblktap $(PLXT_SRCS) + $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L. -lblktap $(LIBS) $(PLX_SRCS) vdi_list: $(LIB) vdi_list.c $(VDI_SRCS) - $(CC) $(CFLAGS) -g3 -o vdi_list vdi_list.c -lpthread $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_list vdi_list.c $(LIBS) $(VDI_SRCS) vdi_create: $(LIB) vdi_create.c $(VDI_SRCS) - $(CC) $(CFLAGS) -g3 -o vdi_create vdi_create.c -lpthread $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_create vdi_create.c $(LIBS) $(VDI_SRCS) vdi_snap: $(LIB) vdi_snap.c $(VDI_SRCS) - $(CC) $(CFLAGS) -g3 -o vdi_snap vdi_snap.c -lpthread $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_snap vdi_snap.c $(LIBS) $(VDI_SRCS) vdi_snap_list: $(LIB) vdi_snap_list.c $(VDI_SRCS) - $(CC) $(CFLAGS) -g3 -o vdi_snap_list vdi_snap_list.c -lpthread $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_snap_list vdi_snap_list.c $(LIBS) $(VDI_SRCS) vdi_snap_delete: $(LIB) vdi_snap_delete.c $(VDI_SRCS) - $(CC) $(CFLAGS) -g3 -o vdi_snap_delete vdi_snap_delete.c -lpthread $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_snap_delete vdi_snap_delete.c $(LIBS) $(VDI_SRCS) vdi_tree: $(LIB) vdi_tree.c $(VDI_SRCS) - $(CC) $(CFLAGS) -g3 -o vdi_tree vdi_tree.c -lpthread $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_tree vdi_tree.c $(LIBS) $(VDI_SRCS) vdi_fill: $(LIB) vdi_fill.c $(VDI_SRCS) - $(CC) $(CFLAGS) -g3 -o vdi_fill vdi_fill.c -lpthread $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_fill vdi_fill.c $(LIBS) $(VDI_SRCS) vdi_validate: $(LIB) vdi_validate.c $(VDI_SRCS) - $(CC) $(CFLAGS) -g3 -o vdi_validate vdi_validate.c -lpthread $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_validate vdi_validate.c $(LIBS) $(VDI_SRCS) + +vdi_unittest: $(LIB) vdi_unittest.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_unittest vdi_unittest.c $(LIBS) $(VDI_SRCS) blockstored: blockstored.c - $(CC) $(CFLAGS) -g3 -o blockstored -lpthread blockstored.c + $(CC) $(CFLAGS) -g3 -o blockstored $(LIBS) blockstored.c bstest: bstest.c blockstore.c - $(CC) $(CFLAGS) -g3 -o bstest bstest.c -lpthread blockstore.c + $(CC) $(CFLAGS) -g3 -o bstest bstest.c $(LIBS) blockstore.c .PHONY: TAGS clean install mk-symlinks rpm TAGS: diff --git a/tools/blktap/blockstore.c b/tools/blktap/blockstore.c index a9dde6e461..e15ddcd760 100644 --- a/tools/blktap/blockstore.c +++ b/tools/blktap/blockstore.c @@ -17,7 +17,6 @@ #include #include "blockstore.h" #include -#include "parallax-threaded.h" //#define BLOCKSTORE_REMOTE //#define BSDEBUG diff --git a/tools/blktap/parallax-threaded.c b/tools/blktap/parallax-threaded.c deleted file mode 100644 index 4944474fc7..0000000000 --- a/tools/blktap/parallax-threaded.c +++ /dev/null @@ -1,654 +0,0 @@ -/************************************************************************** - * - * parallax.c - * - * The Parallax Storage Server - * - */ - - -#include -#include -#include -#include -#include "blktaplib.h" -#include "blockstore.h" -#include "vdi.h" -#include "parallax-threaded.h" - -#define PARALLAX_DEV 61440 - - -#if 0 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) -#else -#define DPRINTF(_f, _a...) ((void)0) -#endif - -/* ------[ session records ]----------------------------------------------- */ - -#define BLKIF_HASHSZ 1024 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) - -#define VDI_HASHSZ 16 -#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1)) - -typedef struct blkif { - domid_t domid; - unsigned int handle; - enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; - vdi_t *vdi_hash[VDI_HASHSZ]; - struct blkif *hash_next; -} blkif_t; - -static blkif_t *blkif_hash[BLKIF_HASHSZ]; - -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) -{ - if ( handle != 0 ) - printf("blktap/parallax don't currently support non-0 dev handles!\n"); - - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif != NULL) && - ((blkif->domid != domid) || (blkif->handle != handle)) ) - blkif = blkif->hash_next; - return blkif; -} - -vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device) -{ - vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)]; - - while ((vdi != NULL) && (vdi->vdevice != device)) - vdi = vdi->next; - - return vdi; -} - -/* ------[ control message handling ]-------------------------------------- */ - -void blkif_create(blkif_be_create_t *create) -{ - domid_t domid = create->domid; - unsigned int handle = create->blkif_handle; - blkif_t **pblkif, *blkif; - - DPRINTF("parallax (blkif_create): create is %p\n", create); - - if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL ) - { - DPRINTF("Could not create blkif: out of memory\n"); - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - blkif->handle = handle; - blkif->status = DISCONNECTED; -/* - spin_lock_init(&blkif->vbd_lock); - spin_lock_init(&blkif->blk_ring_lock); - atomic_set(&blkif->refcnt, 0); -*/ - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( *pblkif != NULL ) - { - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) - { - DPRINTF("Could not create blkif: already exists\n"); - create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; - free(blkif); - return; - } - pblkif = &(*pblkif)->hash_next; - } - - blkif->hash_next = *pblkif; - *pblkif = blkif; - - DPRINTF("Successfully created blkif\n"); - create->status = BLKIF_BE_STATUS_OKAY; -} - -void blkif_destroy(blkif_be_destroy_t *destroy) -{ - domid_t domid = destroy->domid; - unsigned int handle = destroy->blkif_handle; - blkif_t **pblkif, *blkif; - - DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif = *pblkif) != NULL ) - { - if ( (blkif->domid == domid) && (blkif->handle == handle) ) - { - if ( blkif->status != DISCONNECTED ) - goto still_connected; - goto destroy; - } - pblkif = &blkif->hash_next; - } - - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - - still_connected: - destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; - return; - - destroy: - *pblkif = blkif->hash_next; - /* destroy_all_vbds(blkif); */ - free(blkif); - destroy->status = BLKIF_BE_STATUS_OKAY; -} - -void vbd_create(blkif_be_vbd_create_t *create) -{ - blkif_t *blkif; - vdi_t *vdi, **vdip; - blkif_vdev_t vdevice = create->vdevice; - - DPRINTF("parallax (vbd_create): create=%p\n", create); - - blkif = blkif_find_by_handle(create->domid, create->blkif_handle); - if ( blkif == NULL ) - { - DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", - create->domid, create->blkif_handle); - create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - } - - /* VDI identifier is in grow->extent.sector_start */ - DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", - (unsigned long)create->dev_handle); - - vdi = vdi_get(create->dev_handle); - if (vdi == NULL) - { - printf("parallax (vbd_create): VDI %lx not found.\n", - (unsigned long)create->dev_handle); - create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; - return; - } - - vdi->next = NULL; - vdi->vdevice = vdevice; - vdip = &blkif->vdi_hash[VDI_HASH(vdevice)]; - while (*vdip != NULL) - vdip = &(*vdip)->next; - *vdip = vdi; - - DPRINTF("vbd_grow: happy return!\n"); - create->status = BLKIF_BE_STATUS_OKAY; -} - -int parallax_control(control_msg_t *msg) -{ - domid_t domid; - int ret; - - DPRINTF("parallax_control: msg is %p\n", msg); - - if (msg->type != CMSG_BLKIF_BE) - { - printf("Unexpected control message (%d)\n", msg->type); - return 0; - } - - switch(msg->subtype) - { - case CMSG_BLKIF_BE_CREATE: - if ( msg->length != sizeof(blkif_be_create_t) ) - goto parse_error; - blkif_create((blkif_be_create_t *)msg->msg); - break; - - case CMSG_BLKIF_BE_DESTROY: - if ( msg->length != sizeof(blkif_be_destroy_t) ) - goto parse_error; - blkif_destroy((blkif_be_destroy_t *)msg->msg); - break; - - case CMSG_BLKIF_BE_VBD_CREATE: - if ( msg->length != sizeof(blkif_be_vbd_create_t) ) - goto parse_error; - vbd_create((blkif_be_vbd_create_t *)msg->msg); - break; - } - return 0; -parse_error: - printf("Bad control message!\n"); - return 0; - -} - -int parallax_probe(blkif_request_t *req, blkif_t *blkif) -{ - blkif_response_t *rsp; - vdisk_t *img_info; - vdi_t *vdi; - int i, nr_vdis = 0; - - DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); - - /* We expect one buffer only. */ - if ( req->nr_segments != 1 ) - goto err; - - /* Make sure the buffer is page-sized. */ - if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || - (blkif_last_sect (req->frame_and_sects[0]) != 7) ) - goto err; - - /* fill the list of devices */ - for (i=0; ivdi_hash[i]; - while (vdi) { - img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); - img_info[nr_vdis].device = vdi->vdevice; - img_info[nr_vdis].info = 0; - /* The -2 here accounts for the LSB in the radix tree */ - img_info[nr_vdis].capacity = - ((1LL << (VDI_HEIGHT-2)) >> SECTOR_SHIFT); - nr_vdis++; - vdi = vdi->next; - } - } - - - rsp = (blkif_response_t *)req; - rsp->id = req->id; - rsp->operation = BLKIF_OP_PROBE; - rsp->status = nr_vdis; /* number of disks */ - - DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis); - return BLKTAP_RESPOND; -err: - rsp = (blkif_response_t *)req; - rsp->id = req->id; - rsp->operation = BLKIF_OP_PROBE; - rsp->status = BLKIF_RSP_ERROR; - - DPRINTF("parallax_probe: send error response\n"); - return BLKTAP_RESPOND; -} - -typedef struct { - blkif_request_t *req; - int count; - pthread_mutex_t mutex; -} pending_t; - -#define MAX_REQUESTS 64 -pending_t pending_list[MAX_REQUESTS]; - -typedef struct { - vdi_t *vdi; - blkif_request_t *req; - int segment; - pending_t *pent; -} readseg_params_t; - -#define DISPATCH_SIZE 1024UL -#define DISPATCH_MASK (DISPATCH_SIZE-1) -readseg_params_t dispatch_list[DISPATCH_SIZE]; -unsigned long dprod = 0, dcons = 0; -pthread_mutex_t dispatch_mutex; -pthread_cond_t dispatch_cond; - -void *read_segment(void *param) -{ - readseg_params_t *p; - u64 vblock, gblock, sector; - char *dpage, *spage; - unsigned long size, start, offset; - blkif_response_t *rsp; - int tid; - -unsigned long dc, dp; - -#ifdef NOTHREADS -#else - /* Set this thread's tid. */ - tid = *(int *)param; - free(param); - - pthread_setspecific(tid_key, (void *)tid); - - printf("My tid is %d.\n", (int)pthread_getspecific(tid_key)); -start: - pthread_mutex_lock(&dispatch_mutex); - while (dprod == dcons) - pthread_cond_wait(&dispatch_cond, &dispatch_mutex); - - if (dprod == dcons) { - /* unnecessary wakeup. */ - pthread_mutex_unlock(&dispatch_mutex); - goto start; - } -#endif -dc = dcons; -dp = dprod; - - p = &dispatch_list[dcons & DISPATCH_MASK]; - dcons++; -#ifdef NOTHREADS -#else - pthread_mutex_unlock(&dispatch_mutex); -#endif - dpage = (char *)MMAP_VADDR(ID_TO_IDX(p->req->id), p->segment); - - /* Round the requested segment to a block address. */ - - sector = p->req->sector_number + (8*p->segment); - vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT; - - /* Get that block from the store. */ - - gblock = vdi_lookup_block(p->vdi, vblock, NULL); - - /* Calculate read size and offset within the read block. */ - - offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE; - size = ( blkif_last_sect (p->req->frame_and_sects[p->segment]) - - blkif_first_sect(p->req->frame_and_sects[p->segment]) + 1 - ) << SECTOR_SHIFT; - start = blkif_first_sect(p->req->frame_and_sects[p->segment]) - << SECTOR_SHIFT; - - /* If the block does not exist in the store, return zeros. */ - /* Otherwise, copy that region to the guest page. */ - -// printf(" : (%p, %d, %d) (%d) [c:%lu,p:%lu]\n", -// p->req, ID_TO_IDX(p->req->id), p->segment, -// p->pent->count, dc, dp); - - DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), " - "vblock %llx, gblock %llx, " - "size %lx\n", - sector, blkif_first_sect(p->req->frame_and_sects[p->segment]), - blkif_last_sect (p->req->frame_and_sects[p->segment]), - vblock, gblock, size); - - if ( gblock == 0 ) { - - memset(dpage + start, '\0', size); - - } else { - - spage = readblock(gblock); - - if (spage == NULL) { - printf("Error reading gblock from store: %Ld\n", gblock); - goto err; - } - - memcpy(dpage + start, spage + offset, size); - - freeblock(spage); - } - - - /* Done the read. Now update the pending record. */ - - pthread_mutex_lock(&p->pent->mutex); - p->pent->count--; - - if (p->pent->count == 0) { - -// printf("FINISH: (%d, %d)\n", ID_TO_IDX(p->req->id), p->segment); - rsp = (blkif_response_t *)p->req; - rsp->id = p->req->id; - rsp->operation = BLKIF_OP_READ; - rsp->status = BLKIF_RSP_OKAY; - - blktap_inject_response(rsp); - } - - pthread_mutex_unlock(&p->pent->mutex); - -#ifdef NOTHREADS - return NULL; -#else - goto start; -#endif - -err: - printf("I am screwed!\n"); -#ifdef NOTHREADS - return NULL; -#else - goto start; -#endif -} - - -int parallax_read(blkif_request_t *req, blkif_t *blkif) -{ - blkif_response_t *rsp; - unsigned long size, offset, start; - u64 sector; - u64 vblock, gblock; - vdi_t *vdi; - int i; - char *dpage, *spage; - pending_t *pent; - readseg_params_t *params; - - vdi = blkif_get_vdi(blkif, req->device); - - if ( vdi == NULL ) - goto err; - -// printf("START : (%p, %d, %d)\n", req, ID_TO_IDX(req->id), req->nr_segments); - - pent = &pending_list[ID_TO_IDX(req->id)]; - pent->count = req->nr_segments; - pent->req = req; - pthread_mutex_init(&pent->mutex, NULL); - - - for (i = 0; i < req->nr_segments; i++) { - pthread_t tid; - int ret; - - params = &dispatch_list[dprod & DISPATCH_MASK]; - params->pent = pent; - params->vdi = vdi; - params->req = req; - params->segment = i; - wmb(); - dprod++; - - pthread_mutex_lock(&dispatch_mutex); - pthread_cond_signal(&dispatch_cond); - pthread_mutex_unlock(&dispatch_mutex); -#ifdef NOTHREADS - read_segment(NULL); -#endif - - } - - - - - return BLKTAP_STOLEN; - -err: - rsp = (blkif_response_t *)req; - rsp->id = req->id; - rsp->operation = BLKIF_OP_READ; - rsp->status = BLKIF_RSP_ERROR; - - return BLKTAP_RESPOND; -} - -int parallax_write(blkif_request_t *req, blkif_t *blkif) -{ - blkif_response_t *rsp; - u64 sector; - int i, writable = 0; - u64 vblock, gblock; - char *spage; - unsigned long size, offset, start; - vdi_t *vdi; - - vdi = blkif_get_vdi(blkif, req->device); - - if ( vdi == NULL ) - goto err; - - for (i = 0; i < req->nr_segments; i++) { - - spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); - - /* Round the requested segment to a block address. */ - - sector = req->sector_number + (8*i); - vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT; - - /* Get that block from the store. */ - - gblock = vdi_lookup_block(vdi, vblock, &writable); - - /* Calculate read size and offset within the read block. */ - - offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE; - size = ( blkif_last_sect (req->frame_and_sects[i]) - - blkif_first_sect(req->frame_and_sects[i]) + 1 - ) << SECTOR_SHIFT; - start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; - - DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld), " - "vblock %llx, gblock %llx, " - "size %lx\n", - sector, blkif_first_sect(req->frame_and_sects[i]), - blkif_last_sect (req->frame_and_sects[i]), - vblock, gblock, size); - - /* XXX: For now we just freak out if they try to write a */ - /* non block-sized, block-aligned page. */ - - if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) { - printf("]\n] STRANGE WRITE!\n]\n"); - goto err; - } - - if (( gblock == 0 ) || ( writable == 0 )) { - - gblock = allocblock(spage); - vdi_update_block(vdi, vblock, gblock); - - } else { - - /* write-in-place, no need to change mappings. */ - writeblock(gblock, spage); - - } - - } - - rsp = (blkif_response_t *)req; - rsp->id = req->id; - rsp->operation = BLKIF_OP_WRITE; - rsp->status = BLKIF_RSP_OKAY; - - return BLKTAP_RESPOND; -err: - rsp = (blkif_response_t *)req; - rsp->id = req->id; - rsp->operation = BLKIF_OP_WRITE; - rsp->status = BLKIF_RSP_ERROR; - - return BLKTAP_RESPOND; -} - -int parallax_request(blkif_request_t *req) -{ - blkif_response_t *rsp; - domid_t dom = ID_TO_DOM(req->id); - blkif_t *blkif = blkif_find_by_handle(dom, 0); - - //DPRINTF("parallax_request: req=%p, dom=%d, blkif=%p\n", req, dom, blkif); - - if (blkif == NULL) - goto err; - - if ( req->operation == BLKIF_OP_PROBE ) { - - return parallax_probe(req, blkif); - - } else if ( req->operation == BLKIF_OP_READ ) { - - return parallax_read(req, blkif); - - } else if ( req->operation == BLKIF_OP_WRITE ) { - - return parallax_write(req, blkif); - - } else { - /* Unknown operation */ - goto err; - } - -err: - rsp = (blkif_response_t *)req; - rsp->id = req->id; - rsp->operation = req->operation; - rsp->status = BLKIF_RSP_ERROR; - return BLKTAP_RESPOND; -} - -void __init_parallax(void) -{ - memset(blkif_hash, 0, sizeof(blkif_hash)); -} - - - -int main(int argc, char *argv[]) -{ - pthread_t read_pool[READ_POOL_SIZE]; - int i, tid=0; - - DPRINTF("parallax: starting.\n"); - __init_blockstore(); - DPRINTF("parallax: initialized blockstore...\n"); - __init_vdi(); - DPRINTF("parallax: initialized vdi registry etc...\n"); - __init_parallax(); - DPRINTF("parallax: initialized local stuff..\n"); - - - pthread_mutex_init(&dispatch_mutex, NULL); - pthread_cond_init(&dispatch_cond, NULL); - - pthread_key_create(&tid_key, NULL); - tid = 0; - -#ifdef NOTHREADS -#else - for (i=0; i < READ_POOL_SIZE; i++) { - int ret, *t; - t = (int *)malloc(sizeof(int)); - *t = tid++; - ret = pthread_create(&read_pool[i], NULL, read_segment, t); - if (ret != 0) printf("Error starting thread %d\n", i); - } -#endif - - pthread_setspecific(tid_key, (void *)tid); - - printf("*My tid is %d.\n", (int)pthread_getspecific(tid_key)); - - blktap_register_ctrl_hook("parallax_control", parallax_control); - blktap_register_request_hook("parallax_request", parallax_request); - DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); - blktap_listen(); - - return 0; -} diff --git a/tools/blktap/parallax-threaded.h b/tools/blktap/parallax-threaded.h deleted file mode 100644 index de39609fcc..0000000000 --- a/tools/blktap/parallax-threaded.h +++ /dev/null @@ -1,24 +0,0 @@ -/************************************************************************** - * - * parallax-threaded.h - * - * a few thread-specific defines - * - */ - -#ifndef __PARALLAX_THREADED_H__ -#define __PARALLAX_THREADED_H__ - -#if 0 -/* Turn off threading. */ -#define NOTHREADS -#endif - -//#define READ_POOL_SIZE 128 -#define READ_POOL_SIZE 8 - -/* per-thread identifier */ -pthread_key_t tid_key; - -#endif /* __PARALLAX_THREADED_H__ */ - diff --git a/tools/blktap/parallax.c b/tools/blktap/parallax.c index 3f59834f12..46cdec4496 100644 --- a/tools/blktap/parallax.c +++ b/tools/blktap/parallax.c @@ -427,7 +427,7 @@ int parallax_read(blkif_request_t *req, blkif_t *blkif) p->vblock = vblock; /* dbg */ /* Get that block from the store. */ - async_read(vdi, vblock, read_cb, (void *)p); + vdi_read(vdi, vblock, read_cb, (void *)p); } return BLKTAP_STOLEN; @@ -534,7 +534,7 @@ int parallax_write(blkif_request_t *req, blkif_t *blkif) p->vblock = vblock; /* dbg */ /* Issue the write to the store. */ - async_write(vdi, vblock, spage, write_cb, (void *)p); + vdi_write(vdi, vblock, spage, write_cb, (void *)p); } return BLKTAP_STOLEN; @@ -595,7 +595,7 @@ int main(int argc, char *argv[]) DPRINTF("parallax: starting.\n"); __init_blockstore(); DPRINTF("parallax: initialized blockstore...\n"); - init_block_async(); + init_block_async(); DPRINTF("parallax: initialized async blocks...\n"); __init_vdi(); DPRINTF("parallax: initialized vdi registry etc...\n"); diff --git a/tools/blktap/requests-async.c b/tools/blktap/requests-async.c index f68ae76db4..76083c7cc1 100755 --- a/tools/blktap/requests-async.c +++ b/tools/blktap/requests-async.c @@ -6,8 +6,11 @@ #include #include #include +#include #include #include +#include +#include /* for crc32() */ #include "requests-async.h" #include "vdi.h" #include "radix.h" @@ -23,6 +26,10 @@ #define DPRINTF(_f, _a...) ((void)0) #endif +struct block_info { + u32 crc; + u32 unused; +}; struct io_req { enum { IO_OP_READ, IO_OP_WRITE } op; @@ -34,16 +41,17 @@ struct io_req { struct radix_lock *lock; /* internal stuff: */ - struct io_ret retval;/* holds the return while we unlock. */ - char *block; /* the block to write */ - radix_tree_node radix[3]; - u64 radix_addr[3]; + struct io_ret retval;/* holds the return while we unlock. */ + char *block; /* the block to write */ + radix_tree_node radix[3]; + u64 radix_addr[3]; + struct block_info bi; }; void clear_w_bits(radix_tree_node node) { int i; - for (i=0; iradix[0] = req->radix[1] = req->radix[2] = NULL; + if (req == NULL) return ERR_NOMEM; - if (req == NULL) {perror("req was NULL in async_read"); return(-1); } - + req->radix[0] = req->radix[1] = req->radix[2] = NULL; req->op = IO_OP_READ; req->root = vdi->radix_root; req->lock = vdi->radix_lock; @@ -138,26 +148,35 @@ int async_read(vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param) } -int async_write(vdi_t *vdi, u64 vaddr, char *block, - io_cb_t cb, void *param) +int vdi_write(vdi_t *vdi, u64 vaddr, char *block, + io_cb_t cb, void *param) { struct io_req *req; + if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR; + /* Every second line in the bottom-level radix tree is used to */ + /* store crc32 values etc. We shift the vadder here to achied this. */ + vaddr <<= 1; req = (struct io_req *)malloc(sizeof (struct io_req)); + if (req == NULL) return ERR_NOMEM; + req->radix[0] = req->radix[1] = req->radix[2] = NULL; - - if (req == NULL) {perror("req was NULL in async_write"); return(-1); } - - req->op = IO_OP_WRITE; - req->root = vdi->radix_root; - req->lock = vdi->radix_lock; - req->vaddr = vaddr; - req->block = block; - req->cb = cb; - req->param = param; + req->op = IO_OP_WRITE; + req->root = vdi->radix_root; + req->lock = vdi->radix_lock; + req->vaddr = vaddr; + req->block = block; + /* Todo: add a pseodoheader to the block to include some location */ + /* information in the CRC as well. */ + req->bi.crc = (u32) crc32(0L, Z_NULL, 0); + req->bi.crc = (u32) crc32(req->bi.crc, block, BLOCK_SIZE); + req->bi.unused = 0xdeadbeef; + + req->cb = cb; + req->param = param; req->radix_addr[L1] = getid(req->root); /* for consistency */ - req->state = WRITE_LOCKED; + req->state = WRITE_LOCKED; block_wlock(req->lock, L1_IDX(vaddr), write_cb, req); @@ -165,7 +184,7 @@ int async_write(vdi_t *vdi, u64 vaddr, char *block, return 0; } -void read_cb(struct io_ret ret, void *param) +static void read_cb(struct io_ret ret, void *param) { struct io_req *req = (struct io_req *)param; radix_tree_node node; @@ -219,12 +238,16 @@ void read_cb(struct io_ret ret, void *param) break; case READ_L3: - + { + struct block_info *bi; + DPRINTF("READ_L3\n"); block = IO_BLOCK(ret); if (block == NULL) goto fail; node = (radix_tree_node) block; idx = getid( node[L3_IDX(req->vaddr)] ); + bi = (struct block_info *) &node[L3_IDX(req->vaddr) + 1]; + req->bi = *bi; free(block); if ( idx == ZERO ) { req->state = RETURN_ZERO; @@ -234,16 +257,47 @@ void read_cb(struct io_ret ret, void *param) block_read(idx, read_cb, req); } break; - + } case READ_DATA: - + { + u32 crc; + DPRINTF("READ_DATA\n"); - if (IO_BLOCK(ret) == NULL) goto fail; + block = IO_BLOCK(ret); + if (block == NULL) goto fail; + + /* crc check */ + crc = (u32) crc32(0L, Z_NULL, 0); + crc = (u32) crc32(crc, block, BLOCK_SIZE); + if (crc != req->bi.crc) { + /* TODO: add a retry loop here. */ + /* Do this after the cache is added -- make sure to */ + /* invalidate the bad page before reissuing the read. */ + + warn("Bad CRC on vaddr (%Lu:%d)\n", req->vaddr, req->bi.unused); +#ifdef PRINT_BADCRC_PAGES + { + int j; + for (j=0; jretval = ret; req->state = READ_UNLOCKED; block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req); break; - + } case READ_UNLOCKED: { struct io_ret r; @@ -293,12 +347,13 @@ void read_cb(struct io_ret ret, void *param) } -void write_cb(struct io_ret r, void *param) +static void write_cb(struct io_ret r, void *param) { struct io_req *req = (struct io_req *)param; radix_tree_node node; u64 a, addr; void *req_param; + struct block_info *bi; switch(req->state) { @@ -383,6 +438,19 @@ void write_cb(struct io_ret r, void *param) } break; + case WRITE_DATA: + + DPRINTF("WRITE_DATA\n"); + /* The L3 radix points to the correct block, we just need to */ + /* update the crc. */ + if (IO_INT(r) < 0) goto fail; + bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; + req->bi.unused = 101; + *bi = req->bi; + req->state = WRITE_L3; + block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req); + break; + /* L3 Zero Path: */ case ALLOC_DATA_L3z: @@ -391,6 +459,9 @@ void write_cb(struct io_ret r, void *param) addr = IO_ADDR(r); a = writable(addr); req->radix[L3][L3_IDX(req->vaddr)] = a; + bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; + req->bi.unused = 102; + *bi = req->bi; req->state = WRITE_L3_L3z; block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req); break; @@ -398,11 +469,14 @@ void write_cb(struct io_ret r, void *param) /* L3 Fault Path: */ case ALLOC_DATA_L3f: - + DPRINTF("ALLOC_DATA_L3f\n"); addr = IO_ADDR(r); a = writable(addr); req->radix[L3][L3_IDX(req->vaddr)] = a; + bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; + req->bi.unused = 103; + *bi = req->bi; req->state = WRITE_L3_L3f; block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req); break; @@ -416,6 +490,9 @@ void write_cb(struct io_ret r, void *param) a = writable(addr); req->radix[L3] = newblock(); req->radix[L3][L3_IDX(req->vaddr)] = a; + bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; + req->bi.unused = 104; + *bi = req->bi; req->state = ALLOC_L3_L2z; block_alloc( (char*)req->radix[L3], write_cb, req ); break; @@ -452,6 +529,9 @@ void write_cb(struct io_ret r, void *param) addr = IO_ADDR(r); a = writable(addr); req->radix[L3][L3_IDX(req->vaddr)] = a; + bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; + req->bi.unused = 105; + *bi = req->bi; req->state = ALLOC_L3_L2f; block_alloc( (char*)req->radix[L3], write_cb, req ); break; @@ -475,10 +555,13 @@ void write_cb(struct io_ret r, void *param) a = writable(addr); req->radix[L3] = newblock(); req->radix[L3][L3_IDX(req->vaddr)] = a; + bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; + req->bi.unused = 106; + *bi = req->bi; req->state = ALLOC_L3_L1z; block_alloc( (char*)req->radix[L3], write_cb, req ); break; - + case ALLOC_L3_L1z: DPRINTF("ALLOC_L3_L1z\n"); @@ -546,6 +629,9 @@ void write_cb(struct io_ret r, void *param) addr = IO_ADDR(r); a = writable(addr); req->radix[L3][L3_IDX(req->vaddr)] = a; + bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; + req->bi.unused = 107; + *bi = req->bi; req->state = ALLOC_L3_L1f; block_alloc( (char*)req->radix[L3], write_cb, req ); break; @@ -570,7 +656,7 @@ void write_cb(struct io_ret r, void *param) block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req); break; - case WRITE_DATA: + case WRITE_L3: case WRITE_L3_L3z: case WRITE_L3_L3f: case WRITE_L2_L2z: @@ -590,8 +676,8 @@ void write_cb(struct io_ret r, void *param) } case WRITE_UNLOCKED: { - struct io_ret r; - io_cb_t cb; + struct io_ret r; + io_cb_t cb; DPRINTF("WRITE_UNLOCKED!\n"); req_param = req->param; r = req->retval; @@ -612,13 +698,57 @@ void write_cb(struct io_ret r, void *param) { struct io_ret r; io_cb_t cb; + int i; + DPRINTF("asyn_write had a read error mid-way.\n"); req_param = req->param; cb = req->cb; r.type = IO_INT_T; r.u.i = -1; + /* free any saved node vals. */ + for (i=0; i<3; i++) + if (req->radix[i] != 0) free(req->radix[i]); free(req); cb(r, req_param); } } +char *vdi_read_s(vdi_t *vdi, u64 vaddr) +{ + pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER; + char *block = NULL; + int ret; + + void reads_cb(struct io_ret r, void *param) + { + block = IO_BLOCK(r); + pthread_mutex_unlock((pthread_mutex_t *)param); + } + + pthread_mutex_lock(&m); + ret = vdi_read(vdi, vaddr, reads_cb, &m); + + if (ret == 0) pthread_mutex_lock(&m); + + return block; +} + + +int vdi_write_s(vdi_t *vdi, u64 vaddr, char *block) +{ + pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER; + int ret, result; + + void writes_cb(struct io_ret r, void *param) + { + result = IO_INT(r); + pthread_mutex_unlock((pthread_mutex_t *)param); + } + + pthread_mutex_lock(&m); + ret = vdi_write(vdi, vaddr, block, writes_cb, &m); + + if (ret == 0) pthread_mutex_lock(&m); + + return result; +} diff --git a/tools/blktap/requests-async.h b/tools/blktap/requests-async.h index 503a543b7e..451f211bd5 100755 --- a/tools/blktap/requests-async.h +++ b/tools/blktap/requests-async.h @@ -13,7 +13,17 @@ #define readonly(x) ((u64)((x) << 1)) */ -int async_read (vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param); -int async_write(vdi_t *vdi, u64 vaddr, char *block, io_cb_t cb, void *param); +#define VADDR_MASK 0x0000000003ffffffLLU /* 26-bits = 256Gig */ +#define VALID_VADDR(x) (((x) & VADDR_MASK) == (x)) + +int vdi_read (vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param); +int vdi_write(vdi_t *vdi, u64 vaddr, char *block, io_cb_t cb, void *param); +/* synchronous versions: */ +char *vdi_read_s (vdi_t *vdi, u64 vaddr); +int vdi_write_s(vdi_t *vdi, u64 vaddr, char *block); + +#define ERR_BAD_VADDR -1 +#define ERR_NOMEM -2 + #endif //_REQUESTSASYNC_H_ diff --git a/tools/blktap/snaplog.c b/tools/blktap/snaplog.c index 5c030e3b5b..072497fe72 100644 --- a/tools/blktap/snaplog.c +++ b/tools/blktap/snaplog.c @@ -97,6 +97,7 @@ int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id, new_id->index = 0; new_id->block = allocblock(blk); + freeblock(blk); if (new_id->block == 0) return -1; diff --git a/tools/blktap/vdi_fill.c b/tools/blktap/vdi_fill.c index 7e3eacc3f3..61025862f7 100644 --- a/tools/blktap/vdi_fill.c +++ b/tools/blktap/vdi_fill.c @@ -16,6 +16,7 @@ #include #include "blockstore.h" #include "radix.h" +#include "requests-async.h" #include "vdi.h" int main(int argc, char *argv[]) @@ -30,6 +31,7 @@ int main(int argc, char *argv[]) u64 vblock = 0, count=0; __init_blockstore(); + init_block_async(); __init_vdi(); if ( argc < 3 ) { @@ -64,10 +66,7 @@ int main(int argc, char *argv[]) printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE); printf(" "); while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) { - u64 gblock = 0; - - gblock = allocblock(spage); - vdi_update_block(vdi, vblock, gblock); + vdi_write_s(vdi, vblock, spage); vblock++; if ((vblock % 512) == 0) diff --git a/tools/blktap/vdi_unittest.c b/tools/blktap/vdi_unittest.c new file mode 100644 index 0000000000..77ecc833be --- /dev/null +++ b/tools/blktap/vdi_unittest.c @@ -0,0 +1,184 @@ +/************************************************************************** + * + * vdi_unittest.c + * + * Run a small test workload to ensure that data access through a vdi + * is (at least superficially) correct. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "requests-async.h" +#include "blockstore.h" +#include "radix.h" +#include "vdi.h" + +#define TEST_PAGES 32 +static char *zero_page; +static char pages[TEST_PAGES][BLOCK_SIZE]; +static int next_page = 0; + +void fill_test_pages(void) +{ + int i, j; + long *page; + + for (i=0; i< TEST_PAGES; i++) { + page = (unsigned long *)pages[i]; + for (j=0; j<(BLOCK_SIZE/4); j++) { + page[j] = random(); + } + } + + zero_page = newblock(); +} + +inline u64 make_vaddr(u64 L1, u64 L2, u64 L3) +{ + u64 ret = L1; + + ret = (ret << 9) | L2; + ret = (ret << 9) | L3; + + return ret; +} + +void touch_block(vdi_t *vdi, u64 L1, u64 L2, u64 L3) +{ + u64 vaddr; + char *page = pages[next_page++]; + char *rpage = NULL; + + printf("TOUCH (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3); + + vaddr = make_vaddr(L1, L2, L3); + vdi_write_s(vdi, vaddr, page); + rpage = vdi_read_s(vdi, vaddr); + + if (rpage == NULL) + { + printf( "read %Lu returned NULL\n", vaddr); + return; + } + + if (memcmp(page, rpage, BLOCK_SIZE) != 0) + { + printf( "read %Lu returned a different page\n", vaddr); + return; + } + + freeblock(rpage); +} + +void test_block(vdi_t *vdi, u64 L1, u64 L2, u64 L3, char *page) +{ + u64 vaddr; + char *rpage = NULL; + + printf("TEST (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3); + + vaddr = make_vaddr(L1, L2, L3); + rpage = vdi_read_s(vdi, vaddr); + + if (rpage == NULL) + { + printf( "read %Lu returned NULL\n", vaddr); + return; + } + + if (memcmp(page, rpage, BLOCK_SIZE) != 0) + { + printf( "read %Lu returned a different page\n", vaddr); + return; + } + + freeblock(rpage); +} + +void coverage_test(vdi_t *vdi) +{ + u64 vaddr; + int i, j, k; + + /* Do a series of writes and reads to test all paths through the + * async radix code. The radix request code will dump CRC warnings + * if there are data problems here as well. + */ + + /* L1 Zero */ + touch_block(vdi, 0, 0, 0); + + /* L2 Zero */ + i = next_page; + touch_block(vdi, 0, 1, 0); + + /* L3 Zero */ + j = next_page; + touch_block(vdi, 0, 0, 1); + k = next_page; + touch_block(vdi, 0, 1, 1); + + /* Direct write */ + touch_block(vdi, 0, 0, 0); + + vdi_snapshot(vdi); + + /* L1 fault */ + touch_block(vdi, 0, 0, 0); + /* test the read-only branches that should have been copied over. */ + test_block(vdi, 0, 1, 0, pages[i]); + test_block(vdi, 0, 0, 1, pages[j]); + + /* L2 fault */ + touch_block(vdi, 0, 1, 0); + test_block(vdi, 0, 1, 1, pages[k]); + + /* L3 fault */ + touch_block(vdi, 0, 0, 1); + + /* read - L1 zero */ + test_block(vdi, 1, 0, 0, zero_page); + + /* read - L2 zero */ + test_block(vdi, 0, 2, 0, zero_page); + + /* read - L3 zero */ + test_block(vdi, 0, 0, 2, zero_page); +} + +int main(int argc, char *argv[]) +{ + vdi_t *vdi; + u64 id; + int fd; + struct stat st; + u64 tot_size; + char spage[BLOCK_SIZE]; + char *dpage; + u64 vblock = 0, count=0; + + __init_blockstore(); + init_block_async(); + __init_vdi(); + + vdi = vdi_create( NULL, "UNIT TEST VDI"); + + if ( vdi == NULL ) { + printf("Failed to create VDI!\n"); + freeblock(vdi); + exit(-1); + } + + fill_test_pages(); + coverage_test(vdi); + + freeblock(vdi); + + return (0); +} diff --git a/tools/blktap/vdi_validate.c b/tools/blktap/vdi_validate.c index a2468615a0..de7a62d3e9 100644 --- a/tools/blktap/vdi_validate.c +++ b/tools/blktap/vdi_validate.c @@ -18,6 +18,7 @@ #include "blockstore.h" #include "radix.h" #include "vdi.h" +#include "requests-async.h" int main(int argc, char *argv[]) { @@ -31,6 +32,7 @@ int main(int argc, char *argv[]) u64 vblock = 0, count=0; __init_blockstore(); + init_block_async(); __init_vdi(); if ( argc < 3 ) { @@ -64,17 +66,14 @@ int main(int argc, char *argv[]) printf(" "); while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) { - u64 gblock = 0; - - gblock = vdi_lookup_block(vdi, vblock, NULL); - - if (gblock == 0) { + + dpage = vdi_read_s(vdi, vblock); + + if (dpage == NULL) { printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock); exit(0); } - - dpage = readblock(gblock); - + if (memcmp(spage, dpage, BLOCK_SIZE) != 0) { printf("\n\nblocks don't match! (%Ld)\n", vblock); exit(0); @@ -83,8 +82,10 @@ int main(int argc, char *argv[]) freeblock(dpage); vblock++; - printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock); - fflush(stdout); + if ((vblock % 1024) == 0) { + printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock); + fflush(stdout); + } } printf("\n"); -- cgit v1.2.3