aboutsummaryrefslogtreecommitdiffstats
path: root/extras/mini-os/blkfront.c
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2008-01-17 16:41:46 +0000
committerKeir Fraser <keir.fraser@citrix.com>2008-01-17 16:41:46 +0000
commit9d220744e67413bc17df9fbfcebe33ad663207a6 (patch)
tree63238167f2c070d0e044f8c29f70d01a4b02ee9c /extras/mini-os/blkfront.c
parent86f69d44aecaf5e72f86e5686d1a2fcb8f7fc7ba (diff)
downloadxen-9d220744e67413bc17df9fbfcebe33ad663207a6.tar.gz
xen-9d220744e67413bc17df9fbfcebe33ad663207a6.tar.bz2
xen-9d220744e67413bc17df9fbfcebe33ad663207a6.zip
minios: add blkfront
Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
Diffstat (limited to 'extras/mini-os/blkfront.c')
-rw-r--r--extras/mini-os/blkfront.c392
1 files changed, 392 insertions, 0 deletions
diff --git a/extras/mini-os/blkfront.c b/extras/mini-os/blkfront.c
new file mode 100644
index 0000000000..eda17bafe5
--- /dev/null
+++ b/extras/mini-os/blkfront.c
@@ -0,0 +1,392 @@
+/* Minimal block driver for Mini-OS.
+ * Copyright (c) 2007-2008 Samuel Thibault.
+ * Based on netfront.c.
+ */
+
+#include <os.h>
+#include <xenbus.h>
+#include <events.h>
+#include <errno.h>
+#include <xen/io/blkif.h>
+#include <gnttab.h>
+#include <xmalloc.h>
+#include <time.h>
+#include <blkfront.h>
+#include <lib.h>
+#include <fcntl.h>
+
+/* Note: we generally don't need to disable IRQs since we hardly do anything in
+ * the interrupt handler. */
+
+/* Note: we really suppose non-preemptive threads. */
+
+DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
+
+
+
+
+#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
+#define GRANT_INVALID_REF 0
+
+
+struct blk_buffer {
+ void* page;
+ grant_ref_t gref;
+};
+
+struct blkfront_dev {
+ struct blkif_front_ring ring;
+ grant_ref_t ring_ref;
+ evtchn_port_t evtchn, local_port;
+ blkif_vdev_t handle;
+
+ char *nodename;
+ char *backend;
+ unsigned sector_size;
+ unsigned sectors;
+ int mode;
+ int barrier;
+ int flush;
+};
+
+static inline int xenblk_rxidx(RING_IDX idx)
+{
+ return idx & (BLK_RING_SIZE - 1);
+}
+
+void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
+{
+ wake_up(&blkfront_queue);
+}
+
+struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode)
+{
+ xenbus_transaction_t xbt;
+ char* err;
+ char* message=NULL;
+ struct blkif_sring *s;
+ int retry=0;
+ char* msg;
+ char* c;
+
+ struct blkfront_dev *dev;
+
+ ASSERT(!strncmp(nodename, "/local/domain/", 14));
+ nodename = strchr(nodename + 14, '/') + 1;
+
+ char path[strlen(nodename) + 1 + 10 + 1];
+
+ printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
+
+ dev = malloc(sizeof(*dev));
+ dev->nodename = strdup(nodename);
+
+ s = (struct blkif_sring*) alloc_page();
+ memset(s,0,PAGE_SIZE);
+
+
+ SHARED_RING_INIT(s);
+ FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
+
+ dev->ring_ref = gnttab_grant_access(0,virt_to_mfn(s),0);
+
+ evtchn_alloc_unbound_t op;
+ op.dom = DOMID_SELF;
+ snprintf(path, sizeof(path), "%s/backend-id", nodename);
+ op.remote_dom = xenbus_read_integer(path);
+ HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
+ clear_evtchn(op.port); /* Without, handler gets invoked now! */
+ dev->local_port = bind_evtchn(op.port, blkfront_handler, dev);
+ dev->evtchn=op.port;
+
+ // FIXME: proper frees on failures
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ printk("starting transaction\n");
+ }
+
+ err = xenbus_printf(xbt, nodename, "ring-ref","%u",
+ dev->ring_ref);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, nodename,
+ "event-channel", "%u", dev->evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, nodename, "state", "%u",
+ 4); /* connected */
+
+
+ err = xenbus_transaction_end(xbt, 0, &retry);
+ if (retry) {
+ goto again;
+ printk("completing transaction\n");
+ }
+
+ goto done;
+
+abort_transaction:
+ xenbus_transaction_end(xbt, 1, &retry);
+ return NULL;
+
+done:
+
+ snprintf(path, sizeof(path), "%s/backend", nodename);
+ msg = xenbus_read(XBT_NIL, path, &dev->backend);
+ if (msg) {
+ printk("Error %s when reading the backend path %s\n", msg, path);
+ return NULL;
+ }
+
+ printk("backend at %s\n", dev->backend);
+
+ dev->handle = simple_strtoul(strrchr(nodename, '/')+1, NULL, 0);
+
+ {
+ char path[strlen(dev->backend) + 1 + 19 + 1];
+ snprintf(path, sizeof(path), "%s/mode", dev->backend);
+ msg = xenbus_read(XBT_NIL, path, &c);
+ if (msg) {
+ printk("Error %s when reading the mode\n", msg);
+ return NULL;
+ }
+ if (*c == 'w')
+ *mode = dev->mode = O_RDWR;
+ else
+ *mode = dev->mode = O_RDONLY;
+ free(c);
+
+ snprintf(path, sizeof(path), "%s/state", dev->backend);
+
+ xenbus_watch_path(XBT_NIL, path);
+
+ xenbus_wait_for_value(path,"4");
+
+ xenbus_unwatch_path(XBT_NIL, path);
+
+ snprintf(path, sizeof(path), "%s/sectors", dev->backend);
+ // FIXME: read_integer returns an int, so disk size limited to 1TB for now
+ *sectors = dev->sectors = xenbus_read_integer(path);
+
+ snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
+ *sector_size = dev->sector_size = xenbus_read_integer(path);
+
+ snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
+ dev->barrier = xenbus_read_integer(path);
+
+ snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
+ dev->flush = xenbus_read_integer(path);
+ }
+
+ printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
+ printk("**************************\n");
+
+ return dev;
+}
+
+void shutdown_blkfront(struct blkfront_dev *dev)
+{
+ char* err;
+ char *nodename = dev->nodename;
+
+ char path[strlen(dev->backend) + 1 + 5 + 1];
+
+ blkfront_sync(dev);
+
+ printk("close blk: backend at %s\n",dev->backend);
+
+ snprintf(path, sizeof(path), "%s/state", dev->backend);
+ err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
+ xenbus_wait_for_value(path,"5");
+
+ err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
+ xenbus_wait_for_value(path,"6");
+
+ unbind_evtchn(dev->local_port);
+
+ free(nodename);
+ free(dev->backend);
+ free(dev);
+}
+
+static void blkfront_wait_slot(struct blkfront_dev *dev)
+{
+ /* Wait for a slot */
+ if (RING_FULL(&dev->ring)) {
+ unsigned long flags;
+ DEFINE_WAIT(w);
+ local_irq_save(flags);
+ while (1) {
+ blkfront_aio_poll(dev);
+ if (!RING_FULL(&dev->ring))
+ break;
+ /* Really no slot, go to sleep. */
+ add_waiter(w, blkfront_queue);
+ local_irq_restore(flags);
+ schedule();
+ local_irq_save(flags);
+ }
+ remove_waiter(w);
+ local_irq_restore(flags);
+ }
+}
+
+/* Issue an aio */
+void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
+{
+ struct blkfront_dev *dev = aiocbp->aio_dev;
+ struct blkif_request *req;
+ RING_IDX i;
+ int notify;
+ int n, j;
+ uintptr_t start, end;
+
+ // Can't io at non-sector-aligned location
+ ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
+ // Can't io non-sector-sized amounts
+ ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
+ // Can't io non-sector-aligned buffer
+ ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
+
+ start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
+ end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
+ n = (end - start) / PAGE_SIZE;
+
+ /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
+ * so max 44KB can't happen */
+ ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+ blkfront_wait_slot(dev);
+ i = dev->ring.req_prod_pvt;
+ req = RING_GET_REQUEST(&dev->ring, i);
+
+ req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
+ req->nr_segments = n;
+ req->handle = dev->handle;
+ req->id = (uintptr_t) aiocbp;
+ req->sector_number = aiocbp->aio_offset / dev->sector_size;
+
+ for (j = 0; j < n; j++) {
+ uintptr_t data = start + j * PAGE_SIZE;
+ aiocbp->gref[j] = req->seg[j].gref =
+ gnttab_grant_access(0, virt_to_mfn(data), write);
+ req->seg[j].first_sect = 0;
+ req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
+ }
+ req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->sector_size;
+ req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
+
+ dev->ring.req_prod_pvt = i + 1;
+
+ wmb();
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+
+ if(notify) notify_remote_via_evtchn(dev->evtchn);
+}
+
+void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
+{
+ blkfront_aio(aiocbp, 1);
+}
+
+void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
+{
+ blkfront_aio(aiocbp, 0);
+}
+
+int blkfront_aio_poll(struct blkfront_dev *dev)
+{
+ RING_IDX rp, cons;
+ struct blkif_response *rsp;
+
+moretodo:
+ rp = dev->ring.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+ cons = dev->ring.rsp_cons;
+
+ int nr_consumed = 0;
+ while ((cons != rp))
+ {
+ rsp = RING_GET_RESPONSE(&dev->ring, cons);
+
+ switch (rsp->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ {
+ struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
+ int n = (aiocbp->aio_nbytes + PAGE_SIZE - 1) / PAGE_SIZE, j;
+ for (j = 0; j < n; j++)
+ gnttab_end_access(aiocbp->gref[j]);
+
+ /* Nota: callback frees aiocbp itself */
+ aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
+ break;
+ }
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ break;
+ default:
+ printk("unrecognized block operation %d response\n", rsp->operation);
+ break;
+ }
+
+ nr_consumed++;
+ ++cons;
+ }
+ dev->ring.rsp_cons = cons;
+
+ int more;
+ RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
+ if (more) goto moretodo;
+
+ return nr_consumed;
+}
+
+static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
+{
+ int i;
+ struct blkif_request *req;
+ int notify;
+
+ blkfront_wait_slot(dev);
+ i = dev->ring.req_prod_pvt;
+ req = RING_GET_REQUEST(&dev->ring, i);
+ req->operation = op;
+ dev->ring.req_prod_pvt = i + 1;
+ wmb();
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+ if (notify) notify_remote_via_evtchn(dev->evtchn);
+}
+
+void blkfront_sync(struct blkfront_dev *dev)
+{
+ unsigned long flags;
+
+ if (dev->barrier == 1)
+ blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
+
+ if (dev->flush == 1)
+ blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
+
+ /* Note: This won't finish if another thread enqueues requests. */
+ local_irq_save(flags);
+ DEFINE_WAIT(w);
+ while (1) {
+ blkfront_aio_poll(dev);
+ if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
+ break;
+
+ add_waiter(w, blkfront_queue);
+ local_irq_restore(flags);
+ schedule();
+ local_irq_save(flags);
+ }
+ remove_waiter(w);
+ local_irq_restore(flags);
+}