aboutsummaryrefslogtreecommitdiffstats
path: root/xenolinux-2.4.21-sparse/arch/xeno/drivers/block/xl_block.c
diff options
context:
space:
mode:
Diffstat (limited to 'xenolinux-2.4.21-sparse/arch/xeno/drivers/block/xl_block.c')
-rw-r--r--xenolinux-2.4.21-sparse/arch/xeno/drivers/block/xl_block.c589
1 files changed, 589 insertions, 0 deletions
diff --git a/xenolinux-2.4.21-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.21-sparse/arch/xeno/drivers/block/xl_block.c
new file mode 100644
index 0000000000..d3e09eb52c
--- /dev/null
+++ b/xenolinux-2.4.21-sparse/arch/xeno/drivers/block/xl_block.c
@@ -0,0 +1,589 @@
+/******************************************************************************
+ * xl_block.c
+ *
+ * Xenolinux virtual block-device driver.
+ *
+ */
+
+#include "xl_block.h"
+#include <linux/blk.h>
+#include <linux/cdrom.h>
+
+typedef unsigned char byte; /* from linux/ide.h */
+
+#define XLBLK_RESPONSE_IRQ _EVENT_BLK_RESP
+#define DEBUG_IRQ _EVENT_DEBUG
+
+static blk_ring_t *blk_ring;
+static unsigned int resp_cons; /* Response consumer for comms ring. */
+static unsigned int req_prod; /* Private request producer. */
+static xen_disk_info_t xlblk_disk_info;
+static int xlblk_control_msg_pending;
+
+#define RING_FULL (BLK_RING_INC(req_prod) == resp_cons)
+
+/*
+ * Request queues with outstanding work, but ring is currently full.
+ * We need no special lock here, as we always access this with the
+ * io_request_lock held. We only need a small maximum list.
+ */
+#define MAX_PENDING 8
+static request_queue_t *pending_queues[MAX_PENDING];
+static int nr_pending;
+
+static kdev_t sg_dev;
+static int sg_operation = -1;
+static unsigned long sg_next_sect;
+#define DISABLE_SCATTERGATHER() (sg_operation = -1)
+
+static inline void signal_requests_to_xen(void)
+{
+ DISABLE_SCATTERGATHER();
+ blk_ring->req_prod = req_prod;
+ HYPERVISOR_block_io_op();
+}
+
+/* Convert from a XenoLinux major device to the Xen-level 'physical' device */
+inline unsigned short xldev_to_physdev(kdev_t xldev)
+{
+ unsigned short physdev = 0;
+
+ switch ( MAJOR(xldev) )
+ {
+ case XLIDE_MAJOR_0:
+ physdev = XENDEV_IDE + (0*XLIDE_DEVS_PER_MAJOR) +
+ (MINOR(xldev) >> XLIDE_PARTN_SHIFT);
+ break;
+
+ case XLIDE_MAJOR_1:
+ physdev = XENDEV_IDE + (1*XLIDE_DEVS_PER_MAJOR) +
+ (MINOR(xldev) >> XLIDE_PARTN_SHIFT);
+ break;
+
+ case XLSCSI_MAJOR:
+ physdev = XENDEV_SCSI + (MINOR(xldev) >> XLSCSI_PARTN_SHIFT);
+ break;
+
+ case XLVIRT_MAJOR:
+ physdev = XENDEV_VIRTUAL + (MINOR(xldev) >> XLVIRT_PARTN_SHIFT);
+ break;
+ }
+
+ if ( physdev == 0 ) BUG();
+
+ return physdev;
+}
+
+
+static inline struct gendisk *xldev_to_gendisk(kdev_t xldev)
+{
+ struct gendisk *gd = NULL;
+
+ switch ( MAJOR(xldev) )
+ {
+ case XLIDE_MAJOR_0:
+ gd = xlide_gendisk[0];
+ break;
+
+ case XLIDE_MAJOR_1:
+ gd = xlide_gendisk[1];
+ break;
+
+ case XLSCSI_MAJOR:
+ gd = xlscsi_gendisk;
+ break;
+
+ case XLVIRT_MAJOR:
+ gd = xlsegment_gendisk;
+ break;
+ }
+
+ if ( gd == NULL ) BUG();
+
+ return gd;
+}
+
+
+static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
+{
+ struct gendisk *gd = xldev_to_gendisk(xldev);
+ return (xl_disk_t *)gd->real_devices +
+ (MINOR(xldev) >> PARTN_SHIFT(xldev));
+}
+
+
+int xenolinux_block_open(struct inode *inode, struct file *filep)
+{
+ xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
+ disk->usage++;
+ DPRINTK("xenolinux_block_open\n");
+ return 0;
+}
+
+
+int xenolinux_block_release(struct inode *inode, struct file *filep)
+{
+ xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
+ disk->usage--;
+ DPRINTK("xenolinux_block_release\n");
+ return 0;
+}
+
+/*
+ * handle ioctl calls
+ *
+ * individual ioctls are defined in /usr/include/linux/fs.h
+ */
+
+int xenolinux_block_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument)
+{
+ kdev_t dev = inode->i_rdev;
+ struct hd_geometry *geo = (struct hd_geometry *)argument;
+ struct gendisk *gd;
+ struct hd_struct *part;
+
+ DPRINTK("xenolinux_block_ioctl\n");
+
+ /* check permissions */
+ if (!capable(CAP_SYS_ADMIN)) return -EPERM;
+ if (!inode) return -EINVAL;
+
+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
+ command, (long) argument, dev);
+
+ gd = xldev_to_gendisk(dev);
+ part = &gd->part[MINOR(dev)];
+
+ switch ( command )
+ {
+ case BLKGETSIZE:
+ DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
+ return put_user(part->nr_sects, (unsigned long *) argument);
+
+ case BLKRRPART: /* re-read partition table */
+ DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
+ return xenolinux_block_revalidate(dev);
+
+ case BLKSSZGET:
+ switch ( MAJOR(dev) )
+ {
+ case XLIDE_MAJOR_0:
+ DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET,
+ xlide_hwsect(MINOR(dev)));
+ return xlide_hwsect(MINOR(dev));
+
+ case XLSCSI_MAJOR:
+ DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET,
+ xlscsi_hwsect(MINOR(dev)));
+ return xlscsi_hwsect(MINOR(dev));
+
+ case XLVIRT_MAJOR:
+ DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET,
+ xlsegment_hwsect(MINOR(dev)));
+ return xlsegment_hwsect(MINOR(dev));
+
+ default:
+ printk(KERN_ALERT "BLKSSZGET ioctl() on bogus disk!\n");
+ return 0;
+ }
+
+ case BLKBSZGET: /* get block size */
+ DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
+ break;
+
+ case BLKBSZSET: /* set block size */
+ DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
+ break;
+
+ case BLKRASET: /* set read-ahead */
+ DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
+ break;
+
+ case BLKRAGET: /* get read-ahead */
+ DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
+ break;
+
+ case HDIO_GETGEO:
+ /* note: these values are complete garbage */
+ DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
+ if (!argument) return -EINVAL;
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
+ return 0;
+
+ case HDIO_GETGEO_BIG:
+ /* note: these values are complete garbage */
+ DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
+ if (!argument) return -EINVAL;
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
+ return 0;
+
+ case CDROMMULTISESSION:
+ printk("FIXME: support multisession CDs later\n");
+ memset((struct cdrom_multisession *)argument, 0,
+ sizeof(struct cdrom_multisession));
+ return 0;
+
+ default:
+ printk("ioctl %08x not supported by xl_block\n", command);
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+int xenolinux_block_check(kdev_t dev)
+{
+ DPRINTK("xenolinux_block_check\n");
+ return 0;
+}
+
+int xenolinux_block_revalidate(kdev_t dev)
+{
+ struct gendisk *gd = xldev_to_gendisk(dev);
+ xl_disk_t *disk = xldev_to_xldisk(dev);
+ unsigned long flags;
+ int i, partn_shift = PARTN_SHIFT(dev);
+ int xdev = dev & XENDEV_IDX_MASK;
+
+ DPRINTK("xenolinux_block_revalidate: %d %d %d\n",
+ dev, xdev, XENDEV_IDX_MASK);
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ if ( disk->usage > 1 )
+ {
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return -EBUSY;
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ for ( i = 0; i < (1 << partn_shift); i++ )
+ {
+ invalidate_device(xdev + i, 1);
+ gd->part[xdev + i].start_sect = 0;
+ gd->part[xdev + i].nr_sects = 0;
+ }
+
+ grok_partitions(gd, MINOR(dev) >> partn_shift,
+ 1 << partn_shift, disk->capacity);
+
+ return 0;
+}
+
+
+/*
+ * hypervisor_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: XEN_BLOCK_{READ,WRITE,PROBE*,SEG*}
+ * buffer: buffer to read/write into. this should be a
+ * virtual address in the guest os.
+ */
+static int hypervisor_request(unsigned long id,
+ int operation,
+ char * buffer,
+ unsigned long sector_number,
+ unsigned short nr_sectors,
+ kdev_t device)
+{
+ unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer));
+ kdev_t phys_device = (kdev_t) 0;
+ struct gendisk *gd;
+ blk_ring_req_entry_t *req;
+ struct buffer_head *bh;
+
+ if ( nr_sectors >= (1<<9) ) BUG();
+ if ( (buffer_ma & ((1<<9)-1)) != 0 ) BUG();
+
+ switch ( operation )
+ {
+ case XEN_BLOCK_SEG_CREATE:
+ case XEN_BLOCK_SEG_DELETE:
+ case XEN_BLOCK_PROBE_BLK:
+ case XEN_BLOCK_PROBE_SEG:
+ if ( RING_FULL ) return 1;
+ phys_device = (kdev_t) 0;
+ sector_number = 0;
+ DISABLE_SCATTERGATHER();
+ break;
+
+ case XEN_BLOCK_READ:
+ case XEN_BLOCK_WRITE:
+ phys_device = xldev_to_physdev(device);
+ gd = xldev_to_gendisk(device);
+
+ sector_number += gd->part[MINOR(device)].start_sect;
+ if ( (sg_operation == operation) &&
+ (sg_dev == phys_device) &&
+ (sg_next_sect == sector_number) )
+ {
+ req = &blk_ring->ring[(req_prod-1)&(BLK_RING_SIZE-1)].req;
+ bh = (struct buffer_head *)id;
+ bh->b_reqnext = (struct buffer_head *)req->id;
+ req->id = id;
+ req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
+ if ( ++req->nr_segments < MAX_BLK_SEGS )
+ sg_next_sect += nr_sectors;
+ else
+ DISABLE_SCATTERGATHER();
+ return 0;
+ }
+ else if ( RING_FULL )
+ {
+ return 1;
+ }
+ else
+ {
+ sg_operation = operation;
+ sg_dev = phys_device;
+ sg_next_sect = sector_number + nr_sectors;
+ }
+ break;
+
+ default:
+ panic("unknown op %d\n", operation);
+ }
+
+ /* Fill out a communications ring structure. */
+ req = &blk_ring->ring[req_prod].req;
+ req->id = id;
+ req->operation = operation;
+ req->sector_number = sector_number;
+ req->device = phys_device;
+ req->nr_segments = 1;
+ req->buffer_and_sects[0] = buffer_ma | nr_sectors;
+ req_prod = BLK_RING_INC(req_prod);
+
+ return 0;
+}
+
+
+/*
+ * do_xlblk_request
+ * read a block; request is in a request queue
+ */
+void do_xlblk_request(request_queue_t *rq)
+{
+ struct request *req;
+ struct buffer_head *bh, *next_bh;
+ int rw, nsect, full, queued = 0;
+
+ DPRINTK("xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME);
+
+ while ( !rq->plugged && !list_empty(&rq->queue_head))
+ {
+ if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
+ goto out;
+
+ DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
+ req, req->cmd, req->sector,
+ req->current_nr_sectors, req->nr_sectors, req->bh);
+
+ rw = req->cmd;
+ if ( rw == READA ) rw = READ;
+ if ((rw != READ) && (rw != WRITE))
+ panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
+
+ req->errors = 0;
+
+ bh = req->bh;
+ while ( bh != NULL )
+ {
+ next_bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+
+ full = hypervisor_request(
+ (unsigned long)bh,
+ (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE,
+ bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
+
+ if ( full )
+ {
+ bh->b_reqnext = next_bh;
+ pending_queues[nr_pending++] = rq;
+ if ( nr_pending >= MAX_PENDING ) BUG();
+ goto out;
+ }
+
+ queued++;
+
+ /* Dequeue the buffer head from the request. */
+ nsect = bh->b_size >> 9;
+ bh = req->bh = next_bh;
+
+ if ( bh != NULL )
+ {
+ /* There's another buffer head to do. Update the request. */
+ req->hard_sector += nsect;
+ req->hard_nr_sectors -= nsect;
+ req->sector = req->hard_sector;
+ req->nr_sectors = req->hard_nr_sectors;
+ req->current_nr_sectors = bh->b_size >> 9;
+ req->buffer = bh->b_data;
+ }
+ else
+ {
+ /* That was the last buffer head. Finalise the request. */
+ if ( end_that_request_first(req, 1, "XenBlk") ) BUG();
+ blkdev_dequeue_request(req);
+ end_that_request_last(req);
+ }
+ }
+ }
+
+ out:
+ if ( queued != 0 ) signal_requests_to_xen();
+}
+
+
+static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ int i;
+ unsigned long flags;
+ struct buffer_head *bh, *next_bh;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+
+ for ( i = resp_cons;
+ i != blk_ring->resp_prod;
+ i = BLK_RING_INC(i) )
+ {
+ blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp;
+ switch (bret->operation)
+ {
+ case XEN_BLOCK_READ:
+ case XEN_BLOCK_WRITE:
+ if ( bret->status )
+ printk(KERN_ALERT "Bad return from blkdev data request: %lx\n",
+ bret->status);
+ for ( bh = (struct buffer_head *)bret->id;
+ bh != NULL;
+ bh = next_bh )
+ {
+ next_bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ bh->b_end_io(bh, !bret->status);
+ }
+ break;
+
+ case XEN_BLOCK_SEG_CREATE:
+ case XEN_BLOCK_SEG_DELETE:
+ case XEN_BLOCK_PROBE_SEG:
+ case XEN_BLOCK_PROBE_BLK:
+ if ( bret->status )
+ printk(KERN_ALERT "Bad return from blkdev control request\n");
+ xlblk_control_msg_pending = 0;
+ break;
+
+ default:
+ BUG();
+ }
+ }
+
+ resp_cons = i;
+
+ /* We kick pending request queues if the ring is reasonably empty. */
+ if ( (nr_pending != 0) &&
+ (((req_prod - resp_cons) & (BLK_RING_SIZE - 1)) <
+ (BLK_RING_SIZE >> 1)) )
+ {
+ /* Attempt to drain the queue, but bail if the ring becomes full. */
+ while ( nr_pending != 0 )
+ {
+ do_xlblk_request(pending_queues[--nr_pending]);
+ if ( RING_FULL ) break;
+ }
+ }
+
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+
+/* Send a synchronous message to Xen. */
+int xenolinux_control_msg(int operation, char *buffer, int size)
+{
+ unsigned long flags;
+ char *aligned_buf;
+
+ /* We copy from an aligned buffer, as interface needs sector alignment. */
+ aligned_buf = (char *)get_free_page(GFP_KERNEL);
+ if ( aligned_buf == NULL ) BUG();
+ memcpy(aligned_buf, buffer, size);
+
+ xlblk_control_msg_pending = 1;
+ spin_lock_irqsave(&io_request_lock, flags);
+ /* Note that size gets rounded up to a sector-sized boundary. */
+ if ( hypervisor_request(0, operation, aligned_buf, 0, (size+511)/512, 0) )
+ return -EAGAIN;
+ signal_requests_to_xen();
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ while ( xlblk_control_msg_pending ) barrier();
+
+ memcpy(buffer, aligned_buf, size);
+ free_page((unsigned long)aligned_buf);
+
+ return 0;
+}
+
+
+int __init xlblk_init(void)
+{
+ int error;
+
+ xlblk_control_msg_pending = 0;
+ nr_pending = 0;
+
+ /* This mapping was created early at boot time. */
+ blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
+ blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
+
+ error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int,
+ SA_SAMPLE_RANDOM, "xlblk-response", NULL);
+ if ( error )
+ {
+ printk(KERN_ALERT "Could not allocate receive interrupt\n");
+ goto fail;
+ }
+
+ /* Probe for disk information. */
+ memset(&xlblk_disk_info, 0, sizeof(xlblk_disk_info));
+ error = xenolinux_control_msg(XEN_BLOCK_PROBE_BLK,
+ (char *)&xlblk_disk_info,
+ sizeof(xen_disk_info_t));
+ if ( error )
+ {
+ printk(KERN_ALERT "Could not probe disks (%d)\n", error);
+ free_irq(XLBLK_RESPONSE_IRQ, NULL);
+ goto fail;
+ }
+
+ /* Pass the information to our fake IDE and SCSI susbystems. */
+ xlide_init(&xlblk_disk_info);
+ xlscsi_init(&xlblk_disk_info);
+
+ return 0;
+
+ fail:
+ return error;
+}
+
+static void __exit xlblk_cleanup(void)
+{
+ xlide_cleanup();
+ xlscsi_cleanup();
+ free_irq(XLBLK_RESPONSE_IRQ, NULL);
+}
+
+
+#ifdef MODULE
+module_init(xlblk_init);
+module_exit(xlblk_cleanup);
+#endif