aboutsummaryrefslogtreecommitdiffstats
path: root/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c
diff options
context:
space:
mode:
Diffstat (limited to 'old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c')
-rw-r--r--old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c1251
1 files changed, 1251 insertions, 0 deletions
diff --git a/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c b/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c
new file mode 100644
index 0000000000..cd1cb7ca9c
--- /dev/null
+++ b/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c
@@ -0,0 +1,1251 @@
+/*
+ * linux/drivers/block/ll_rw_blk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
+ * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
+ */
+
+/*
+ * This handles all read/write requests to block devices
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/config.h>
+#include <linux/locks.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/completion.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <linux/blk.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+/*
+ * MAC Floppy IWM hooks
+ */
+
+#ifdef CONFIG_MAC_FLOPPY_IWM
+extern int mac_floppy_init(void);
+#endif
+
+/*
+ * For the allocated request tables
+ */
+static kmem_cache_t *request_cachep;
+
+/*
+ * The "disk" task queue is used to start the actual requests
+ * after a plug
+ */
+DECLARE_TASK_QUEUE(tq_disk);
+
+/*
+ * Protect the request list against multiple users..
+ *
+ * With this spinlock the Linux block IO subsystem is 100% SMP threaded
+ * from the IRQ event side, and almost 100% SMP threaded from the syscall
+ * side (we still have protect against block device array operations, and
+ * the do_request() side is casually still unsafe. The kernel lock protects
+ * this part currently.).
+ *
+ * there is a fair chance that things will work just OK if these functions
+ * are called with no global kernel lock held ...
+ */
+spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
+
+/* This specifies how many sectors to read ahead on the disk. */
+
+int read_ahead[MAX_BLKDEV];
+
+/* blk_dev_struct is:
+ * *request_fn
+ * *current_request
+ */
+struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
+
+/*
+ * blk_size contains the size of all block-devices in units of 1024 byte
+ * sectors:
+ *
+ * blk_size[MAJOR][MINOR]
+ *
+ * if (!blk_size[MAJOR]) then no minor size checking is done.
+ */
+int * blk_size[MAX_BLKDEV];
+
+/*
+ * blksize_size contains the size of all block-devices:
+ *
+ * blksize_size[MAJOR][MINOR]
+ *
+ * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
+ */
+int * blksize_size[MAX_BLKDEV];
+
+/*
+ * hardsect_size contains the size of the hardware sector of a device.
+ *
+ * hardsect_size[MAJOR][MINOR]
+ *
+ * if (!hardsect_size[MAJOR])
+ * then 512 bytes is assumed.
+ * else
+ * sector_size is hardsect_size[MAJOR][MINOR]
+ * This is currently set by some scsi devices and read by the msdos fs driver.
+ * Other uses may appear later.
+ */
+int * hardsect_size[MAX_BLKDEV];
+
+/*
+ * The following tunes the read-ahead algorithm in mm/filemap.c
+ */
+int * max_readahead[MAX_BLKDEV];
+
+/*
+ * Max number of sectors per request
+ */
+int * max_sectors[MAX_BLKDEV];
+
+/*
+ * How many reqeusts do we allocate per queue,
+ * and how many do we "batch" on freeing them?
+ */
+static int queue_nr_requests, batch_requests;
+
+static inline int get_max_sectors(kdev_t dev)
+{
+ if (!max_sectors[MAJOR(dev)])
+ return MAX_SECTORS;
+ return max_sectors[MAJOR(dev)][MINOR(dev)];
+}
+
+inline request_queue_t *blk_get_queue(kdev_t dev)
+{
+ struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
+
+ if (bdev->queue)
+ return bdev->queue(dev);
+ else
+ return &blk_dev[MAJOR(dev)].request_queue;
+}
+
+static int __blk_cleanup_queue(struct request_list *list)
+{
+ struct list_head *head = &list->free;
+ struct request *rq;
+ int i = 0;
+
+ while (!list_empty(head)) {
+ rq = list_entry(head->next, struct request, queue);
+ list_del(&rq->queue);
+ kmem_cache_free(request_cachep, rq);
+ i++;
+ };
+
+ if (i != list->count)
+ printk("request list leak!\n");
+
+ list->count = 0;
+ return i;
+}
+
+/**
+ * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
+ * @q: the request queue to be released
+ *
+ * Description:
+ * blk_cleanup_queue is the pair to blk_init_queue(). It should
+ * be called when a request queue is being released; typically
+ * when a block device is being de-registered. Currently, its
+ * primary task it to free all the &struct request structures that
+ * were allocated to the queue.
+ * Caveat:
+ * Hopefully the low level driver will have finished any
+ * outstanding requests first...
+ **/
+void blk_cleanup_queue(request_queue_t * q)
+{
+ int count = queue_nr_requests;
+
+ count -= __blk_cleanup_queue(&q->rq[READ]);
+ count -= __blk_cleanup_queue(&q->rq[WRITE]);
+
+ if (count)
+ printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+
+ memset(q, 0, sizeof(*q));
+}
+
+/**
+ * blk_queue_headactive - indicate whether head of request queue may be active
+ * @q: The queue which this applies to.
+ * @active: A flag indication where the head of the queue is active.
+ *
+ * Description:
+ * The driver for a block device may choose to leave the currently active
+ * request on the request queue, removing it only when it has completed.
+ * The queue handling routines assume this by default for safety reasons
+ * and will not involve the head of the request queue in any merging or
+ * reordering of requests when the queue is unplugged (and thus may be
+ * working on this particular request).
+ *
+ * If a driver removes requests from the queue before processing them, then
+ * it may indicate that it does so, there by allowing the head of the queue
+ * to be involved in merging and reordering. This is done be calling
+ * blk_queue_headactive() with an @active flag of %0.
+ *
+ * If a driver processes several requests at once, it must remove them (or
+ * at least all but one of them) from the request queue.
+ *
+ * When a queue is plugged the head will be assumed to be inactive.
+ **/
+
+void blk_queue_headactive(request_queue_t * q, int active)
+{
+ q->head_active = active;
+}
+
+/**
+ * blk_queue_make_request - define an alternate make_request function for a device
+ * @q: the request queue for the device to be affected
+ * @mfn: the alternate make_request function
+ *
+ * Description:
+ * The normal way for &struct buffer_heads to be passed to a device
+ * driver is for them to be collected into requests on a request
+ * queue, and then to allow the device driver to select requests
+ * off that queue when it is ready. This works well for many block
+ * devices. However some block devices (typically virtual devices
+ * such as md or lvm) do not benefit from the processing on the
+ * request queue, and are served best by having the requests passed
+ * directly to them. This can be achieved by providing a function
+ * to blk_queue_make_request().
+ *
+ * Caveat:
+ * The driver that does this *must* be able to deal appropriately
+ * with buffers in "highmemory", either by calling bh_kmap() to get
+ * a kernel mapping, to by calling create_bounce() to create a
+ * buffer in normal memory.
+ **/
+
+void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
+{
+ q->make_request_fn = mfn;
+}
+
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+{
+ if (req->nr_segments < max_segments) {
+ req->nr_segments++;
+ return 1;
+ }
+ return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int max_segments)
+{
+ if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+ return 1;
+ return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int max_segments)
+{
+ if (bh->b_data + bh->b_size == req->bh->b_data)
+ return 1;
+ return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
+ struct request *next, int max_segments)
+{
+ int total_segments = req->nr_segments + next->nr_segments;
+
+ if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+ total_segments--;
+
+ if (total_segments > max_segments)
+ return 0;
+
+ req->nr_segments = total_segments;
+ return 1;
+}
+
+/*
+ * "plug" the device if there are no outstanding requests: this will
+ * force the transfer to start only after we have put all the requests
+ * on the list.
+ *
+ * This is called with interrupts off and no requests on the queue.
+ * (and with the request spinlock acquired)
+ */
+static void generic_plug_device(request_queue_t *q, kdev_t dev)
+{
+ /*
+ * no need to replug device
+ */
+ if (!list_empty(&q->queue_head) || q->plugged)
+ return;
+
+ q->plugged = 1;
+ queue_task(&q->plug_tq, &tq_disk);
+}
+
+/*
+ * remove the plug and let it rip..
+ */
+static inline void __generic_unplug_device(request_queue_t *q)
+{
+ if (q->plugged) {
+ q->plugged = 0;
+ if (!list_empty(&q->queue_head))
+ q->request_fn(q);
+ }
+}
+
+void generic_unplug_device(void *data)
+{
+ request_queue_t *q = (request_queue_t *) data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ __generic_unplug_device(q);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+static void blk_init_free_list(request_queue_t *q)
+{
+ struct request *rq;
+ int i;
+
+ INIT_LIST_HEAD(&q->rq[READ].free);
+ INIT_LIST_HEAD(&q->rq[WRITE].free);
+ q->rq[READ].count = 0;
+ q->rq[WRITE].count = 0;
+
+ /*
+ * Divide requests in half between read and write
+ */
+ for (i = 0; i < queue_nr_requests; i++) {
+ rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
+ if (rq == NULL) {
+ /* We'll get a `leaked requests' message from blk_cleanup_queue */
+ printk(KERN_EMERG "blk_init_free_list: error allocating requests\n");
+ break;
+ }
+ memset(rq, 0, sizeof(struct request));
+ rq->rq_status = RQ_INACTIVE;
+ list_add(&rq->queue, &q->rq[i&1].free);
+ q->rq[i&1].count++;
+ }
+
+ init_waitqueue_head(&q->wait_for_request);
+ spin_lock_init(&q->queue_lock);
+}
+
+static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+
+/**
+ * blk_init_queue - prepare a request queue for use with a block device
+ * @q: The &request_queue_t to be initialised
+ * @rfn: The function to be called to process requests that have been
+ * placed on the queue.
+ *
+ * Description:
+ * If a block device wishes to use the standard request handling procedures,
+ * which sorts requests and coalesces adjacent requests, then it must
+ * call blk_init_queue(). The function @rfn will be called when there
+ * are requests on the queue that need to be processed. If the device
+ * supports plugging, then @rfn may not be called immediately when requests
+ * are available on the queue, but may be called at some time later instead.
+ * Plugged queues are generally unplugged when a buffer belonging to one
+ * of the requests on the queue is needed, or due to memory pressure.
+ *
+ * @rfn is not required, or even expected, to remove all requests off the
+ * queue, but only as many as it can handle at a time. If it does leave
+ * requests on the queue, it is responsible for arranging that the requests
+ * get dealt with eventually.
+ *
+ * A global spin lock $io_request_lock must be held while manipulating the
+ * requests on the request queue.
+ *
+ * The request on the head of the queue is by default assumed to be
+ * potentially active, and it is not considered for re-ordering or merging
+ * whenever the given queue is unplugged. This behaviour can be changed with
+ * blk_queue_headactive().
+ *
+ * Note:
+ * blk_init_queue() must be paired with a blk_cleanup_queue() call
+ * when the block device is deactivated (such as at module unload).
+ **/
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+ INIT_LIST_HEAD(&q->queue_head);
+ elevator_init(&q->elevator, ELEVATOR_LINUS);
+ blk_init_free_list(q);
+ q->request_fn = rfn;
+ q->back_merge_fn = ll_back_merge_fn;
+ q->front_merge_fn = ll_front_merge_fn;
+ q->merge_requests_fn = ll_merge_requests_fn;
+ q->make_request_fn = __make_request;
+ q->plug_tq.sync = 0;
+ q->plug_tq.routine = &generic_unplug_device;
+ q->plug_tq.data = q;
+ q->plugged = 0;
+ /*
+ * These booleans describe the queue properties. We set the
+ * default (and most common) values here. Other drivers can
+ * use the appropriate functions to alter the queue properties.
+ * as appropriate.
+ */
+ q->plug_device_fn = generic_plug_device;
+ q->head_active = 1;
+}
+
+#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
+/*
+ * Get a free request. io_request_lock must be held and interrupts
+ * disabled on the way in.
+ */
+static inline struct request *get_request(request_queue_t *q, int rw)
+{
+ struct request *rq = NULL;
+ struct request_list *rl = q->rq + rw;
+
+ if (!list_empty(&rl->free)) {
+ rq = blkdev_free_rq(&rl->free);
+ list_del(&rq->queue);
+ rl->count--;
+ rq->rq_status = RQ_ACTIVE;
+ rq->special = NULL;
+ rq->q = q;
+ }
+
+ return rq;
+}
+
+/*
+ * No available requests for this queue, unplug the device.
+ */
+static struct request *__get_request_wait(request_queue_t *q, int rw)
+{
+ register struct request *rq;
+ DECLARE_WAITQUEUE(wait, current);
+
+ generic_unplug_device(q);
+ add_wait_queue(&q->wait_for_request, &wait);
+ do {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (q->rq[rw].count < batch_requests)
+ schedule();
+ spin_lock_irq(&io_request_lock);
+ rq = get_request(q,rw);
+ spin_unlock_irq(&io_request_lock);
+ } while (rq == NULL);
+ remove_wait_queue(&q->wait_for_request, &wait);
+ current->state = TASK_RUNNING;
+ return rq;
+}
+
+static inline struct request *get_request_wait(request_queue_t *q, int rw)
+{
+ register struct request *rq;
+
+ spin_lock_irq(&io_request_lock);
+ rq = get_request(q, rw);
+ spin_unlock_irq(&io_request_lock);
+ if (rq)
+ return rq;
+ return __get_request_wait(q, rw);
+}
+
+/* RO fail safe mechanism */
+
+static long ro_bits[MAX_BLKDEV][8];
+
+int is_read_only(kdev_t dev)
+{
+ int minor,major;
+
+ major = MAJOR(dev);
+ minor = MINOR(dev);
+ if (major < 0 || major >= MAX_BLKDEV) return 0;
+ return ro_bits[major][minor >> 5] & (1 << (minor & 31));
+}
+
+void set_device_ro(kdev_t dev,int flag)
+{
+ int minor,major;
+
+ major = MAJOR(dev);
+ minor = MINOR(dev);
+ if (major < 0 || major >= MAX_BLKDEV) return;
+ if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
+ else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
+}
+
+inline void drive_stat_acct (kdev_t dev, int rw,
+ unsigned long nr_sectors, int new_io)
+{
+ unsigned int major = MAJOR(dev);
+ unsigned int index;
+
+ index = disk_index(dev);
+ if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+ return;
+
+ kstat.dk_drive[major][index] += new_io;
+ if (rw == READ) {
+ kstat.dk_drive_rio[major][index] += new_io;
+ kstat.dk_drive_rblk[major][index] += nr_sectors;
+ } else if (rw == WRITE) {
+ kstat.dk_drive_wio[major][index] += new_io;
+ kstat.dk_drive_wblk[major][index] += nr_sectors;
+ } else
+ printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
+}
+
+/*
+ * add-request adds a request to the linked list.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
+ *
+ * By this point, req->cmd is always either READ/WRITE, never READA,
+ * which is important for drive_stat_acct() above.
+ */
+static inline void add_request(request_queue_t * q, struct request * req,
+ struct list_head *insert_here)
+{
+ drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
+
+ if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+ spin_unlock_irq(&io_request_lock);
+ printk("list_empty(&q->queue_head) is %d\n",
+ list_empty(&q->queue_head));
+ BUG();
+ }
+
+ /*
+ * elevator indicated where it wants this request to be
+ * inserted at elevator_merge time
+ */
+ list_add(&req->queue, insert_here);
+}
+
+/*
+ * Must be called with io_request_lock held and interrupts disabled
+ */
+inline void blkdev_release_request(struct request *req)
+{
+ request_queue_t *q = req->q;
+ int rw = req->cmd;
+
+ req->rq_status = RQ_INACTIVE;
+ req->q = NULL;
+
+ /*
+ * Request may not have originated from ll_rw_blk. if not,
+ * assume it has free buffers and check waiters
+ */
+ if (q) {
+ list_add(&req->queue, &q->rq[rw].free);
+ if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request))
+ wake_up(&q->wait_for_request);
+ }
+}
+
+/*
+ * Has to be called with the request spinlock acquired
+ */
+static void attempt_merge(request_queue_t * q,
+ struct request *req,
+ int max_sectors,
+ int max_segments)
+{
+ struct request *next;
+
+ next = blkdev_next_request(req);
+ if (req->sector + req->nr_sectors != next->sector)
+ return;
+ if (req->cmd != next->cmd
+ || req->rq_dev != next->rq_dev
+ || req->nr_sectors + next->nr_sectors > max_sectors
+ || next->waiting)
+ return;
+ /*
+ * If we are not allowed to merge these requests, then
+ * return. If we are allowed to merge, then the count
+ * will have been updated to the appropriate number,
+ * and we shouldn't do it here too.
+ */
+ if (!q->merge_requests_fn(q, req, next, max_segments))
+ return;
+
+ q->elevator.elevator_merge_req_fn(req, next);
+ req->bhtail->b_reqnext = next->bh;
+ req->bhtail = next->bhtail;
+ req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+ list_del(&next->queue);
+ blkdev_release_request(next);
+}
+
+static inline void attempt_back_merge(request_queue_t * q,
+ struct request *req,
+ int max_sectors,
+ int max_segments)
+{
+ if (&req->queue == q->queue_head.prev)
+ return;
+ attempt_merge(q, req, max_sectors, max_segments);
+}
+
+static inline void attempt_front_merge(request_queue_t * q,
+ struct list_head * head,
+ struct request *req,
+ int max_sectors,
+ int max_segments)
+{
+ struct list_head * prev;
+
+ prev = req->queue.prev;
+ if (head == prev)
+ return;
+ attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+}
+
+static int __make_request(request_queue_t * q, int rw,
+ struct buffer_head * bh)
+{
+ unsigned int sector, count;
+ int max_segments = MAX_SEGMENTS;
+ struct request * req, *freereq = NULL;
+ int rw_ahead, max_sectors, el_ret;
+ struct list_head *head, *insert_here;
+ int latency;
+ elevator_t *elevator = &q->elevator;
+
+ count = bh->b_size >> 9;
+ sector = bh->b_rsector;
+
+ rw_ahead = 0; /* normal case; gets changed below for READA */
+ switch (rw) {
+ case READA:
+ rw_ahead = 1;
+ rw = READ; /* drop into READ */
+ case READ:
+ case WRITE:
+ latency = elevator_request_latency(elevator, rw);
+ break;
+ default:
+ BUG();
+ goto end_io;
+ }
+
+ /* We'd better have a real physical mapping!
+ Check this bit only if the buffer was dirty and just locked
+ down by us so at this point flushpage will block and
+ won't clear the mapped bit under us. */
+ if (!buffer_mapped(bh))
+ BUG();
+
+ /*
+ * Temporary solution - in 2.5 this will be done by the lowlevel
+ * driver. Create a bounce buffer if the buffer data points into
+ * high memory - keep the original buffer otherwise.
+ */
+#if CONFIG_HIGHMEM
+ bh = create_bounce(rw, bh);
+#endif
+
+/* look for a free request. */
+ /*
+ * Try to coalesce the new request with old requests
+ */
+ max_sectors = get_max_sectors(bh->b_rdev);
+
+again:
+ req = NULL;
+ head = &q->queue_head;
+ /*
+ * Now we acquire the request spinlock, we have to be mega careful
+ * not to schedule or do something nonatomic
+ */
+ spin_lock_irq(&io_request_lock);
+
+ insert_here = head->prev;
+ if (list_empty(head)) {
+ q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+ goto get_rq;
+ } else if (q->head_active && !q->plugged)
+ head = head->next;
+
+ el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+ switch (el_ret) {
+
+ case ELEVATOR_BACK_MERGE:
+ if (!q->back_merge_fn(q, req, bh, max_segments))
+ break;
+ elevator->elevator_merge_cleanup_fn(q, req, count);
+ req->bhtail->b_reqnext = bh;
+ req->bhtail = bh;
+ req->nr_sectors = req->hard_nr_sectors += count;
+ blk_started_io(count);
+ drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+ attempt_back_merge(q, req, max_sectors, max_segments);
+ goto out;
+
+ case ELEVATOR_FRONT_MERGE:
+ if (!q->front_merge_fn(q, req, bh, max_segments))
+ break;
+ elevator->elevator_merge_cleanup_fn(q, req, count);
+ bh->b_reqnext = req->bh;
+ req->bh = bh;
+ req->buffer = bh->b_data;
+ req->current_nr_sectors = count;
+ req->sector = req->hard_sector = sector;
+ req->nr_sectors = req->hard_nr_sectors += count;
+ blk_started_io(count);
+ drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+ attempt_front_merge(q, head, req, max_sectors, max_segments);
+ goto out;
+
+ /*
+ * elevator says don't/can't merge. get new request
+ */
+ case ELEVATOR_NO_MERGE:
+ /*
+ * use elevator hints as to where to insert the
+ * request. if no hints, just add it to the back
+ * of the queue
+ */
+ if (req)
+ insert_here = &req->queue;
+ break;
+
+ default:
+ printk("elevator returned crap (%d)\n", el_ret);
+ BUG();
+ }
+
+ /*
+ * Grab a free request from the freelist - if that is empty, check
+ * if we are doing read ahead and abort instead of blocking for
+ * a free slot.
+ */
+get_rq:
+ if (freereq) {
+ req = freereq;
+ freereq = NULL;
+ } else if ((req = get_request(q, rw)) == NULL) {
+ spin_unlock_irq(&io_request_lock);
+ if (rw_ahead)
+ goto end_io;
+
+ freereq = __get_request_wait(q, rw);
+ goto again;
+ }
+
+/* fill up the request-info, and add it to the queue */
+ req->elevator_sequence = latency;
+ req->cmd = rw;
+ req->errors = 0;
+ req->hard_sector = req->sector = sector;
+ req->hard_nr_sectors = req->nr_sectors = count;
+ req->current_nr_sectors = count;
+ req->nr_segments = 1; /* Always 1 for a new request. */
+ req->nr_hw_segments = 1; /* Always 1 for a new request. */
+ req->buffer = bh->b_data;
+ req->waiting = NULL;
+ req->bh = bh;
+ req->bhtail = bh;
+ req->rq_dev = bh->b_rdev;
+ blk_started_io(count);
+
+ add_request(q, req, insert_here);
+out:
+ if (freereq)
+ blkdev_release_request(freereq);
+ spin_unlock_irq(&io_request_lock);
+ return 0;
+end_io:
+ bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+ return 0;
+}
+
+/**
+ * generic_make_request: hand a buffer head to it's device driver for I/O
+ * @rw: READ, WRITE, or READA - what sort of I/O is desired.
+ * @bh: The buffer head describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct buffer_head and a &rw value. The
+ * %READ and %WRITE options are (hopefully) obvious in meaning. The
+ * %READA value means that a read is required, but that the driver is
+ * free to fail the request if, for example, it cannot get needed
+ * resources immediately.
+ *
+ * generic_make_request() does not return any status. The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bh->b_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that b_page,
+ * b_addr, b_size are set to describe the memory buffer, that b_rdev
+ * and b_rsector are set to describe the device address, and the
+ * b_end_io and optionally b_private are set to describe how
+ * completion notification should be signaled. BH_Mapped should also
+ * be set (to confirm that b_dev and b_blocknr are valid).
+ *
+ * generic_make_request and the drivers it calls may use b_reqnext,
+ * and may change b_rdev and b_rsector. So the values of these fields
+ * should NOT be depended on after the call to generic_make_request.
+ * Because of this, the caller should record the device address
+ * information in b_dev and b_blocknr.
+ *
+ * Apart from those fields mentioned above, no other fields, and in
+ * particular, no other flags, are changed by generic_make_request or
+ * any lower level drivers.
+ * */
+void generic_make_request (int rw, struct buffer_head * bh)
+{
+ int major = MAJOR(bh->b_rdev);
+ int minorsize = 0;
+ request_queue_t *q;
+
+ if (!bh->b_end_io)
+ BUG();
+
+ /* Test device size, when known. */
+ if (blk_size[major])
+ minorsize = blk_size[major][MINOR(bh->b_rdev)];
+ if (minorsize) {
+ unsigned long maxsector = (minorsize << 1) + 1;
+ unsigned long sector = bh->b_rsector;
+ unsigned int count = bh->b_size >> 9;
+
+ if (maxsector < count || maxsector - count < sector) {
+ /* Yecch */
+ bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
+
+ /* This may well happen - the kernel calls bread()
+ without checking the size of the device, e.g.,
+ when mounting a device. */
+ printk(KERN_INFO
+ "attempt to access beyond end of device\n");
+ printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
+ kdevname(bh->b_rdev), rw,
+ (sector + count)>>1, minorsize);
+
+ /* Yecch again */
+ bh->b_end_io(bh, 0);
+ return;
+ }
+ }
+
+ /*
+ * Resolve the mapping until finished. (drivers are
+ * still free to implement/resolve their own stacking
+ * by explicitly returning 0)
+ */
+ /* NOTE: we don't repeat the blk_size check for each new device.
+ * Stacking drivers are expected to know what they are doing.
+ */
+ do {
+ q = blk_get_queue(bh->b_rdev);
+ if (!q) {
+ printk(KERN_ERR
+ "generic_make_request: Trying to access "
+ "nonexistent block-device %s (%ld)\n",
+ kdevname(bh->b_rdev), bh->b_rsector);
+ buffer_IO_error(bh);
+ break;
+ }
+ } while (q->make_request_fn(q, rw, bh));
+}
+
+
+/**
+ * submit_bh: submit a buffer_head to the block device later for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bh: The &struct buffer_head which describes the I/O
+ *
+ * submit_bh() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work.
+ *
+ * The extra functionality provided by submit_bh is to determine
+ * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
+ * This is is appropriate for IO requests that come from the buffer
+ * cache and page cache which (currently) always use aligned blocks.
+ */
+void submit_bh(int rw, struct buffer_head * bh)
+{
+ int count = bh->b_size >> 9;
+
+ if (!test_bit(BH_Lock, &bh->b_state))
+ BUG();
+
+ set_bit(BH_Req, &bh->b_state);
+
+ /*
+ * First step, 'identity mapping' - RAID or LVM might
+ * further remap this.
+ */
+ bh->b_rdev = bh->b_dev;
+ bh->b_rsector = bh->b_blocknr * count;
+
+ generic_make_request(rw, bh);
+
+ switch (rw) {
+ case WRITE:
+ kstat.pgpgout += count;
+ break;
+ default:
+ kstat.pgpgin += count;
+ break;
+ }
+}
+
+/**
+ * ll_rw_block: low-level access to block devices
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @nr: number of &struct buffer_heads in the array
+ * @bhs: array of pointers to &struct buffer_head
+ *
+ * ll_rw_block() takes an array of pointers to &struct buffer_heads,
+ * and requests an I/O operation on them, either a %READ or a %WRITE.
+ * The third %READA option is described in the documentation for
+ * generic_make_request() which ll_rw_block() calls.
+ *
+ * This function provides extra functionality that is not in
+ * generic_make_request() that is relevant to buffers in the buffer
+ * cache or page cache. In particular it drops any buffer that it
+ * cannot get a lock on (with the BH_Lock state bit), any buffer that
+ * appears to be clean when doing a write request, and any buffer that
+ * appears to be up-to-date when doing read request. Further it marks
+ * as clean buffers that are processed for writing (the buffer cache
+ * wont assume that they are actually clean until the buffer gets
+ * unlocked).
+ *
+ * ll_rw_block sets b_end_io to simple completion handler that marks
+ * the buffer up-to-date (if approriate), unlocks the buffer and wakes
+ * any waiters. As client that needs a more interesting completion
+ * routine should call submit_bh() (or generic_make_request())
+ * directly.
+ *
+ * Caveat:
+ * All of the buffers must be for the same device, and must also be
+ * of the current approved size for the device. */
+
+void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
+{
+ unsigned int major;
+ int correct_size;
+ int i;
+
+ if (!nr)
+ return;
+
+ major = MAJOR(bhs[0]->b_dev);
+
+ /* Determine correct block size for this device. */
+ correct_size = get_hardsect_size(bhs[0]->b_dev);
+
+ /* Verify requested block sizes. */
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+ if (bh->b_size % correct_size) {
+ printk(KERN_NOTICE "ll_rw_block: device %s: "
+ "only %d-char blocks implemented (%u)\n",
+ kdevname(bhs[0]->b_dev),
+ correct_size, bh->b_size);
+ goto sorry;
+ }
+ }
+
+ if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
+ printk(KERN_NOTICE "Can't write to read-only device %s\n",
+ kdevname(bhs[0]->b_dev));
+ goto sorry;
+ }
+
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+
+ /* Only one thread can actually submit the I/O. */
+ if (test_and_set_bit(BH_Lock, &bh->b_state))
+ continue;
+
+ /* We have the buffer lock */
+ atomic_inc(&bh->b_count);
+ bh->b_end_io = end_buffer_io_sync;
+
+ switch(rw) {
+ case WRITE:
+ if (!atomic_set_buffer_clean(bh))
+ /* Hmmph! Nothing to write */
+ goto end_io;
+ __mark_buffer_clean(bh);
+ break;
+
+ case READA:
+ case READ:
+ if (buffer_uptodate(bh))
+ /* Hmmph! Already have it */
+ goto end_io;
+ break;
+ default:
+ BUG();
+ end_io:
+ bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+ continue;
+ }
+
+ submit_bh(rw, bh);
+ }
+ return;
+
+sorry:
+ /* Make sure we don't get infinite dirty retries.. */
+ for (i = 0; i < nr; i++)
+ mark_buffer_clean(bhs[i]);
+}
+
+#ifdef CONFIG_STRAM_SWAP
+extern int stram_device_init (void);
+#endif
+
+
+/**
+ * end_that_request_first - end I/O on one buffer.
+ * @req: the request being processed
+ * @uptodate: 0 for I/O error
+ * @name: the name printed for an I/O error
+ *
+ * Description:
+ * Ends I/O on the first buffer attached to @req, and sets it up
+ * for the next buffer_head (if any) in the cluster.
+ *
+ * Return:
+ * 0 - we are done with this request, call end_that_request_last()
+ * 1 - still buffers pending for this request
+ *
+ * Caveat:
+ * Drivers implementing their own end_request handling must call
+ * blk_finished_io() appropriately.
+ **/
+
+int end_that_request_first (struct request *req, int uptodate, char *name)
+{
+ struct buffer_head * bh;
+ int nsect;
+
+ req->errors = 0;
+ if (!uptodate)
+ printk("end_request: I/O error, dev %s (%s), sector %lu\n",
+ kdevname(req->rq_dev), name, req->sector);
+
+ if ((bh = req->bh) != NULL) {
+ nsect = bh->b_size >> 9;
+ blk_finished_io(nsect);
+ req->bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ bh->b_end_io(bh, uptodate);
+ if ((bh = req->bh) != NULL) {
+ req->hard_sector += nsect;
+ req->hard_nr_sectors -= nsect;
+ req->sector = req->hard_sector;
+ req->nr_sectors = req->hard_nr_sectors;
+
+ req->current_nr_sectors = bh->b_size >> 9;
+ if (req->nr_sectors < req->current_nr_sectors) {
+ req->nr_sectors = req->current_nr_sectors;
+ printk("end_request: buffer-list destroyed\n");
+ }
+ req->buffer = bh->b_data;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void end_that_request_last(struct request *req)
+{
+ if (req->waiting != NULL)
+ complete(req->waiting);
+
+ blkdev_release_request(req);
+}
+
+#define MB(kb) ((kb) << 10)
+
+int __init blk_dev_init(void)
+{
+ struct blk_dev_struct *dev;
+ int total_ram;
+
+ request_cachep = kmem_cache_create("blkdev_requests",
+ sizeof(struct request),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+ if (!request_cachep)
+ panic("Can't create request pool slab cache\n");
+
+ for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
+ dev->queue = NULL;
+
+ memset(ro_bits,0,sizeof(ro_bits));
+ memset(max_readahead, 0, sizeof(max_readahead));
+ memset(max_sectors, 0, sizeof(max_sectors));
+
+ total_ram = nr_free_pages() << (PAGE_SHIFT - 10);
+
+ /*
+ * Free request slots per queue.
+ * (Half for reads, half for writes)
+ */
+ queue_nr_requests = 64;
+ if (total_ram > MB(32))
+ queue_nr_requests = 128;
+
+ /*
+ * Batch frees according to queue length
+ */
+ batch_requests = queue_nr_requests/4;
+ printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests);
+
+#ifdef CONFIG_AMIGA_Z2RAM
+ z2_init();
+#endif
+#ifdef CONFIG_STRAM_SWAP
+ stram_device_init();
+#endif
+#ifdef CONFIG_BLK_DEV_RAM
+ rd_init();
+#endif
+#ifdef CONFIG_ISP16_CDI
+ isp16_init();
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
+ ide_init(); /* this MUST precede hd_init */
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
+ hd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_PS2
+ ps2esdi_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XD
+ xd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_MFM
+ mfm_init();
+#endif
+#ifdef CONFIG_PARIDE
+ { extern void paride_init(void); paride_init(); };
+#endif
+#ifdef CONFIG_MAC_FLOPPY
+ swim3_init();
+#endif
+#ifdef CONFIG_BLK_DEV_SWIM_IOP
+ swimiop_init();
+#endif
+#ifdef CONFIG_AMIGA_FLOPPY
+ amiga_floppy_init();
+#endif
+#ifdef CONFIG_ATARI_FLOPPY
+ atari_floppy_init();
+#endif
+#ifdef CONFIG_BLK_DEV_FD
+ floppy_init();
+#else
+#if defined(__i386__) && !defined(CONFIG_XENO) /* Do we even need this? */
+ outb_p(0xc, 0x3f2);
+#endif
+#endif
+#ifdef CONFIG_CDU31A
+ cdu31a_init();
+#endif
+#ifdef CONFIG_ATARI_ACSI
+ acsi_init();
+#endif
+#ifdef CONFIG_MCD
+ mcd_init();
+#endif
+#ifdef CONFIG_MCDX
+ mcdx_init();
+#endif
+#ifdef CONFIG_SBPCD
+ sbpcd_init();
+#endif
+#ifdef CONFIG_AZTCD
+ aztcd_init();
+#endif
+#ifdef CONFIG_CDU535
+ sony535_init();
+#endif
+#ifdef CONFIG_GSCD
+ gscd_init();
+#endif
+#ifdef CONFIG_CM206
+ cm206_init();
+#endif
+#ifdef CONFIG_OPTCD
+ optcd_init();
+#endif
+#ifdef CONFIG_SJCD
+ sjcd_init();
+#endif
+#ifdef CONFIG_APBLOCK
+ ap_init();
+#endif
+#ifdef CONFIG_DDV
+ ddv_init();
+#endif
+#ifdef CONFIG_MDISK
+ mdisk_init();
+#endif
+#ifdef CONFIG_DASD
+ dasd_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
+ tapeblock_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XPRAM
+ xpram_init();
+#endif
+
+#ifdef CONFIG_SUN_JSFLASH
+ jsfd_init();
+#endif
+
+#ifdef CONFIG_XENOLINUX_BLOCK
+ xlblk_init();
+#endif
+
+ return 0;
+};
+
+EXPORT_SYMBOL(io_request_lock);
+EXPORT_SYMBOL(end_that_request_first);
+EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blk_init_queue);
+EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(blk_cleanup_queue);
+EXPORT_SYMBOL(blk_queue_headactive);
+EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(generic_make_request);
+EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(generic_unplug_device);