diff options
Diffstat (limited to 'old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c')
-rw-r--r-- | old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c | 1251 |
1 files changed, 1251 insertions, 0 deletions
diff --git a/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c b/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c new file mode 100644 index 0000000000..cd1cb7ca9c --- /dev/null +++ b/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c @@ -0,0 +1,1251 @@ +/* + * linux/drivers/block/ll_rw_blk.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE + * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> + * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 + */ + +/* + * This handles all read/write requests to block devices + */ +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/config.h> +#include <linux/locks.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/init.h> +#include <linux/smp_lock.h> +#include <linux/completion.h> + +#include <asm/system.h> +#include <asm/io.h> +#include <linux/blk.h> +#include <linux/highmem.h> +#include <linux/slab.h> +#include <linux/module.h> + +/* + * MAC Floppy IWM hooks + */ + +#ifdef CONFIG_MAC_FLOPPY_IWM +extern int mac_floppy_init(void); +#endif + +/* + * For the allocated request tables + */ +static kmem_cache_t *request_cachep; + +/* + * The "disk" task queue is used to start the actual requests + * after a plug + */ +DECLARE_TASK_QUEUE(tq_disk); + +/* + * Protect the request list against multiple users.. + * + * With this spinlock the Linux block IO subsystem is 100% SMP threaded + * from the IRQ event side, and almost 100% SMP threaded from the syscall + * side (we still have protect against block device array operations, and + * the do_request() side is casually still unsafe. The kernel lock protects + * this part currently.). + * + * there is a fair chance that things will work just OK if these functions + * are called with no global kernel lock held ... + */ +spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; + +/* This specifies how many sectors to read ahead on the disk. */ + +int read_ahead[MAX_BLKDEV]; + +/* blk_dev_struct is: + * *request_fn + * *current_request + */ +struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ + +/* + * blk_size contains the size of all block-devices in units of 1024 byte + * sectors: + * + * blk_size[MAJOR][MINOR] + * + * if (!blk_size[MAJOR]) then no minor size checking is done. + */ +int * blk_size[MAX_BLKDEV]; + +/* + * blksize_size contains the size of all block-devices: + * + * blksize_size[MAJOR][MINOR] + * + * if (!blksize_size[MAJOR]) then 1024 bytes is assumed. + */ +int * blksize_size[MAX_BLKDEV]; + +/* + * hardsect_size contains the size of the hardware sector of a device. + * + * hardsect_size[MAJOR][MINOR] + * + * if (!hardsect_size[MAJOR]) + * then 512 bytes is assumed. + * else + * sector_size is hardsect_size[MAJOR][MINOR] + * This is currently set by some scsi devices and read by the msdos fs driver. + * Other uses may appear later. + */ +int * hardsect_size[MAX_BLKDEV]; + +/* + * The following tunes the read-ahead algorithm in mm/filemap.c + */ +int * max_readahead[MAX_BLKDEV]; + +/* + * Max number of sectors per request + */ +int * max_sectors[MAX_BLKDEV]; + +/* + * How many reqeusts do we allocate per queue, + * and how many do we "batch" on freeing them? + */ +static int queue_nr_requests, batch_requests; + +static inline int get_max_sectors(kdev_t dev) +{ + if (!max_sectors[MAJOR(dev)]) + return MAX_SECTORS; + return max_sectors[MAJOR(dev)][MINOR(dev)]; +} + +inline request_queue_t *blk_get_queue(kdev_t dev) +{ + struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); + + if (bdev->queue) + return bdev->queue(dev); + else + return &blk_dev[MAJOR(dev)].request_queue; +} + +static int __blk_cleanup_queue(struct request_list *list) +{ + struct list_head *head = &list->free; + struct request *rq; + int i = 0; + + while (!list_empty(head)) { + rq = list_entry(head->next, struct request, queue); + list_del(&rq->queue); + kmem_cache_free(request_cachep, rq); + i++; + }; + + if (i != list->count) + printk("request list leak!\n"); + + list->count = 0; + return i; +} + +/** + * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * @q: the request queue to be released + * + * Description: + * blk_cleanup_queue is the pair to blk_init_queue(). It should + * be called when a request queue is being released; typically + * when a block device is being de-registered. Currently, its + * primary task it to free all the &struct request structures that + * were allocated to the queue. + * Caveat: + * Hopefully the low level driver will have finished any + * outstanding requests first... + **/ +void blk_cleanup_queue(request_queue_t * q) +{ + int count = queue_nr_requests; + + count -= __blk_cleanup_queue(&q->rq[READ]); + count -= __blk_cleanup_queue(&q->rq[WRITE]); + + if (count) + printk("blk_cleanup_queue: leaked requests (%d)\n", count); + + memset(q, 0, sizeof(*q)); +} + +/** + * blk_queue_headactive - indicate whether head of request queue may be active + * @q: The queue which this applies to. + * @active: A flag indication where the head of the queue is active. + * + * Description: + * The driver for a block device may choose to leave the currently active + * request on the request queue, removing it only when it has completed. + * The queue handling routines assume this by default for safety reasons + * and will not involve the head of the request queue in any merging or + * reordering of requests when the queue is unplugged (and thus may be + * working on this particular request). + * + * If a driver removes requests from the queue before processing them, then + * it may indicate that it does so, there by allowing the head of the queue + * to be involved in merging and reordering. This is done be calling + * blk_queue_headactive() with an @active flag of %0. + * + * If a driver processes several requests at once, it must remove them (or + * at least all but one of them) from the request queue. + * + * When a queue is plugged the head will be assumed to be inactive. + **/ + +void blk_queue_headactive(request_queue_t * q, int active) +{ + q->head_active = active; +} + +/** + * blk_queue_make_request - define an alternate make_request function for a device + * @q: the request queue for the device to be affected + * @mfn: the alternate make_request function + * + * Description: + * The normal way for &struct buffer_heads to be passed to a device + * driver is for them to be collected into requests on a request + * queue, and then to allow the device driver to select requests + * off that queue when it is ready. This works well for many block + * devices. However some block devices (typically virtual devices + * such as md or lvm) do not benefit from the processing on the + * request queue, and are served best by having the requests passed + * directly to them. This can be achieved by providing a function + * to blk_queue_make_request(). + * + * Caveat: + * The driver that does this *must* be able to deal appropriately + * with buffers in "highmemory", either by calling bh_kmap() to get + * a kernel mapping, to by calling create_bounce() to create a + * buffer in normal memory. + **/ + +void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) +{ + q->make_request_fn = mfn; +} + +static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +{ + if (req->nr_segments < max_segments) { + req->nr_segments++; + return 1; + } + return 0; +} + +static int ll_back_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); +} + +static int ll_front_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (bh->b_data + bh->b_size == req->bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); +} + +static int ll_merge_requests_fn(request_queue_t *q, struct request *req, + struct request *next, int max_segments) +{ + int total_segments = req->nr_segments + next->nr_segments; + + if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + total_segments--; + + if (total_segments > max_segments) + return 0; + + req->nr_segments = total_segments; + return 1; +} + +/* + * "plug" the device if there are no outstanding requests: this will + * force the transfer to start only after we have put all the requests + * on the list. + * + * This is called with interrupts off and no requests on the queue. + * (and with the request spinlock acquired) + */ +static void generic_plug_device(request_queue_t *q, kdev_t dev) +{ + /* + * no need to replug device + */ + if (!list_empty(&q->queue_head) || q->plugged) + return; + + q->plugged = 1; + queue_task(&q->plug_tq, &tq_disk); +} + +/* + * remove the plug and let it rip.. + */ +static inline void __generic_unplug_device(request_queue_t *q) +{ + if (q->plugged) { + q->plugged = 0; + if (!list_empty(&q->queue_head)) + q->request_fn(q); + } +} + +void generic_unplug_device(void *data) +{ + request_queue_t *q = (request_queue_t *) data; + unsigned long flags; + + spin_lock_irqsave(&io_request_lock, flags); + __generic_unplug_device(q); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +static void blk_init_free_list(request_queue_t *q) +{ + struct request *rq; + int i; + + INIT_LIST_HEAD(&q->rq[READ].free); + INIT_LIST_HEAD(&q->rq[WRITE].free); + q->rq[READ].count = 0; + q->rq[WRITE].count = 0; + + /* + * Divide requests in half between read and write + */ + for (i = 0; i < queue_nr_requests; i++) { + rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); + if (rq == NULL) { + /* We'll get a `leaked requests' message from blk_cleanup_queue */ + printk(KERN_EMERG "blk_init_free_list: error allocating requests\n"); + break; + } + memset(rq, 0, sizeof(struct request)); + rq->rq_status = RQ_INACTIVE; + list_add(&rq->queue, &q->rq[i&1].free); + q->rq[i&1].count++; + } + + init_waitqueue_head(&q->wait_for_request); + spin_lock_init(&q->queue_lock); +} + +static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); + +/** + * blk_init_queue - prepare a request queue for use with a block device + * @q: The &request_queue_t to be initialised + * @rfn: The function to be called to process requests that have been + * placed on the queue. + * + * Description: + * If a block device wishes to use the standard request handling procedures, + * which sorts requests and coalesces adjacent requests, then it must + * call blk_init_queue(). The function @rfn will be called when there + * are requests on the queue that need to be processed. If the device + * supports plugging, then @rfn may not be called immediately when requests + * are available on the queue, but may be called at some time later instead. + * Plugged queues are generally unplugged when a buffer belonging to one + * of the requests on the queue is needed, or due to memory pressure. + * + * @rfn is not required, or even expected, to remove all requests off the + * queue, but only as many as it can handle at a time. If it does leave + * requests on the queue, it is responsible for arranging that the requests + * get dealt with eventually. + * + * A global spin lock $io_request_lock must be held while manipulating the + * requests on the request queue. + * + * The request on the head of the queue is by default assumed to be + * potentially active, and it is not considered for re-ordering or merging + * whenever the given queue is unplugged. This behaviour can be changed with + * blk_queue_headactive(). + * + * Note: + * blk_init_queue() must be paired with a blk_cleanup_queue() call + * when the block device is deactivated (such as at module unload). + **/ +void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +{ + INIT_LIST_HEAD(&q->queue_head); + elevator_init(&q->elevator, ELEVATOR_LINUS); + blk_init_free_list(q); + q->request_fn = rfn; + q->back_merge_fn = ll_back_merge_fn; + q->front_merge_fn = ll_front_merge_fn; + q->merge_requests_fn = ll_merge_requests_fn; + q->make_request_fn = __make_request; + q->plug_tq.sync = 0; + q->plug_tq.routine = &generic_unplug_device; + q->plug_tq.data = q; + q->plugged = 0; + /* + * These booleans describe the queue properties. We set the + * default (and most common) values here. Other drivers can + * use the appropriate functions to alter the queue properties. + * as appropriate. + */ + q->plug_device_fn = generic_plug_device; + q->head_active = 1; +} + +#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); +/* + * Get a free request. io_request_lock must be held and interrupts + * disabled on the way in. + */ +static inline struct request *get_request(request_queue_t *q, int rw) +{ + struct request *rq = NULL; + struct request_list *rl = q->rq + rw; + + if (!list_empty(&rl->free)) { + rq = blkdev_free_rq(&rl->free); + list_del(&rq->queue); + rl->count--; + rq->rq_status = RQ_ACTIVE; + rq->special = NULL; + rq->q = q; + } + + return rq; +} + +/* + * No available requests for this queue, unplug the device. + */ +static struct request *__get_request_wait(request_queue_t *q, int rw) +{ + register struct request *rq; + DECLARE_WAITQUEUE(wait, current); + + generic_unplug_device(q); + add_wait_queue(&q->wait_for_request, &wait); + do { + set_current_state(TASK_UNINTERRUPTIBLE); + if (q->rq[rw].count < batch_requests) + schedule(); + spin_lock_irq(&io_request_lock); + rq = get_request(q,rw); + spin_unlock_irq(&io_request_lock); + } while (rq == NULL); + remove_wait_queue(&q->wait_for_request, &wait); + current->state = TASK_RUNNING; + return rq; +} + +static inline struct request *get_request_wait(request_queue_t *q, int rw) +{ + register struct request *rq; + + spin_lock_irq(&io_request_lock); + rq = get_request(q, rw); + spin_unlock_irq(&io_request_lock); + if (rq) + return rq; + return __get_request_wait(q, rw); +} + +/* RO fail safe mechanism */ + +static long ro_bits[MAX_BLKDEV][8]; + +int is_read_only(kdev_t dev) +{ + int minor,major; + + major = MAJOR(dev); + minor = MINOR(dev); + if (major < 0 || major >= MAX_BLKDEV) return 0; + return ro_bits[major][minor >> 5] & (1 << (minor & 31)); +} + +void set_device_ro(kdev_t dev,int flag) +{ + int minor,major; + + major = MAJOR(dev); + minor = MINOR(dev); + if (major < 0 || major >= MAX_BLKDEV) return; + if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31); + else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); +} + +inline void drive_stat_acct (kdev_t dev, int rw, + unsigned long nr_sectors, int new_io) +{ + unsigned int major = MAJOR(dev); + unsigned int index; + + index = disk_index(dev); + if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR)) + return; + + kstat.dk_drive[major][index] += new_io; + if (rw == READ) { + kstat.dk_drive_rio[major][index] += new_io; + kstat.dk_drive_rblk[major][index] += nr_sectors; + } else if (rw == WRITE) { + kstat.dk_drive_wio[major][index] += new_io; + kstat.dk_drive_wblk[major][index] += nr_sectors; + } else + printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); +} + +/* + * add-request adds a request to the linked list. + * io_request_lock is held and interrupts disabled, as we muck with the + * request queue list. + * + * By this point, req->cmd is always either READ/WRITE, never READA, + * which is important for drive_stat_acct() above. + */ +static inline void add_request(request_queue_t * q, struct request * req, + struct list_head *insert_here) +{ + drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); + + if (!q->plugged && q->head_active && insert_here == &q->queue_head) { + spin_unlock_irq(&io_request_lock); + printk("list_empty(&q->queue_head) is %d\n", + list_empty(&q->queue_head)); + BUG(); + } + + /* + * elevator indicated where it wants this request to be + * inserted at elevator_merge time + */ + list_add(&req->queue, insert_here); +} + +/* + * Must be called with io_request_lock held and interrupts disabled + */ +inline void blkdev_release_request(struct request *req) +{ + request_queue_t *q = req->q; + int rw = req->cmd; + + req->rq_status = RQ_INACTIVE; + req->q = NULL; + + /* + * Request may not have originated from ll_rw_blk. if not, + * assume it has free buffers and check waiters + */ + if (q) { + list_add(&req->queue, &q->rq[rw].free); + if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request)) + wake_up(&q->wait_for_request); + } +} + +/* + * Has to be called with the request spinlock acquired + */ +static void attempt_merge(request_queue_t * q, + struct request *req, + int max_sectors, + int max_segments) +{ + struct request *next; + + next = blkdev_next_request(req); + if (req->sector + req->nr_sectors != next->sector) + return; + if (req->cmd != next->cmd + || req->rq_dev != next->rq_dev + || req->nr_sectors + next->nr_sectors > max_sectors + || next->waiting) + return; + /* + * If we are not allowed to merge these requests, then + * return. If we are allowed to merge, then the count + * will have been updated to the appropriate number, + * and we shouldn't do it here too. + */ + if (!q->merge_requests_fn(q, req, next, max_segments)) + return; + + q->elevator.elevator_merge_req_fn(req, next); + req->bhtail->b_reqnext = next->bh; + req->bhtail = next->bhtail; + req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + list_del(&next->queue); + blkdev_release_request(next); +} + +static inline void attempt_back_merge(request_queue_t * q, + struct request *req, + int max_sectors, + int max_segments) +{ + if (&req->queue == q->queue_head.prev) + return; + attempt_merge(q, req, max_sectors, max_segments); +} + +static inline void attempt_front_merge(request_queue_t * q, + struct list_head * head, + struct request *req, + int max_sectors, + int max_segments) +{ + struct list_head * prev; + + prev = req->queue.prev; + if (head == prev) + return; + attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); +} + +static int __make_request(request_queue_t * q, int rw, + struct buffer_head * bh) +{ + unsigned int sector, count; + int max_segments = MAX_SEGMENTS; + struct request * req, *freereq = NULL; + int rw_ahead, max_sectors, el_ret; + struct list_head *head, *insert_here; + int latency; + elevator_t *elevator = &q->elevator; + + count = bh->b_size >> 9; + sector = bh->b_rsector; + + rw_ahead = 0; /* normal case; gets changed below for READA */ + switch (rw) { + case READA: + rw_ahead = 1; + rw = READ; /* drop into READ */ + case READ: + case WRITE: + latency = elevator_request_latency(elevator, rw); + break; + default: + BUG(); + goto end_io; + } + + /* We'd better have a real physical mapping! + Check this bit only if the buffer was dirty and just locked + down by us so at this point flushpage will block and + won't clear the mapped bit under us. */ + if (!buffer_mapped(bh)) + BUG(); + + /* + * Temporary solution - in 2.5 this will be done by the lowlevel + * driver. Create a bounce buffer if the buffer data points into + * high memory - keep the original buffer otherwise. + */ +#if CONFIG_HIGHMEM + bh = create_bounce(rw, bh); +#endif + +/* look for a free request. */ + /* + * Try to coalesce the new request with old requests + */ + max_sectors = get_max_sectors(bh->b_rdev); + +again: + req = NULL; + head = &q->queue_head; + /* + * Now we acquire the request spinlock, we have to be mega careful + * not to schedule or do something nonatomic + */ + spin_lock_irq(&io_request_lock); + + insert_here = head->prev; + if (list_empty(head)) { + q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + goto get_rq; + } else if (q->head_active && !q->plugged) + head = head->next; + + el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); + switch (el_ret) { + + case ELEVATOR_BACK_MERGE: + if (!q->back_merge_fn(q, req, bh, max_segments)) + break; + elevator->elevator_merge_cleanup_fn(q, req, count); + req->bhtail->b_reqnext = bh; + req->bhtail = bh; + req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); + drive_stat_acct(req->rq_dev, req->cmd, count, 0); + attempt_back_merge(q, req, max_sectors, max_segments); + goto out; + + case ELEVATOR_FRONT_MERGE: + if (!q->front_merge_fn(q, req, bh, max_segments)) + break; + elevator->elevator_merge_cleanup_fn(q, req, count); + bh->b_reqnext = req->bh; + req->bh = bh; + req->buffer = bh->b_data; + req->current_nr_sectors = count; + req->sector = req->hard_sector = sector; + req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); + drive_stat_acct(req->rq_dev, req->cmd, count, 0); + attempt_front_merge(q, head, req, max_sectors, max_segments); + goto out; + + /* + * elevator says don't/can't merge. get new request + */ + case ELEVATOR_NO_MERGE: + /* + * use elevator hints as to where to insert the + * request. if no hints, just add it to the back + * of the queue + */ + if (req) + insert_here = &req->queue; + break; + + default: + printk("elevator returned crap (%d)\n", el_ret); + BUG(); + } + + /* + * Grab a free request from the freelist - if that is empty, check + * if we are doing read ahead and abort instead of blocking for + * a free slot. + */ +get_rq: + if (freereq) { + req = freereq; + freereq = NULL; + } else if ((req = get_request(q, rw)) == NULL) { + spin_unlock_irq(&io_request_lock); + if (rw_ahead) + goto end_io; + + freereq = __get_request_wait(q, rw); + goto again; + } + +/* fill up the request-info, and add it to the queue */ + req->elevator_sequence = latency; + req->cmd = rw; + req->errors = 0; + req->hard_sector = req->sector = sector; + req->hard_nr_sectors = req->nr_sectors = count; + req->current_nr_sectors = count; + req->nr_segments = 1; /* Always 1 for a new request. */ + req->nr_hw_segments = 1; /* Always 1 for a new request. */ + req->buffer = bh->b_data; + req->waiting = NULL; + req->bh = bh; + req->bhtail = bh; + req->rq_dev = bh->b_rdev; + blk_started_io(count); + + add_request(q, req, insert_here); +out: + if (freereq) + blkdev_release_request(freereq); + spin_unlock_irq(&io_request_lock); + return 0; +end_io: + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + return 0; +} + +/** + * generic_make_request: hand a buffer head to it's device driver for I/O + * @rw: READ, WRITE, or READA - what sort of I/O is desired. + * @bh: The buffer head describing the location in memory and on the device. + * + * generic_make_request() is used to make I/O requests of block + * devices. It is passed a &struct buffer_head and a &rw value. The + * %READ and %WRITE options are (hopefully) obvious in meaning. The + * %READA value means that a read is required, but that the driver is + * free to fail the request if, for example, it cannot get needed + * resources immediately. + * + * generic_make_request() does not return any status. The + * success/failure status of the request, along with notification of + * completion, is delivered asynchronously through the bh->b_end_io + * function described (one day) else where. + * + * The caller of generic_make_request must make sure that b_page, + * b_addr, b_size are set to describe the memory buffer, that b_rdev + * and b_rsector are set to describe the device address, and the + * b_end_io and optionally b_private are set to describe how + * completion notification should be signaled. BH_Mapped should also + * be set (to confirm that b_dev and b_blocknr are valid). + * + * generic_make_request and the drivers it calls may use b_reqnext, + * and may change b_rdev and b_rsector. So the values of these fields + * should NOT be depended on after the call to generic_make_request. + * Because of this, the caller should record the device address + * information in b_dev and b_blocknr. + * + * Apart from those fields mentioned above, no other fields, and in + * particular, no other flags, are changed by generic_make_request or + * any lower level drivers. + * */ +void generic_make_request (int rw, struct buffer_head * bh) +{ + int major = MAJOR(bh->b_rdev); + int minorsize = 0; + request_queue_t *q; + + if (!bh->b_end_io) + BUG(); + + /* Test device size, when known. */ + if (blk_size[major]) + minorsize = blk_size[major][MINOR(bh->b_rdev)]; + if (minorsize) { + unsigned long maxsector = (minorsize << 1) + 1; + unsigned long sector = bh->b_rsector; + unsigned int count = bh->b_size >> 9; + + if (maxsector < count || maxsector - count < sector) { + /* Yecch */ + bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); + + /* This may well happen - the kernel calls bread() + without checking the size of the device, e.g., + when mounting a device. */ + printk(KERN_INFO + "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", + kdevname(bh->b_rdev), rw, + (sector + count)>>1, minorsize); + + /* Yecch again */ + bh->b_end_io(bh, 0); + return; + } + } + + /* + * Resolve the mapping until finished. (drivers are + * still free to implement/resolve their own stacking + * by explicitly returning 0) + */ + /* NOTE: we don't repeat the blk_size check for each new device. + * Stacking drivers are expected to know what they are doing. + */ + do { + q = blk_get_queue(bh->b_rdev); + if (!q) { + printk(KERN_ERR + "generic_make_request: Trying to access " + "nonexistent block-device %s (%ld)\n", + kdevname(bh->b_rdev), bh->b_rsector); + buffer_IO_error(bh); + break; + } + } while (q->make_request_fn(q, rw, bh)); +} + + +/** + * submit_bh: submit a buffer_head to the block device later for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bh: The &struct buffer_head which describes the I/O + * + * submit_bh() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. + * + * The extra functionality provided by submit_bh is to determine + * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. + * This is is appropriate for IO requests that come from the buffer + * cache and page cache which (currently) always use aligned blocks. + */ +void submit_bh(int rw, struct buffer_head * bh) +{ + int count = bh->b_size >> 9; + + if (!test_bit(BH_Lock, &bh->b_state)) + BUG(); + + set_bit(BH_Req, &bh->b_state); + + /* + * First step, 'identity mapping' - RAID or LVM might + * further remap this. + */ + bh->b_rdev = bh->b_dev; + bh->b_rsector = bh->b_blocknr * count; + + generic_make_request(rw, bh); + + switch (rw) { + case WRITE: + kstat.pgpgout += count; + break; + default: + kstat.pgpgin += count; + break; + } +} + +/** + * ll_rw_block: low-level access to block devices + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) + * @nr: number of &struct buffer_heads in the array + * @bhs: array of pointers to &struct buffer_head + * + * ll_rw_block() takes an array of pointers to &struct buffer_heads, + * and requests an I/O operation on them, either a %READ or a %WRITE. + * The third %READA option is described in the documentation for + * generic_make_request() which ll_rw_block() calls. + * + * This function provides extra functionality that is not in + * generic_make_request() that is relevant to buffers in the buffer + * cache or page cache. In particular it drops any buffer that it + * cannot get a lock on (with the BH_Lock state bit), any buffer that + * appears to be clean when doing a write request, and any buffer that + * appears to be up-to-date when doing read request. Further it marks + * as clean buffers that are processed for writing (the buffer cache + * wont assume that they are actually clean until the buffer gets + * unlocked). + * + * ll_rw_block sets b_end_io to simple completion handler that marks + * the buffer up-to-date (if approriate), unlocks the buffer and wakes + * any waiters. As client that needs a more interesting completion + * routine should call submit_bh() (or generic_make_request()) + * directly. + * + * Caveat: + * All of the buffers must be for the same device, and must also be + * of the current approved size for the device. */ + +void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) +{ + unsigned int major; + int correct_size; + int i; + + if (!nr) + return; + + major = MAJOR(bhs[0]->b_dev); + + /* Determine correct block size for this device. */ + correct_size = get_hardsect_size(bhs[0]->b_dev); + + /* Verify requested block sizes. */ + for (i = 0; i < nr; i++) { + struct buffer_head *bh = bhs[i]; + if (bh->b_size % correct_size) { + printk(KERN_NOTICE "ll_rw_block: device %s: " + "only %d-char blocks implemented (%u)\n", + kdevname(bhs[0]->b_dev), + correct_size, bh->b_size); + goto sorry; + } + } + + if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) { + printk(KERN_NOTICE "Can't write to read-only device %s\n", + kdevname(bhs[0]->b_dev)); + goto sorry; + } + + for (i = 0; i < nr; i++) { + struct buffer_head *bh = bhs[i]; + + /* Only one thread can actually submit the I/O. */ + if (test_and_set_bit(BH_Lock, &bh->b_state)) + continue; + + /* We have the buffer lock */ + atomic_inc(&bh->b_count); + bh->b_end_io = end_buffer_io_sync; + + switch(rw) { + case WRITE: + if (!atomic_set_buffer_clean(bh)) + /* Hmmph! Nothing to write */ + goto end_io; + __mark_buffer_clean(bh); + break; + + case READA: + case READ: + if (buffer_uptodate(bh)) + /* Hmmph! Already have it */ + goto end_io; + break; + default: + BUG(); + end_io: + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + continue; + } + + submit_bh(rw, bh); + } + return; + +sorry: + /* Make sure we don't get infinite dirty retries.. */ + for (i = 0; i < nr; i++) + mark_buffer_clean(bhs[i]); +} + +#ifdef CONFIG_STRAM_SWAP +extern int stram_device_init (void); +#endif + + +/** + * end_that_request_first - end I/O on one buffer. + * @req: the request being processed + * @uptodate: 0 for I/O error + * @name: the name printed for an I/O error + * + * Description: + * Ends I/O on the first buffer attached to @req, and sets it up + * for the next buffer_head (if any) in the cluster. + * + * Return: + * 0 - we are done with this request, call end_that_request_last() + * 1 - still buffers pending for this request + * + * Caveat: + * Drivers implementing their own end_request handling must call + * blk_finished_io() appropriately. + **/ + +int end_that_request_first (struct request *req, int uptodate, char *name) +{ + struct buffer_head * bh; + int nsect; + + req->errors = 0; + if (!uptodate) + printk("end_request: I/O error, dev %s (%s), sector %lu\n", + kdevname(req->rq_dev), name, req->sector); + + if ((bh = req->bh) != NULL) { + nsect = bh->b_size >> 9; + blk_finished_io(nsect); + req->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, uptodate); + if ((bh = req->bh) != NULL) { + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + + req->current_nr_sectors = bh->b_size >> 9; + if (req->nr_sectors < req->current_nr_sectors) { + req->nr_sectors = req->current_nr_sectors; + printk("end_request: buffer-list destroyed\n"); + } + req->buffer = bh->b_data; + return 1; + } + } + return 0; +} + +void end_that_request_last(struct request *req) +{ + if (req->waiting != NULL) + complete(req->waiting); + + blkdev_release_request(req); +} + +#define MB(kb) ((kb) << 10) + +int __init blk_dev_init(void) +{ + struct blk_dev_struct *dev; + int total_ram; + + request_cachep = kmem_cache_create("blkdev_requests", + sizeof(struct request), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (!request_cachep) + panic("Can't create request pool slab cache\n"); + + for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) + dev->queue = NULL; + + memset(ro_bits,0,sizeof(ro_bits)); + memset(max_readahead, 0, sizeof(max_readahead)); + memset(max_sectors, 0, sizeof(max_sectors)); + + total_ram = nr_free_pages() << (PAGE_SHIFT - 10); + + /* + * Free request slots per queue. + * (Half for reads, half for writes) + */ + queue_nr_requests = 64; + if (total_ram > MB(32)) + queue_nr_requests = 128; + + /* + * Batch frees according to queue length + */ + batch_requests = queue_nr_requests/4; + printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests); + +#ifdef CONFIG_AMIGA_Z2RAM + z2_init(); +#endif +#ifdef CONFIG_STRAM_SWAP + stram_device_init(); +#endif +#ifdef CONFIG_BLK_DEV_RAM + rd_init(); +#endif +#ifdef CONFIG_ISP16_CDI + isp16_init(); +#endif +#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) + ide_init(); /* this MUST precede hd_init */ +#endif +#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) + hd_init(); +#endif +#ifdef CONFIG_BLK_DEV_PS2 + ps2esdi_init(); +#endif +#ifdef CONFIG_BLK_DEV_XD + xd_init(); +#endif +#ifdef CONFIG_BLK_DEV_MFM + mfm_init(); +#endif +#ifdef CONFIG_PARIDE + { extern void paride_init(void); paride_init(); }; +#endif +#ifdef CONFIG_MAC_FLOPPY + swim3_init(); +#endif +#ifdef CONFIG_BLK_DEV_SWIM_IOP + swimiop_init(); +#endif +#ifdef CONFIG_AMIGA_FLOPPY + amiga_floppy_init(); +#endif +#ifdef CONFIG_ATARI_FLOPPY + atari_floppy_init(); +#endif +#ifdef CONFIG_BLK_DEV_FD + floppy_init(); +#else +#if defined(__i386__) && !defined(CONFIG_XENO) /* Do we even need this? */ + outb_p(0xc, 0x3f2); +#endif +#endif +#ifdef CONFIG_CDU31A + cdu31a_init(); +#endif +#ifdef CONFIG_ATARI_ACSI + acsi_init(); +#endif +#ifdef CONFIG_MCD + mcd_init(); +#endif +#ifdef CONFIG_MCDX + mcdx_init(); +#endif +#ifdef CONFIG_SBPCD + sbpcd_init(); +#endif +#ifdef CONFIG_AZTCD + aztcd_init(); +#endif +#ifdef CONFIG_CDU535 + sony535_init(); +#endif +#ifdef CONFIG_GSCD + gscd_init(); +#endif +#ifdef CONFIG_CM206 + cm206_init(); +#endif +#ifdef CONFIG_OPTCD + optcd_init(); +#endif +#ifdef CONFIG_SJCD + sjcd_init(); +#endif +#ifdef CONFIG_APBLOCK + ap_init(); +#endif +#ifdef CONFIG_DDV + ddv_init(); +#endif +#ifdef CONFIG_MDISK + mdisk_init(); +#endif +#ifdef CONFIG_DASD + dasd_init(); +#endif +#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) + tapeblock_init(); +#endif +#ifdef CONFIG_BLK_DEV_XPRAM + xpram_init(); +#endif + +#ifdef CONFIG_SUN_JSFLASH + jsfd_init(); +#endif + +#ifdef CONFIG_XENOLINUX_BLOCK + xlblk_init(); +#endif + + return 0; +}; + +EXPORT_SYMBOL(io_request_lock); +EXPORT_SYMBOL(end_that_request_first); +EXPORT_SYMBOL(end_that_request_last); +EXPORT_SYMBOL(blk_init_queue); +EXPORT_SYMBOL(blk_get_queue); +EXPORT_SYMBOL(blk_cleanup_queue); +EXPORT_SYMBOL(blk_queue_headactive); +EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(generic_make_request); +EXPORT_SYMBOL(blkdev_release_request); +EXPORT_SYMBOL(generic_unplug_device); |