diff options
author | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2003-02-24 16:55:07 +0000 |
---|---|---|
committer | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2003-02-24 16:55:07 +0000 |
commit | a48212cb65e09669ed243581556529681cebba0a (patch) | |
tree | a58f47e4764f343db87eba48d17ce9b2ddbf8047 /old/xenolinux-2.4.16-sparse/drivers/block | |
parent | 96ce9e11d148a721557d48ed5a8ca7857a7bc937 (diff) | |
download | xen-a48212cb65e09669ed243581556529681cebba0a.tar.gz xen-a48212cb65e09669ed243581556529681cebba0a.tar.bz2 xen-a48212cb65e09669ed243581556529681cebba0a.zip |
bitkeeper revision 1.93 (3e5a4e6bkPheUp3x1uufN2MS3LAB7A)
Latest and Greatest version of XenoLinux based on the Linux-2.4.21-pre4
kernel.
Diffstat (limited to 'old/xenolinux-2.4.16-sparse/drivers/block')
-rw-r--r-- | old/xenolinux-2.4.16-sparse/drivers/block/Config.in | 51 | ||||
-rw-r--r-- | old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c | 1251 | ||||
-rw-r--r-- | old/xenolinux-2.4.16-sparse/drivers/block/rd.c | 1009 |
3 files changed, 2311 insertions, 0 deletions
diff --git a/old/xenolinux-2.4.16-sparse/drivers/block/Config.in b/old/xenolinux-2.4.16-sparse/drivers/block/Config.in new file mode 100644 index 0000000000..716774fe74 --- /dev/null +++ b/old/xenolinux-2.4.16-sparse/drivers/block/Config.in @@ -0,0 +1,51 @@ +# +# Block device driver configuration +# +mainmenu_option next_comment +comment 'Block devices' + +tristate 'Normal PC floppy disk support' CONFIG_BLK_DEV_FD +if [ "$CONFIG_AMIGA" = "y" ]; then + tristate 'Amiga floppy support' CONFIG_AMIGA_FLOPPY +fi +if [ "$CONFIG_ATARI" = "y" ]; then + tristate 'Atari floppy support' CONFIG_ATARI_FLOPPY +fi +if [ "$CONFIG_MAC" = "y" ]; then + dep_bool 'Macintosh IIfx/Quadra 900/Quadra 950 floppy support (EXPERIMENTAL)' CONFIG_BLK_DEV_SWIM_IOP $CONFIG_EXPERIMENTAL +fi +if [ "$CONFIG_MCA" = "y" ]; then + tristate 'PS/2 ESDI hard disk support' CONFIG_BLK_DEV_PS2 +fi +if [ "$CONFIG_ZORRO" = "y" ]; then + tristate 'Amiga Zorro II ramdisk support' CONFIG_AMIGA_Z2RAM +fi +if [ "$CONFIG_ATARI" = "y" ]; then + tristate 'Atari ACSI support' CONFIG_ATARI_ACSI + if [ "$CONFIG_ATARI_ACSI" != "n" ]; then + comment 'Some devices (e.g. CD jukebox) support multiple LUNs' + bool ' Probe all LUNs on each ACSI device' CONFIG_ACSI_MULTI_LUN + tristate ' Atari SLM laser printer support' CONFIG_ATARI_SLM + fi +fi +dep_tristate 'XT hard disk support' CONFIG_BLK_DEV_XD $CONFIG_ISA +dep_tristate 'Parallel port IDE device support' CONFIG_PARIDE $CONFIG_PARPORT +if [ "$CONFIG_PARIDE" = "y" -o "$CONFIG_PARIDE" = "m" ]; then + source drivers/block/paride/Config.in +fi +dep_tristate 'Compaq SMART2 support' CONFIG_BLK_CPQ_DA $CONFIG_PCI +dep_tristate 'Compaq Smart Array 5xxx support' CONFIG_BLK_CPQ_CISS_DA $CONFIG_PCI +dep_tristate 'Mylex DAC960/DAC1100 PCI RAID Controller support' CONFIG_BLK_DEV_DAC960 $CONFIG_PCI + +tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP +dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET + +tristate 'RAM disk support' CONFIG_BLK_DEV_RAM +if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then + int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 +fi +dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM + +bool 'XenoLinux virtual block device support' CONFIG_XENOLINUX_BLOCK + +endmenu diff --git a/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c b/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c new file mode 100644 index 0000000000..cd1cb7ca9c --- /dev/null +++ b/old/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c @@ -0,0 +1,1251 @@ +/* + * linux/drivers/block/ll_rw_blk.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE + * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> + * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 + */ + +/* + * This handles all read/write requests to block devices + */ +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/config.h> +#include <linux/locks.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/init.h> +#include <linux/smp_lock.h> +#include <linux/completion.h> + +#include <asm/system.h> +#include <asm/io.h> +#include <linux/blk.h> +#include <linux/highmem.h> +#include <linux/slab.h> +#include <linux/module.h> + +/* + * MAC Floppy IWM hooks + */ + +#ifdef CONFIG_MAC_FLOPPY_IWM +extern int mac_floppy_init(void); +#endif + +/* + * For the allocated request tables + */ +static kmem_cache_t *request_cachep; + +/* + * The "disk" task queue is used to start the actual requests + * after a plug + */ +DECLARE_TASK_QUEUE(tq_disk); + +/* + * Protect the request list against multiple users.. + * + * With this spinlock the Linux block IO subsystem is 100% SMP threaded + * from the IRQ event side, and almost 100% SMP threaded from the syscall + * side (we still have protect against block device array operations, and + * the do_request() side is casually still unsafe. The kernel lock protects + * this part currently.). + * + * there is a fair chance that things will work just OK if these functions + * are called with no global kernel lock held ... + */ +spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; + +/* This specifies how many sectors to read ahead on the disk. */ + +int read_ahead[MAX_BLKDEV]; + +/* blk_dev_struct is: + * *request_fn + * *current_request + */ +struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ + +/* + * blk_size contains the size of all block-devices in units of 1024 byte + * sectors: + * + * blk_size[MAJOR][MINOR] + * + * if (!blk_size[MAJOR]) then no minor size checking is done. + */ +int * blk_size[MAX_BLKDEV]; + +/* + * blksize_size contains the size of all block-devices: + * + * blksize_size[MAJOR][MINOR] + * + * if (!blksize_size[MAJOR]) then 1024 bytes is assumed. + */ +int * blksize_size[MAX_BLKDEV]; + +/* + * hardsect_size contains the size of the hardware sector of a device. + * + * hardsect_size[MAJOR][MINOR] + * + * if (!hardsect_size[MAJOR]) + * then 512 bytes is assumed. + * else + * sector_size is hardsect_size[MAJOR][MINOR] + * This is currently set by some scsi devices and read by the msdos fs driver. + * Other uses may appear later. + */ +int * hardsect_size[MAX_BLKDEV]; + +/* + * The following tunes the read-ahead algorithm in mm/filemap.c + */ +int * max_readahead[MAX_BLKDEV]; + +/* + * Max number of sectors per request + */ +int * max_sectors[MAX_BLKDEV]; + +/* + * How many reqeusts do we allocate per queue, + * and how many do we "batch" on freeing them? + */ +static int queue_nr_requests, batch_requests; + +static inline int get_max_sectors(kdev_t dev) +{ + if (!max_sectors[MAJOR(dev)]) + return MAX_SECTORS; + return max_sectors[MAJOR(dev)][MINOR(dev)]; +} + +inline request_queue_t *blk_get_queue(kdev_t dev) +{ + struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); + + if (bdev->queue) + return bdev->queue(dev); + else + return &blk_dev[MAJOR(dev)].request_queue; +} + +static int __blk_cleanup_queue(struct request_list *list) +{ + struct list_head *head = &list->free; + struct request *rq; + int i = 0; + + while (!list_empty(head)) { + rq = list_entry(head->next, struct request, queue); + list_del(&rq->queue); + kmem_cache_free(request_cachep, rq); + i++; + }; + + if (i != list->count) + printk("request list leak!\n"); + + list->count = 0; + return i; +} + +/** + * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * @q: the request queue to be released + * + * Description: + * blk_cleanup_queue is the pair to blk_init_queue(). It should + * be called when a request queue is being released; typically + * when a block device is being de-registered. Currently, its + * primary task it to free all the &struct request structures that + * were allocated to the queue. + * Caveat: + * Hopefully the low level driver will have finished any + * outstanding requests first... + **/ +void blk_cleanup_queue(request_queue_t * q) +{ + int count = queue_nr_requests; + + count -= __blk_cleanup_queue(&q->rq[READ]); + count -= __blk_cleanup_queue(&q->rq[WRITE]); + + if (count) + printk("blk_cleanup_queue: leaked requests (%d)\n", count); + + memset(q, 0, sizeof(*q)); +} + +/** + * blk_queue_headactive - indicate whether head of request queue may be active + * @q: The queue which this applies to. + * @active: A flag indication where the head of the queue is active. + * + * Description: + * The driver for a block device may choose to leave the currently active + * request on the request queue, removing it only when it has completed. + * The queue handling routines assume this by default for safety reasons + * and will not involve the head of the request queue in any merging or + * reordering of requests when the queue is unplugged (and thus may be + * working on this particular request). + * + * If a driver removes requests from the queue before processing them, then + * it may indicate that it does so, there by allowing the head of the queue + * to be involved in merging and reordering. This is done be calling + * blk_queue_headactive() with an @active flag of %0. + * + * If a driver processes several requests at once, it must remove them (or + * at least all but one of them) from the request queue. + * + * When a queue is plugged the head will be assumed to be inactive. + **/ + +void blk_queue_headactive(request_queue_t * q, int active) +{ + q->head_active = active; +} + +/** + * blk_queue_make_request - define an alternate make_request function for a device + * @q: the request queue for the device to be affected + * @mfn: the alternate make_request function + * + * Description: + * The normal way for &struct buffer_heads to be passed to a device + * driver is for them to be collected into requests on a request + * queue, and then to allow the device driver to select requests + * off that queue when it is ready. This works well for many block + * devices. However some block devices (typically virtual devices + * such as md or lvm) do not benefit from the processing on the + * request queue, and are served best by having the requests passed + * directly to them. This can be achieved by providing a function + * to blk_queue_make_request(). + * + * Caveat: + * The driver that does this *must* be able to deal appropriately + * with buffers in "highmemory", either by calling bh_kmap() to get + * a kernel mapping, to by calling create_bounce() to create a + * buffer in normal memory. + **/ + +void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) +{ + q->make_request_fn = mfn; +} + +static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +{ + if (req->nr_segments < max_segments) { + req->nr_segments++; + return 1; + } + return 0; +} + +static int ll_back_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); +} + +static int ll_front_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (bh->b_data + bh->b_size == req->bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); +} + +static int ll_merge_requests_fn(request_queue_t *q, struct request *req, + struct request *next, int max_segments) +{ + int total_segments = req->nr_segments + next->nr_segments; + + if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + total_segments--; + + if (total_segments > max_segments) + return 0; + + req->nr_segments = total_segments; + return 1; +} + +/* + * "plug" the device if there are no outstanding requests: this will + * force the transfer to start only after we have put all the requests + * on the list. + * + * This is called with interrupts off and no requests on the queue. + * (and with the request spinlock acquired) + */ +static void generic_plug_device(request_queue_t *q, kdev_t dev) +{ + /* + * no need to replug device + */ + if (!list_empty(&q->queue_head) || q->plugged) + return; + + q->plugged = 1; + queue_task(&q->plug_tq, &tq_disk); +} + +/* + * remove the plug and let it rip.. + */ +static inline void __generic_unplug_device(request_queue_t *q) +{ + if (q->plugged) { + q->plugged = 0; + if (!list_empty(&q->queue_head)) + q->request_fn(q); + } +} + +void generic_unplug_device(void *data) +{ + request_queue_t *q = (request_queue_t *) data; + unsigned long flags; + + spin_lock_irqsave(&io_request_lock, flags); + __generic_unplug_device(q); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +static void blk_init_free_list(request_queue_t *q) +{ + struct request *rq; + int i; + + INIT_LIST_HEAD(&q->rq[READ].free); + INIT_LIST_HEAD(&q->rq[WRITE].free); + q->rq[READ].count = 0; + q->rq[WRITE].count = 0; + + /* + * Divide requests in half between read and write + */ + for (i = 0; i < queue_nr_requests; i++) { + rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); + if (rq == NULL) { + /* We'll get a `leaked requests' message from blk_cleanup_queue */ + printk(KERN_EMERG "blk_init_free_list: error allocating requests\n"); + break; + } + memset(rq, 0, sizeof(struct request)); + rq->rq_status = RQ_INACTIVE; + list_add(&rq->queue, &q->rq[i&1].free); + q->rq[i&1].count++; + } + + init_waitqueue_head(&q->wait_for_request); + spin_lock_init(&q->queue_lock); +} + +static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); + +/** + * blk_init_queue - prepare a request queue for use with a block device + * @q: The &request_queue_t to be initialised + * @rfn: The function to be called to process requests that have been + * placed on the queue. + * + * Description: + * If a block device wishes to use the standard request handling procedures, + * which sorts requests and coalesces adjacent requests, then it must + * call blk_init_queue(). The function @rfn will be called when there + * are requests on the queue that need to be processed. If the device + * supports plugging, then @rfn may not be called immediately when requests + * are available on the queue, but may be called at some time later instead. + * Plugged queues are generally unplugged when a buffer belonging to one + * of the requests on the queue is needed, or due to memory pressure. + * + * @rfn is not required, or even expected, to remove all requests off the + * queue, but only as many as it can handle at a time. If it does leave + * requests on the queue, it is responsible for arranging that the requests + * get dealt with eventually. + * + * A global spin lock $io_request_lock must be held while manipulating the + * requests on the request queue. + * + * The request on the head of the queue is by default assumed to be + * potentially active, and it is not considered for re-ordering or merging + * whenever the given queue is unplugged. This behaviour can be changed with + * blk_queue_headactive(). + * + * Note: + * blk_init_queue() must be paired with a blk_cleanup_queue() call + * when the block device is deactivated (such as at module unload). + **/ +void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +{ + INIT_LIST_HEAD(&q->queue_head); + elevator_init(&q->elevator, ELEVATOR_LINUS); + blk_init_free_list(q); + q->request_fn = rfn; + q->back_merge_fn = ll_back_merge_fn; + q->front_merge_fn = ll_front_merge_fn; + q->merge_requests_fn = ll_merge_requests_fn; + q->make_request_fn = __make_request; + q->plug_tq.sync = 0; + q->plug_tq.routine = &generic_unplug_device; + q->plug_tq.data = q; + q->plugged = 0; + /* + * These booleans describe the queue properties. We set the + * default (and most common) values here. Other drivers can + * use the appropriate functions to alter the queue properties. + * as appropriate. + */ + q->plug_device_fn = generic_plug_device; + q->head_active = 1; +} + +#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); +/* + * Get a free request. io_request_lock must be held and interrupts + * disabled on the way in. + */ +static inline struct request *get_request(request_queue_t *q, int rw) +{ + struct request *rq = NULL; + struct request_list *rl = q->rq + rw; + + if (!list_empty(&rl->free)) { + rq = blkdev_free_rq(&rl->free); + list_del(&rq->queue); + rl->count--; + rq->rq_status = RQ_ACTIVE; + rq->special = NULL; + rq->q = q; + } + + return rq; +} + +/* + * No available requests for this queue, unplug the device. + */ +static struct request *__get_request_wait(request_queue_t *q, int rw) +{ + register struct request *rq; + DECLARE_WAITQUEUE(wait, current); + + generic_unplug_device(q); + add_wait_queue(&q->wait_for_request, &wait); + do { + set_current_state(TASK_UNINTERRUPTIBLE); + if (q->rq[rw].count < batch_requests) + schedule(); + spin_lock_irq(&io_request_lock); + rq = get_request(q,rw); + spin_unlock_irq(&io_request_lock); + } while (rq == NULL); + remove_wait_queue(&q->wait_for_request, &wait); + current->state = TASK_RUNNING; + return rq; +} + +static inline struct request *get_request_wait(request_queue_t *q, int rw) +{ + register struct request *rq; + + spin_lock_irq(&io_request_lock); + rq = get_request(q, rw); + spin_unlock_irq(&io_request_lock); + if (rq) + return rq; + return __get_request_wait(q, rw); +} + +/* RO fail safe mechanism */ + +static long ro_bits[MAX_BLKDEV][8]; + +int is_read_only(kdev_t dev) +{ + int minor,major; + + major = MAJOR(dev); + minor = MINOR(dev); + if (major < 0 || major >= MAX_BLKDEV) return 0; + return ro_bits[major][minor >> 5] & (1 << (minor & 31)); +} + +void set_device_ro(kdev_t dev,int flag) +{ + int minor,major; + + major = MAJOR(dev); + minor = MINOR(dev); + if (major < 0 || major >= MAX_BLKDEV) return; + if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31); + else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); +} + +inline void drive_stat_acct (kdev_t dev, int rw, + unsigned long nr_sectors, int new_io) +{ + unsigned int major = MAJOR(dev); + unsigned int index; + + index = disk_index(dev); + if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR)) + return; + + kstat.dk_drive[major][index] += new_io; + if (rw == READ) { + kstat.dk_drive_rio[major][index] += new_io; + kstat.dk_drive_rblk[major][index] += nr_sectors; + } else if (rw == WRITE) { + kstat.dk_drive_wio[major][index] += new_io; + kstat.dk_drive_wblk[major][index] += nr_sectors; + } else + printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); +} + +/* + * add-request adds a request to the linked list. + * io_request_lock is held and interrupts disabled, as we muck with the + * request queue list. + * + * By this point, req->cmd is always either READ/WRITE, never READA, + * which is important for drive_stat_acct() above. + */ +static inline void add_request(request_queue_t * q, struct request * req, + struct list_head *insert_here) +{ + drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); + + if (!q->plugged && q->head_active && insert_here == &q->queue_head) { + spin_unlock_irq(&io_request_lock); + printk("list_empty(&q->queue_head) is %d\n", + list_empty(&q->queue_head)); + BUG(); + } + + /* + * elevator indicated where it wants this request to be + * inserted at elevator_merge time + */ + list_add(&req->queue, insert_here); +} + +/* + * Must be called with io_request_lock held and interrupts disabled + */ +inline void blkdev_release_request(struct request *req) +{ + request_queue_t *q = req->q; + int rw = req->cmd; + + req->rq_status = RQ_INACTIVE; + req->q = NULL; + + /* + * Request may not have originated from ll_rw_blk. if not, + * assume it has free buffers and check waiters + */ + if (q) { + list_add(&req->queue, &q->rq[rw].free); + if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request)) + wake_up(&q->wait_for_request); + } +} + +/* + * Has to be called with the request spinlock acquired + */ +static void attempt_merge(request_queue_t * q, + struct request *req, + int max_sectors, + int max_segments) +{ + struct request *next; + + next = blkdev_next_request(req); + if (req->sector + req->nr_sectors != next->sector) + return; + if (req->cmd != next->cmd + || req->rq_dev != next->rq_dev + || req->nr_sectors + next->nr_sectors > max_sectors + || next->waiting) + return; + /* + * If we are not allowed to merge these requests, then + * return. If we are allowed to merge, then the count + * will have been updated to the appropriate number, + * and we shouldn't do it here too. + */ + if (!q->merge_requests_fn(q, req, next, max_segments)) + return; + + q->elevator.elevator_merge_req_fn(req, next); + req->bhtail->b_reqnext = next->bh; + req->bhtail = next->bhtail; + req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + list_del(&next->queue); + blkdev_release_request(next); +} + +static inline void attempt_back_merge(request_queue_t * q, + struct request *req, + int max_sectors, + int max_segments) +{ + if (&req->queue == q->queue_head.prev) + return; + attempt_merge(q, req, max_sectors, max_segments); +} + +static inline void attempt_front_merge(request_queue_t * q, + struct list_head * head, + struct request *req, + int max_sectors, + int max_segments) +{ + struct list_head * prev; + + prev = req->queue.prev; + if (head == prev) + return; + attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); +} + +static int __make_request(request_queue_t * q, int rw, + struct buffer_head * bh) +{ + unsigned int sector, count; + int max_segments = MAX_SEGMENTS; + struct request * req, *freereq = NULL; + int rw_ahead, max_sectors, el_ret; + struct list_head *head, *insert_here; + int latency; + elevator_t *elevator = &q->elevator; + + count = bh->b_size >> 9; + sector = bh->b_rsector; + + rw_ahead = 0; /* normal case; gets changed below for READA */ + switch (rw) { + case READA: + rw_ahead = 1; + rw = READ; /* drop into READ */ + case READ: + case WRITE: + latency = elevator_request_latency(elevator, rw); + break; + default: + BUG(); + goto end_io; + } + + /* We'd better have a real physical mapping! + Check this bit only if the buffer was dirty and just locked + down by us so at this point flushpage will block and + won't clear the mapped bit under us. */ + if (!buffer_mapped(bh)) + BUG(); + + /* + * Temporary solution - in 2.5 this will be done by the lowlevel + * driver. Create a bounce buffer if the buffer data points into + * high memory - keep the original buffer otherwise. + */ +#if CONFIG_HIGHMEM + bh = create_bounce(rw, bh); +#endif + +/* look for a free request. */ + /* + * Try to coalesce the new request with old requests + */ + max_sectors = get_max_sectors(bh->b_rdev); + +again: + req = NULL; + head = &q->queue_head; + /* + * Now we acquire the request spinlock, we have to be mega careful + * not to schedule or do something nonatomic + */ + spin_lock_irq(&io_request_lock); + + insert_here = head->prev; + if (list_empty(head)) { + q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + goto get_rq; + } else if (q->head_active && !q->plugged) + head = head->next; + + el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); + switch (el_ret) { + + case ELEVATOR_BACK_MERGE: + if (!q->back_merge_fn(q, req, bh, max_segments)) + break; + elevator->elevator_merge_cleanup_fn(q, req, count); + req->bhtail->b_reqnext = bh; + req->bhtail = bh; + req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); + drive_stat_acct(req->rq_dev, req->cmd, count, 0); + attempt_back_merge(q, req, max_sectors, max_segments); + goto out; + + case ELEVATOR_FRONT_MERGE: + if (!q->front_merge_fn(q, req, bh, max_segments)) + break; + elevator->elevator_merge_cleanup_fn(q, req, count); + bh->b_reqnext = req->bh; + req->bh = bh; + req->buffer = bh->b_data; + req->current_nr_sectors = count; + req->sector = req->hard_sector = sector; + req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); + drive_stat_acct(req->rq_dev, req->cmd, count, 0); + attempt_front_merge(q, head, req, max_sectors, max_segments); + goto out; + + /* + * elevator says don't/can't merge. get new request + */ + case ELEVATOR_NO_MERGE: + /* + * use elevator hints as to where to insert the + * request. if no hints, just add it to the back + * of the queue + */ + if (req) + insert_here = &req->queue; + break; + + default: + printk("elevator returned crap (%d)\n", el_ret); + BUG(); + } + + /* + * Grab a free request from the freelist - if that is empty, check + * if we are doing read ahead and abort instead of blocking for + * a free slot. + */ +get_rq: + if (freereq) { + req = freereq; + freereq = NULL; + } else if ((req = get_request(q, rw)) == NULL) { + spin_unlock_irq(&io_request_lock); + if (rw_ahead) + goto end_io; + + freereq = __get_request_wait(q, rw); + goto again; + } + +/* fill up the request-info, and add it to the queue */ + req->elevator_sequence = latency; + req->cmd = rw; + req->errors = 0; + req->hard_sector = req->sector = sector; + req->hard_nr_sectors = req->nr_sectors = count; + req->current_nr_sectors = count; + req->nr_segments = 1; /* Always 1 for a new request. */ + req->nr_hw_segments = 1; /* Always 1 for a new request. */ + req->buffer = bh->b_data; + req->waiting = NULL; + req->bh = bh; + req->bhtail = bh; + req->rq_dev = bh->b_rdev; + blk_started_io(count); + + add_request(q, req, insert_here); +out: + if (freereq) + blkdev_release_request(freereq); + spin_unlock_irq(&io_request_lock); + return 0; +end_io: + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + return 0; +} + +/** + * generic_make_request: hand a buffer head to it's device driver for I/O + * @rw: READ, WRITE, or READA - what sort of I/O is desired. + * @bh: The buffer head describing the location in memory and on the device. + * + * generic_make_request() is used to make I/O requests of block + * devices. It is passed a &struct buffer_head and a &rw value. The + * %READ and %WRITE options are (hopefully) obvious in meaning. The + * %READA value means that a read is required, but that the driver is + * free to fail the request if, for example, it cannot get needed + * resources immediately. + * + * generic_make_request() does not return any status. The + * success/failure status of the request, along with notification of + * completion, is delivered asynchronously through the bh->b_end_io + * function described (one day) else where. + * + * The caller of generic_make_request must make sure that b_page, + * b_addr, b_size are set to describe the memory buffer, that b_rdev + * and b_rsector are set to describe the device address, and the + * b_end_io and optionally b_private are set to describe how + * completion notification should be signaled. BH_Mapped should also + * be set (to confirm that b_dev and b_blocknr are valid). + * + * generic_make_request and the drivers it calls may use b_reqnext, + * and may change b_rdev and b_rsector. So the values of these fields + * should NOT be depended on after the call to generic_make_request. + * Because of this, the caller should record the device address + * information in b_dev and b_blocknr. + * + * Apart from those fields mentioned above, no other fields, and in + * particular, no other flags, are changed by generic_make_request or + * any lower level drivers. + * */ +void generic_make_request (int rw, struct buffer_head * bh) +{ + int major = MAJOR(bh->b_rdev); + int minorsize = 0; + request_queue_t *q; + + if (!bh->b_end_io) + BUG(); + + /* Test device size, when known. */ + if (blk_size[major]) + minorsize = blk_size[major][MINOR(bh->b_rdev)]; + if (minorsize) { + unsigned long maxsector = (minorsize << 1) + 1; + unsigned long sector = bh->b_rsector; + unsigned int count = bh->b_size >> 9; + + if (maxsector < count || maxsector - count < sector) { + /* Yecch */ + bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); + + /* This may well happen - the kernel calls bread() + without checking the size of the device, e.g., + when mounting a device. */ + printk(KERN_INFO + "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", + kdevname(bh->b_rdev), rw, + (sector + count)>>1, minorsize); + + /* Yecch again */ + bh->b_end_io(bh, 0); + return; + } + } + + /* + * Resolve the mapping until finished. (drivers are + * still free to implement/resolve their own stacking + * by explicitly returning 0) + */ + /* NOTE: we don't repeat the blk_size check for each new device. + * Stacking drivers are expected to know what they are doing. + */ + do { + q = blk_get_queue(bh->b_rdev); + if (!q) { + printk(KERN_ERR + "generic_make_request: Trying to access " + "nonexistent block-device %s (%ld)\n", + kdevname(bh->b_rdev), bh->b_rsector); + buffer_IO_error(bh); + break; + } + } while (q->make_request_fn(q, rw, bh)); +} + + +/** + * submit_bh: submit a buffer_head to the block device later for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bh: The &struct buffer_head which describes the I/O + * + * submit_bh() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. + * + * The extra functionality provided by submit_bh is to determine + * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. + * This is is appropriate for IO requests that come from the buffer + * cache and page cache which (currently) always use aligned blocks. + */ +void submit_bh(int rw, struct buffer_head * bh) +{ + int count = bh->b_size >> 9; + + if (!test_bit(BH_Lock, &bh->b_state)) + BUG(); + + set_bit(BH_Req, &bh->b_state); + + /* + * First step, 'identity mapping' - RAID or LVM might + * further remap this. + */ + bh->b_rdev = bh->b_dev; + bh->b_rsector = bh->b_blocknr * count; + + generic_make_request(rw, bh); + + switch (rw) { + case WRITE: + kstat.pgpgout += count; + break; + default: + kstat.pgpgin += count; + break; + } +} + +/** + * ll_rw_block: low-level access to block devices + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) + * @nr: number of &struct buffer_heads in the array + * @bhs: array of pointers to &struct buffer_head + * + * ll_rw_block() takes an array of pointers to &struct buffer_heads, + * and requests an I/O operation on them, either a %READ or a %WRITE. + * The third %READA option is described in the documentation for + * generic_make_request() which ll_rw_block() calls. + * + * This function provides extra functionality that is not in + * generic_make_request() that is relevant to buffers in the buffer + * cache or page cache. In particular it drops any buffer that it + * cannot get a lock on (with the BH_Lock state bit), any buffer that + * appears to be clean when doing a write request, and any buffer that + * appears to be up-to-date when doing read request. Further it marks + * as clean buffers that are processed for writing (the buffer cache + * wont assume that they are actually clean until the buffer gets + * unlocked). + * + * ll_rw_block sets b_end_io to simple completion handler that marks + * the buffer up-to-date (if approriate), unlocks the buffer and wakes + * any waiters. As client that needs a more interesting completion + * routine should call submit_bh() (or generic_make_request()) + * directly. + * + * Caveat: + * All of the buffers must be for the same device, and must also be + * of the current approved size for the device. */ + +void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) +{ + unsigned int major; + int correct_size; + int i; + + if (!nr) + return; + + major = MAJOR(bhs[0]->b_dev); + + /* Determine correct block size for this device. */ + correct_size = get_hardsect_size(bhs[0]->b_dev); + + /* Verify requested block sizes. */ + for (i = 0; i < nr; i++) { + struct buffer_head *bh = bhs[i]; + if (bh->b_size % correct_size) { + printk(KERN_NOTICE "ll_rw_block: device %s: " + "only %d-char blocks implemented (%u)\n", + kdevname(bhs[0]->b_dev), + correct_size, bh->b_size); + goto sorry; + } + } + + if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) { + printk(KERN_NOTICE "Can't write to read-only device %s\n", + kdevname(bhs[0]->b_dev)); + goto sorry; + } + + for (i = 0; i < nr; i++) { + struct buffer_head *bh = bhs[i]; + + /* Only one thread can actually submit the I/O. */ + if (test_and_set_bit(BH_Lock, &bh->b_state)) + continue; + + /* We have the buffer lock */ + atomic_inc(&bh->b_count); + bh->b_end_io = end_buffer_io_sync; + + switch(rw) { + case WRITE: + if (!atomic_set_buffer_clean(bh)) + /* Hmmph! Nothing to write */ + goto end_io; + __mark_buffer_clean(bh); + break; + + case READA: + case READ: + if (buffer_uptodate(bh)) + /* Hmmph! Already have it */ + goto end_io; + break; + default: + BUG(); + end_io: + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + continue; + } + + submit_bh(rw, bh); + } + return; + +sorry: + /* Make sure we don't get infinite dirty retries.. */ + for (i = 0; i < nr; i++) + mark_buffer_clean(bhs[i]); +} + +#ifdef CONFIG_STRAM_SWAP +extern int stram_device_init (void); +#endif + + +/** + * end_that_request_first - end I/O on one buffer. + * @req: the request being processed + * @uptodate: 0 for I/O error + * @name: the name printed for an I/O error + * + * Description: + * Ends I/O on the first buffer attached to @req, and sets it up + * for the next buffer_head (if any) in the cluster. + * + * Return: + * 0 - we are done with this request, call end_that_request_last() + * 1 - still buffers pending for this request + * + * Caveat: + * Drivers implementing their own end_request handling must call + * blk_finished_io() appropriately. + **/ + +int end_that_request_first (struct request *req, int uptodate, char *name) +{ + struct buffer_head * bh; + int nsect; + + req->errors = 0; + if (!uptodate) + printk("end_request: I/O error, dev %s (%s), sector %lu\n", + kdevname(req->rq_dev), name, req->sector); + + if ((bh = req->bh) != NULL) { + nsect = bh->b_size >> 9; + blk_finished_io(nsect); + req->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, uptodate); + if ((bh = req->bh) != NULL) { + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + + req->current_nr_sectors = bh->b_size >> 9; + if (req->nr_sectors < req->current_nr_sectors) { + req->nr_sectors = req->current_nr_sectors; + printk("end_request: buffer-list destroyed\n"); + } + req->buffer = bh->b_data; + return 1; + } + } + return 0; +} + +void end_that_request_last(struct request *req) +{ + if (req->waiting != NULL) + complete(req->waiting); + + blkdev_release_request(req); +} + +#define MB(kb) ((kb) << 10) + +int __init blk_dev_init(void) +{ + struct blk_dev_struct *dev; + int total_ram; + + request_cachep = kmem_cache_create("blkdev_requests", + sizeof(struct request), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (!request_cachep) + panic("Can't create request pool slab cache\n"); + + for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) + dev->queue = NULL; + + memset(ro_bits,0,sizeof(ro_bits)); + memset(max_readahead, 0, sizeof(max_readahead)); + memset(max_sectors, 0, sizeof(max_sectors)); + + total_ram = nr_free_pages() << (PAGE_SHIFT - 10); + + /* + * Free request slots per queue. + * (Half for reads, half for writes) + */ + queue_nr_requests = 64; + if (total_ram > MB(32)) + queue_nr_requests = 128; + + /* + * Batch frees according to queue length + */ + batch_requests = queue_nr_requests/4; + printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests); + +#ifdef CONFIG_AMIGA_Z2RAM + z2_init(); +#endif +#ifdef CONFIG_STRAM_SWAP + stram_device_init(); +#endif +#ifdef CONFIG_BLK_DEV_RAM + rd_init(); +#endif +#ifdef CONFIG_ISP16_CDI + isp16_init(); +#endif +#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) + ide_init(); /* this MUST precede hd_init */ +#endif +#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) + hd_init(); +#endif +#ifdef CONFIG_BLK_DEV_PS2 + ps2esdi_init(); +#endif +#ifdef CONFIG_BLK_DEV_XD + xd_init(); +#endif +#ifdef CONFIG_BLK_DEV_MFM + mfm_init(); +#endif +#ifdef CONFIG_PARIDE + { extern void paride_init(void); paride_init(); }; +#endif +#ifdef CONFIG_MAC_FLOPPY + swim3_init(); +#endif +#ifdef CONFIG_BLK_DEV_SWIM_IOP + swimiop_init(); +#endif +#ifdef CONFIG_AMIGA_FLOPPY + amiga_floppy_init(); +#endif +#ifdef CONFIG_ATARI_FLOPPY + atari_floppy_init(); +#endif +#ifdef CONFIG_BLK_DEV_FD + floppy_init(); +#else +#if defined(__i386__) && !defined(CONFIG_XENO) /* Do we even need this? */ + outb_p(0xc, 0x3f2); +#endif +#endif +#ifdef CONFIG_CDU31A + cdu31a_init(); +#endif +#ifdef CONFIG_ATARI_ACSI + acsi_init(); +#endif +#ifdef CONFIG_MCD + mcd_init(); +#endif +#ifdef CONFIG_MCDX + mcdx_init(); +#endif +#ifdef CONFIG_SBPCD + sbpcd_init(); +#endif +#ifdef CONFIG_AZTCD + aztcd_init(); +#endif +#ifdef CONFIG_CDU535 + sony535_init(); +#endif +#ifdef CONFIG_GSCD + gscd_init(); +#endif +#ifdef CONFIG_CM206 + cm206_init(); +#endif +#ifdef CONFIG_OPTCD + optcd_init(); +#endif +#ifdef CONFIG_SJCD + sjcd_init(); +#endif +#ifdef CONFIG_APBLOCK + ap_init(); +#endif +#ifdef CONFIG_DDV + ddv_init(); +#endif +#ifdef CONFIG_MDISK + mdisk_init(); +#endif +#ifdef CONFIG_DASD + dasd_init(); +#endif +#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) + tapeblock_init(); +#endif +#ifdef CONFIG_BLK_DEV_XPRAM + xpram_init(); +#endif + +#ifdef CONFIG_SUN_JSFLASH + jsfd_init(); +#endif + +#ifdef CONFIG_XENOLINUX_BLOCK + xlblk_init(); +#endif + + return 0; +}; + +EXPORT_SYMBOL(io_request_lock); +EXPORT_SYMBOL(end_that_request_first); +EXPORT_SYMBOL(end_that_request_last); +EXPORT_SYMBOL(blk_init_queue); +EXPORT_SYMBOL(blk_get_queue); +EXPORT_SYMBOL(blk_cleanup_queue); +EXPORT_SYMBOL(blk_queue_headactive); +EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(generic_make_request); +EXPORT_SYMBOL(blkdev_release_request); +EXPORT_SYMBOL(generic_unplug_device); diff --git a/old/xenolinux-2.4.16-sparse/drivers/block/rd.c b/old/xenolinux-2.4.16-sparse/drivers/block/rd.c new file mode 100644 index 0000000000..ffcc9d3825 --- /dev/null +++ b/old/xenolinux-2.4.16-sparse/drivers/block/rd.c @@ -0,0 +1,1009 @@ +/* + * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta. + * + * (C) Chad Page, Theodore Ts'o, et. al, 1995. + * + * This RAM disk is designed to have filesystems created on it and mounted + * just like a regular floppy disk. + * + * It also does something suggested by Linus: use the buffer cache as the + * RAM disk data. This makes it possible to dynamically allocate the RAM disk + * buffer - with some consequences I have to deal with as I write this. + * + * This code is based on the original ramdisk.c, written mostly by + * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by + * Chad Page to use the buffer cache to store the RAM disk data in + * 1995; Theodore then took over the driver again, and cleaned it up + * for inclusion in the mainline kernel. + * + * The original CRAMDISK code was written by Richard Lyons, and + * adapted by Chad Page to use the new RAM disk interface. Theodore + * Ts'o rewrote it so that both the compressed RAM disk loader and the + * kernel decompressor uses the same inflate.c codebase. The RAM disk + * loader now also loads into a dynamic (buffer cache based) RAM disk, + * not the old static RAM disk. Support for the old static RAM disk has + * been completely removed. + * + * Loadable module support added by Tom Dyas. + * + * Further cleanups by Chad Page (page0588@sundance.sjsu.edu): + * Cosmetic changes in #ifdef MODULE, code movement, etc. + * When the RAM disk module is removed, free the protected buffers + * Default RAM disk size changed to 2.88 MB + * + * Added initrd: Werner Almesberger & Hans Lermen, Feb '96 + * + * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) + * - Chad Page + * + * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98 + * + * Make block size and block size shift for RAM disks a global macro + * and set blk_size for -ENOSPC, Werner Fink <werner@suse.de>, Apr '99 + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/minix_fs.h> +#include <linux/ext2_fs.h> +#include <linux/romfs_fs.h> +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/hdreg.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/slab.h> +#include <linux/ioctl.h> +#include <linux/fd.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/devfs_fs_kernel.h> +#include <linux/smp_lock.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/byteorder.h> + +extern void wait_for_keypress(void); + +/* + * 35 has been officially registered as the RAMDISK major number, but + * so is the original MAJOR number of 1. We're using 1 in + * include/linux/major.h for now + */ +#define MAJOR_NR RAMDISK_MAJOR +#include <linux/blk.h> +#include <linux/blkpg.h> + +/* The RAM disk size is now a parameter */ +#define NUM_RAMDISKS 16 /* This cannot be overridden (yet) */ + +#ifndef MODULE +/* We don't have to load RAM disks or gunzip them in a module. */ +#define RD_LOADER +#define BUILD_CRAMDISK + +void rd_load(void); +static int crd_load(struct file *fp, struct file *outfp); + +#ifdef CONFIG_BLK_DEV_INITRD +static int initrd_users; +#endif +#endif + +/* Various static variables go here. Most are used only in the RAM disk code. + */ + +static unsigned long rd_length[NUM_RAMDISKS]; /* Size of RAM disks in bytes */ +static int rd_hardsec[NUM_RAMDISKS]; /* Size of real blocks in bytes */ +static int rd_blocksizes[NUM_RAMDISKS]; /* Size of 1024 byte blocks :) */ +static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ +static devfs_handle_t devfs_handle; +static struct block_device *rd_bdev[NUM_RAMDISKS];/* Protected device data */ + +/* + * Parameters for the boot-loading of the RAM disk. These are set by + * init/main.c (from arguments to the kernel command line) or from the + * architecture-specific setup routine (from the stored boot sector + * information). + */ +int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ +/* + * It would be very desiderable to have a soft-blocksize (that in the case + * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because + * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of + * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages + * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only + * 1 page will be protected. Depending on the size of the ramdisk you + * may want to change the ramdisk blocksize to achieve a better or worse MM + * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that + * supposes the filesystem in the image uses a BLOCK_SIZE blocksize). + */ +int rd_blocksize = BLOCK_SIZE; /* blocksize of the RAM disks */ + +#ifndef MODULE + +int rd_doload; /* 1 = load RAM disk, 0 = don't load */ +int rd_prompt = 1; /* 1 = prompt for RAM disk, 0 = don't prompt */ +int rd_image_start; /* starting block # of image */ +#ifdef CONFIG_BLK_DEV_INITRD +unsigned long initrd_start, initrd_end; +int mount_initrd = 1; /* zero if initrd should not be mounted */ +int initrd_below_start_ok; + +static int __init no_initrd(char *str) +{ + mount_initrd = 0; + return 1; +} + +__setup("noinitrd", no_initrd); + +#endif + +static int __init ramdisk_start_setup(char *str) +{ + rd_image_start = simple_strtol(str,NULL,0); + return 1; +} + +static int __init load_ramdisk(char *str) +{ + rd_doload = simple_strtol(str,NULL,0) & 3; + return 1; +} + +static int __init prompt_ramdisk(char *str) +{ + rd_prompt = simple_strtol(str,NULL,0) & 1; + return 1; +} + +static int __init ramdisk_size(char *str) +{ + rd_size = simple_strtol(str,NULL,0); + return 1; +} + +static int __init ramdisk_size2(char *str) +{ + return ramdisk_size(str); +} + +static int __init ramdisk_blocksize(char *str) +{ + rd_blocksize = simple_strtol(str,NULL,0); + return 1; +} + +__setup("ramdisk_start=", ramdisk_start_setup); +__setup("load_ramdisk=", load_ramdisk); +__setup("prompt_ramdisk=", prompt_ramdisk); +__setup("ramdisk=", ramdisk_size); +__setup("ramdisk_size=", ramdisk_size2); +__setup("ramdisk_blocksize=", ramdisk_blocksize); + +#endif + +/* + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + * aops copied from ramfs. + */ +static int ramdisk_readpage(struct file *file, struct page * page) +{ + if (!Page_Uptodate(page)) { + memset(kmap(page), 0, PAGE_CACHE_SIZE); + kunmap(page); + flush_dcache_page(page); + SetPageUptodate(page); + } + UnlockPage(page); + return 0; +} + +static int ramdisk_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + if (!Page_Uptodate(page)) { + void *addr = page_address(page); + memset(addr, 0, PAGE_CACHE_SIZE); + flush_dcache_page(page); + SetPageUptodate(page); + } + SetPageDirty(page); + return 0; +} + +static int ramdisk_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + return 0; +} + +static struct address_space_operations ramdisk_aops = { + readpage: ramdisk_readpage, + writepage: fail_writepage, + prepare_write: ramdisk_prepare_write, + commit_write: ramdisk_commit_write, +}; + +static int rd_blkdev_pagecache_IO(int rw, struct buffer_head * sbh, int minor) +{ + struct address_space * mapping; + unsigned long index; + int offset, size, err; + + err = -EIO; + err = 0; + mapping = rd_bdev[minor]->bd_inode->i_mapping; + + index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9); + offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK; + size = sbh->b_size; + + do { + int count; + struct page ** hash; + struct page * page; + char * src, * dst; + int unlock = 0; + + count = PAGE_CACHE_SIZE - offset; + if (count > size) + count = size; + size -= count; + + hash = page_hash(mapping, index); + page = __find_get_page(mapping, index, hash); + if (!page) { + page = grab_cache_page(mapping, index); + err = -ENOMEM; + if (!page) + goto out; + err = 0; + + if (!Page_Uptodate(page)) { + memset(kmap(page), 0, PAGE_CACHE_SIZE); + kunmap(page); + SetPageUptodate(page); + } + + unlock = 1; + } + + index++; + + if (rw == READ) { + src = kmap(page); + src += offset; + dst = bh_kmap(sbh); + } else { + dst = kmap(page); + dst += offset; + src = bh_kmap(sbh); + } + offset = 0; + + memcpy(dst, src, count); + + kunmap(page); + bh_kunmap(sbh); + + if (rw == READ) { + flush_dcache_page(page); + } else { + SetPageDirty(page); + } + if (unlock) + UnlockPage(page); + __free_page(page); + } while (size); + + out: + return err; +} + +/* + * Basically, my strategy here is to set up a buffer-head which can't be + * deleted, and make that my Ramdisk. If the request is outside of the + * allocated size, we must get rid of it... + * + * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support + * + */ +static int rd_make_request(request_queue_t * q, int rw, struct buffer_head *sbh) +{ + unsigned int minor; + unsigned long offset, len; + + minor = MINOR(sbh->b_rdev); + + if (minor >= NUM_RAMDISKS) + goto fail; + + + offset = sbh->b_rsector << 9; + len = sbh->b_size; + + if ((offset + len) > rd_length[minor]) + goto fail; + + if (rw==READA) + rw=READ; + if ((rw != READ) && (rw != WRITE)) { + printk(KERN_INFO "RAMDISK: bad command: %d\n", rw); + goto fail; + } + + if (rd_blkdev_pagecache_IO(rw, sbh, minor)) + goto fail; + + sbh->b_end_io(sbh,1); + return 0; + fail: + sbh->b_end_io(sbh,0); + return 0; +} + +static int rd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + int error = -EINVAL; + unsigned int minor; + + if (!inode || !inode->i_rdev) + goto out; + + minor = MINOR(inode->i_rdev); + + switch (cmd) { + case BLKFLSBUF: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + /* special: we want to release the ramdisk memory, + it's not like with the other blockdevices where + this ioctl only flushes away the buffer cache. */ + error = -EBUSY; + down(&inode->i_bdev->bd_sem); + if (inode->i_bdev->bd_openers <= 2) { + truncate_inode_pages(inode->i_mapping, 0); + error = 0; + } + up(&inode->i_bdev->bd_sem); + break; + case BLKGETSIZE: /* Return device size */ + if (!arg) + break; + error = put_user(rd_kbsize[minor] << 1, (unsigned long *) arg); + break; + case BLKGETSIZE64: + error = put_user((u64)rd_kbsize[minor]<<10, (u64*)arg); + break; + case BLKROSET: + case BLKROGET: + case BLKSSZGET: + error = blk_ioctl(inode->i_rdev, cmd, arg); + }; +out: + return error; +} + + +#ifdef CONFIG_BLK_DEV_INITRD + +static ssize_t initrd_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + int left; + + left = initrd_end - initrd_start - *ppos; + if (count > left) count = left; + if (count == 0) return 0; + copy_to_user(buf, (char *)initrd_start + *ppos, count); + *ppos += count; + return count; +} + + +static int initrd_release(struct inode *inode,struct file *file) +{ + extern void free_initrd_mem(unsigned long, unsigned long); + + lock_kernel(); + if (!--initrd_users) { + free_initrd_mem(initrd_start, initrd_end); + initrd_start = 0; + } + unlock_kernel(); + blkdev_put(inode->i_bdev, BDEV_FILE); + return 0; +} + + +static struct file_operations initrd_fops = { + read: initrd_read, + release: initrd_release, +}; + +#endif + + +static int rd_open(struct inode * inode, struct file * filp) +{ + int unit = DEVICE_NR(inode->i_rdev); + +#ifdef CONFIG_BLK_DEV_INITRD + if (unit == INITRD_MINOR) { + if (!initrd_start) return -ENODEV; + initrd_users++; + filp->f_op = &initrd_fops; + return 0; + } +#endif + + if (unit >= NUM_RAMDISKS) + return -ENXIO; + + /* + * Immunize device against invalidate_buffers() and prune_icache(). + */ + if (rd_bdev[unit] == NULL) { + rd_bdev[unit] = bdget(kdev_t_to_nr(inode->i_rdev)); + rd_bdev[unit]->bd_openers++; + rd_bdev[unit]->bd_inode->i_mapping->a_ops = &ramdisk_aops; + } + + return 0; +} + +static struct block_device_operations rd_bd_op = { + owner: THIS_MODULE, + open: rd_open, + ioctl: rd_ioctl, +}; + +#ifdef MODULE +/* Before freeing the module, invalidate all of the protected buffers! */ +static void __exit rd_cleanup (void) +{ + int i; + + for (i = 0 ; i < NUM_RAMDISKS; i++) { + struct block_device *bdev = rd_bdev[i]; + rd_bdev[i] = NULL; + if (bdev) + blkdev_put(bdev, BDEV_FILE); + destroy_buffers(MKDEV(MAJOR_NR, i)); + } + + devfs_unregister (devfs_handle); + unregister_blkdev( MAJOR_NR, "ramdisk" ); + hardsect_size[MAJOR_NR] = NULL; + blksize_size[MAJOR_NR] = NULL; + blk_size[MAJOR_NR] = NULL; +} +#endif + +/* This is the registration and initialization section of the RAM disk driver */ +int __init rd_init (void) +{ + int i; + + if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 || + (rd_blocksize & (rd_blocksize-1))) + { + printk("RAMDISK: wrong blocksize %d, reverting to defaults\n", + rd_blocksize); + rd_blocksize = BLOCK_SIZE; + } + + if (register_blkdev(MAJOR_NR, "ramdisk", &rd_bd_op)) { + printk("RAMDISK: Could not get major %d", MAJOR_NR); + return -EIO; + } + + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), &rd_make_request); + + for (i = 0; i < NUM_RAMDISKS; i++) { + /* rd_size is given in kB */ + rd_length[i] = rd_size << 10; + rd_hardsec[i] = rd_blocksize; + rd_blocksizes[i] = rd_blocksize; + rd_kbsize[i] = rd_size; + } + devfs_handle = devfs_mk_dir (NULL, "rd", NULL); + devfs_register_series (devfs_handle, "%u", NUM_RAMDISKS, + DEVFS_FL_DEFAULT, MAJOR_NR, 0, + S_IFBLK | S_IRUSR | S_IWUSR, + &rd_bd_op, NULL); + + for (i = 0; i < NUM_RAMDISKS; i++) + register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &rd_bd_op, rd_size<<1); + +#ifdef CONFIG_BLK_DEV_INITRD + /* We ought to separate initrd operations here */ + register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &rd_bd_op, rd_size<<1); +#endif + + hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */ + blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */ + blk_size[MAJOR_NR] = rd_kbsize; /* Size of the RAM disk in kB */ + + /* rd_size is given in kB */ + printk("RAMDISK driver initialized: " + "%d RAM disks of %dK size %d blocksize\n", + NUM_RAMDISKS, rd_size, rd_blocksize); + + return 0; +} + +#ifdef MODULE +module_init(rd_init); +module_exit(rd_cleanup); +#endif + +/* loadable module support */ +MODULE_PARM (rd_size, "1i"); +MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); +MODULE_PARM (rd_blocksize, "i"); +MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes."); + +MODULE_LICENSE("GPL"); + +/* End of non-loading portions of the RAM disk driver */ + +#ifdef RD_LOADER +/* + * This routine tries to find a RAM disk image to load, and returns the + * number of blocks to read for a non-compressed image, 0 if the image + * is a compressed image, and -1 if an image with the right magic + * numbers could not be found. + * + * We currently check for the following magic numbers: + * minix + * ext2 + * romfs + * gzip + */ +static int __init +identify_ramdisk_image(kdev_t device, struct file *fp, int start_block) +{ + const int size = 512; + struct minix_super_block *minixsb; + struct ext2_super_block *ext2sb; + struct romfs_super_block *romfsb; + int nblocks = -1; + unsigned char *buf; + + buf = kmalloc(size, GFP_KERNEL); + if (buf == 0) + return -1; + + minixsb = (struct minix_super_block *) buf; + ext2sb = (struct ext2_super_block *) buf; + romfsb = (struct romfs_super_block *) buf; + memset(buf, 0xe5, size); + + /* + * Read block 0 to test for gzipped kernel + */ + if (fp->f_op->llseek) + fp->f_op->llseek(fp, start_block * BLOCK_SIZE, 0); + fp->f_pos = start_block * BLOCK_SIZE; + + fp->f_op->read(fp, buf, size, &fp->f_pos); + + /* + * If it matches the gzip magic numbers, return -1 + */ + if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) { + printk(KERN_NOTICE + "RAMDISK: Compressed image found at block %d\n", + start_block); + nblocks = 0; + goto done; + } + + /* romfs is at block zero too */ + if (romfsb->word0 == ROMSB_WORD0 && + romfsb->word1 == ROMSB_WORD1) { + printk(KERN_NOTICE + "RAMDISK: romfs filesystem found at block %d\n", + start_block); + nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS; + goto done; + } + + /* + * Read block 1 to test for minix and ext2 superblock + */ + if (fp->f_op->llseek) + fp->f_op->llseek(fp, (start_block+1) * BLOCK_SIZE, 0); + fp->f_pos = (start_block+1) * BLOCK_SIZE; + + fp->f_op->read(fp, buf, size, &fp->f_pos); + + /* Try minix */ + if (minixsb->s_magic == MINIX_SUPER_MAGIC || + minixsb->s_magic == MINIX_SUPER_MAGIC2) { + printk(KERN_NOTICE + "RAMDISK: Minix filesystem found at block %d\n", + start_block); + nblocks = minixsb->s_nzones << minixsb->s_log_zone_size; + goto done; + } + + /* Try ext2 */ + if (ext2sb->s_magic == cpu_to_le16(EXT2_SUPER_MAGIC)) { + printk(KERN_NOTICE + "RAMDISK: ext2 filesystem found at block %d\n", + start_block); + nblocks = le32_to_cpu(ext2sb->s_blocks_count); + goto done; + } + + printk(KERN_NOTICE + "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n", + start_block); + +done: + if (fp->f_op->llseek) + fp->f_op->llseek(fp, start_block * BLOCK_SIZE, 0); + fp->f_pos = start_block * BLOCK_SIZE; + + kfree(buf); + return nblocks; +} + +/* + * This routine loads in the RAM disk image. + */ +static void __init rd_load_image(kdev_t device, int offset, int unit) +{ + struct inode *inode, *out_inode; + struct file infile, outfile; + struct dentry in_dentry, out_dentry; + mm_segment_t fs; + kdev_t ram_device; + int nblocks, i; + char *buf; + unsigned short rotate = 0; + unsigned short devblocks = 0; +#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES) && !defined(CONFIG_XENO) + char rotator[4] = { '|' , '/' , '-' , '\\' }; +#endif + ram_device = MKDEV(MAJOR_NR, unit); + + if ((inode = get_empty_inode()) == NULL) + return; + memset(&infile, 0, sizeof(infile)); + memset(&in_dentry, 0, sizeof(in_dentry)); + infile.f_mode = 1; /* read only */ + infile.f_dentry = &in_dentry; + in_dentry.d_inode = inode; + infile.f_op = &def_blk_fops; + init_special_inode(inode, S_IFBLK | S_IRUSR, kdev_t_to_nr(device)); + + if ((out_inode = get_empty_inode()) == NULL) + goto free_inode; + memset(&outfile, 0, sizeof(outfile)); + memset(&out_dentry, 0, sizeof(out_dentry)); + outfile.f_mode = 3; /* read/write */ + outfile.f_dentry = &out_dentry; + out_dentry.d_inode = out_inode; + outfile.f_op = &def_blk_fops; + init_special_inode(out_inode, S_IFBLK | S_IRUSR | S_IWUSR, kdev_t_to_nr(ram_device)); + + if (blkdev_open(inode, &infile) != 0) { + iput(out_inode); + goto free_inode; + } + if (blkdev_open(out_inode, &outfile) != 0) + goto free_inodes; + + fs = get_fs(); + set_fs(KERNEL_DS); + + nblocks = identify_ramdisk_image(device, &infile, offset); + if (nblocks < 0) + goto done; + + if (nblocks == 0) { +#ifdef BUILD_CRAMDISK + if (crd_load(&infile, &outfile) == 0) + goto successful_load; +#else + printk(KERN_NOTICE + "RAMDISK: Kernel does not support compressed " + "RAM disk images\n"); +#endif + goto done; + } + + /* + * NOTE NOTE: nblocks suppose that the blocksize is BLOCK_SIZE, so + * rd_load_image will work only with filesystem BLOCK_SIZE wide! + * So make sure to use 1k blocksize while generating ext2fs + * ramdisk-images. + */ + if (nblocks > (rd_length[unit] >> BLOCK_SIZE_BITS)) { + printk("RAMDISK: image too big! (%d/%ld blocks)\n", + nblocks, rd_length[unit] >> BLOCK_SIZE_BITS); + goto done; + } + + /* + * OK, time to copy in the data + */ + buf = kmalloc(BLOCK_SIZE, GFP_KERNEL); + if (buf == 0) { + printk(KERN_ERR "RAMDISK: could not allocate buffer\n"); + goto done; + } + + if (blk_size[MAJOR(device)]) + devblocks = blk_size[MAJOR(device)][MINOR(device)]; + +#ifdef CONFIG_BLK_DEV_INITRD + if (MAJOR(device) == MAJOR_NR && MINOR(device) == INITRD_MINOR) + devblocks = nblocks; +#endif + + if (devblocks == 0) { + printk(KERN_ERR "RAMDISK: could not determine device size\n"); + goto done; + } + + printk(KERN_NOTICE "RAMDISK: Loading %d blocks [%d disk%s] into ram disk... ", + nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : ""); + for (i=0; i < nblocks; i++) { + if (i && (i % devblocks == 0)) { + printk("done disk #%d.\n", i/devblocks); + rotate = 0; + if (infile.f_op->release(inode, &infile) != 0) { + printk("Error closing the disk.\n"); + goto noclose_input; + } + printk("Please insert disk #%d and press ENTER\n", i/devblocks+1); + wait_for_keypress(); + if (blkdev_open(inode, &infile) != 0) { + printk("Error opening disk.\n"); + goto noclose_input; + } + infile.f_pos = 0; + printk("Loading disk #%d... ", i/devblocks+1); + } + infile.f_op->read(&infile, buf, BLOCK_SIZE, &infile.f_pos); + outfile.f_op->write(&outfile, buf, BLOCK_SIZE, &outfile.f_pos); +#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES) && !defined(CONFIG_XENO) + if (!(i % 16)) { + printk("%c\b", rotator[rotate & 0x3]); + rotate++; + } +#endif + } + printk("done.\n"); + kfree(buf); + +successful_load: + ROOT_DEV = MKDEV(MAJOR_NR, unit); + if (ROOT_DEVICE_NAME != NULL) strcpy (ROOT_DEVICE_NAME, "rd/0"); + +done: + infile.f_op->release(inode, &infile); +noclose_input: + blkdev_close(out_inode, &outfile); + iput(inode); + iput(out_inode); + set_fs(fs); + return; +free_inodes: /* free inodes on error */ + iput(out_inode); + infile.f_op->release(inode, &infile); +free_inode: + iput(inode); +} + +#ifdef CONFIG_MAC_FLOPPY +int swim3_fd_eject(int devnum); +#endif + +static void __init rd_load_disk(int n) +{ + + if (rd_doload == 0) + return; + + if (MAJOR(ROOT_DEV) != FLOPPY_MAJOR +#ifdef CONFIG_BLK_DEV_INITRD + && MAJOR(real_root_dev) != FLOPPY_MAJOR +#endif + ) + return; + + if (rd_prompt) { +#ifdef CONFIG_BLK_DEV_FD + floppy_eject(); +#endif +#ifdef CONFIG_MAC_FLOPPY + if(MAJOR(ROOT_DEV) == FLOPPY_MAJOR) + swim3_fd_eject(MINOR(ROOT_DEV)); + else if(MAJOR(real_root_dev) == FLOPPY_MAJOR) + swim3_fd_eject(MINOR(real_root_dev)); +#endif + printk(KERN_NOTICE + "VFS: Insert root floppy disk to be loaded into RAM disk and press ENTER\n"); + wait_for_keypress(); + } + + rd_load_image(ROOT_DEV,rd_image_start, n); + +} + +void __init rd_load(void) +{ + rd_load_disk(0); +} + +void __init rd_load_secondary(void) +{ + rd_load_disk(1); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void __init initrd_load(void) +{ + rd_load_image(MKDEV(MAJOR_NR, INITRD_MINOR),rd_image_start,0); +} +#endif + +#endif /* RD_LOADER */ + +#ifdef BUILD_CRAMDISK + +/* + * gzip declarations + */ + +#define OF(args) args + +#ifndef memzero +#define memzero(s, n) memset ((s), 0, (n)) +#endif + +typedef unsigned char uch; +typedef unsigned short ush; +typedef unsigned long ulg; + +#define INBUFSIZ 4096 +#define WSIZE 0x8000 /* window size--must be a power of two, and */ + /* at least 32K for zip's deflate method */ + +static uch *inbuf; +static uch *window; + +static unsigned insize; /* valid bytes in inbuf */ +static unsigned inptr; /* index of next byte to be processed in inbuf */ +static unsigned outcnt; /* bytes in output buffer */ +static int exit_code; +static long bytes_out; +static struct file *crd_infp, *crd_outfp; + +#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) + +/* Diagnostic functions (stubbed out) */ +#define Assert(cond,msg) +#define Trace(x) +#define Tracev(x) +#define Tracevv(x) +#define Tracec(c,x) +#define Tracecv(c,x) + +#define STATIC static + +static int fill_inbuf(void); +static void flush_window(void); +static void *malloc(int size); +static void free(void *where); +static void error(char *m); +static void gzip_mark(void **); +static void gzip_release(void **); + +#include "../../lib/inflate.c" + +static void __init *malloc(int size) +{ + return kmalloc(size, GFP_KERNEL); +} + +static void __init free(void *where) +{ + kfree(where); +} + +static void __init gzip_mark(void **ptr) +{ +} + +static void __init gzip_release(void **ptr) +{ +} + + +/* =========================================================================== + * Fill the input buffer. This is called only when the buffer is empty + * and at least one byte is really needed. + */ +static int __init fill_inbuf(void) +{ + if (exit_code) return -1; + + insize = crd_infp->f_op->read(crd_infp, inbuf, INBUFSIZ, + &crd_infp->f_pos); + if (insize == 0) return -1; + + inptr = 1; + + return inbuf[0]; +} + +/* =========================================================================== + * Write the output window window[0..outcnt-1] and update crc and bytes_out. + * (Used for the decompressed data only.) + */ +static void __init flush_window(void) +{ + ulg c = crc; /* temporary variable */ + unsigned n; + uch *in, ch; + + crd_outfp->f_op->write(crd_outfp, window, outcnt, &crd_outfp->f_pos); + in = window; + for (n = 0; n < outcnt; n++) { + ch = *in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + outcnt = 0; +} + +static void __init error(char *x) +{ + printk(KERN_ERR "%s", x); + exit_code = 1; +} + +static int __init +crd_load(struct file * fp, struct file *outfp) +{ + int result; + + insize = 0; /* valid bytes in inbuf */ + inptr = 0; /* index of next byte to be processed in inbuf */ + outcnt = 0; /* bytes in output buffer */ + exit_code = 0; + bytes_out = 0; + crc = (ulg)0xffffffffL; /* shift register contents */ + + crd_infp = fp; + crd_outfp = outfp; + inbuf = kmalloc(INBUFSIZ, GFP_KERNEL); + if (inbuf == 0) { + printk(KERN_ERR "RAMDISK: Couldn't allocate gzip buffer\n"); + return -1; + } + window = kmalloc(WSIZE, GFP_KERNEL); + if (window == 0) { + printk(KERN_ERR "RAMDISK: Couldn't allocate gzip window\n"); + kfree(inbuf); + return -1; + } + makecrc(); + result = gunzip(); + kfree(inbuf); + kfree(window); + return result; +} + +#endif /* BUILD_CRAMDISK */ + |