3 files changed, 4929 insertions, 0 deletions
diff --git a/xenolinux-2.4.23-sparse/drivers/block/ll_rw_blk.c b/xenolinux-2.4.23-sparse/drivers/block/ll_rw_blk.c
new file mode 100644
index 0000000000..bca30ae493
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/drivers/block/ll_rw_blk.c
@@ -0,0 +1,1646 @@
+/*
+ *  linux/drivers/block/ll_rw_blk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
+ * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
+ * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
+ */
+
+/*
+ * This handles all read/write requests to block devices
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/config.h>
+#include <linux/locks.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/completion.h>
+#include <linux/bootmem.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <linux/blk.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+/*
+ * MAC Floppy IWM hooks
+ */
+
+#ifdef CONFIG_MAC_FLOPPY_IWM
+extern int mac_floppy_init(void);
+#endif
+
+/*
+ * For the allocated request tables
+ */
+static kmem_cache_t *request_cachep;
+
+/*
+ * The "disk" task queue is used to start the actual requests
+ * after a plug
+ */
+DECLARE_TASK_QUEUE(tq_disk);
+
+/*
+ * Protect the request list against multiple users..
+ *
+ * With this spinlock the Linux block IO subsystem is 100% SMP threaded
+ * from the IRQ event side, and almost 100% SMP threaded from the syscall
+ * side (we still have protect against block device array operations, and
+ * the do_request() side is casually still unsafe. The kernel lock protects
+ * this part currently.).
+ *
+ * there is a fair chance that things will work just OK if these functions
+ * are called with no global kernel lock held ...
+ */
+spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
+
+/* This specifies how many sectors to read ahead on the disk. */
+
+int read_ahead[MAX_BLKDEV];
+
+/* blk_dev_struct is:
+ *	*request_fn
+ *	*current_request
+ */
+struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
+
+/*
+ * blk_size contains the size of all block-devices in units of 1024 byte
+ * sectors:
+ *
+ * blk_size[MAJOR][MINOR]
+ *
+ * if (!blk_size[MAJOR]) then no minor size checking is done.
+ */
+int * blk_size[MAX_BLKDEV];
+
+/*
+ * blksize_size contains the size of all block-devices:
+ *
+ * blksize_size[MAJOR][MINOR]
+ *
+ * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
+ */
+int * blksize_size[MAX_BLKDEV];
+
+/*
+ * hardsect_size contains the size of the hardware sector of a device.
+ *
+ * hardsect_size[MAJOR][MINOR]
+ *
+ * if (!hardsect_size[MAJOR])
+ *		then 512 bytes is assumed.
+ * else
+ *		sector_size is hardsect_size[MAJOR][MINOR]
+ * This is currently set by some scsi devices and read by the msdos fs driver.
+ * Other uses may appear later.
+ */
+int * hardsect_size[MAX_BLKDEV];
+
+/*
+ * The following tunes the read-ahead algorithm in mm/filemap.c
+ */
+int * max_readahead[MAX_BLKDEV];
+
+/*
+ * Max number of sectors per request
+ */
+int * max_sectors[MAX_BLKDEV];
+
+unsigned long blk_max_low_pfn, blk_max_pfn;
+int blk_nohighio = 0;
+
+int block_dump = 0;
+
+static struct timer_list writeback_timer;
+
+static inline int get_max_sectors(kdev_t dev)
+{
+	if (!max_sectors[MAJOR(dev)])
+		return MAX_SECTORS;
+	return max_sectors[MAJOR(dev)][MINOR(dev)];
+}
+
+inline request_queue_t *blk_get_queue(kdev_t dev)
+{
+	struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
+
+	if (bdev->queue)
+		return bdev->queue(dev);
+	else
+		return &blk_dev[MAJOR(dev)].request_queue;
+}
+
+static int __blk_cleanup_queue(struct request_list *list)
+{
+	struct list_head *head = &list->free;
+	struct request *rq;
+	int i = 0;
+
+	while (!list_empty(head)) {
+		rq = list_entry(head->next, struct request, queue);
+		list_del(&rq->queue);
+		kmem_cache_free(request_cachep, rq);
+		i++;
+	};
+
+	if (i != list->count)
+		printk("request list leak!\n");
+
+	list->count = 0;
+	return i;
+}
+
+/**
+ * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
+ * @q:    the request queue to be released
+ *
+ * Description:
+ *     blk_cleanup_queue is the pair to blk_init_queue().  It should
+ *     be called when a request queue is being released; typically
+ *     when a block device is being de-registered.  Currently, its
+ *     primary task it to free all the &struct request structures that
+ *     were allocated to the queue.
+ * Caveat: 
+ *     Hopefully the low level driver will have finished any
+ *     outstanding requests first...
+ **/
+void blk_cleanup_queue(request_queue_t * q)
+{
+	int count = q->nr_requests;
+
+	count -= __blk_cleanup_queue(&q->rq);
+
+	if (count)
+		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+	if (atomic_read(&q->nr_sectors))
+		printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors));
+
+	memset(q, 0, sizeof(*q));
+}
+
+/**
+ * blk_queue_headactive - indicate whether head of request queue may be active
+ * @q:       The queue which this applies to.
+ * @active:  A flag indication where the head of the queue is active.
+ *
+ * Description:
+ *    The driver for a block device may choose to leave the currently active
+ *    request on the request queue, removing it only when it has completed.
+ *    The queue handling routines assume this by default for safety reasons
+ *    and will not involve the head of the request queue in any merging or
+ *    reordering of requests when the queue is unplugged (and thus may be
+ *    working on this particular request).
+ *
+ *    If a driver removes requests from the queue before processing them, then
+ *    it may indicate that it does so, there by allowing the head of the queue
+ *    to be involved in merging and reordering.  This is done be calling
+ *    blk_queue_headactive() with an @active flag of %0.
+ *
+ *    If a driver processes several requests at once, it must remove them (or
+ *    at least all but one of them) from the request queue.
+ *
+ *    When a queue is plugged the head will be assumed to be inactive.
+ **/
+ 
+void blk_queue_headactive(request_queue_t * q, int active)
+{
+	q->head_active = active;
+}
+
+/**
+ * blk_queue_throttle_sectors - indicates you will call sector throttling funcs
+ * @q:       The queue which this applies to.
+ * @active:  A flag indication if you want sector throttling on
+ *
+ * Description:
+ * The sector throttling code allows us to put a limit on the number of
+ * sectors pending io to the disk at a given time, sending @active nonzero
+ * indicates you will call blk_started_sectors and blk_finished_sectors in
+ * addition to calling blk_started_io and blk_finished_io in order to
+ * keep track of the number of sectors in flight.
+ **/
+ 
+void blk_queue_throttle_sectors(request_queue_t * q, int active)
+{
+	q->can_throttle = active;
+}
+
+/**
+ * blk_queue_make_request - define an alternate make_request function for a device
+ * @q:  the request queue for the device to be affected
+ * @mfn: the alternate make_request function
+ *
+ * Description:
+ *    The normal way for &struct buffer_heads to be passed to a device
+ *    driver is for them to be collected into requests on a request
+ *    queue, and then to allow the device driver to select requests
+ *    off that queue when it is ready.  This works well for many block
+ *    devices. However some block devices (typically virtual devices
+ *    such as md or lvm) do not benefit from the processing on the
+ *    request queue, and are served best by having the requests passed
+ *    directly to them.  This can be achieved by providing a function
+ *    to blk_queue_make_request().
+ *
+ * Caveat:
+ *    The driver that does this *must* be able to deal appropriately
+ *    with buffers in "highmemory", either by calling bh_kmap() to get
+ *    a kernel mapping, to by calling create_bounce() to create a
+ *    buffer in normal memory.
+ **/
+
+void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
+{
+	q->make_request_fn = mfn;
+}
+
+/**
+ * blk_queue_bounce_limit - set bounce buffer limit for queue
+ * @q:  the request queue for the device
+ * @dma_addr:   bus address limit
+ *
+ * Description:
+ *    Different hardware can have different requirements as to what pages
+ *    it can do I/O directly to. A low level driver can call
+ *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
+ *    buffers for doing I/O to pages residing above @page. By default
+ *    the block layer sets this to the highest numbered "low" memory page.
+ **/
+void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
+{
+	unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
+	unsigned long mb = dma_addr >> 20;
+	static request_queue_t *old_q;
+
+	/*
+	 * keep this for debugging for now...
+	 */
+	if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) {
+		old_q = q;
+		printk("blk: queue %p, ", q);
+		if (dma_addr == BLK_BOUNCE_ANY)
+			printk("no I/O memory limit\n");
+		else
+			printk("I/O limit %luMb (mask 0x%Lx)\n", mb,
+			       (long long) dma_addr);
+	}
+
+	q->bounce_pfn = bounce_pfn;
+}
+
+
+/*
+ * can we merge the two segments, or do we need to start a new one?
+ */
+inline int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
+{
+	/*
+	 * if bh and nxt are contigous and don't cross a 4g boundary, it's ok
+	 */
+	if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt))
+		return 1;
+
+	return 0;
+}
+
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+{
+	if (req->nr_segments < max_segments) {
+		req->nr_segments++;
+		return 1;
+	}
+	return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
+			    struct buffer_head *bh, int max_segments)
+{
+	if (blk_seg_merge_ok(req->bhtail, bh))
+		return 1;
+
+	return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
+			     struct buffer_head *bh, int max_segments)
+{
+	if (blk_seg_merge_ok(bh, req->bh))
+		return 1;
+
+	return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
+				struct request *next, int max_segments)
+{
+	int total_segments = req->nr_segments + next->nr_segments;
+
+	if (blk_seg_merge_ok(req->bhtail, next->bh))
+		total_segments--;
+
+	if (total_segments > max_segments)
+		return 0;
+
+	req->nr_segments = total_segments;
+	return 1;
+}
+
+/*
+ * "plug" the device if there are no outstanding requests: this will
+ * force the transfer to start only after we have put all the requests
+ * on the list.
+ *
+ * This is called with interrupts off and no requests on the queue.
+ * (and with the request spinlock acquired)
+ */
+static void generic_plug_device(request_queue_t *q, kdev_t dev)
+{
+	/*
+	 * no need to replug device
+	 */
+	if (!list_empty(&q->queue_head) || q->plugged)
+		return;
+
+	q->plugged = 1;
+	queue_task(&q->plug_tq, &tq_disk);
+}
+
+/*
+ * remove the plug and let it rip..
+ */
+static inline void __generic_unplug_device(request_queue_t *q)
+{
+	if (q->plugged) {
+		q->plugged = 0;
+		if (!list_empty(&q->queue_head))
+			q->request_fn(q);
+	}
+}
+
+void generic_unplug_device(void *data)
+{
+	request_queue_t *q = (request_queue_t *) data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	__generic_unplug_device(q);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/** blk_grow_request_list
+ *  @q: The &request_queue_t
+ *  @nr_requests: how many requests are desired
+ *
+ * More free requests are added to the queue's free lists, bringing
+ * the total number of requests to @nr_requests.
+ *
+ * The requests are added equally to the request queue's read
+ * and write freelists.
+ *
+ * This function can sleep.
+ *
+ * Returns the (new) number of requests which the queue has available.
+ */
+int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors)
+{
+	unsigned long flags;
+	/* Several broken drivers assume that this function doesn't sleep,
+	 * this causes system hangs during boot.
+	 * As a temporary fix, make the function non-blocking.
+	 */
+	spin_lock_irqsave(&io_request_lock, flags);
+	while (q->nr_requests < nr_requests) {
+		struct request *rq;
+
+		rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
+		if (rq == NULL)
+			break;
+		memset(rq, 0, sizeof(*rq));
+		rq->rq_status = RQ_INACTIVE;
+ 		list_add(&rq->queue, &q->rq.free);
+ 		q->rq.count++;
+
+		q->nr_requests++;
+	}
+
+ 	/*
+ 	 * Wakeup waiters after both one quarter of the
+ 	 * max-in-fligh queue and one quarter of the requests
+ 	 * are available again.
+ 	 */
+
+	q->batch_requests = q->nr_requests / 4;
+	if (q->batch_requests > 32)
+		q->batch_requests = 32;
+ 	q->batch_sectors = max_queue_sectors / 4;
+ 
+ 	q->max_queue_sectors = max_queue_sectors;
+ 
+ 	BUG_ON(!q->batch_sectors);
+ 	atomic_set(&q->nr_sectors, 0);
+
+	spin_unlock_irqrestore(&io_request_lock, flags);
+	return q->nr_requests;
+}
+
+static void blk_init_free_list(request_queue_t *q)
+{
+	struct sysinfo si;
+	int megs;		/* Total memory, in megabytes */
+ 	int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS;
+  
+ 	INIT_LIST_HEAD(&q->rq.free);
+	q->rq.count = 0;
+	q->rq.pending[READ] = q->rq.pending[WRITE] = 0;
+	q->nr_requests = 0;
+
+	si_meminfo(&si);
+	megs = si.totalram >> (20 - PAGE_SHIFT);
+ 	nr_requests = MAX_NR_REQUESTS;
+ 	if (megs < 30) {
+  		nr_requests /= 2;
+ 		max_queue_sectors /= 2;
+ 	}
+ 	/* notice early if anybody screwed the defaults */
+ 	BUG_ON(!nr_requests);
+ 	BUG_ON(!max_queue_sectors);
+ 
+ 	blk_grow_request_list(q, nr_requests, max_queue_sectors);
+
+ 	init_waitqueue_head(&q->wait_for_requests);
+
+	spin_lock_init(&q->queue_lock);
+}
+
+static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+
+/**
+ * blk_init_queue  - prepare a request queue for use with a block device
+ * @q:    The &request_queue_t to be initialised
+ * @rfn:  The function to be called to process requests that have been
+ *        placed on the queue.
+ *
+ * Description:
+ *    If a block device wishes to use the standard request handling procedures,
+ *    which sorts requests and coalesces adjacent requests, then it must
+ *    call blk_init_queue().  The function @rfn will be called when there
+ *    are requests on the queue that need to be processed.  If the device
+ *    supports plugging, then @rfn may not be called immediately when requests
+ *    are available on the queue, but may be called at some time later instead.
+ *    Plugged queues are generally unplugged when a buffer belonging to one
+ *    of the requests on the queue is needed, or due to memory pressure.
+ *
+ *    @rfn is not required, or even expected, to remove all requests off the
+ *    queue, but only as many as it can handle at a time.  If it does leave
+ *    requests on the queue, it is responsible for arranging that the requests
+ *    get dealt with eventually.
+ *
+ *    A global spin lock $io_request_lock must be held while manipulating the
+ *    requests on the request queue.
+ *
+ *    The request on the head of the queue is by default assumed to be
+ *    potentially active, and it is not considered for re-ordering or merging
+ *    whenever the given queue is unplugged. This behaviour can be changed with
+ *    blk_queue_headactive().
+ *
+ * Note:
+ *    blk_init_queue() must be paired with a blk_cleanup_queue() call
+ *    when the block device is deactivated (such as at module unload).
+ **/
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+	INIT_LIST_HEAD(&q->queue_head);
+	elevator_init(&q->elevator, ELEVATOR_LINUS);
+	blk_init_free_list(q);
+	q->request_fn     	= rfn;
+	q->back_merge_fn       	= ll_back_merge_fn;
+	q->front_merge_fn      	= ll_front_merge_fn;
+	q->merge_requests_fn	= ll_merge_requests_fn;
+	q->make_request_fn	= __make_request;
+	q->plug_tq.sync		= 0;
+	q->plug_tq.routine	= &generic_unplug_device;
+	q->plug_tq.data		= q;
+	q->plugged        	= 0;
+	q->can_throttle		= 0;
+
+	/*
+	 * These booleans describe the queue properties.  We set the
+	 * default (and most common) values here.  Other drivers can
+	 * use the appropriate functions to alter the queue properties.
+	 * as appropriate.
+	 */
+	q->plug_device_fn 	= generic_plug_device;
+	q->head_active    	= 1;
+
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+}
+
+#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
+/*
+ * Get a free request. io_request_lock must be held and interrupts
+ * disabled on the way in.  Returns NULL if there are no free requests.
+ */
+static struct request *get_request(request_queue_t *q, int rw)
+{
+	struct request *rq = NULL;
+	struct request_list *rl = &q->rq;
+
+	if (blk_oversized_queue(q)) {
+		int rlim = q->nr_requests >> 5;
+
+		if (rlim < 4)
+			rlim = 4;
+
+		/*
+		 * if its a write, or we have more than a handful of reads
+		 * pending, bail out
+		 */
+		if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim))
+			return NULL;
+		if (blk_oversized_queue_reads(q))
+			return NULL;
+	}
+	
+	if (!list_empty(&rl->free)) {
+		rq = blkdev_free_rq(&rl->free);
+		list_del(&rq->queue);
+		rl->count--;
+		rl->pending[rw]++;
+		rq->rq_status = RQ_ACTIVE;
+		rq->cmd = rw;
+		rq->special = NULL;
+		rq->q = q;
+	}
+
+	return rq;
+}
+
+/*
+ * Here's the request allocation design, low latency version:
+ *
+ * 1: Blocking on request exhaustion is a key part of I/O throttling.
+ * 
+ * 2: We want to be `fair' to all requesters.  We must avoid starvation, and
+ *    attempt to ensure that all requesters sleep for a similar duration.  Hence
+ *    no stealing requests when there are other processes waiting.
+ *
+ * There used to be more here, attempting to allow a process to send in a
+ * number of requests once it has woken up.  But, there's no way to 
+ * tell if a process has just been woken up, or if it is a new process
+ * coming in to steal requests from the waiters.  So, we give up and force
+ * everyone to wait fairly.
+ * 
+ * So here's what we do:
+ * 
+ *    a) A READA requester fails if free_requests < batch_requests
+ * 
+ *       We don't want READA requests to prevent sleepers from ever
+ *       waking.  Note that READA is used extremely rarely - a few
+ *       filesystems use it for directory readahead.
+ * 
+ *  When a process wants a new request:
+ * 
+ *    b) If free_requests == 0, the requester sleeps in FIFO manner, and
+ *       the queue full condition is set.  The full condition is not
+ *       cleared until there are no longer any waiters.  Once the full
+ *       condition is set, all new io must wait, hopefully for a very
+ *       short period of time.
+ * 
+ *  When a request is released:
+ * 
+ *    c) If free_requests < batch_requests, do nothing.
+ * 
+ *    d) If free_requests >= batch_requests, wake up a single waiter.
+ *
+ *   As each waiter gets a request, he wakes another waiter.  We do this
+ *   to prevent a race where an unplug might get run before a request makes
+ *   it's way onto the queue.  The result is a cascade of wakeups, so delaying
+ *   the initial wakeup until we've got batch_requests available helps avoid
+ *   wakeups where there aren't any requests available yet.
+ */
+
+static struct request *__get_request_wait(request_queue_t *q, int rw)
+{
+	register struct request *rq;
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue_exclusive(&q->wait_for_requests, &wait);
+
+	do {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_lock_irq(&io_request_lock);
+		if (blk_oversized_queue(q) || q->rq.count == 0) {
+			__generic_unplug_device(q);
+			spin_unlock_irq(&io_request_lock);
+			schedule();
+			spin_lock_irq(&io_request_lock);
+		}
+		rq = get_request(q, rw);
+		spin_unlock_irq(&io_request_lock);
+	} while (rq == NULL);
+	remove_wait_queue(&q->wait_for_requests, &wait);
+	current->state = TASK_RUNNING;
+
+	return rq;
+}
+
+static void get_request_wait_wakeup(request_queue_t *q, int rw)
+{
+	/*
+	 * avoid losing an unplug if a second __get_request_wait did the
+	 * generic_unplug_device while our __get_request_wait was running
+	 * w/o the queue_lock held and w/ our request out of the queue.
+	 */	
+	if (waitqueue_active(&q->wait_for_requests))
+		wake_up(&q->wait_for_requests);
+}
+
+/* RO fail safe mechanism */
+
+static long ro_bits[MAX_BLKDEV][8];
+
+int is_read_only(kdev_t dev)
+{
+	int minor,major;
+
+	major = MAJOR(dev);
+	minor = MINOR(dev);
+	if (major < 0 || major >= MAX_BLKDEV) return 0;
+	return ro_bits[major][minor >> 5] & (1 << (minor & 31));
+}
+
+void set_device_ro(kdev_t dev,int flag)
+{
+	int minor,major;
+
+	major = MAJOR(dev);
+	minor = MINOR(dev);
+	if (major < 0 || major >= MAX_BLKDEV) return;
+	if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
+	else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
+}
+
+inline void drive_stat_acct (kdev_t dev, int rw,
+				unsigned long nr_sectors, int new_io)
+{
+	unsigned int major = MAJOR(dev);
+	unsigned int index;
+
+	index = disk_index(dev);
+	if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+		return;
+
+	kstat.dk_drive[major][index] += new_io;
+	if (rw == READ) {
+		kstat.dk_drive_rio[major][index] += new_io;
+		kstat.dk_drive_rblk[major][index] += nr_sectors;
+	} else if (rw == WRITE) {
+		kstat.dk_drive_wio[major][index] += new_io;
+		kstat.dk_drive_wblk[major][index] += nr_sectors;
+	} else
+		printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
+}
+
+#ifdef CONFIG_BLK_STATS
+/*
+ * Return up to two hd_structs on which to do IO accounting for a given
+ * request.
+ *
+ * On a partitioned device, we want to account both against the partition
+ * and against the whole disk.
+ */
+static void locate_hd_struct(struct request *req, 
+			     struct hd_struct **hd1,
+			     struct hd_struct **hd2)
+{
+	struct gendisk *gd;
+
+	*hd1 = NULL;
+	*hd2 = NULL;
+	
+	gd = get_gendisk(req->rq_dev);
+	if (gd && gd->part) {
+		/* Mask out the partition bits: account for the entire disk */
+		int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
+		int whole_minor = devnr << gd->minor_shift;
+
+		*hd1 = &gd->part[whole_minor];
+		if (whole_minor != MINOR(req->rq_dev))
+			*hd2= &gd->part[MINOR(req->rq_dev)];
+	}
+}
+
+/*
+ * Round off the performance stats on an hd_struct.
+ *
+ * The average IO queue length and utilisation statistics are maintained
+ * by observing the current state of the queue length and the amount of
+ * time it has been in this state for.
+ * Normally, that accounting is done on IO completion, but that can result
+ * in more than a second's worth of IO being accounted for within any one
+ * second, leading to >100% utilisation.  To deal with that, we do a
+ * round-off before returning the results when reading /proc/partitions,
+ * accounting immediately for all queue usage up to the current jiffies and
+ * restarting the counters again.
+ */
+void disk_round_stats(struct hd_struct *hd)
+{
+	unsigned long now = jiffies;
+	
+	hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
+	hd->last_queue_change = now;
+
+	if (hd->ios_in_flight)
+		hd->io_ticks += (now - hd->last_idle_time);
+	hd->last_idle_time = now;	
+}
+
+static inline void down_ios(struct hd_struct *hd)
+{
+	disk_round_stats(hd);	
+	--hd->ios_in_flight;
+}
+
+static inline void up_ios(struct hd_struct *hd)
+{
+	disk_round_stats(hd);
+	++hd->ios_in_flight;
+}
+
+static void account_io_start(struct hd_struct *hd, struct request *req,
+			     int merge, int sectors)
+{
+	switch (req->cmd) {
+	case READ:
+		if (merge)
+			hd->rd_merges++;
+		hd->rd_sectors += sectors;
+		break;
+	case WRITE:
+		if (merge)
+			hd->wr_merges++;
+		hd->wr_sectors += sectors;
+		break;
+	}
+	if (!merge)
+		up_ios(hd);
+}
+
+static void account_io_end(struct hd_struct *hd, struct request *req)
+{
+	unsigned long duration = jiffies - req->start_time;
+	switch (req->cmd) {
+	case READ:
+		hd->rd_ticks += duration;
+		hd->rd_ios++;
+		break;
+	case WRITE:
+		hd->wr_ticks += duration;
+		hd->wr_ios++;
+		break;
+	}
+	down_ios(hd);
+}
+
+void req_new_io(struct request *req, int merge, int sectors)
+{
+	struct hd_struct *hd1, *hd2;
+
+	locate_hd_struct(req, &hd1, &hd2);
+	if (hd1)
+		account_io_start(hd1, req, merge, sectors);
+	if (hd2)
+		account_io_start(hd2, req, merge, sectors);
+}
+
+void req_merged_io(struct request *req)
+{
+	struct hd_struct *hd1, *hd2;
+
+	locate_hd_struct(req, &hd1, &hd2);
+	if (hd1)
+		down_ios(hd1);
+	if (hd2)	
+		down_ios(hd2);
+}
+
+void req_finished_io(struct request *req)
+{
+	struct hd_struct *hd1, *hd2;
+
+	locate_hd_struct(req, &hd1, &hd2);
+	if (hd1)
+		account_io_end(hd1, req);
+	if (hd2)	
+		account_io_end(hd2, req);
+}
+EXPORT_SYMBOL(req_finished_io);
+#endif /* CONFIG_BLK_STATS */
+
+/*
+ * add-request adds a request to the linked list.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
+ *
+ * By this point, req->cmd is always either READ/WRITE, never READA,
+ * which is important for drive_stat_acct() above.
+ */
+static inline void add_request(request_queue_t * q, struct request * req,
+			       struct list_head *insert_here)
+{
+	drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
+
+	if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+		spin_unlock_irq(&io_request_lock);
+		BUG();
+	}
+
+	/*
+	 * elevator indicated where it wants this request to be
+	 * inserted at elevator_merge time
+	 */
+	list_add(&req->queue, insert_here);
+}
+
+/*
+ * Must be called with io_request_lock held and interrupts disabled
+ */
+void blkdev_release_request(struct request *req)
+{
+	request_queue_t *q = req->q;
+
+	req->rq_status = RQ_INACTIVE;
+	req->q = NULL;
+
+	/*
+	 * Request may not have originated from ll_rw_blk. if not,
+	 * assume it has free buffers and check waiters
+	 */
+	if (q) {
+		struct request_list *rl = &q->rq;
+		int oversized_batch = 0;
+
+		if (q->can_throttle)
+			oversized_batch = blk_oversized_queue_batch(q);
+		rl->count++;
+		/*
+		 * paranoia check
+		 */
+		if (req->cmd == READ || req->cmd == WRITE)
+			rl->pending[req->cmd]--;
+		if (rl->pending[READ] > q->nr_requests)
+			printk("blk: reads: %u\n", rl->pending[READ]);
+		if (rl->pending[WRITE] > q->nr_requests)
+			printk("blk: writes: %u\n", rl->pending[WRITE]);
+		if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests)
+			printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests);
+		list_add(&req->queue, &rl->free);
+		if (rl->count >= q->batch_requests && !oversized_batch) {
+			smp_mb();
+			if (waitqueue_active(&q->wait_for_requests))
+				wake_up(&q->wait_for_requests);
+		}
+	}
+}
+
+/*
+ * Has to be called with the request spinlock acquired
+ */
+static void attempt_merge(request_queue_t * q,
+			  struct request *req,
+			  int max_sectors,
+			  int max_segments)
+{
+	struct request *next;
+  
+	next = blkdev_next_request(req);
+	if (req->sector + req->nr_sectors != next->sector)
+		return;
+	if (req->cmd != next->cmd
+	    || req->rq_dev != next->rq_dev
+	    || req->nr_sectors + next->nr_sectors > max_sectors
+	    || next->waiting)
+		return;
+	/*
+	 * If we are not allowed to merge these requests, then
+	 * return.  If we are allowed to merge, then the count
+	 * will have been updated to the appropriate number,
+	 * and we shouldn't do it here too.
+	 */
+	if (!q->merge_requests_fn(q, req, next, max_segments))
+		return;
+
+	q->elevator.elevator_merge_req_fn(req, next);
+	req->bhtail->b_reqnext = next->bh;
+	req->bhtail = next->bhtail;
+	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+	list_del(&next->queue);
+
+	/* One last thing: we have removed a request, so we now have one
+	   less expected IO to complete for accounting purposes. */
+	req_merged_io(req);
+
+	blkdev_release_request(next);
+}
+
+static inline void attempt_back_merge(request_queue_t * q,
+				      struct request *req,
+				      int max_sectors,
+				      int max_segments)
+{
+	if (&req->queue == q->queue_head.prev)
+		return;
+	attempt_merge(q, req, max_sectors, max_segments);
+}
+
+static inline void attempt_front_merge(request_queue_t * q,
+				       struct list_head * head,
+				       struct request *req,
+				       int max_sectors,
+				       int max_segments)
+{
+	struct list_head * prev;
+
+	prev = req->queue.prev;
+	if (head == prev)
+		return;
+	attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+}
+
+static int __make_request(request_queue_t * q, int rw,
+				  struct buffer_head * bh)
+{
+	unsigned int sector, count, sync;
+	int max_segments = MAX_SEGMENTS;
+	struct request * req, *freereq = NULL;
+	int rw_ahead, max_sectors, el_ret;
+	struct list_head *head, *insert_here;
+	int latency;
+	elevator_t *elevator = &q->elevator;
+	int should_wake = 0;
+
+	count = bh->b_size >> 9;
+	sector = bh->b_rsector;
+	sync = test_and_clear_bit(BH_Sync, &bh->b_state);
+
+	rw_ahead = 0;	/* normal case; gets changed below for READA */
+	switch (rw) {
+		case READA:
+#if 0	/* bread() misinterprets failed READA attempts as IO errors on SMP */
+			rw_ahead = 1;
+#endif
+			rw = READ;	/* drop into READ */
+		case READ:
+		case WRITE:
+			latency = elevator_request_latency(elevator, rw);
+			break;
+		default:
+			BUG();
+			goto end_io;
+	}
+
+	/* We'd better have a real physical mapping!
+	   Check this bit only if the buffer was dirty and just locked
+	   down by us so at this point flushpage will block and
+	   won't clear the mapped bit under us. */
+	if (!buffer_mapped(bh))
+		BUG();
+
+	/*
+	 * Temporary solution - in 2.5 this will be done by the lowlevel
+	 * driver. Create a bounce buffer if the buffer data points into
+	 * high memory - keep the original buffer otherwise.
+	 */
+	bh = blk_queue_bounce(q, rw, bh);
+
+/* look for a free request. */
+	/*
+	 * Try to coalesce the new request with old requests
+	 */
+	max_sectors = get_max_sectors(bh->b_rdev);
+
+	req = NULL;
+	head = &q->queue_head;
+	/*
+	 * Now we acquire the request spinlock, we have to be mega careful
+	 * not to schedule or do something nonatomic
+	 */
+	spin_lock_irq(&io_request_lock);
+
+again:
+	insert_here = head->prev;
+
+	if (list_empty(head)) {
+		q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+		goto get_rq;
+	} else if (q->head_active && !q->plugged)
+		head = head->next;
+
+	el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+	switch (el_ret) {
+
+		case ELEVATOR_BACK_MERGE:
+			if (!q->back_merge_fn(q, req, bh, max_segments)) {
+				insert_here = &req->queue;
+				break;
+			}
+			req->bhtail->b_reqnext = bh;
+			req->bhtail = bh;
+			req->nr_sectors = req->hard_nr_sectors += count;
+			blk_started_io(count);
+			blk_started_sectors(req, count);
+			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+			req_new_io(req, 1, count);
+			attempt_back_merge(q, req, max_sectors, max_segments);
+			goto out;
+
+		case ELEVATOR_FRONT_MERGE:
+			if (!q->front_merge_fn(q, req, bh, max_segments)) {
+				insert_here = req->queue.prev;
+				break;
+			}
+			bh->b_reqnext = req->bh;
+			req->bh = bh;
+			/*
+			 * may not be valid, but queues not having bounce
+			 * enabled for highmem pages must not look at
+			 * ->buffer anyway
+			 */
+			req->buffer = bh->b_data;
+			req->current_nr_sectors = req->hard_cur_sectors = count;
+			req->sector = req->hard_sector = sector;
+			req->nr_sectors = req->hard_nr_sectors += count;
+			blk_started_io(count);
+			blk_started_sectors(req, count);
+			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+			req_new_io(req, 1, count);
+			attempt_front_merge(q, head, req, max_sectors, max_segments);
+			goto out;
+
+		/*
+		 * elevator says don't/can't merge. get new request
+		 */
+		case ELEVATOR_NO_MERGE:
+			/*
+			 * use elevator hints as to where to insert the
+			 * request. if no hints, just add it to the back
+			 * of the queue
+			 */
+			if (req)
+				insert_here = &req->queue;
+			break;
+
+		default:
+			printk("elevator returned crap (%d)\n", el_ret);
+			BUG();
+	}
+		
+get_rq:
+	if (freereq) {
+		req = freereq;
+		freereq = NULL;
+	} else {
+		/*
+		 * See description above __get_request_wait()
+		 */
+		if (rw_ahead) {
+			if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) {
+				spin_unlock_irq(&io_request_lock);
+				goto end_io;
+			}
+			req = get_request(q, rw);
+			if (req == NULL)
+				BUG();
+		} else {
+			req = get_request(q, rw);
+			if (req == NULL) {
+				spin_unlock_irq(&io_request_lock);
+				freereq = __get_request_wait(q, rw);
+				head = &q->queue_head;
+				spin_lock_irq(&io_request_lock);
+				should_wake = 1;
+				goto again;
+			}
+		}
+	}
+
+/* fill up the request-info, and add it to the queue */
+	req->elevator_sequence = latency;
+	req->cmd = rw;
+	req->errors = 0;
+	req->hard_sector = req->sector = sector;
+	req->hard_nr_sectors = req->nr_sectors = count;
+	req->current_nr_sectors = req->hard_cur_sectors = count;
+	req->nr_segments = 1; /* Always 1 for a new request. */
+	req->nr_hw_segments = 1; /* Always 1 for a new request. */
+	req->buffer = bh->b_data;
+	req->waiting = NULL;
+	req->bh = bh;
+	req->bhtail = bh;
+	req->rq_dev = bh->b_rdev;
+	req->start_time = jiffies;
+	req_new_io(req, 0, count);
+	blk_started_io(count);
+	blk_started_sectors(req, count);
+	add_request(q, req, insert_here);
+out:
+	if (freereq)
+		blkdev_release_request(freereq);
+	if (should_wake)
+		get_request_wait_wakeup(q, rw);
+	if (sync)
+		__generic_unplug_device(q);
+	spin_unlock_irq(&io_request_lock);
+	return 0;
+end_io:
+	bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+	return 0;
+}
+
+/**
+ * generic_make_request: hand a buffer head to it's device driver for I/O
+ * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
+ * @bh:  The buffer head describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct buffer_head and a &rw value.  The
+ * %READ and %WRITE options are (hopefully) obvious in meaning.  The
+ * %READA value means that a read is required, but that the driver is
+ * free to fail the request if, for example, it cannot get needed
+ * resources immediately.
+ *
+ * generic_make_request() does not return any status.  The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bh->b_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that b_page,
+ * b_addr, b_size are set to describe the memory buffer, that b_rdev
+ * and b_rsector are set to describe the device address, and the
+ * b_end_io and optionally b_private are set to describe how
+ * completion notification should be signaled.  BH_Mapped should also
+ * be set (to confirm that b_dev and b_blocknr are valid).
+ *
+ * generic_make_request and the drivers it calls may use b_reqnext,
+ * and may change b_rdev and b_rsector.  So the values of these fields
+ * should NOT be depended on after the call to generic_make_request.
+ * Because of this, the caller should record the device address
+ * information in b_dev and b_blocknr.
+ *
+ * Apart from those fields mentioned above, no other fields, and in
+ * particular, no other flags, are changed by generic_make_request or
+ * any lower level drivers.
+ * */
+void generic_make_request (int rw, struct buffer_head * bh)
+{
+	int major = MAJOR(bh->b_rdev);
+	int minorsize = 0;
+	request_queue_t *q;
+
+	if (!bh->b_end_io)
+		BUG();
+
+	/* Test device size, when known. */
+	if (blk_size[major])
+		minorsize = blk_size[major][MINOR(bh->b_rdev)];
+	if (minorsize) {
+		unsigned long maxsector = (minorsize << 1) + 1;
+		unsigned long sector = bh->b_rsector;
+		unsigned int count = bh->b_size >> 9;
+
+		if (maxsector < count || maxsector - count < sector) {
+			/* Yecch */
+			bh->b_state &= ~(1 << BH_Dirty);
+
+			/* This may well happen - the kernel calls bread()
+			   without checking the size of the device, e.g.,
+			   when mounting a device. */
+			printk(KERN_INFO
+			       "attempt to access beyond end of device\n");
+			printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
+			       kdevname(bh->b_rdev), rw,
+			       (sector + count)>>1, minorsize);
+
+			bh->b_end_io(bh, 0);
+			return;
+		}
+	}
+
+	/*
+	 * Resolve the mapping until finished. (drivers are
+	 * still free to implement/resolve their own stacking
+	 * by explicitly returning 0)
+	 */
+	/* NOTE: we don't repeat the blk_size check for each new device.
+	 * Stacking drivers are expected to know what they are doing.
+	 */
+	do {
+		q = blk_get_queue(bh->b_rdev);
+		if (!q) {
+			printk(KERN_ERR
+			       "generic_make_request: Trying to access "
+			       "nonexistent block-device %s (%ld)\n",
+			       kdevname(bh->b_rdev), bh->b_rsector);
+			buffer_IO_error(bh);
+			break;
+		}
+	} while (q->make_request_fn(q, rw, bh));
+}
+
+
+/**
+ * submit_bh: submit a buffer_head to the block device later for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bh: The &struct buffer_head which describes the I/O
+ *
+ * submit_bh() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work.
+ *
+ * The extra functionality provided by submit_bh is to determine
+ * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
+ * This is is appropriate for IO requests that come from the buffer
+ * cache and page cache which (currently) always use aligned blocks.
+ */
+void submit_bh(int rw, struct buffer_head * bh)
+{
+	int count = bh->b_size >> 9;
+
+	if (!test_bit(BH_Lock, &bh->b_state))
+		BUG();
+
+	set_bit(BH_Req, &bh->b_state);
+	set_bit(BH_Launder, &bh->b_state);
+
+	/*
+	 * First step, 'identity mapping' - RAID or LVM might
+	 * further remap this.
+	 */
+	bh->b_rdev = bh->b_dev;
+	bh->b_rsector = bh->b_blocknr * count;
+
+	get_bh(bh);
+	generic_make_request(rw, bh);
+
+	/* fix race condition with wait_on_buffer() */
+	smp_mb(); /* spin_unlock may have inclusive semantics */
+	if (waitqueue_active(&bh->b_wait))
+		wake_up(&bh->b_wait);
+
+	if (block_dump)
+		printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev));
+
+	put_bh(bh);
+	switch (rw) {
+		case WRITE:
+			kstat.pgpgout += count;
+			break;
+		default:
+			kstat.pgpgin += count;
+			break;
+	}
+}
+
+/**
+ * ll_rw_block: low-level access to block devices
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @nr: number of &struct buffer_heads in the array
+ * @bhs: array of pointers to &struct buffer_head
+ *
+ * ll_rw_block() takes an array of pointers to &struct buffer_heads,
+ * and requests an I/O operation on them, either a %READ or a %WRITE.
+ * The third %READA option is described in the documentation for
+ * generic_make_request() which ll_rw_block() calls.
+ *
+ * This function provides extra functionality that is not in
+ * generic_make_request() that is relevant to buffers in the buffer
+ * cache or page cache.  In particular it drops any buffer that it
+ * cannot get a lock on (with the BH_Lock state bit), any buffer that
+ * appears to be clean when doing a write request, and any buffer that
+ * appears to be up-to-date when doing read request.  Further it marks
+ * as clean buffers that are processed for writing (the buffer cache
+ * wont assume that they are actually clean until the buffer gets
+ * unlocked).
+ *
+ * ll_rw_block sets b_end_io to simple completion handler that marks
+ * the buffer up-to-date (if approriate), unlocks the buffer and wakes
+ * any waiters.  As client that needs a more interesting completion
+ * routine should call submit_bh() (or generic_make_request())
+ * directly.
+ *
+ * Caveat:
+ *  All of the buffers must be for the same device, and must also be
+ *  of the current approved size for the device.  */
+
+void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
+{
+	unsigned int major;
+	int correct_size;
+	int i;
+
+	if (!nr)
+		return;
+
+	major = MAJOR(bhs[0]->b_dev);
+
+	/* Determine correct block size for this device. */
+	correct_size = get_hardsect_size(bhs[0]->b_dev);
+
+	/* Verify requested block sizes. */
+	for (i = 0; i < nr; i++) {
+		struct buffer_head *bh = bhs[i];
+		if (bh->b_size % correct_size) {
+			printk(KERN_NOTICE "ll_rw_block: device %s: "
+			       "only %d-char blocks implemented (%u)\n",
+			       kdevname(bhs[0]->b_dev),
+			       correct_size, bh->b_size);
+			goto sorry;
+		}
+	}
+
+	if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
+		printk(KERN_NOTICE "Can't write to read-only device %s\n",
+		       kdevname(bhs[0]->b_dev));
+		goto sorry;
+	}
+
+	for (i = 0; i < nr; i++) {
+		struct buffer_head *bh = bhs[i];
+
+		/* Only one thread can actually submit the I/O. */
+		if (test_and_set_bit(BH_Lock, &bh->b_state))
+			continue;
+
+		/* We have the buffer lock */
+		atomic_inc(&bh->b_count);
+		bh->b_end_io = end_buffer_io_sync;
+
+		switch(rw) {
+		case WRITE:
+			if (!atomic_set_buffer_clean(bh))
+				/* Hmmph! Nothing to write */
+				goto end_io;
+			__mark_buffer_clean(bh);
+			break;
+
+		case READA:
+		case READ:
+			if (buffer_uptodate(bh))
+				/* Hmmph! Already have it */
+				goto end_io;
+			break;
+		default:
+			BUG();
+	end_io:
+			bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+			continue;
+		}
+
+		submit_bh(rw, bh);
+	}
+	return;
+
+sorry:
+	/* Make sure we don't get infinite dirty retries.. */
+	for (i = 0; i < nr; i++)
+		mark_buffer_clean(bhs[i]);
+}
+
+#ifdef CONFIG_STRAM_SWAP
+extern int stram_device_init (void);
+#endif
+
+static void blk_writeback_timer(unsigned long data)
+{
+	wakeup_bdflush();
+	wakeup_kupdate();
+}
+
+/**
+ * end_that_request_first - end I/O on one buffer.
+ * @req:      the request being processed
+ * @uptodate: 0 for I/O error
+ * @name:     the name printed for an I/O error
+ *
+ * Description:
+ *     Ends I/O on the first buffer attached to @req, and sets it up
+ *     for the next buffer_head (if any) in the cluster.
+ *     
+ * Return:
+ *     0 - we are done with this request, call end_that_request_last()
+ *     1 - still buffers pending for this request
+ *
+ * Caveat: 
+ *     Drivers implementing their own end_request handling must call
+ *     blk_finished_io() appropriately.
+ **/
+
+int end_that_request_first (struct request *req, int uptodate, char *name)
+{
+	struct buffer_head * bh;
+	int nsect;
+
+	req->errors = 0;
+	if (!uptodate)
+		printk("end_request: I/O error, dev %s (%s), sector %lu\n",
+			kdevname(req->rq_dev), name, req->sector);
+
+	if ((bh = req->bh) != NULL) {
+		nsect = bh->b_size >> 9;
+		blk_finished_io(nsect);
+		blk_finished_sectors(req, nsect);
+		req->bh = bh->b_reqnext;
+		bh->b_reqnext = NULL;
+		bh->b_end_io(bh, uptodate);
+		if ((bh = req->bh) != NULL) {
+			req->hard_sector += nsect;
+			req->hard_nr_sectors -= nsect;
+			req->sector = req->hard_sector;
+			req->nr_sectors = req->hard_nr_sectors;
+
+			req->current_nr_sectors = bh->b_size >> 9;
+			req->hard_cur_sectors = req->current_nr_sectors;
+			if (req->nr_sectors < req->current_nr_sectors) {
+				req->nr_sectors = req->current_nr_sectors;
+				printk("end_request: buffer-list destroyed\n");
+			}
+			req->buffer = bh->b_data;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+extern int laptop_mode;
+
+void end_that_request_last(struct request *req)
+{
+	struct completion *waiting = req->waiting;
+
+	/*
+	 * schedule the writeout of pending dirty data when the disk is idle
+	 */
+	if (laptop_mode && req->cmd == READ)
+		mod_timer(&writeback_timer, jiffies + 5 * HZ);
+
+	req_finished_io(req);
+	blkdev_release_request(req);
+	if (waiting)
+		complete(waiting);
+}
+
+int __init blk_dev_init(void)
+{
+	struct blk_dev_struct *dev;
+
+	request_cachep = kmem_cache_create("blkdev_requests",
+					   sizeof(struct request),
+					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+	if (!request_cachep)
+		panic("Can't create request pool slab cache\n");
+
+	for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
+		dev->queue = NULL;
+
+	memset(ro_bits,0,sizeof(ro_bits));
+	memset(max_readahead, 0, sizeof(max_readahead));
+	memset(max_sectors, 0, sizeof(max_sectors));
+
+	blk_max_low_pfn = max_low_pfn - 1;
+	blk_max_pfn = max_pfn - 1;
+
+	init_timer(&writeback_timer);
+	writeback_timer.function = blk_writeback_timer;
+
+#ifdef CONFIG_AMIGA_Z2RAM
+	z2_init();
+#endif
+#ifdef CONFIG_STRAM_SWAP
+	stram_device_init();
+#endif
+#ifdef CONFIG_ISP16_CDI
+	isp16_init();
+#endif
+#ifdef CONFIG_BLK_DEV_PS2
+	ps2esdi_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XD
+	xd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_MFM
+	mfm_init();
+#endif
+#ifdef CONFIG_PARIDE
+	{ extern void paride_init(void); paride_init(); };
+#endif
+#ifdef CONFIG_MAC_FLOPPY
+	swim3_init();
+#endif
+#ifdef CONFIG_BLK_DEV_SWIM_IOP
+	swimiop_init();
+#endif
+#ifdef CONFIG_AMIGA_FLOPPY
+	amiga_floppy_init();
+#endif
+#ifdef CONFIG_ATARI_FLOPPY
+	atari_floppy_init();
+#endif
+#ifdef CONFIG_BLK_DEV_FD
+	floppy_init();
+#else
+#if defined(__i386__) && !defined(CONFIG_XENO) /* Do we even need this? */
+	outb_p(0xc, 0x3f2);
+#endif
+#endif
+#ifdef CONFIG_CDU31A
+	cdu31a_init();
+#endif
+#ifdef CONFIG_ATARI_ACSI
+	acsi_init();
+#endif
+#ifdef CONFIG_MCD
+	mcd_init();
+#endif
+#ifdef CONFIG_MCDX
+	mcdx_init();
+#endif
+#ifdef CONFIG_SBPCD
+	sbpcd_init();
+#endif
+#ifdef CONFIG_AZTCD
+	aztcd_init();
+#endif
+#ifdef CONFIG_CDU535
+	sony535_init();
+#endif
+#ifdef CONFIG_GSCD
+	gscd_init();
+#endif
+#ifdef CONFIG_CM206
+	cm206_init();
+#endif
+#ifdef CONFIG_OPTCD
+	optcd_init();
+#endif
+#ifdef CONFIG_SJCD
+	sjcd_init();
+#endif
+#ifdef CONFIG_APBLOCK
+	ap_init();
+#endif
+#ifdef CONFIG_DDV
+	ddv_init();
+#endif
+#ifdef CONFIG_MDISK
+	mdisk_init();
+#endif
+#ifdef CONFIG_DASD
+	dasd_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
+	tapeblock_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XPRAM
+        xpram_init();
+#endif
+
+#ifdef CONFIG_SUN_JSFLASH
+	jsfd_init();
+#endif
+
+#ifdef CONFIG_XENOLINUX_BLOCK
+    xlblk_init();
+#endif
+
+	return 0;
+};
+
+EXPORT_SYMBOL(io_request_lock);
+EXPORT_SYMBOL(end_that_request_first);
+EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blk_grow_request_list);
+EXPORT_SYMBOL(blk_init_queue);
+EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(blk_cleanup_queue);
+EXPORT_SYMBOL(blk_queue_headactive);
+EXPORT_SYMBOL(blk_queue_throttle_sectors);
+EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(generic_make_request);
+EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(generic_unplug_device);
+EXPORT_SYMBOL(blk_queue_bounce_limit);
+EXPORT_SYMBOL(blk_max_low_pfn);
+EXPORT_SYMBOL(blk_max_pfn);
+EXPORT_SYMBOL(blk_seg_merge_ok);
+EXPORT_SYMBOL(blk_nohighio);
diff --git a/xenolinux-2.4.23-sparse/drivers/char/mem.c b/xenolinux-2.4.23-sparse/drivers/char/mem.c
new file mode 100644
index 0000000000..73e6d48713
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/drivers/char/mem.c
@@ -0,0 +1,815 @@
+/*
+ *  linux/drivers/char/mem.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Added devfs support. 
+ *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
+ *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
+ *
+ *  MODIFIED FOR XENOLINUX by Keir Fraser, 10th July 2003.
+ *  Xenolinux has strange semantics for /dev/mem and /dev/kmem!!
+ *   1. mmap will not work on /dev/kmem
+ *   2. mmap on /dev/mem interprets the 'file offset' as a machine address
+ *      rather than a physical address.
+ *  I don't believe anyone sane mmaps /dev/kmem, but /dev/mem is mmapped
+ *  to get at memory-mapped I/O spaces (eg. the VESA X server does this).
+ *  For this to work at all we need to expect machine addresses.
+ *  Reading/writing of /dev/kmem expects kernel virtual addresses, as usual.
+ *  Reading/writing of /dev/mem expects 'physical addresses' as usual -- this
+ *  is because /dev/mem can only read/write existing kernel mappings, which
+ *  will be normal RAM, and we should present pseudo-physical layout for all
+ *  except I/O (which is the sticky case that mmap is hacked to deal with).
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/tpqic02.h>
+#include <linux/ftape.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/raw.h>
+#include <linux/tty.h>
+#include <linux/capability.h>
+#include <linux/ptrace.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+
+#ifdef CONFIG_I2C
+extern int i2c_init_all(void);
+#endif
+#ifdef CONFIG_FB
+extern void fbmem_init(void);
+#endif
+#ifdef CONFIG_PROM_CONSOLE
+extern void prom_con_init(void);
+#endif
+#ifdef CONFIG_MDA_CONSOLE
+extern void mda_console_init(void);
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
+extern void tapechar_init(void);
+#endif
+     
+static ssize_t do_write_mem(struct file * file, void *p, unsigned long realp,
+			    const char * buf, size_t count, loff_t *ppos)
+{
+	ssize_t written;
+
+	written = 0;
+#if defined(__sparc__) || defined(__mc68000__)
+	/* we don't have page 0 mapped on sparc and m68k.. */
+	if (realp < PAGE_SIZE) {
+		unsigned long sz = PAGE_SIZE-realp;
+		if (sz > count) sz = count; 
+		/* Hmm. Do something? */
+		buf+=sz;
+		p+=sz;
+		count-=sz;
+		written+=sz;
+	}
+#endif
+	if (copy_from_user(p, buf, count))
+		return -EFAULT;
+	written += count;
+	*ppos += written;
+	return written;
+}
+
+
+/*
+ * This funcion reads the *physical* memory. The f_pos points directly to the 
+ * memory location. 
+ */
+static ssize_t read_mem(struct file * file, char * buf,
+			size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	unsigned long end_mem;
+	ssize_t read;
+	
+	end_mem = __pa(high_memory);
+	if (p >= end_mem)
+		return 0;
+	if (count > end_mem - p)
+		count = end_mem - p;
+	read = 0;
+#if defined(__sparc__) || defined(__mc68000__)
+	/* we don't have page 0 mapped on sparc and m68k.. */
+	if (p < PAGE_SIZE) {
+		unsigned long sz = PAGE_SIZE-p;
+		if (sz > count) 
+			sz = count; 
+		if (sz > 0) {
+			if (clear_user(buf, sz))
+				return -EFAULT;
+			buf += sz; 
+			p += sz; 
+			count -= sz; 
+			read += sz; 
+		}
+	}
+#endif
+	if (copy_to_user(buf, __va(p), count))
+		return -EFAULT;
+	read += count;
+	*ppos += read;
+	return read;
+}
+
+static ssize_t write_mem(struct file * file, const char * buf, 
+			 size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	unsigned long end_mem;
+
+	end_mem = __pa(high_memory);
+	if (p >= end_mem)
+		return 0;
+	if (count > end_mem - p)
+		count = end_mem - p;
+	return do_write_mem(file, __va(p), p, buf, count, ppos);
+}
+
+#ifndef pgprot_noncached
+
+/*
+ * This should probably be per-architecture in <asm/pgtable.h>
+ */
+static inline pgprot_t pgprot_noncached(pgprot_t _prot)
+{
+	unsigned long prot = pgprot_val(_prot);
+
+#if defined(__i386__) || defined(__x86_64__)
+	/* On PPro and successors, PCD alone doesn't always mean 
+	    uncached because of interactions with the MTRRs. PCD | PWT
+	    means definitely uncached. */ 
+	if (boot_cpu_data.x86 > 3)
+		prot |= _PAGE_PCD | _PAGE_PWT;
+#elif defined(__powerpc__)
+	prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+#elif defined(__mc68000__)
+#ifdef SUN3_PAGE_NOCACHE
+	if (MMU_IS_SUN3)
+		prot |= SUN3_PAGE_NOCACHE;
+	else
+#endif
+	if (MMU_IS_851 || MMU_IS_030)
+		prot |= _PAGE_NOCACHE030;
+	/* Use no-cache mode, serialized */
+	else if (MMU_IS_040 || MMU_IS_060)
+		prot = (prot & _CACHEMASK040) | _PAGE_NOCACHE_S;
+#endif
+
+	return __pgprot(prot);
+}
+
+#endif /* !pgprot_noncached */
+
+/*
+ * Architectures vary in how they handle caching for addresses 
+ * outside of main memory.
+ */
+static inline int noncached_address(unsigned long addr)
+{
+#if defined(__i386__)
+	/* 
+	 * On the PPro and successors, the MTRRs are used to set
+	 * memory types for physical addresses outside main memory, 
+	 * so blindly setting PCD or PWT on those pages is wrong.
+	 * For Pentiums and earlier, the surround logic should disable 
+	 * caching for the high addresses through the KEN pin, but
+	 * we maintain the tradition of paranoia in this code.
+	 */
+ 	return !( test_bit(X86_FEATURE_MTRR, &boot_cpu_data.x86_capability) ||
+		  test_bit(X86_FEATURE_K6_MTRR, &boot_cpu_data.x86_capability) ||
+		  test_bit(X86_FEATURE_CYRIX_ARR, &boot_cpu_data.x86_capability) ||
+		  test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) )
+	  && addr >= __pa(high_memory);
+#else
+	return addr >= __pa(high_memory);
+#endif
+}
+
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+#if defined(CONFIG_XENO) && defined(CONFIG_XENO_PRIV)
+	if (!(start_info.flags & SIF_PRIVILEGED))
+		return -ENXIO;
+
+	/* DONTCOPY is essential for Xenolinux as copy_page_range is broken. */
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, 
+			     vma->vm_end-vma->vm_start, vma->vm_page_prot))
+		return -EAGAIN;
+	return 0;
+#elif defined(CONFIG_XENO)
+	return -ENXIO;
+#else
+	/*
+	 * Accessing memory above the top the kernel knows about or
+	 * through a file pointer that was marked O_SYNC will be
+	 * done non-cached.
+	 */
+	if (noncached_address(offset) || (file->f_flags & O_SYNC))
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	/* Don't try to swap out physical pages.. */
+	vma->vm_flags |= VM_RESERVED;
+
+	/*
+	 * Don't dump addresses that are not real memory to a core file.
+	 */
+	if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC))
+		vma->vm_flags |= VM_IO;
+
+	if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start,
+			     vma->vm_page_prot))
+		return -EAGAIN;
+	return 0;
+#endif
+}
+
+/*
+ * This function reads the *virtual* memory as seen by the kernel.
+ */
+static ssize_t read_kmem(struct file *file, char *buf, 
+			 size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	ssize_t read = 0;
+	ssize_t virtr = 0;
+	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
+		
+	if (p < (unsigned long) high_memory) {
+		read = count;
+		if (count > (unsigned long) high_memory - p)
+			read = (unsigned long) high_memory - p;
+
+#if defined(__sparc__) || defined(__mc68000__)
+		/* we don't have page 0 mapped on sparc and m68k.. */
+		if (p < PAGE_SIZE && read > 0) {
+			size_t tmp = PAGE_SIZE - p;
+			if (tmp > read) tmp = read;
+			if (clear_user(buf, tmp))
+				return -EFAULT;
+			buf += tmp;
+			p += tmp;
+			read -= tmp;
+			count -= tmp;
+		}
+#endif
+		if (copy_to_user(buf, (char *)p, read))
+			return -EFAULT;
+		p += read;
+		buf += read;
+		count -= read;
+	}
+
+	if (count > 0) {
+		kbuf = (char *)__get_free_page(GFP_KERNEL);
+		if (!kbuf)
+			return -ENOMEM;
+		while (count > 0) {
+			int len = count;
+
+			if (len > PAGE_SIZE)
+				len = PAGE_SIZE;
+			len = vread(kbuf, (char *)p, len);
+			if (!len)
+				break;
+			if (copy_to_user(buf, kbuf, len)) {
+				free_page((unsigned long)kbuf);
+				return -EFAULT;
+			}
+			count -= len;
+			buf += len;
+			virtr += len;
+			p += len;
+		}
+		free_page((unsigned long)kbuf);
+	}
+ 	*ppos = p;
+ 	return virtr + read;
+}
+
+extern long vwrite(char *buf, char *addr, unsigned long count);
+
+/*
+ * This function writes to the *virtual* memory as seen by the kernel.
+ */
+static ssize_t write_kmem(struct file * file, const char * buf, 
+			  size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	ssize_t wrote = 0;
+	ssize_t virtr = 0;
+	char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
+
+	if (p < (unsigned long) high_memory) {
+		wrote = count;
+		if (count > (unsigned long) high_memory - p)
+			wrote = (unsigned long) high_memory - p;
+
+		wrote = do_write_mem(file, (void*)p, p, buf, wrote, ppos);
+
+		p += wrote;
+		buf += wrote;
+		count -= wrote;
+	}
+
+	if (count > 0) {
+		kbuf = (char *)__get_free_page(GFP_KERNEL);
+		if (!kbuf)
+			return -ENOMEM;
+		while (count > 0) {
+			int len = count;
+
+			if (len > PAGE_SIZE)
+				len = PAGE_SIZE;
+			if (len && copy_from_user(kbuf, buf, len)) {
+				free_page((unsigned long)kbuf);
+				return -EFAULT;
+			}
+			len = vwrite(kbuf, (char *)p, len);
+			count -= len;
+			buf += len;
+			virtr += len;
+			p += len;
+		}
+		free_page((unsigned long)kbuf);
+	}
+
+ 	*ppos = p;
+ 	return virtr + wrote;
+}
+
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+static ssize_t read_port(struct file * file, char * buf,
+			 size_t count, loff_t *ppos)
+{
+	unsigned long i = *ppos;
+	char *tmp = buf;
+
+	if (verify_area(VERIFY_WRITE,buf,count))
+		return -EFAULT; 
+	while (count-- > 0 && i < 65536) {
+		if (__put_user(inb(i),tmp) < 0) 
+			return -EFAULT;  
+		i++;
+		tmp++;
+	}
+	*ppos = i;
+	return tmp-buf;
+}
+
+static ssize_t write_port(struct file * file, const char * buf,
+			  size_t count, loff_t *ppos)
+{
+	unsigned long i = *ppos;
+	const char * tmp = buf;
+
+	if (verify_area(VERIFY_READ,buf,count))
+		return -EFAULT;
+	while (count-- > 0 && i < 65536) {
+		char c;
+		if (__get_user(c, tmp)) 
+			return -EFAULT; 
+		outb(c,i);
+		i++;
+		tmp++;
+	}
+	*ppos = i;
+	return tmp-buf;
+}
+#endif
+
+static ssize_t read_null(struct file * file, char * buf,
+			 size_t count, loff_t *ppos)
+{
+	return 0;
+}
+
+static ssize_t write_null(struct file * file, const char * buf,
+			  size_t count, loff_t *ppos)
+{
+	return count;
+}
+
+/*
+ * For fun, we are using the MMU for this.
+ */
+static inline size_t read_zero_pagealigned(char * buf, size_t size)
+{
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	unsigned long addr=(unsigned long)buf;
+
+	mm = current->mm;
+	/* Oops, this was forgotten before. -ben */
+	down_read(&mm->mmap_sem);
+
+	/* For private mappings, just map in zero pages. */
+	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+		unsigned long count;
+
+		if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
+			goto out_up;
+		if (vma->vm_flags & VM_SHARED)
+			break;
+#if defined(CONFIG_XENO_PRIV)
+		if (vma->vm_flags & VM_IO)
+			break;
+#endif
+		count = vma->vm_end - addr;
+		if (count > size)
+			count = size;
+
+		zap_page_range(mm, addr, count);
+        	zeromap_page_range(addr, count, PAGE_COPY);
+
+		size -= count;
+		buf += count;
+		addr += count;
+		if (size == 0)
+			goto out_up;
+	}
+
+	up_read(&mm->mmap_sem);
+	
+	/* The shared case is hard. Let's do the conventional zeroing. */ 
+	do {
+		unsigned long unwritten = clear_user(buf, PAGE_SIZE);
+		if (unwritten)
+			return size + unwritten - PAGE_SIZE;
+		if (current->need_resched)
+			schedule();
+		buf += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	} while (size);
+
+	return size;
+out_up:
+	up_read(&mm->mmap_sem);
+	return size;
+}
+
+static ssize_t read_zero(struct file * file, char * buf, 
+			 size_t count, loff_t *ppos)
+{
+	unsigned long left, unwritten, written = 0;
+
+	if (!count)
+		return 0;
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	left = count;
+
+	/* do we want to be clever? Arbitrary cut-off */
+	if (count >= PAGE_SIZE*4) {
+		unsigned long partial;
+
+		/* How much left of the page? */
+		partial = (PAGE_SIZE-1) & -(unsigned long) buf;
+		unwritten = clear_user(buf, partial);
+		written = partial - unwritten;
+		if (unwritten)
+			goto out;
+		left -= partial;
+		buf += partial;
+		unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
+		written += (left & PAGE_MASK) - unwritten;
+		if (unwritten)
+			goto out;
+		buf += left & PAGE_MASK;
+		left &= ~PAGE_MASK;
+	}
+	unwritten = clear_user(buf, left);
+	written += left - unwritten;
+out:
+	return written ? written : -EFAULT;
+}
+
+static int mmap_zero(struct file * file, struct vm_area_struct * vma)
+{
+	if (vma->vm_flags & VM_SHARED)
+		return shmem_zero_setup(vma);
+	if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
+		return -EAGAIN;
+	return 0;
+}
+
+static ssize_t write_full(struct file * file, const char * buf,
+			  size_t count, loff_t *ppos)
+{
+	return -ENOSPC;
+}
+
+/*
+ * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
+ * can fopen() both devices with "a" now.  This was previously impossible.
+ * -- SRB.
+ */
+
+static loff_t null_lseek(struct file * file, loff_t offset, int orig)
+{
+	return file->f_pos = 0;
+}
+
+/*
+ * The memory devices use the full 32/64 bits of the offset, and so we cannot
+ * check against negative addresses: they are ok. The return value is weird,
+ * though, in that case (0).
+ *
+ * also note that seeking relative to the "end of file" isn't supported:
+ * it has no meaning, so it returns -EINVAL.
+ */
+static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+{
+	loff_t ret;
+
+	switch (orig) {
+		case 0:
+			file->f_pos = offset;
+			ret = file->f_pos;
+			force_successful_syscall_return();
+			break;
+		case 1:
+			file->f_pos += offset;
+			ret = file->f_pos;
+			force_successful_syscall_return();
+			break;
+		default:
+			ret = -EINVAL;
+	}
+	return ret;
+}
+
+static int open_port(struct inode * inode, struct file * filp)
+{
+	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write)
+{
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long kaddr;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	struct page *page = NULL;
+
+	/* address is user VA; convert to kernel VA of desired page */
+	kaddr = (address - vma->vm_start) + offset;
+	kaddr = VMALLOC_VMADDR(kaddr);
+
+	spin_lock(&init_mm.page_table_lock);
+
+	/* Lookup page structure for kernel VA */
+	pgd = pgd_offset(&init_mm, kaddr);
+	if (pgd_none(*pgd) || pgd_bad(*pgd))
+		goto out;
+	pmd = pmd_offset(pgd, kaddr);
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
+		goto out;
+	ptep = pte_offset(pmd, kaddr);
+	if (!ptep)
+		goto out;
+	pte = *ptep;
+	if (!pte_present(pte))
+		goto out;
+	if (write && !pte_write(pte))
+		goto out;
+	page = pte_page(pte);
+	if (!VALID_PAGE(page)) {
+		page = NULL;
+		goto out;
+	}
+
+	/* Increment reference count on page */
+	get_page(page);
+
+out:
+	spin_unlock(&init_mm.page_table_lock);
+
+	return page;
+}
+
+struct vm_operations_struct kmem_vm_ops = {
+	nopage:		kmem_vm_nopage,
+};
+
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long size = vma->vm_end - vma->vm_start;
+
+#if defined(CONFIG_XENO)
+	return -ENXIO;
+#endif
+
+	/*
+	 * If the user is not attempting to mmap a high memory address then
+	 * the standard mmap_mem mechanism will work.  High memory addresses
+	 * need special handling, as remap_page_range expects a physically-
+	 * contiguous range of kernel addresses (such as obtained in kmalloc).
+	 */
+	if ((offset + size) < (unsigned long) high_memory)
+		return mmap_mem(file, vma);
+
+	/*
+	 * Accessing memory above the top the kernel knows about or
+	 * through a file pointer that was marked O_SYNC will be
+	 * done non-cached.
+	 */
+	if (noncached_address(offset) || (file->f_flags & O_SYNC))
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	/* Don't do anything here; "nopage" will fill the holes */
+	vma->vm_ops = &kmem_vm_ops;
+
+	/* Don't try to swap out physical pages.. */
+	vma->vm_flags |= VM_RESERVED;
+
+	/*
+	 * Don't dump addresses that are not real memory to a core file.
+	 */
+	vma->vm_flags |= VM_IO;
+
+	return 0;
+}
+
+#define zero_lseek	null_lseek
+#define full_lseek      null_lseek
+#define write_zero	write_null
+#define read_full       read_zero
+#define open_mem	open_port
+#define open_kmem	open_mem
+
+static struct file_operations mem_fops = {
+	llseek:		memory_lseek,
+	read:		read_mem,
+	write:		write_mem,
+	mmap:		mmap_mem,
+	open:		open_mem,
+};
+
+static struct file_operations kmem_fops = {
+	llseek:		memory_lseek,
+	read:		read_kmem,
+	write:		write_kmem,
+	mmap:		mmap_kmem,
+	open:		open_kmem,
+};
+
+static struct file_operations null_fops = {
+	llseek:		null_lseek,
+	read:		read_null,
+	write:		write_null,
+};
+
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+static struct file_operations port_fops = {
+	llseek:		memory_lseek,
+	read:		read_port,
+	write:		write_port,
+	open:		open_port,
+};
+#endif
+
+static struct file_operations zero_fops = {
+	llseek:		zero_lseek,
+	read:		read_zero,
+	write:		write_zero,
+	mmap:		mmap_zero,
+};
+
+static struct file_operations full_fops = {
+	llseek:		full_lseek,
+	read:		read_full,
+	write:		write_full,
+};
+
+static int memory_open(struct inode * inode, struct file * filp)
+{
+	switch (MINOR(inode->i_rdev)) {
+		case 1:
+			filp->f_op = &mem_fops;
+			break;
+		case 2:
+			filp->f_op = &kmem_fops;
+			break;
+		case 3:
+			filp->f_op = &null_fops;
+			break;
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+		case 4:
+#if defined(CONFIG_XENO)
+#if defined(CONFIG_XENO_PRIV)
+			if (!(start_info.flags & SIF_PRIVILEGED))
+#endif
+				return -ENXIO;
+#endif
+			filp->f_op = &port_fops;
+			break;
+#endif
+		case 5:
+			filp->f_op = &zero_fops;
+			break;
+		case 7:
+			filp->f_op = &full_fops;
+			break;
+		case 8:
+			filp->f_op = &random_fops;
+			break;
+		case 9:
+			filp->f_op = &urandom_fops;
+			break;
+		default:
+			return -ENXIO;
+	}
+	if (filp->f_op && filp->f_op->open)
+		return filp->f_op->open(inode,filp);
+	return 0;
+}
+
+void __init memory_devfs_register (void)
+{
+    /*  These are never unregistered  */
+    static const struct {
+	unsigned short minor;
+	char *name;
+	umode_t mode;
+	struct file_operations *fops;
+    } list[] = { /* list of minor devices */
+	{1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
+	{2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
+	{3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+	{4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
+#endif
+	{5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
+	{7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
+	{8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
+	{9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops}
+    };
+    int i;
+
+    for (i=0; i<(sizeof(list)/sizeof(*list)); i++)
+	devfs_register (NULL, list[i].name, DEVFS_FL_NONE,
+			MEM_MAJOR, list[i].minor,
+			list[i].mode | S_IFCHR,
+			list[i].fops, NULL);
+}
+
+static struct file_operations memory_fops = {
+	open:		memory_open,	/* just a selector for the real open */
+};
+
+int __init chr_dev_init(void)
+{
+	if (devfs_register_chrdev(MEM_MAJOR,"mem",&memory_fops))
+		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
+	memory_devfs_register();
+	rand_initialize();
+#ifdef CONFIG_I2C
+	i2c_init_all();
+#endif
+#if defined (CONFIG_FB)
+	fbmem_init();
+#endif
+#if defined (CONFIG_PROM_CONSOLE)
+	prom_con_init();
+#endif
+#if defined (CONFIG_MDA_CONSOLE)
+	mda_console_init();
+#endif
+	tty_init();
+#ifdef CONFIG_M68K_PRINTER
+	lp_m68k_init();
+#endif
+	misc_init();
+#if CONFIG_QIC02_TAPE
+	qic02_tape_init();
+#endif
+#ifdef CONFIG_FTAPE
+	ftape_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
+	tapechar_init();
+#endif
+	return 0;
+}
+
+__initcall(chr_dev_init);
diff --git a/xenolinux-2.4.23-sparse/drivers/char/tty_io.c b/xenolinux-2.4.23-sparse/drivers/char/tty_io.c
new file mode 100644
index 0000000000..e5334deb1d
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/drivers/char/tty_io.c
@@ -0,0 +1,2468 @@
+/*
+ *  linux/drivers/char/tty_io.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
+ * or rs-channels. It also implements echoing, cooked mode etc.
+ *
+ * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
+ *
+ * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
+ * tty_struct and tty_queue structures.  Previously there was an array
+ * of 256 tty_struct's which was statically allocated, and the
+ * tty_queue structures were allocated at boot time.  Both are now
+ * dynamically allocated only when the tty is open.
+ *
+ * Also restructured routines so that there is more of a separation
+ * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
+ * the low-level tty routines (serial.c, pty.c, console.c).  This
+ * makes for cleaner and more compact code.  -TYT, 9/17/92 
+ *
+ * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
+ * which can be dynamically activated and de-activated by the line
+ * discipline handling modules (like SLIP).
+ *
+ * NOTE: pay no attention to the line discipline code (yet); its
+ * interface is still subject to change in this version...
+ * -- TYT, 1/31/92
+ *
+ * Added functionality to the OPOST tty handling.  No delays, but all
+ * other bits should be there.
+ *	-- Nick Holloway <alfie@dcs.warwick.ac.uk>, 27th May 1993.
+ *
+ * Rewrote canonical mode and added more termios flags.
+ * 	-- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94
+ *
+ * Reorganized FASYNC support so mouse code can share it.
+ *	-- ctm@ardi.com, 9Sep95
+ *
+ * New TIOCLINUX variants added.
+ *	-- mj@k332.feld.cvut.cz, 19-Nov-95
+ * 
+ * Restrict vt switching via ioctl()
+ *      -- grif@cs.ucr.edu, 5-Dec-95
+ *
+ * Move console and virtual terminal code to more appropriate files,
+ * implement CONFIG_VT and generalize console device interface.
+ *	-- Marko Kohtala <Marko.Kohtala@hut.fi>, March 97
+ *
+ * Rewrote init_dev and release_dev to eliminate races.
+ *	-- Bill Hawes <whawes@star.net>, June 97
+ *
+ * Added devfs support.
+ *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 13-Jan-1998
+ *
+ * Added support for a Unix98-style ptmx device.
+ *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
+ *
+ * Reduced memory usage for older ARM systems
+ *      -- Russell King <rmk@arm.linux.org.uk>
+ *
+ * Move do_SAK() into process context.  Less stack use in devfs functions.
+ * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <linux/devpts_fs.h>
+#include <linux/file.h>
+#include <linux/console.h>
+#include <linux/timer.h>
+#include <linux/ctype.h>
+#include <linux/kd.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+
+#include <linux/kbd_kern.h>
+#include <linux/vt_kern.h>
+#include <linux/selection.h>
+#include <linux/devfs_fs_kernel.h>
+
+#include <linux/kmod.h>
+
+#ifdef CONFIG_XEN_CONSOLE
+extern void xen_console_init(void);
+#endif
+
+#ifdef CONFIG_VT
+extern void con_init_devfs (void);
+#endif
+
+extern void disable_early_printk(void);
+
+#define CONSOLE_DEV MKDEV(TTY_MAJOR,0)
+#define TTY_DEV MKDEV(TTYAUX_MAJOR,0)
+#define SYSCONS_DEV MKDEV(TTYAUX_MAJOR,1)
+#define PTMX_DEV MKDEV(TTYAUX_MAJOR,2)
+
+#undef TTY_DEBUG_HANGUP
+
+#define TTY_PARANOIA_CHECK 1
+#define CHECK_TTY_COUNT 1
+
+struct termios tty_std_termios;		/* for the benefit of tty drivers  */
+struct tty_driver *tty_drivers;		/* linked list of tty drivers */
+struct tty_ldisc ldiscs[NR_LDISCS];	/* line disc dispatch table	*/
+
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver ptm_driver[];	/* Unix98 pty masters; for /dev/ptmx */
+extern struct tty_driver pts_driver[];	/* Unix98 pty slaves;  for /dev/ptmx */
+#endif
+
+static void initialize_tty_struct(struct tty_struct *tty);
+
+static ssize_t tty_read(struct file *, char *, size_t, loff_t *);
+static ssize_t tty_write(struct file *, const char *, size_t, loff_t *);
+static unsigned int tty_poll(struct file *, poll_table *);
+static int tty_open(struct inode *, struct file *);
+static int tty_release(struct inode *, struct file *);
+int tty_ioctl(struct inode * inode, struct file * file,
+	      unsigned int cmd, unsigned long arg);
+static int tty_fasync(int fd, struct file * filp, int on);
+extern int vme_scc_init (void);
+extern long vme_scc_console_init(void);
+extern int serial167_init(void);
+extern long serial167_console_init(void);
+extern void console_8xx_init(void);
+extern void au1x00_serial_console_init(void);
+extern int rs_8xx_init(void);
+extern void mac_scc_console_init(void);
+extern void hwc_console_init(void);
+extern void hwc_tty_init(void);
+extern void con3215_init(void);
+extern void tty3215_init(void);
+extern void tub3270_con_init(void);
+extern void tub3270_init(void);
+extern void rs285_console_init(void);
+extern void sa1100_rs_console_init(void);
+extern void sgi_serial_console_init(void);
+extern void sn_sal_serial_console_init(void);
+extern void sci_console_init(void);
+extern void dec_serial_console_init(void);
+extern void tx3912_console_init(void);
+extern void tx3912_rs_init(void);
+extern void txx927_console_init(void);
+extern void txx9_rs_init(void);
+extern void txx9_serial_console_init(void);
+extern void sb1250_serial_console_init(void);
+extern void arc_console_init(void);
+
+#ifndef MIN
+#define MIN(a,b)	((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a,b)	((a) < (b) ? (b) : (a))
+#endif
+
+static struct tty_struct *alloc_tty_struct(void)
+{
+	struct tty_struct *tty;
+
+	tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+	if (tty)
+		memset(tty, 0, sizeof(struct tty_struct));
+	return tty;
+}
+
+static inline void free_tty_struct(struct tty_struct *tty)
+{
+	kfree(tty);
+}
+
+/*
+ * This routine returns the name of tty.
+ */
+static char *
+_tty_make_name(struct tty_struct *tty, const char *name, char *buf)
+{
+	int idx = (tty)?MINOR(tty->device) - tty->driver.minor_start:0;
+
+	if (!tty) /* Hmm.  NULL pointer.  That's fun. */
+		strcpy(buf, "NULL tty");
+	else
+		sprintf(buf, name,
+			idx + tty->driver.name_base);
+		
+	return buf;
+}
+
+#define TTY_NUMBER(tty) (MINOR((tty)->device) - (tty)->driver.minor_start + \
+			 (tty)->driver.name_base)
+
+char *tty_name(struct tty_struct *tty, char *buf)
+{
+	return _tty_make_name(tty, (tty)?tty->driver.name:NULL, buf);
+}
+
+inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device,
+			      const char *routine)
+{
+#ifdef TTY_PARANOIA_CHECK
+	static const char badmagic[] = KERN_WARNING
+		"Warning: bad magic number for tty struct (%s) in %s\n";
+	static const char badtty[] = KERN_WARNING
+		"Warning: null TTY for (%s) in %s\n";
+
+	if (!tty) {
+		printk(badtty, kdevname(device), routine);
+		return 1;
+	}
+	if (tty->magic != TTY_MAGIC) {
+		printk(badmagic, kdevname(device), routine);
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+static int check_tty_count(struct tty_struct *tty, const char *routine)
+{
+#ifdef CHECK_TTY_COUNT
+	struct list_head *p;
+	int count = 0;
+	
+	file_list_lock();
+	for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) {
+		if(list_entry(p, struct file, f_list)->private_data == tty)
+			count++;
+	}
+	file_list_unlock();
+	if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver.subtype == PTY_TYPE_SLAVE &&
+	    tty->link && tty->link->count)
+		count++;
+	if (tty->count != count) {
+		printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) "
+				    "!= #fd's(%d) in %s\n",
+		       kdevname(tty->device), tty->count, count, routine);
+		return count;
+       }	
+#endif
+	return 0;
+}
+
+int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
+{
+	if (disc < N_TTY || disc >= NR_LDISCS)
+		return -EINVAL;
+	
+	if (new_ldisc) {
+		ldiscs[disc] = *new_ldisc;
+		ldiscs[disc].flags |= LDISC_FLAG_DEFINED;
+		ldiscs[disc].num = disc;
+	} else
+		memset(&ldiscs[disc], 0, sizeof(struct tty_ldisc));
+	
+	return 0;
+}
+
+EXPORT_SYMBOL(tty_register_ldisc);
+
+/* Set the discipline of a tty line. */
+static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
+{
+	int	retval = 0;
+	struct	tty_ldisc o_ldisc;
+	char buf[64];
+
+	if ((ldisc < N_TTY) || (ldisc >= NR_LDISCS))
+		return -EINVAL;
+	/* Eduardo Blanco <ejbs@cs.cs.com.uy> */
+	/* Cyrus Durgin <cider@speakeasy.org> */
+	if (!(ldiscs[ldisc].flags & LDISC_FLAG_DEFINED)) {
+		char modname [20];
+		sprintf(modname, "tty-ldisc-%d", ldisc);
+		request_module (modname);
+	}
+	if (!(ldiscs[ldisc].flags & LDISC_FLAG_DEFINED))
+		return -EINVAL;
+
+	if (tty->ldisc.num == ldisc)
+		return 0;	/* We are already in the desired discipline */
+	o_ldisc = tty->ldisc;
+
+	tty_wait_until_sent(tty, 0);
+	
+	/* Shutdown the current discipline. */
+	if (tty->ldisc.close)
+		(tty->ldisc.close)(tty);
+
+	/* Now set up the new line discipline. */
+	tty->ldisc = ldiscs[ldisc];
+	tty->termios->c_line = ldisc;
+	if (tty->ldisc.open)
+		retval = (tty->ldisc.open)(tty);
+	if (retval < 0) {
+		tty->ldisc = o_ldisc;
+		tty->termios->c_line = tty->ldisc.num;
+		if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) {
+			tty->ldisc = ldiscs[N_TTY];
+			tty->termios->c_line = N_TTY;
+			if (tty->ldisc.open) {
+				int r = tty->ldisc.open(tty);
+
+				if (r < 0)
+					panic("Couldn't open N_TTY ldisc for "
+					      "%s --- error %d.",
+					      tty_name(tty, buf), r);
+			}
+		}
+	}
+	if (tty->ldisc.num != o_ldisc.num && tty->driver.set_ldisc)
+		tty->driver.set_ldisc(tty);
+	return retval;
+}
+
+/*
+ * This routine returns a tty driver structure, given a device number
+ */
+struct tty_driver *get_tty_driver(kdev_t device)
+{
+	int	major, minor;
+	struct tty_driver *p;
+	
+	minor = MINOR(device);
+	major = MAJOR(device);
+
+	for (p = tty_drivers; p; p = p->next) {
+		if (p->major != major)
+			continue;
+		if (minor < p->minor_start)
+			continue;
+		if (minor >= p->minor_start + p->num)
+			continue;
+		return p;
+	}
+	return NULL;
+}
+
+/*
+ * If we try to write to, or set the state of, a terminal and we're
+ * not in the foreground, send a SIGTTOU.  If the signal is blocked or
+ * ignored, go ahead and perform the operation.  (POSIX 7.2)
+ */
+int tty_check_change(struct tty_struct * tty)
+{
+	if (current->tty != tty)
+		return 0;
+	if (tty->pgrp <= 0) {
+		printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n");
+		return 0;
+	}
+	if (current->pgrp == tty->pgrp)
+		return 0;
+	if (is_ignored(SIGTTOU))
+		return 0;
+	if (is_orphaned_pgrp(current->pgrp))
+		return -EIO;
+	(void) kill_pg(current->pgrp,SIGTTOU,1);
+	return -ERESTARTSYS;
+}
+
+static ssize_t hung_up_tty_read(struct file * file, char * buf,
+				size_t count, loff_t *ppos)
+{
+	/* Can't seek (pread) on ttys.  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+	return 0;
+}
+
+static ssize_t hung_up_tty_write(struct file * file, const char * buf,
+				 size_t count, loff_t *ppos)
+{
+	/* Can't seek (pwrite) on ttys.  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+	return -EIO;
+}
+
+/* No kernel lock held - none needed ;) */
+static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait)
+{
+	return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM;
+}
+
+static int hung_up_tty_ioctl(struct inode * inode, struct file * file,
+			     unsigned int cmd, unsigned long arg)
+{
+	return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
+}
+
+static struct file_operations tty_fops = {
+	llseek:		no_llseek,
+	read:		tty_read,
+	write:		tty_write,
+	poll:		tty_poll,
+	ioctl:		tty_ioctl,
+	open:		tty_open,
+	release:	tty_release,
+	fasync:		tty_fasync,
+};
+
+static struct file_operations hung_up_tty_fops = {
+	llseek:		no_llseek,
+	read:		hung_up_tty_read,
+	write:		hung_up_tty_write,
+	poll:		hung_up_tty_poll,
+	ioctl:		hung_up_tty_ioctl,
+	release:	tty_release,
+};
+
+static spinlock_t redirect_lock = SPIN_LOCK_UNLOCKED;
+static struct file *redirect;
+/*
+ * This can be called by the "eventd" kernel thread.  That is process synchronous,
+ * but doesn't hold any locks, so we need to make sure we have the appropriate
+ * locks for what we're doing..
+ */
+void do_tty_hangup(void *data)
+{
+	struct tty_struct *tty = (struct tty_struct *) data;
+	struct file * cons_filp = NULL;
+	struct file *f = NULL;
+	struct task_struct *p;
+	struct list_head *l;
+	int    closecount = 0, n;
+
+	if (!tty)
+		return;
+
+	/* inuse_filps is protected by the single kernel lock */
+	lock_kernel();
+
+	spin_lock(&redirect_lock);
+	if (redirect && redirect->private_data == tty) {
+		f = redirect;
+		redirect = NULL;
+	}
+	spin_unlock(&redirect_lock);
+	
+	check_tty_count(tty, "do_tty_hangup");
+	file_list_lock();
+	for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) {
+		struct file * filp = list_entry(l, struct file, f_list);
+		if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV ||
+		    filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) {
+			cons_filp = filp;
+			continue;
+		}
+		if (filp->f_op != &tty_fops)
+			continue;
+		closecount++;
+		tty_fasync(-1, filp, 0);	/* can't block */
+		filp->f_op = &hung_up_tty_fops;
+	}
+	file_list_unlock();
+	
+	/* FIXME! What are the locking issues here? This may me overdoing things.. */
+	{
+		unsigned long flags;
+
+		save_flags(flags); cli();
+		if (tty->ldisc.flush_buffer)
+			tty->ldisc.flush_buffer(tty);
+		if (tty->driver.flush_buffer)
+			tty->driver.flush_buffer(tty);
+		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+		    tty->ldisc.write_wakeup)
+			(tty->ldisc.write_wakeup)(tty);
+		restore_flags(flags);
+	}
+
+	wake_up_interruptible(&tty->write_wait);
+	wake_up_interruptible(&tty->read_wait);
+
+	/*
+	 * Shutdown the current line discipline, and reset it to
+	 * N_TTY.
+	 */
+	if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS)
+		*tty->termios = tty->driver.init_termios;
+	if (tty->ldisc.num != ldiscs[N_TTY].num) {
+		if (tty->ldisc.close)
+			(tty->ldisc.close)(tty);
+		tty->ldisc = ldiscs[N_TTY];
+		tty->termios->c_line = N_TTY;
+		if (tty->ldisc.open) {
+			int i = (tty->ldisc.open)(tty);
+			if (i < 0)
+				printk(KERN_ERR "do_tty_hangup: N_TTY open: "
+						"error %d\n", -i);
+		}
+	}
+	
+	read_lock(&tasklist_lock);
+ 	for_each_task(p) {
+		if ((tty->session > 0) && (p->session == tty->session) &&
+		    p->leader) {
+			send_sig(SIGHUP,p,1);
+			send_sig(SIGCONT,p,1);
+			if (tty->pgrp > 0)
+				p->tty_old_pgrp = tty->pgrp;
+		}
+		if (p->tty == tty)
+			p->tty = NULL;
+	}
+	read_unlock(&tasklist_lock);
+
+	tty->flags = 0;
+	tty->session = 0;
+	tty->pgrp = -1;
+	tty->ctrl_status = 0;
+	/*
+	 *	If one of the devices matches a console pointer, we
+	 *	cannot just call hangup() because that will cause
+	 *	tty->count and state->count to go out of sync.
+	 *	So we just call close() the right number of times.
+	 */
+	if (cons_filp) {
+		if (tty->driver.close)
+			for (n = 0; n < closecount; n++)
+				tty->driver.close(tty, cons_filp);
+	} else if (tty->driver.hangup)
+		(tty->driver.hangup)(tty);
+	unlock_kernel();
+	if (f)
+		fput(f);
+}
+
+void tty_hangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+	char	buf[64];
+	
+	printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf));
+#endif
+	schedule_task(&tty->tq_hangup);
+}
+
+void tty_vhangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+	char	buf[64];
+
+	printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
+#endif
+	do_tty_hangup((void *) tty);
+}
+
+int tty_hung_up_p(struct file * filp)
+{
+	return (filp->f_op == &hung_up_tty_fops);
+}
+
+/*
+ * This function is typically called only by the session leader, when
+ * it wants to disassociate itself from its controlling tty.
+ *
+ * It performs the following functions:
+ * 	(1)  Sends a SIGHUP and SIGCONT to the foreground process group
+ * 	(2)  Clears the tty from being controlling the session
+ * 	(3)  Clears the controlling tty for all processes in the
+ * 		session group.
+ *
+ * The argument on_exit is set to 1 if called when a process is
+ * exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ */
+void disassociate_ctty(int on_exit)
+{
+	struct tty_struct *tty = current->tty;
+	struct task_struct *p;
+	int tty_pgrp = -1;
+
+	if (tty) {
+		tty_pgrp = tty->pgrp;
+		if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY)
+			tty_vhangup(tty);
+	} else {
+		if (current->tty_old_pgrp) {
+			kill_pg(current->tty_old_pgrp, SIGHUP, on_exit);
+			kill_pg(current->tty_old_pgrp, SIGCONT, on_exit);
+		}
+		return;
+	}
+	if (tty_pgrp > 0) {
+		kill_pg(tty_pgrp, SIGHUP, on_exit);
+		if (!on_exit)
+			kill_pg(tty_pgrp, SIGCONT, on_exit);
+	}
+
+	current->tty_old_pgrp = 0;
+	tty->session = 0;
+	tty->pgrp = -1;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p)
+	  	if (p->session == current->session)
+			p->tty = NULL;
+	read_unlock(&tasklist_lock);
+}
+
+void stop_tty(struct tty_struct *tty)
+{
+	if (tty->stopped)
+		return;
+	tty->stopped = 1;
+	if (tty->link && tty->link->packet) {
+		tty->ctrl_status &= ~TIOCPKT_START;
+		tty->ctrl_status |= TIOCPKT_STOP;
+		wake_up_interruptible(&tty->link->read_wait);
+	}
+	if (tty->driver.stop)
+		(tty->driver.stop)(tty);
+}
+
+void start_tty(struct tty_struct *tty)
+{
+	if (!tty->stopped || tty->flow_stopped)
+		return;
+	tty->stopped = 0;
+	if (tty->link && tty->link->packet) {
+		tty->ctrl_status &= ~TIOCPKT_STOP;
+		tty->ctrl_status |= TIOCPKT_START;
+		wake_up_interruptible(&tty->link->read_wait);
+	}
+	if (tty->driver.start)
+		(tty->driver.start)(tty);
+	if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+	    tty->ldisc.write_wakeup)
+		(tty->ldisc.write_wakeup)(tty);
+	wake_up_interruptible(&tty->write_wait);
+}
+
+static ssize_t tty_read(struct file * file, char * buf, size_t count, 
+			loff_t *ppos)
+{
+	int i;
+	struct tty_struct * tty;
+	struct inode *inode;
+
+	/* Can't seek (pread) on ttys.  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+
+	tty = (struct tty_struct *)file->private_data;
+	inode = file->f_dentry->d_inode;
+	if (tty_paranoia_check(tty, inode->i_rdev, "tty_read"))
+		return -EIO;
+	if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
+		return -EIO;
+
+	/* This check not only needs to be done before reading, but also
+	   whenever read_chan() gets woken up after sleeping, so I've
+	   moved it to there.  This should only be done for the N_TTY
+	   line discipline, anyway.  Same goes for write_chan(). -- jlc. */
+#if 0
+	if ((inode->i_rdev != CONSOLE_DEV) && /* don't stop on /dev/console */
+	    (tty->pgrp > 0) &&
+	    (current->tty == tty) &&
+	    (tty->pgrp != current->pgrp))
+		if (is_ignored(SIGTTIN) || is_orphaned_pgrp(current->pgrp))
+			return -EIO;
+		else {
+			(void) kill_pg(current->pgrp, SIGTTIN, 1);
+			return -ERESTARTSYS;
+		}
+#endif
+	lock_kernel();
+	if (tty->ldisc.read)
+		i = (tty->ldisc.read)(tty,file,buf,count);
+	else
+		i = -EIO;
+	unlock_kernel();
+	if (i > 0)
+		inode->i_atime = CURRENT_TIME;
+	return i;
+}
+
+/*
+ * Split writes up in sane blocksizes to avoid
+ * denial-of-service type attacks
+ */
+static inline ssize_t do_tty_write(
+	ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t),
+	struct tty_struct *tty,
+	struct file *file,
+	const unsigned char *buf,
+	size_t count)
+{
+	ssize_t ret = 0, written = 0;
+	
+	if (file->f_flags & O_NONBLOCK) {
+		if (down_trylock(&tty->atomic_write))
+			return -EAGAIN;
+	}
+	else {
+		if (down_interruptible(&tty->atomic_write))
+			return -ERESTARTSYS;
+	}
+	if ( test_bit(TTY_NO_WRITE_SPLIT, &tty->flags) ) {
+		lock_kernel();
+		written = write(tty, file, buf, count);
+		unlock_kernel();
+	} else {
+		for (;;) {
+			unsigned long size = MAX(PAGE_SIZE*2,16384);
+			if (size > count)
+				size = count;
+			lock_kernel();
+			ret = write(tty, file, buf, size);
+			unlock_kernel();
+			if (ret <= 0)
+				break;
+			written += ret;
+			buf += ret;
+			count -= ret;
+			if (!count)
+				break;
+			ret = -ERESTARTSYS;
+			if (signal_pending(current))
+				break;
+			if (current->need_resched)
+				schedule();
+		}
+	}
+	if (written) {
+		file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
+		ret = written;
+	}
+	up(&tty->atomic_write);
+	return ret;
+}
+
+
+static ssize_t tty_write(struct file * file, const char * buf, size_t count,
+			 loff_t *ppos)
+{
+	int is_console;
+	struct tty_struct * tty;
+	struct inode *inode = file->f_dentry->d_inode;
+
+	/* Can't seek (pwrite) on ttys.  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+
+	/*
+	 *      For now, we redirect writes from /dev/console as
+	 *      well as /dev/tty0.
+	 */
+	inode = file->f_dentry->d_inode;
+	is_console = (inode->i_rdev == SYSCONS_DEV ||
+		      inode->i_rdev == CONSOLE_DEV);
+
+	if (is_console) {
+		struct file *p = NULL;
+
+		spin_lock(&redirect_lock);
+		if (redirect) {
+			get_file(redirect);
+			p = redirect;
+		}
+		spin_unlock(&redirect_lock);
+
+		if (p) {
+			ssize_t res = p->f_op->write(p, buf, count, &p->f_pos);
+			fput(p);
+			return res;
+		}
+	}
+
+	tty = (struct tty_struct *)file->private_data;
+	if (tty_paranoia_check(tty, inode->i_rdev, "tty_write"))
+		return -EIO;
+	if (!tty || !tty->driver.write || (test_bit(TTY_IO_ERROR, &tty->flags)))
+		return -EIO;
+#if 0
+	if (!is_console && L_TOSTOP(tty) && (tty->pgrp > 0) &&
+	    (current->tty == tty) && (tty->pgrp != current->pgrp)) {
+		if (is_orphaned_pgrp(current->pgrp))
+			return -EIO;
+		if (!is_ignored(SIGTTOU)) {
+			(void) kill_pg(current->pgrp, SIGTTOU, 1);
+			return -ERESTARTSYS;
+		}
+	}
+#endif
+	if (!tty->ldisc.write)
+		return -EIO;
+	return do_tty_write(tty->ldisc.write, tty, file,
+			    (const unsigned char *)buf, count);
+}
+
+/* Semaphore to protect creating and releasing a tty */
+static DECLARE_MUTEX(tty_sem);
+
+static void down_tty_sem(int index)
+{
+	down(&tty_sem);
+}
+
+static void up_tty_sem(int index)
+{
+	up(&tty_sem);
+}
+
+static void release_mem(struct tty_struct *tty, int idx);
+
+/*
+ * WSH 06/09/97: Rewritten to remove races and properly clean up after a
+ * failed open.  The new code protects the open with a semaphore, so it's
+ * really quite straightforward.  The semaphore locking can probably be
+ * relaxed for the (most common) case of reopening a tty.
+ */
+static int init_dev(kdev_t device, struct tty_struct **ret_tty)
+{
+	struct tty_struct *tty, *o_tty;
+	struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
+	struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
+	struct tty_driver *driver;	
+	int retval=0;
+	int idx;
+
+	driver = get_tty_driver(device);
+	if (!driver)
+		return -ENODEV;
+
+	idx = MINOR(device) - driver->minor_start;
+
+	/* 
+	 * Check whether we need to acquire the tty semaphore to avoid
+	 * race conditions.  For now, play it safe.
+	 */
+	down_tty_sem(idx);
+
+	/* check whether we're reopening an existing tty */
+	tty = driver->table[idx];
+	if (tty) goto fast_track;
+
+	/*
+	 * First time open is complex, especially for PTY devices.
+	 * This code guarantees that either everything succeeds and the
+	 * TTY is ready for operation, or else the table slots are vacated
+	 * and the allocated memory released.  (Except that the termios 
+	 * and locked termios may be retained.)
+	 */
+
+	o_tty = NULL;
+	tp = o_tp = NULL;
+	ltp = o_ltp = NULL;
+
+	tty = alloc_tty_struct();
+	if(!tty)
+		goto fail_no_mem;
+	initialize_tty_struct(tty);
+	tty->device = device;
+	tty->driver = *driver;
+
+	tp_loc = &driver->termios[idx];
+	if (!*tp_loc) {
+		tp = (struct termios *) kmalloc(sizeof(struct termios),
+						GFP_KERNEL);
+		if (!tp)
+			goto free_mem_out;
+		*tp = driver->init_termios;
+	}
+
+	ltp_loc = &driver->termios_locked[idx];
+	if (!*ltp_loc) {
+		ltp = (struct termios *) kmalloc(sizeof(struct termios),
+						 GFP_KERNEL);
+		if (!ltp)
+			goto free_mem_out;
+		memset(ltp, 0, sizeof(struct termios));
+	}
+
+	if (driver->type == TTY_DRIVER_TYPE_PTY) {
+		o_tty = alloc_tty_struct();
+		if (!o_tty)
+			goto free_mem_out;
+		initialize_tty_struct(o_tty);
+		o_tty->device = (kdev_t) MKDEV(driver->other->major,
+					driver->other->minor_start + idx);
+		o_tty->driver = *driver->other;
+
+		o_tp_loc  = &driver->other->termios[idx];
+		if (!*o_tp_loc) {
+			o_tp = (struct termios *)
+				kmalloc(sizeof(struct termios), GFP_KERNEL);
+			if (!o_tp)
+				goto free_mem_out;
+			*o_tp = driver->other->init_termios;
+		}
+
+		o_ltp_loc = &driver->other->termios_locked[idx];
+		if (!*o_ltp_loc) {
+			o_ltp = (struct termios *)
+				kmalloc(sizeof(struct termios), GFP_KERNEL);
+			if (!o_ltp)
+				goto free_mem_out;
+			memset(o_ltp, 0, sizeof(struct termios));
+		}
+
+		/*
+		 * Everything allocated ... set up the o_tty structure.
+		 */
+		driver->other->table[idx] = o_tty;
+		if (!*o_tp_loc)
+			*o_tp_loc = o_tp;
+		if (!*o_ltp_loc)
+			*o_ltp_loc = o_ltp;
+		o_tty->termios = *o_tp_loc;
+		o_tty->termios_locked = *o_ltp_loc;
+		(*driver->other->refcount)++;
+		if (driver->subtype == PTY_TYPE_MASTER)
+			o_tty->count++;
+
+		/* Establish the links in both directions */
+		tty->link   = o_tty;
+		o_tty->link = tty;
+	}
+
+	/* 
+	 * All structures have been allocated, so now we install them.
+	 * Failures after this point use release_mem to clean up, so 
+	 * there's no need to null out the local pointers.
+	 */
+	driver->table[idx] = tty;
+	
+	if (!*tp_loc)
+		*tp_loc = tp;
+	if (!*ltp_loc)
+		*ltp_loc = ltp;
+	tty->termios = *tp_loc;
+	tty->termios_locked = *ltp_loc;
+	(*driver->refcount)++;
+	tty->count++;
+
+	/* 
+	 * Structures all installed ... call the ldisc open routines.
+	 * If we fail here just call release_mem to clean up.  No need
+	 * to decrement the use counts, as release_mem doesn't care.
+	 */
+	if (tty->ldisc.open) {
+		retval = (tty->ldisc.open)(tty);
+		if (retval)
+			goto release_mem_out;
+	}
+	if (o_tty && o_tty->ldisc.open) {
+		retval = (o_tty->ldisc.open)(o_tty);
+		if (retval) {
+			if (tty->ldisc.close)
+				(tty->ldisc.close)(tty);
+			goto release_mem_out;
+		}
+	}
+	goto success;
+
+	/*
+	 * This fast open can be used if the tty is already open.
+	 * No memory is allocated, and the only failures are from
+	 * attempting to open a closing tty or attempting multiple
+	 * opens on a pty master.
+	 */
+fast_track:
+	if (test_bit(TTY_CLOSING, &tty->flags)) {
+		retval = -EIO;
+		goto end_init;
+	}
+	if (driver->type == TTY_DRIVER_TYPE_PTY &&
+	    driver->subtype == PTY_TYPE_MASTER) {
+		/*
+		 * special case for PTY masters: only one open permitted, 
+		 * and the slave side open count is incremented as well.
+		 */
+		if (tty->count) {
+			retval = -EIO;
+			goto end_init;
+		}
+		tty->link->count++;
+	}
+	tty->count++;
+	tty->driver = *driver; /* N.B. why do this every time?? */
+
+success:
+	*ret_tty = tty;
+	
+	/* All paths come through here to release the semaphore */
+end_init:
+	up_tty_sem(idx);
+	return retval;
+
+	/* Release locally allocated memory ... nothing placed in slots */
+free_mem_out:
+	if (o_tp)
+		kfree(o_tp);
+	if (o_tty)
+		free_tty_struct(o_tty);
+	if (ltp)
+		kfree(ltp);
+	if (tp)
+		kfree(tp);
+	free_tty_struct(tty);
+
+fail_no_mem:
+	retval = -ENOMEM;
+	goto end_init;
+
+	/* call the tty release_mem routine to clean out this slot */
+release_mem_out:
+	printk(KERN_INFO "init_dev: ldisc open failed, "
+			 "clearing slot %d\n", idx);
+	release_mem(tty, idx);
+	goto end_init;
+}
+
+/*
+ * Releases memory associated with a tty structure, and clears out the
+ * driver table slots.
+ */
+static void release_mem(struct tty_struct *tty, int idx)
+{
+	struct tty_struct *o_tty;
+	struct termios *tp;
+
+	if ((o_tty = tty->link) != NULL) {
+		o_tty->driver.table[idx] = NULL;
+		if (o_tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+			tp = o_tty->driver.termios[idx];
+			o_tty->driver.termios[idx] = NULL;
+			kfree(tp);
+		}
+		o_tty->magic = 0;
+		(*o_tty->driver.refcount)--;
+		list_del_init(&o_tty->tty_files);
+		free_tty_struct(o_tty);
+	}
+
+	tty->driver.table[idx] = NULL;
+	if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+		tp = tty->driver.termios[idx];
+		tty->driver.termios[idx] = NULL;
+		kfree(tp);
+	}
+	tty->magic = 0;
+	(*tty->driver.refcount)--;
+	list_del_init(&tty->tty_files);
+	free_tty_struct(tty);
+}
+
+/*
+ * Even releasing the tty structures is a tricky business.. We have
+ * to be very careful that the structures are all released at the
+ * same time, as interrupts might otherwise get the wrong pointers.
+ *
+ * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
+ * lead to double frees or releasing memory still in use.
+ */
+static void release_dev(struct file * filp)
+{
+	struct tty_struct *tty, *o_tty;
+	int	pty_master, tty_closing, o_tty_closing, do_sleep;
+	int	idx;
+	char	buf[64];
+	
+	tty = (struct tty_struct *)filp->private_data;
+	if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "release_dev"))
+		return;
+
+	check_tty_count(tty, "release_dev");
+
+	tty_fasync(-1, filp, 0);
+
+	idx = MINOR(tty->device) - tty->driver.minor_start;
+	pty_master = (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+		      tty->driver.subtype == PTY_TYPE_MASTER);
+	o_tty = tty->link;
+
+#ifdef TTY_PARANOIA_CHECK
+	if (idx < 0 || idx >= tty->driver.num) {
+		printk(KERN_DEBUG "release_dev: bad idx when trying to "
+				  "free (%s)\n", kdevname(tty->device));
+		return;
+	}
+	if (tty != tty->driver.table[idx]) {
+		printk(KERN_DEBUG "release_dev: driver.table[%d] not tty "
+				  "for (%s)\n", idx, kdevname(tty->device));
+		return;
+	}
+	if (tty->termios != tty->driver.termios[idx]) {
+		printk(KERN_DEBUG "release_dev: driver.termios[%d] not termios "
+		       "for (%s)\n",
+		       idx, kdevname(tty->device));
+		return;
+	}
+	if (tty->termios_locked != tty->driver.termios_locked[idx]) {
+		printk(KERN_DEBUG "release_dev: driver.termios_locked[%d] not "
+		       "termios_locked for (%s)\n",
+		       idx, kdevname(tty->device));
+		return;
+	}
+#endif
+
+#ifdef TTY_DEBUG_HANGUP
+	printk(KERN_DEBUG "release_dev of %s (tty count=%d)...",
+	       tty_name(tty, buf), tty->count);
+#endif
+
+#ifdef TTY_PARANOIA_CHECK
+	if (tty->driver.other) {
+		if (o_tty != tty->driver.other->table[idx]) {
+			printk(KERN_DEBUG "release_dev: other->table[%d] "
+					  "not o_tty for (%s)\n",
+			       idx, kdevname(tty->device));
+			return;
+		}
+		if (o_tty->termios != tty->driver.other->termios[idx]) {
+			printk(KERN_DEBUG "release_dev: other->termios[%d] "
+					  "not o_termios for (%s)\n",
+			       idx, kdevname(tty->device));
+			return;
+		}
+		if (o_tty->termios_locked != 
+		      tty->driver.other->termios_locked[idx]) {
+			printk(KERN_DEBUG "release_dev: other->termios_locked["
+					  "%d] not o_termios_locked for (%s)\n",
+			       idx, kdevname(tty->device));
+			return;
+		}
+		if (o_tty->link != tty) {
+			printk(KERN_DEBUG "release_dev: bad pty pointers\n");
+			return;
+		}
+	}
+#endif
+
+	if (tty->driver.close)
+		tty->driver.close(tty, filp);
+
+	/*
+	 * Sanity check: if tty->count is going to zero, there shouldn't be
+	 * any waiters on tty->read_wait or tty->write_wait.  We test the
+	 * wait queues and kick everyone out _before_ actually starting to
+	 * close.  This ensures that we won't block while releasing the tty
+	 * structure.
+	 *
+	 * The test for the o_tty closing is necessary, since the master and
+	 * slave sides may close in any order.  If the slave side closes out
+	 * first, its count will be one, since the master side holds an open.
+	 * Thus this test wouldn't be triggered at the time the slave closes,
+	 * so we do it now.
+	 *
+	 * Note that it's possible for the tty to be opened again while we're
+	 * flushing out waiters.  By recalculating the closing flags before
+	 * each iteration we avoid any problems.
+	 */
+	while (1) {
+		tty_closing = tty->count <= 1;
+		o_tty_closing = o_tty &&
+			(o_tty->count <= (pty_master ? 1 : 0));
+		do_sleep = 0;
+
+		if (tty_closing) {
+			if (waitqueue_active(&tty->read_wait)) {
+				wake_up(&tty->read_wait);
+				do_sleep++;
+			}
+			if (waitqueue_active(&tty->write_wait)) {
+				wake_up(&tty->write_wait);
+				do_sleep++;
+			}
+		}
+		if (o_tty_closing) {
+			if (waitqueue_active(&o_tty->read_wait)) {
+				wake_up(&o_tty->read_wait);
+				do_sleep++;
+			}
+			if (waitqueue_active(&o_tty->write_wait)) {
+				wake_up(&o_tty->write_wait);
+				do_sleep++;
+			}
+		}
+		if (!do_sleep)
+			break;
+
+		printk(KERN_WARNING "release_dev: %s: read/write wait queue "
+				    "active!\n", tty_name(tty, buf));
+		schedule();
+	}	
+
+	/*
+	 * The closing flags are now consistent with the open counts on 
+	 * both sides, and we've completed the last operation that could 
+	 * block, so it's safe to proceed with closing.
+	 */
+	if (pty_master) {
+		if (--o_tty->count < 0) {
+			printk(KERN_WARNING "release_dev: bad pty slave count "
+					    "(%d) for %s\n",
+			       o_tty->count, tty_name(o_tty, buf));
+			o_tty->count = 0;
+		}
+	}
+	if (--tty->count < 0) {
+		printk(KERN_WARNING "release_dev: bad tty->count (%d) for %s\n",
+		       tty->count, tty_name(tty, buf));
+		tty->count = 0;
+	}
+
+	/*
+	 * We've decremented tty->count, so we should zero out
+	 * filp->private_data, to break the link between the tty and
+	 * the file descriptor.  Otherwise if filp_close() blocks before
+	 * the file descriptor is removed from the inuse_filp
+	 * list, check_tty_count() could observe a discrepancy and
+	 * printk a warning message to the user.
+	 */
+	filp->private_data = 0;
+
+	/*
+	 * Perform some housekeeping before deciding whether to return.
+	 *
+	 * Set the TTY_CLOSING flag if this was the last open.  In the
+	 * case of a pty we may have to wait around for the other side
+	 * to close, and TTY_CLOSING makes sure we can't be reopened.
+	 */
+	if(tty_closing)
+		set_bit(TTY_CLOSING, &tty->flags);
+	if(o_tty_closing)
+		set_bit(TTY_CLOSING, &o_tty->flags);
+
+	/*
+	 * If _either_ side is closing, make sure there aren't any
+	 * processes that still think tty or o_tty is their controlling
+	 * tty.
+	 */
+	if (tty_closing || o_tty_closing) {
+		struct task_struct *p;
+
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->tty == tty || (o_tty && p->tty == o_tty))
+				p->tty = NULL;
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	/* check whether both sides are closing ... */
+	if (!tty_closing || (o_tty && !o_tty_closing))
+		return;
+	
+#ifdef TTY_DEBUG_HANGUP
+	printk(KERN_DEBUG "freeing tty structure...");
+#endif
+
+	/*
+	 * Shutdown the current line discipline, and reset it to N_TTY.
+	 * N.B. why reset ldisc when we're releasing the memory??
+	 */
+	if (tty->ldisc.close)
+		(tty->ldisc.close)(tty);
+	tty->ldisc = ldiscs[N_TTY];
+	tty->termios->c_line = N_TTY;
+	if (o_tty) {
+		if (o_tty->ldisc.close)
+			(o_tty->ldisc.close)(o_tty);
+		o_tty->ldisc = ldiscs[N_TTY];
+	}
+	
+	/*
+	 * Make sure that the tty's task queue isn't activated. 
+	 */
+	run_task_queue(&tq_timer);
+	flush_scheduled_tasks();
+
+	/* 
+	 * The release_mem function takes care of the details of clearing
+	 * the slots and preserving the termios structure.
+	 */
+	release_mem(tty, idx);
+}
+
+/*
+ * tty_open and tty_release keep up the tty count that contains the
+ * number of opens done on a tty. We cannot use the inode-count, as
+ * different inodes might point to the same tty.
+ *
+ * Open-counting is needed for pty masters, as well as for keeping
+ * track of serial lines: DTR is dropped when the last close happens.
+ * (This is not done solely through tty->count, now.  - Ted 1/27/92)
+ *
+ * The termios state of a pty is reset on first open so that
+ * settings don't persist across reuse.
+ */
+static int tty_open(struct inode * inode, struct file * filp)
+{
+	struct tty_struct *tty;
+	int noctty, retval;
+	kdev_t device;
+	unsigned short saved_flags;
+	char	buf[64];
+
+	saved_flags = filp->f_flags;
+retry_open:
+	noctty = filp->f_flags & O_NOCTTY;
+	device = inode->i_rdev;
+	if (device == TTY_DEV) {
+		if (!current->tty)
+			return -ENXIO;
+		device = current->tty->device;
+		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+		/* noctty = 1; */
+	}
+#ifdef CONFIG_VT
+	if (device == CONSOLE_DEV) {
+		extern int fg_console;
+		device = MKDEV(TTY_MAJOR, fg_console + 1);
+		noctty = 1;
+	}
+#endif
+	if (device == SYSCONS_DEV) {
+		struct console *c = console_drivers;
+		while(c && !c->device)
+			c = c->next;
+		if (!c)
+                        return -ENODEV;
+                device = c->device(c);
+		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/console block */
+		noctty = 1;
+	}
+
+	if (device == PTMX_DEV) {
+#ifdef CONFIG_UNIX98_PTYS
+
+		/* find a free pty. */
+		int major, minor;
+		struct tty_driver *driver;
+
+		/* find a device that is not in use. */
+		retval = -1;
+		for ( major = 0 ; major < UNIX98_NR_MAJORS ; major++ ) {
+			driver = &ptm_driver[major];
+			for (minor = driver->minor_start ;
+			     minor < driver->minor_start + driver->num ;
+			     minor++) {
+				device = MKDEV(driver->major, minor);
+				if (!init_dev(device, &tty)) goto ptmx_found; /* ok! */
+			}
+		}
+		return -EIO; /* no free ptys */
+	ptmx_found:
+		set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
+		minor -= driver->minor_start;
+		devpts_pty_new(driver->other->name_base + minor, MKDEV(driver->other->major, minor + driver->other->minor_start));
+		tty_register_devfs(&pts_driver[major], DEVFS_FL_DEFAULT,
+				   pts_driver[major].minor_start + minor);
+		noctty = 1;
+		goto init_dev_done;
+
+#else   /* CONFIG_UNIX_98_PTYS */
+
+		return -ENODEV;
+
+#endif  /* CONFIG_UNIX_98_PTYS */
+	}
+
+	retval = init_dev(device, &tty);
+	if (retval)
+		return retval;
+
+#ifdef CONFIG_UNIX98_PTYS
+init_dev_done:
+#endif
+	filp->private_data = tty;
+	file_move(filp, &tty->tty_files);
+	check_tty_count(tty, "tty_open");
+	if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver.subtype == PTY_TYPE_MASTER)
+		noctty = 1;
+#ifdef TTY_DEBUG_HANGUP
+	printk(KERN_DEBUG "opening %s...", tty_name(tty, buf));
+#endif
+	if (tty->driver.open)
+		retval = tty->driver.open(tty, filp);
+	else
+		retval = -ENODEV;
+	filp->f_flags = saved_flags;
+
+	if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) && !suser())
+		retval = -EBUSY;
+
+	if (retval) {
+#ifdef TTY_DEBUG_HANGUP
+		printk(KERN_DEBUG "error %d in opening %s...", retval,
+		       tty_name(tty, buf));
+#endif
+
+		release_dev(filp);
+		if (retval != -ERESTARTSYS)
+			return retval;
+		if (signal_pending(current))
+			return retval;
+		schedule();
+		/*
+		 * Need to reset f_op in case a hangup happened.
+		 */
+		filp->f_op = &tty_fops;
+		goto retry_open;
+	}
+	if (!noctty &&
+	    current->leader &&
+	    !current->tty &&
+	    tty->session == 0) {
+	    	task_lock(current);
+		current->tty = tty;
+		task_unlock(current);
+		current->tty_old_pgrp = 0;
+		tty->session = current->session;
+		tty->pgrp = current->pgrp;
+	}
+	if ((tty->driver.type == TTY_DRIVER_TYPE_SERIAL) &&
+	    (tty->driver.subtype == SERIAL_TYPE_CALLOUT) &&
+	    (tty->count == 1)) {
+		static int nr_warns;
+		if (nr_warns < 5) {
+			printk(KERN_WARNING "tty_io.c: "
+				"process %d (%s) used obsolete /dev/%s - "
+				"update software to use /dev/ttyS%d\n",
+				current->pid, current->comm,
+				tty_name(tty, buf), TTY_NUMBER(tty));
+			nr_warns++;
+		}
+	}
+	return 0;
+}
+
+static int tty_release(struct inode * inode, struct file * filp)
+{
+	lock_kernel();
+	release_dev(filp);
+	unlock_kernel();
+	return 0;
+}
+
+/* No kernel lock held - fine */
+static unsigned int tty_poll(struct file * filp, poll_table * wait)
+{
+	struct tty_struct * tty;
+
+	tty = (struct tty_struct *)filp->private_data;
+	if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_poll"))
+		return 0;
+
+	if (tty->ldisc.poll)
+		return (tty->ldisc.poll)(tty, filp, wait);
+	return 0;
+}
+
+static int tty_fasync(int fd, struct file * filp, int on)
+{
+	struct tty_struct * tty;
+	int retval;
+
+	tty = (struct tty_struct *)filp->private_data;
+	if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_fasync"))
+		return 0;
+	
+	retval = fasync_helper(fd, filp, on, &tty->fasync);
+	if (retval <= 0)
+		return retval;
+
+	if (on) {
+		if (!waitqueue_active(&tty->read_wait))
+			tty->minimum_to_wake = 1;
+		if (filp->f_owner.pid == 0) {
+			filp->f_owner.pid = (-tty->pgrp) ? : current->pid;
+			filp->f_owner.uid = current->uid;
+			filp->f_owner.euid = current->euid;
+		}
+	} else {
+		if (!tty->fasync && !waitqueue_active(&tty->read_wait))
+			tty->minimum_to_wake = N_TTY_BUF_SIZE;
+	}
+	return 0;
+}
+
+static int tiocsti(struct tty_struct *tty, char * arg)
+{
+	char ch, mbz = 0;
+
+	if ((current->tty != tty) && !suser())
+		return -EPERM;
+	if (get_user(ch, arg))
+		return -EFAULT;
+	tty->ldisc.receive_buf(tty, &ch, &mbz, 1);
+	return 0;
+}
+
+static int tiocgwinsz(struct tty_struct *tty, struct winsize * arg)
+{
+	if (copy_to_user(arg, &tty->winsize, sizeof(*arg)))
+		return -EFAULT;
+	return 0;
+}
+
+static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
+	struct winsize * arg)
+{
+	struct winsize tmp_ws;
+
+	if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
+		return -EFAULT;
+	if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg)))
+		return 0;
+	if (tty->pgrp > 0)
+		kill_pg(tty->pgrp, SIGWINCH, 1);
+	if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0))
+		kill_pg(real_tty->pgrp, SIGWINCH, 1);
+	tty->winsize = tmp_ws;
+	real_tty->winsize = tmp_ws;
+	return 0;
+}
+
+static int tioccons(struct inode *inode, struct file *file)
+{
+	if (inode->i_rdev == SYSCONS_DEV ||
+	    inode->i_rdev == CONSOLE_DEV) {
+		struct file *f;
+		if (!suser())
+			return -EPERM;
+		spin_lock(&redirect_lock);
+		f = redirect;
+		redirect = NULL;
+		spin_unlock(&redirect_lock);
+		if (f)
+			fput(f);
+		return 0;
+	}
+	spin_lock(&redirect_lock);
+	if (redirect) {
+		spin_unlock(&redirect_lock);
+		return -EBUSY;
+	}
+	get_file(file);
+	redirect = file;
+	spin_unlock(&redirect_lock);
+	return 0;
+}
+
+
+static int fionbio(struct file *file, int *arg)
+{
+	int nonblock;
+
+	if (get_user(nonblock, arg))
+		return -EFAULT;
+
+	if (nonblock)
+		file->f_flags |= O_NONBLOCK;
+	else
+		file->f_flags &= ~O_NONBLOCK;
+	return 0;
+}
+
+static int tiocsctty(struct tty_struct *tty, int arg)
+{
+	if (current->leader &&
+	    (current->session == tty->session))
+		return 0;
+	/*
+	 * The process must be a session leader and
+	 * not have a controlling tty already.
+	 */
+	if (!current->leader || current->tty)
+		return -EPERM;
+	if (tty->session > 0) {
+		/*
+		 * This tty is already the controlling
+		 * tty for another session group!
+		 */
+		if ((arg == 1) && suser()) {
+			/*
+			 * Steal it away
+			 */
+			struct task_struct *p;
+
+			read_lock(&tasklist_lock);
+			for_each_task(p)
+				if (p->tty == tty)
+					p->tty = NULL;
+			read_unlock(&tasklist_lock);
+		} else
+			return -EPERM;
+	}
+	task_lock(current);
+	current->tty = tty;
+	task_unlock(current);
+	current->tty_old_pgrp = 0;
+	tty->session = current->session;
+	tty->pgrp = current->pgrp;
+	return 0;
+}
+
+static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+	/*
+	 * (tty == real_tty) is a cheap way of
+	 * testing if the tty is NOT a master pty.
+	 */
+	if (tty == real_tty && current->tty != real_tty)
+		return -ENOTTY;
+	return put_user(real_tty->pgrp, arg);
+}
+
+static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+	pid_t pgrp;
+	int retval = tty_check_change(real_tty);
+
+	if (retval == -EIO)
+		return -ENOTTY;
+	if (retval)
+		return retval;
+	if (!current->tty ||
+	    (current->tty != real_tty) ||
+	    (real_tty->session != current->session))
+		return -ENOTTY;
+	if (get_user(pgrp, (pid_t *) arg))
+		return -EFAULT;
+	if (pgrp < 0)
+		return -EINVAL;
+	if (session_of_pgrp(pgrp) != current->session)
+		return -EPERM;
+	real_tty->pgrp = pgrp;
+	return 0;
+}
+
+static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+	/*
+	 * (tty == real_tty) is a cheap way of
+	 * testing if the tty is NOT a master pty.
+	*/
+	if (tty == real_tty && current->tty != real_tty)
+		return -ENOTTY;
+	if (real_tty->session <= 0)
+		return -ENOTTY;
+	return put_user(real_tty->session, arg);
+}
+
+static int tiocttygstruct(struct tty_struct *tty, struct tty_struct *arg)
+{
+	if (copy_to_user(arg, tty, sizeof(*arg)))
+		return -EFAULT;
+	return 0;
+}
+
+static int tiocsetd(struct tty_struct *tty, int *arg)
+{
+	int ldisc;
+
+	if (get_user(ldisc, arg))
+		return -EFAULT;
+	return tty_set_ldisc(tty, ldisc);
+}
+
+static int send_break(struct tty_struct *tty, int duration)
+{
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	tty->driver.break_ctl(tty, -1);
+	if (!signal_pending(current))
+		schedule_timeout(duration);
+	tty->driver.break_ctl(tty, 0);
+	if (signal_pending(current))
+		return -EINTR;
+	return 0;
+}
+
+static int tty_generic_brk(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg)
+{
+	if (cmd == TCSBRK && arg) 
+	{
+		/* tcdrain case */
+		int retval = tty_check_change(tty);
+		if (retval)
+			return retval;
+		tty_wait_until_sent(tty, 0);
+		if (signal_pending(current))
+			return -EINTR;
+	}
+	return 0;
+}
+
+/*
+ * Split this up, as gcc can choke on it otherwise..
+ */
+int tty_ioctl(struct inode * inode, struct file * file,
+	      unsigned int cmd, unsigned long arg)
+{
+	struct tty_struct *tty, *real_tty;
+	int retval;
+	
+	tty = (struct tty_struct *)file->private_data;
+	if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl"))
+		return -EINVAL;
+
+	real_tty = tty;
+	if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver.subtype == PTY_TYPE_MASTER)
+		real_tty = tty->link;
+
+	/*
+	 * Break handling by driver
+	 */
+	if (!tty->driver.break_ctl) {
+		switch(cmd) {
+		case TIOCSBRK:
+		case TIOCCBRK:
+			if (tty->driver.ioctl)
+				return tty->driver.ioctl(tty, file, cmd, arg);
+			return -EINVAL;
+			
+		/* These two ioctl's always return success; even if */
+		/* the driver doesn't support them. */
+		case TCSBRK:
+		case TCSBRKP:
+			retval = -ENOIOCTLCMD;
+			if (tty->driver.ioctl)
+				retval = tty->driver.ioctl(tty, file, cmd, arg);
+			/* Not driver handled */
+			if (retval == -ENOIOCTLCMD)
+				retval = tty_generic_brk(tty, file, cmd, arg);
+			return retval;
+		}
+	}
+
+	/*
+	 * Factor out some common prep work
+	 */
+	switch (cmd) {
+	case TIOCSETD:
+	case TIOCSBRK:
+	case TIOCCBRK:
+	case TCSBRK:
+	case TCSBRKP:			
+		retval = tty_check_change(tty);
+		if (retval)
+			return retval;
+		if (cmd != TIOCCBRK) {
+			tty_wait_until_sent(tty, 0);
+			if (signal_pending(current))
+				return -EINTR;
+		}
+		break;
+	}
+
+	switch (cmd) {
+		case TIOCSTI:
+			return tiocsti(tty, (char *)arg);
+		case TIOCGWINSZ:
+			return tiocgwinsz(tty, (struct winsize *) arg);
+		case TIOCSWINSZ:
+			return tiocswinsz(tty, real_tty, (struct winsize *) arg);
+		case TIOCCONS:
+			return real_tty!=tty ? -EINVAL : tioccons(inode, file);
+		case FIONBIO:
+			return fionbio(file, (int *) arg);
+		case TIOCEXCL:
+			set_bit(TTY_EXCLUSIVE, &tty->flags);
+			return 0;
+		case TIOCNXCL:
+			clear_bit(TTY_EXCLUSIVE, &tty->flags);
+			return 0;
+		case TIOCNOTTY:
+			if (current->tty != tty)
+				return -ENOTTY;
+			if (current->leader)
+				disassociate_ctty(0);
+			task_lock(current);
+			current->tty = NULL;
+			task_unlock(current);
+			return 0;
+		case TIOCSCTTY:
+			return tiocsctty(tty, arg);
+		case TIOCGPGRP:
+			return tiocgpgrp(tty, real_tty, (pid_t *) arg);
+		case TIOCSPGRP:
+			return tiocspgrp(tty, real_tty, (pid_t *) arg);
+		case TIOCGSID:
+			return tiocgsid(tty, real_tty, (pid_t *) arg);
+		case TIOCGETD:
+			return put_user(tty->ldisc.num, (int *) arg);
+		case TIOCSETD:
+			return tiocsetd(tty, (int *) arg);
+#ifdef CONFIG_VT
+		case TIOCLINUX:
+			return tioclinux(tty, arg);
+#endif
+		case TIOCTTYGSTRUCT:
+			return tiocttygstruct(tty, (struct tty_struct *) arg);
+
+		/*
+		 * Break handling
+		 */
+		case TIOCSBRK:	/* Turn break on, unconditionally */
+			tty->driver.break_ctl(tty, -1);
+			return 0;
+			
+		case TIOCCBRK:	/* Turn break off, unconditionally */
+			tty->driver.break_ctl(tty, 0);
+			return 0;
+		case TCSBRK:   /* SVID version: non-zero arg --> no break */
+			/*
+			 * XXX is the above comment correct, or the
+			 * code below correct?  Is this ioctl used at
+			 * all by anyone?
+			 */
+			if (!arg)
+				return send_break(tty, HZ/4);
+			return 0;
+		case TCSBRKP:	/* support for POSIX tcsendbreak() */	
+			return send_break(tty, arg ? arg*(HZ/10) : HZ/4);
+	}
+	if (tty->driver.ioctl) {
+		int retval = (tty->driver.ioctl)(tty, file, cmd, arg);
+		if (retval != -ENOIOCTLCMD)
+			return retval;
+	}
+	if (tty->ldisc.ioctl) {
+		int retval = (tty->ldisc.ioctl)(tty, file, cmd, arg);
+		if (retval != -ENOIOCTLCMD)
+			return retval;
+	}
+	return -EINVAL;
+}
+
+
+/*
+ * This implements the "Secure Attention Key" ---  the idea is to
+ * prevent trojan horses by killing all processes associated with this
+ * tty when the user hits the "Secure Attention Key".  Required for
+ * super-paranoid applications --- see the Orange Book for more details.
+ * 
+ * This code could be nicer; ideally it should send a HUP, wait a few
+ * seconds, then send a INT, and then a KILL signal.  But you then
+ * have to coordinate with the init process, since all processes associated
+ * with the current tty must be dead before the new getty is allowed
+ * to spawn.
+ *
+ * Now, if it would be correct ;-/ The current code has a nasty hole -
+ * it doesn't catch files in flight. We may send the descriptor to ourselves
+ * via AF_UNIX socket, close it and later fetch from socket. FIXME.
+ *
+ * Nasty bug: do_SAK is being called in interrupt context.  This can
+ * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
+ */
+static void __do_SAK(void *arg)
+{
+#ifdef TTY_SOFT_SAK
+	tty_hangup(tty);
+#else
+	struct tty_struct *tty = arg;
+	struct task_struct *p;
+	int session;
+	int		i;
+	struct file	*filp;
+	
+	if (!tty)
+		return;
+	session  = tty->session;
+	if (tty->ldisc.flush_buffer)
+		tty->ldisc.flush_buffer(tty);
+	if (tty->driver.flush_buffer)
+		tty->driver.flush_buffer(tty);
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if ((p->tty == tty) ||
+		    ((session > 0) && (p->session == session))) {
+			send_sig(SIGKILL, p, 1);
+			continue;
+		}
+		task_lock(p);
+		if (p->files) {
+			read_lock(&p->files->file_lock);
+			for (i=0; i < p->files->max_fds; i++) {
+				filp = fcheck_files(p->files, i);
+				if (filp && (filp->f_op == &tty_fops) &&
+				    (filp->private_data == tty)) {
+					send_sig(SIGKILL, p, 1);
+					break;
+				}
+			}
+			read_unlock(&p->files->file_lock);
+		}
+		task_unlock(p);
+	}
+	read_unlock(&tasklist_lock);
+#endif
+}
+
+/*
+ * The tq handling here is a little racy - tty->SAK_tq may already be queued.
+ * But there's no mechanism to fix that without futzing with tqueue_lock.
+ * Fortunately we don't need to worry, because if ->SAK_tq is already queued,
+ * the values which we write to it will be identical to the values which it
+ * already has. --akpm
+ */
+void do_SAK(struct tty_struct *tty)
+{
+	if (!tty)
+		return;
+	PREPARE_TQUEUE(&tty->SAK_tq, __do_SAK, tty);
+	schedule_task(&tty->SAK_tq);
+}
+
+/*
+ * This routine is called out of the software interrupt to flush data
+ * from the flip buffer to the line discipline.
+ */
+static void flush_to_ldisc(void *private_)
+{
+	struct tty_struct *tty = (struct tty_struct *) private_;
+	unsigned char	*cp;
+	char		*fp;
+	int		count;
+	unsigned long flags;
+
+	if (test_bit(TTY_DONT_FLIP, &tty->flags)) {
+		queue_task(&tty->flip.tqueue, &tq_timer);
+		return;
+	}
+	if (tty->flip.buf_num) {
+		cp = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+		fp = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+		tty->flip.buf_num = 0;
+
+		save_flags(flags); cli();
+		tty->flip.char_buf_ptr = tty->flip.char_buf;
+		tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+	} else {
+		cp = tty->flip.char_buf;
+		fp = tty->flip.flag_buf;
+		tty->flip.buf_num = 1;
+
+		save_flags(flags); cli();
+		tty->flip.char_buf_ptr = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+		tty->flip.flag_buf_ptr = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+	}
+	count = tty->flip.count;
+	tty->flip.count = 0;
+	restore_flags(flags);
+	
+	tty->ldisc.receive_buf(tty, cp, fp, count);
+}
+
+/*
+ * Routine which returns the baud rate of the tty
+ *
+ * Note that the baud_table needs to be kept in sync with the
+ * include/asm/termbits.h file.
+ */
+static int baud_table[] = {
+	0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800,
+	9600, 19200, 38400, 57600, 115200, 230400, 460800,
+#ifdef __sparc__
+	76800, 153600, 307200, 614400, 921600
+#else
+	500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000,
+	2500000, 3000000, 3500000, 4000000
+#endif
+};
+
+static int n_baud_table = sizeof(baud_table)/sizeof(int);
+
+int tty_get_baud_rate(struct tty_struct *tty)
+{
+	unsigned int cflag, i;
+
+	cflag = tty->termios->c_cflag;
+
+	i = cflag & CBAUD;
+	if (i & CBAUDEX) {
+		i &= ~CBAUDEX;
+		if (i < 1 || i+15 >= n_baud_table) 
+			tty->termios->c_cflag &= ~CBAUDEX;
+		else
+			i += 15;
+	}
+	if (i==15 && tty->alt_speed) {
+		if (!tty->warned) {
+			printk(KERN_WARNING "Use of setserial/setrocket to "
+					    "set SPD_* flags is deprecated\n");
+			tty->warned = 1;
+		}
+		return(tty->alt_speed);
+	}
+	
+	return baud_table[i];
+}
+
+void tty_flip_buffer_push(struct tty_struct *tty)
+{
+	if (tty->low_latency)
+		flush_to_ldisc((void *) tty);
+	else
+		queue_task(&tty->flip.tqueue, &tq_timer);
+}
+
+/*
+ * This subroutine initializes a tty structure.
+ */
+static void initialize_tty_struct(struct tty_struct *tty)
+{
+	memset(tty, 0, sizeof(struct tty_struct));
+	tty->magic = TTY_MAGIC;
+	tty->ldisc = ldiscs[N_TTY];
+	tty->pgrp = -1;
+	tty->flip.char_buf_ptr = tty->flip.char_buf;
+	tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+	tty->flip.tqueue.routine = flush_to_ldisc;
+	tty->flip.tqueue.data = tty;
+	init_MUTEX(&tty->flip.pty_sem);
+	init_waitqueue_head(&tty->write_wait);
+	init_waitqueue_head(&tty->read_wait);
+	tty->tq_hangup.routine = do_tty_hangup;
+	tty->tq_hangup.data = tty;
+	sema_init(&tty->atomic_read, 1);
+	sema_init(&tty->atomic_write, 1);
+	spin_lock_init(&tty->read_lock);
+	INIT_LIST_HEAD(&tty->tty_files);
+	INIT_TQUEUE(&tty->SAK_tq, 0, 0);
+}
+
+/*
+ * The default put_char routine if the driver did not define one.
+ */
+void tty_default_put_char(struct tty_struct *tty, unsigned char ch)
+{
+	tty->driver.write(tty, 0, &ch, 1);
+}
+
+/*
+ * Register a tty device described by <driver>, with minor number <minor>.
+ */
+void tty_register_devfs (struct tty_driver *driver, unsigned int flags, unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+	umode_t mode = S_IFCHR | S_IRUSR | S_IWUSR;
+	kdev_t device = MKDEV (driver->major, minor);
+	int idx = minor - driver->minor_start;
+	char buf[32];
+
+	switch (device) {
+		case TTY_DEV:
+		case PTMX_DEV:
+			mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+			break;
+		default:
+			if (driver->major == PTY_MASTER_MAJOR)
+				mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+			break;
+	}
+	if ( (minor <  driver->minor_start) || 
+	     (minor >= driver->minor_start + driver->num) ) {
+		printk(KERN_ERR "Attempt to register invalid minor number "
+		       "with devfs (%d:%d).\n", (int)driver->major,(int)minor);
+		return;
+	}
+#  ifdef CONFIG_UNIX98_PTYS
+	if ( (driver->major >= UNIX98_PTY_SLAVE_MAJOR) &&
+	     (driver->major < UNIX98_PTY_SLAVE_MAJOR + UNIX98_NR_MAJORS) )
+		flags |= DEVFS_FL_CURRENT_OWNER;
+#  endif
+	sprintf(buf, driver->name, idx + driver->name_base);
+	devfs_register (NULL, buf, flags | DEVFS_FL_DEFAULT,
+			driver->major, minor, mode, &tty_fops, NULL);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+void tty_unregister_devfs (struct tty_driver *driver, unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+	void * handle;
+	int idx = minor - driver->minor_start;
+	char buf[32];
+
+	sprintf(buf, driver->name, idx + driver->name_base);
+	handle = devfs_find_handle (NULL, buf, driver->major, minor,
+				    DEVFS_SPECIAL_CHR, 0);
+	devfs_unregister (handle);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+EXPORT_SYMBOL(tty_register_devfs);
+EXPORT_SYMBOL(tty_unregister_devfs);
+
+/*
+ * Called by a tty driver to register itself.
+ */
+int tty_register_driver(struct tty_driver *driver)
+{
+	int error;
+        int i;
+
+	if (driver->flags & TTY_DRIVER_INSTALLED)
+		return 0;
+
+	error = devfs_register_chrdev(driver->major, driver->name, &tty_fops);
+	if (error < 0)
+		return error;
+	else if(driver->major == 0)
+		driver->major = error;
+
+	if (!driver->put_char)
+		driver->put_char = tty_default_put_char;
+	
+	driver->prev = 0;
+	driver->next = tty_drivers;
+	if (tty_drivers) tty_drivers->prev = driver;
+	tty_drivers = driver;
+	
+	if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
+		for(i = 0; i < driver->num; i++)
+		    tty_register_devfs(driver, 0, driver->minor_start + i);
+	}
+	proc_tty_register_driver(driver);
+	return error;
+}
+
+/*
+ * Called by a tty driver to unregister itself.
+ */
+int tty_unregister_driver(struct tty_driver *driver)
+{
+	int	retval;
+	struct tty_driver *p;
+	int	i, found = 0;
+	struct termios *tp;
+	const char *othername = NULL;
+	
+	if (*driver->refcount)
+		return -EBUSY;
+
+	for (p = tty_drivers; p; p = p->next) {
+		if (p == driver)
+			found++;
+		else if (p->major == driver->major)
+			othername = p->name;
+	}
+	
+	if (!found)
+		return -ENOENT;
+
+	if (othername == NULL) {
+		retval = devfs_unregister_chrdev(driver->major, driver->name);
+		if (retval)
+			return retval;
+	} else
+		devfs_register_chrdev(driver->major, othername, &tty_fops);
+
+	if (driver->prev)
+		driver->prev->next = driver->next;
+	else
+		tty_drivers = driver->next;
+	
+	if (driver->next)
+		driver->next->prev = driver->prev;
+
+	/*
+	 * Free the termios and termios_locked structures because
+	 * we don't want to get memory leaks when modular tty
+	 * drivers are removed from the kernel.
+	 */
+	for (i = 0; i < driver->num; i++) {
+		tp = driver->termios[i];
+		if (tp) {
+			driver->termios[i] = NULL;
+			kfree(tp);
+		}
+		tp = driver->termios_locked[i];
+		if (tp) {
+			driver->termios_locked[i] = NULL;
+			kfree(tp);
+		}
+		tty_unregister_devfs(driver, driver->minor_start + i);
+	}
+	proc_tty_unregister_driver(driver);
+	return 0;
+}
+
+
+/*
+ * Initialize the console device. This is called *early*, so
+ * we can't necessarily depend on lots of kernel help here.
+ * Just do some early initializations, and do the complex setup
+ * later.
+ */
+void __init console_init(void)
+{
+	/* Setup the default TTY line discipline. */
+	memset(ldiscs, 0, sizeof(ldiscs));
+	(void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY);
+
+	/*
+	 * Set up the standard termios.  Individual tty drivers may 
+	 * deviate from this; this is used as a template.
+	 */
+	memset(&tty_std_termios, 0, sizeof(struct termios));
+	memcpy(tty_std_termios.c_cc, INIT_C_CC, NCCS);
+	tty_std_termios.c_iflag = ICRNL | IXON;
+	tty_std_termios.c_oflag = OPOST | ONLCR;
+	tty_std_termios.c_cflag = B38400 | CS8 | CREAD | HUPCL;
+	tty_std_termios.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
+		ECHOCTL | ECHOKE | IEXTEN;
+
+	/*
+	 * set up the console device so that later boot sequences can 
+	 * inform about problems etc..
+	 */
+#ifdef CONFIG_EARLY_PRINTK
+	disable_early_printk(); 
+#endif
+
+#ifdef CONFIG_XEN_CONSOLE
+        xen_console_init();
+#endif
+
+#ifdef CONFIG_VT
+	con_init();
+#endif
+#ifdef CONFIG_AU1X00_SERIAL_CONSOLE
+	au1x00_serial_console_init();
+#endif
+#ifdef CONFIG_SERIAL_CONSOLE
+#if (defined(CONFIG_8xx) || defined(CONFIG_8260))
+	console_8xx_init();
+#elif defined(CONFIG_MAC_SERIAL) && defined(CONFIG_SERIAL)
+	if (_machine == _MACH_Pmac)
+ 		mac_scc_console_init();
+	else
+		serial_console_init();
+#elif defined(CONFIG_MAC_SERIAL)
+ 	mac_scc_console_init();
+#elif defined(CONFIG_PARISC)
+	pdc_console_init();
+#elif defined(CONFIG_SERIAL)
+	serial_console_init();
+#endif /* CONFIG_8xx */
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC)
+	vme_scc_console_init();
+#endif
+#if defined(CONFIG_SERIAL167)
+	serial167_console_init();
+#endif
+#if defined(CONFIG_SH_SCI)
+	sci_console_init();
+#endif
+#endif
+#ifdef CONFIG_SERIAL_DEC_CONSOLE
+	dec_serial_console_init();
+#endif
+#ifdef CONFIG_TN3270_CONSOLE
+	tub3270_con_init();
+#endif
+#ifdef CONFIG_TN3215
+	con3215_init();
+#endif
+#ifdef CONFIG_HWC
+        hwc_console_init();
+#endif
+#ifdef CONFIG_STDIO_CONSOLE
+	stdio_console_init();
+#endif
+#ifdef CONFIG_SERIAL_21285_CONSOLE
+	rs285_console_init();
+#endif
+#ifdef CONFIG_SERIAL_SA1100_CONSOLE
+	sa1100_rs_console_init();
+#endif
+#ifdef CONFIG_ARC_CONSOLE
+	arc_console_init();
+#endif
+#ifdef CONFIG_SERIAL_AMBA_CONSOLE
+	ambauart_console_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912_CONSOLE
+	tx3912_console_init();
+#endif
+#ifdef CONFIG_TXX927_SERIAL_CONSOLE
+	txx927_console_init();
+#endif
+#ifdef CONFIG_SERIAL_TXX9_CONSOLE
+	txx9_serial_console_init();
+#endif
+#ifdef CONFIG_SIBYTE_SB1250_DUART_CONSOLE
+	sb1250_serial_console_init();
+#endif
+#ifdef CONFIG_IP22_SERIAL
+	sgi_serial_console_init();
+#endif
+}
+
+static struct tty_driver dev_tty_driver, dev_syscons_driver;
+#ifdef CONFIG_UNIX98_PTYS
+static struct tty_driver dev_ptmx_driver;
+#endif
+#ifdef CONFIG_VT
+static struct tty_driver dev_console_driver;
+#endif
+
+/*
+ * Ok, now we can initialize the rest of the tty devices and can count
+ * on memory allocations, interrupts etc..
+ */
+void __init tty_init(void)
+{
+	/*
+	 * dev_tty_driver and dev_console_driver are actually magic
+	 * devices which get redirected at open time.  Nevertheless,
+	 * we register them so that register_chrdev is called
+	 * appropriately.
+	 */
+	memset(&dev_tty_driver, 0, sizeof(struct tty_driver));
+	dev_tty_driver.magic = TTY_DRIVER_MAGIC;
+	dev_tty_driver.driver_name = "/dev/tty";
+	dev_tty_driver.name = dev_tty_driver.driver_name + 5;
+	dev_tty_driver.name_base = 0;
+	dev_tty_driver.major = TTYAUX_MAJOR;
+	dev_tty_driver.minor_start = 0;
+	dev_tty_driver.num = 1;
+	dev_tty_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+	dev_tty_driver.subtype = SYSTEM_TYPE_TTY;
+	
+	if (tty_register_driver(&dev_tty_driver))
+		panic("Couldn't register /dev/tty driver\n");
+
+	dev_syscons_driver = dev_tty_driver;
+	dev_syscons_driver.driver_name = "/dev/console";
+	dev_syscons_driver.name = dev_syscons_driver.driver_name + 5;
+	dev_syscons_driver.major = TTYAUX_MAJOR;
+	dev_syscons_driver.minor_start = 1;
+	dev_syscons_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+	dev_syscons_driver.subtype = SYSTEM_TYPE_SYSCONS;
+
+	if (tty_register_driver(&dev_syscons_driver))
+		panic("Couldn't register /dev/console driver\n");
+
+	/* console calls tty_register_driver() before kmalloc() works.
+	 * Thus, we can't devfs_register() then.  Do so now, instead. 
+	 */
+#ifdef CONFIG_VT
+	con_init_devfs();
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+	dev_ptmx_driver = dev_tty_driver;
+	dev_ptmx_driver.driver_name = "/dev/ptmx";
+	dev_ptmx_driver.name = dev_ptmx_driver.driver_name + 5;
+	dev_ptmx_driver.major= MAJOR(PTMX_DEV);
+	dev_ptmx_driver.minor_start = MINOR(PTMX_DEV);
+	dev_ptmx_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+	dev_ptmx_driver.subtype = SYSTEM_TYPE_SYSPTMX;
+
+	if (tty_register_driver(&dev_ptmx_driver))
+		panic("Couldn't register /dev/ptmx driver\n");
+#endif
+	
+#ifdef CONFIG_VT
+	dev_console_driver = dev_tty_driver;
+	dev_console_driver.driver_name = "/dev/vc/0";
+	dev_console_driver.name = dev_console_driver.driver_name + 5;
+	dev_console_driver.major = TTY_MAJOR;
+	dev_console_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+	dev_console_driver.subtype = SYSTEM_TYPE_CONSOLE;
+
+	if (tty_register_driver(&dev_console_driver))
+		panic("Couldn't register /dev/tty0 driver\n");
+
+	kbd_init();
+#endif
+
+#ifdef CONFIG_SGI_L1_SERIAL_CONSOLE
+	if (ia64_platform_is("sn2")) {
+		sn_sal_serial_console_init();
+		return; /* only one console right now for SN2 */
+	}
+#endif
+#ifdef CONFIG_ESPSERIAL  /* init ESP before rs, so rs doesn't see the port */
+	espserial_init();
+#endif
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC)
+	vme_scc_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912
+	tx3912_rs_init();
+#endif
+#ifdef CONFIG_ROCKETPORT
+	rp_init();
+#endif
+#ifdef CONFIG_SERIAL167
+	serial167_init();
+#endif
+#ifdef CONFIG_CYCLADES
+	cy_init();
+#endif
+#ifdef CONFIG_STALLION
+	stl_init();
+#endif
+#ifdef CONFIG_ISTALLION
+	stli_init();
+#endif
+#ifdef CONFIG_DIGI
+	pcxe_init();
+#endif
+#ifdef CONFIG_DIGIEPCA
+	pc_init();
+#endif
+#ifdef CONFIG_SPECIALIX
+	specialix_init();
+#endif
+#if (defined(CONFIG_8xx) || defined(CONFIG_8260))
+	rs_8xx_init();
+#endif /* CONFIG_8xx */
+	pty_init();
+#ifdef CONFIG_MOXA_SMARTIO
+	mxser_init();
+#endif	
+#ifdef CONFIG_MOXA_INTELLIO
+	moxa_init();
+#endif	
+#ifdef CONFIG_VT
+	vcs_init();
+#endif
+#ifdef CONFIG_TN3270
+	tub3270_init();
+#endif
+#ifdef CONFIG_TN3215
+	tty3215_init();
+#endif
+#ifdef CONFIG_HWC
+	hwc_tty_init();
+#endif
+#ifdef CONFIG_A2232
+	a2232board_init();
+#endif
+}