aboutsummaryrefslogtreecommitdiffstats
path: root/xenolinux-2.4.23-sparse/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'xenolinux-2.4.23-sparse/drivers')
-rw-r--r--xenolinux-2.4.23-sparse/drivers/block/ll_rw_blk.c1646
-rw-r--r--xenolinux-2.4.23-sparse/drivers/char/mem.c815
-rw-r--r--xenolinux-2.4.23-sparse/drivers/char/tty_io.c2468
3 files changed, 4929 insertions, 0 deletions
diff --git a/xenolinux-2.4.23-sparse/drivers/block/ll_rw_blk.c b/xenolinux-2.4.23-sparse/drivers/block/ll_rw_blk.c
new file mode 100644
index 0000000000..bca30ae493
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/drivers/block/ll_rw_blk.c
@@ -0,0 +1,1646 @@
+/*
+ * linux/drivers/block/ll_rw_blk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
+ * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
+ */
+
+/*
+ * This handles all read/write requests to block devices
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/config.h>
+#include <linux/locks.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/completion.h>
+#include <linux/bootmem.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <linux/blk.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+/*
+ * MAC Floppy IWM hooks
+ */
+
+#ifdef CONFIG_MAC_FLOPPY_IWM
+extern int mac_floppy_init(void);
+#endif
+
+/*
+ * For the allocated request tables
+ */
+static kmem_cache_t *request_cachep;
+
+/*
+ * The "disk" task queue is used to start the actual requests
+ * after a plug
+ */
+DECLARE_TASK_QUEUE(tq_disk);
+
+/*
+ * Protect the request list against multiple users..
+ *
+ * With this spinlock the Linux block IO subsystem is 100% SMP threaded
+ * from the IRQ event side, and almost 100% SMP threaded from the syscall
+ * side (we still have protect against block device array operations, and
+ * the do_request() side is casually still unsafe. The kernel lock protects
+ * this part currently.).
+ *
+ * there is a fair chance that things will work just OK if these functions
+ * are called with no global kernel lock held ...
+ */
+spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
+
+/* This specifies how many sectors to read ahead on the disk. */
+
+int read_ahead[MAX_BLKDEV];
+
+/* blk_dev_struct is:
+ * *request_fn
+ * *current_request
+ */
+struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
+
+/*
+ * blk_size contains the size of all block-devices in units of 1024 byte
+ * sectors:
+ *
+ * blk_size[MAJOR][MINOR]
+ *
+ * if (!blk_size[MAJOR]) then no minor size checking is done.
+ */
+int * blk_size[MAX_BLKDEV];
+
+/*
+ * blksize_size contains the size of all block-devices:
+ *
+ * blksize_size[MAJOR][MINOR]
+ *
+ * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
+ */
+int * blksize_size[MAX_BLKDEV];
+
+/*
+ * hardsect_size contains the size of the hardware sector of a device.
+ *
+ * hardsect_size[MAJOR][MINOR]
+ *
+ * if (!hardsect_size[MAJOR])
+ * then 512 bytes is assumed.
+ * else
+ * sector_size is hardsect_size[MAJOR][MINOR]
+ * This is currently set by some scsi devices and read by the msdos fs driver.
+ * Other uses may appear later.
+ */
+int * hardsect_size[MAX_BLKDEV];
+
+/*
+ * The following tunes the read-ahead algorithm in mm/filemap.c
+ */
+int * max_readahead[MAX_BLKDEV];
+
+/*
+ * Max number of sectors per request
+ */
+int * max_sectors[MAX_BLKDEV];
+
+unsigned long blk_max_low_pfn, blk_max_pfn;
+int blk_nohighio = 0;
+
+int block_dump = 0;
+
+static struct timer_list writeback_timer;
+
+static inline int get_max_sectors(kdev_t dev)
+{
+ if (!max_sectors[MAJOR(dev)])
+ return MAX_SECTORS;
+ return max_sectors[MAJOR(dev)][MINOR(dev)];
+}
+
+inline request_queue_t *blk_get_queue(kdev_t dev)
+{
+ struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
+
+ if (bdev->queue)
+ return bdev->queue(dev);
+ else
+ return &blk_dev[MAJOR(dev)].request_queue;
+}
+
+static int __blk_cleanup_queue(struct request_list *list)
+{
+ struct list_head *head = &list->free;
+ struct request *rq;
+ int i = 0;
+
+ while (!list_empty(head)) {
+ rq = list_entry(head->next, struct request, queue);
+ list_del(&rq->queue);
+ kmem_cache_free(request_cachep, rq);
+ i++;
+ };
+
+ if (i != list->count)
+ printk("request list leak!\n");
+
+ list->count = 0;
+ return i;
+}
+
+/**
+ * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
+ * @q: the request queue to be released
+ *
+ * Description:
+ * blk_cleanup_queue is the pair to blk_init_queue(). It should
+ * be called when a request queue is being released; typically
+ * when a block device is being de-registered. Currently, its
+ * primary task it to free all the &struct request structures that
+ * were allocated to the queue.
+ * Caveat:
+ * Hopefully the low level driver will have finished any
+ * outstanding requests first...
+ **/
+void blk_cleanup_queue(request_queue_t * q)
+{
+ int count = q->nr_requests;
+
+ count -= __blk_cleanup_queue(&q->rq);
+
+ if (count)
+ printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+ if (atomic_read(&q->nr_sectors))
+ printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors));
+
+ memset(q, 0, sizeof(*q));
+}
+
+/**
+ * blk_queue_headactive - indicate whether head of request queue may be active
+ * @q: The queue which this applies to.
+ * @active: A flag indication where the head of the queue is active.
+ *
+ * Description:
+ * The driver for a block device may choose to leave the currently active
+ * request on the request queue, removing it only when it has completed.
+ * The queue handling routines assume this by default for safety reasons
+ * and will not involve the head of the request queue in any merging or
+ * reordering of requests when the queue is unplugged (and thus may be
+ * working on this particular request).
+ *
+ * If a driver removes requests from the queue before processing them, then
+ * it may indicate that it does so, there by allowing the head of the queue
+ * to be involved in merging and reordering. This is done be calling
+ * blk_queue_headactive() with an @active flag of %0.
+ *
+ * If a driver processes several requests at once, it must remove them (or
+ * at least all but one of them) from the request queue.
+ *
+ * When a queue is plugged the head will be assumed to be inactive.
+ **/
+
+void blk_queue_headactive(request_queue_t * q, int active)
+{
+ q->head_active = active;
+}
+
+/**
+ * blk_queue_throttle_sectors - indicates you will call sector throttling funcs
+ * @q: The queue which this applies to.
+ * @active: A flag indication if you want sector throttling on
+ *
+ * Description:
+ * The sector throttling code allows us to put a limit on the number of
+ * sectors pending io to the disk at a given time, sending @active nonzero
+ * indicates you will call blk_started_sectors and blk_finished_sectors in
+ * addition to calling blk_started_io and blk_finished_io in order to
+ * keep track of the number of sectors in flight.
+ **/
+
+void blk_queue_throttle_sectors(request_queue_t * q, int active)
+{
+ q->can_throttle = active;
+}
+
+/**
+ * blk_queue_make_request - define an alternate make_request function for a device
+ * @q: the request queue for the device to be affected
+ * @mfn: the alternate make_request function
+ *
+ * Description:
+ * The normal way for &struct buffer_heads to be passed to a device
+ * driver is for them to be collected into requests on a request
+ * queue, and then to allow the device driver to select requests
+ * off that queue when it is ready. This works well for many block
+ * devices. However some block devices (typically virtual devices
+ * such as md or lvm) do not benefit from the processing on the
+ * request queue, and are served best by having the requests passed
+ * directly to them. This can be achieved by providing a function
+ * to blk_queue_make_request().
+ *
+ * Caveat:
+ * The driver that does this *must* be able to deal appropriately
+ * with buffers in "highmemory", either by calling bh_kmap() to get
+ * a kernel mapping, to by calling create_bounce() to create a
+ * buffer in normal memory.
+ **/
+
+void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
+{
+ q->make_request_fn = mfn;
+}
+
+/**
+ * blk_queue_bounce_limit - set bounce buffer limit for queue
+ * @q: the request queue for the device
+ * @dma_addr: bus address limit
+ *
+ * Description:
+ * Different hardware can have different requirements as to what pages
+ * it can do I/O directly to. A low level driver can call
+ * blk_queue_bounce_limit to have lower memory pages allocated as bounce
+ * buffers for doing I/O to pages residing above @page. By default
+ * the block layer sets this to the highest numbered "low" memory page.
+ **/
+void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
+{
+ unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
+ unsigned long mb = dma_addr >> 20;
+ static request_queue_t *old_q;
+
+ /*
+ * keep this for debugging for now...
+ */
+ if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) {
+ old_q = q;
+ printk("blk: queue %p, ", q);
+ if (dma_addr == BLK_BOUNCE_ANY)
+ printk("no I/O memory limit\n");
+ else
+ printk("I/O limit %luMb (mask 0x%Lx)\n", mb,
+ (long long) dma_addr);
+ }
+
+ q->bounce_pfn = bounce_pfn;
+}
+
+
+/*
+ * can we merge the two segments, or do we need to start a new one?
+ */
+inline int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
+{
+ /*
+ * if bh and nxt are contigous and don't cross a 4g boundary, it's ok
+ */
+ if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt))
+ return 1;
+
+ return 0;
+}
+
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+{
+ if (req->nr_segments < max_segments) {
+ req->nr_segments++;
+ return 1;
+ }
+ return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int max_segments)
+{
+ if (blk_seg_merge_ok(req->bhtail, bh))
+ return 1;
+
+ return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int max_segments)
+{
+ if (blk_seg_merge_ok(bh, req->bh))
+ return 1;
+
+ return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
+ struct request *next, int max_segments)
+{
+ int total_segments = req->nr_segments + next->nr_segments;
+
+ if (blk_seg_merge_ok(req->bhtail, next->bh))
+ total_segments--;
+
+ if (total_segments > max_segments)
+ return 0;
+
+ req->nr_segments = total_segments;
+ return 1;
+}
+
+/*
+ * "plug" the device if there are no outstanding requests: this will
+ * force the transfer to start only after we have put all the requests
+ * on the list.
+ *
+ * This is called with interrupts off and no requests on the queue.
+ * (and with the request spinlock acquired)
+ */
+static void generic_plug_device(request_queue_t *q, kdev_t dev)
+{
+ /*
+ * no need to replug device
+ */
+ if (!list_empty(&q->queue_head) || q->plugged)
+ return;
+
+ q->plugged = 1;
+ queue_task(&q->plug_tq, &tq_disk);
+}
+
+/*
+ * remove the plug and let it rip..
+ */
+static inline void __generic_unplug_device(request_queue_t *q)
+{
+ if (q->plugged) {
+ q->plugged = 0;
+ if (!list_empty(&q->queue_head))
+ q->request_fn(q);
+ }
+}
+
+void generic_unplug_device(void *data)
+{
+ request_queue_t *q = (request_queue_t *) data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ __generic_unplug_device(q);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/** blk_grow_request_list
+ * @q: The &request_queue_t
+ * @nr_requests: how many requests are desired
+ *
+ * More free requests are added to the queue's free lists, bringing
+ * the total number of requests to @nr_requests.
+ *
+ * The requests are added equally to the request queue's read
+ * and write freelists.
+ *
+ * This function can sleep.
+ *
+ * Returns the (new) number of requests which the queue has available.
+ */
+int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors)
+{
+ unsigned long flags;
+ /* Several broken drivers assume that this function doesn't sleep,
+ * this causes system hangs during boot.
+ * As a temporary fix, make the function non-blocking.
+ */
+ spin_lock_irqsave(&io_request_lock, flags);
+ while (q->nr_requests < nr_requests) {
+ struct request *rq;
+
+ rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
+ if (rq == NULL)
+ break;
+ memset(rq, 0, sizeof(*rq));
+ rq->rq_status = RQ_INACTIVE;
+ list_add(&rq->queue, &q->rq.free);
+ q->rq.count++;
+
+ q->nr_requests++;
+ }
+
+ /*
+ * Wakeup waiters after both one quarter of the
+ * max-in-fligh queue and one quarter of the requests
+ * are available again.
+ */
+
+ q->batch_requests = q->nr_requests / 4;
+ if (q->batch_requests > 32)
+ q->batch_requests = 32;
+ q->batch_sectors = max_queue_sectors / 4;
+
+ q->max_queue_sectors = max_queue_sectors;
+
+ BUG_ON(!q->batch_sectors);
+ atomic_set(&q->nr_sectors, 0);
+
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return q->nr_requests;
+}
+
+static void blk_init_free_list(request_queue_t *q)
+{
+ struct sysinfo si;
+ int megs; /* Total memory, in megabytes */
+ int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS;
+
+ INIT_LIST_HEAD(&q->rq.free);
+ q->rq.count = 0;
+ q->rq.pending[READ] = q->rq.pending[WRITE] = 0;
+ q->nr_requests = 0;
+
+ si_meminfo(&si);
+ megs = si.totalram >> (20 - PAGE_SHIFT);
+ nr_requests = MAX_NR_REQUESTS;
+ if (megs < 30) {
+ nr_requests /= 2;
+ max_queue_sectors /= 2;
+ }
+ /* notice early if anybody screwed the defaults */
+ BUG_ON(!nr_requests);
+ BUG_ON(!max_queue_sectors);
+
+ blk_grow_request_list(q, nr_requests, max_queue_sectors);
+
+ init_waitqueue_head(&q->wait_for_requests);
+
+ spin_lock_init(&q->queue_lock);
+}
+
+static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+
+/**
+ * blk_init_queue - prepare a request queue for use with a block device
+ * @q: The &request_queue_t to be initialised
+ * @rfn: The function to be called to process requests that have been
+ * placed on the queue.
+ *
+ * Description:
+ * If a block device wishes to use the standard request handling procedures,
+ * which sorts requests and coalesces adjacent requests, then it must
+ * call blk_init_queue(). The function @rfn will be called when there
+ * are requests on the queue that need to be processed. If the device
+ * supports plugging, then @rfn may not be called immediately when requests
+ * are available on the queue, but may be called at some time later instead.
+ * Plugged queues are generally unplugged when a buffer belonging to one
+ * of the requests on the queue is needed, or due to memory pressure.
+ *
+ * @rfn is not required, or even expected, to remove all requests off the
+ * queue, but only as many as it can handle at a time. If it does leave
+ * requests on the queue, it is responsible for arranging that the requests
+ * get dealt with eventually.
+ *
+ * A global spin lock $io_request_lock must be held while manipulating the
+ * requests on the request queue.
+ *
+ * The request on the head of the queue is by default assumed to be
+ * potentially active, and it is not considered for re-ordering or merging
+ * whenever the given queue is unplugged. This behaviour can be changed with
+ * blk_queue_headactive().
+ *
+ * Note:
+ * blk_init_queue() must be paired with a blk_cleanup_queue() call
+ * when the block device is deactivated (such as at module unload).
+ **/
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+ INIT_LIST_HEAD(&q->queue_head);
+ elevator_init(&q->elevator, ELEVATOR_LINUS);
+ blk_init_free_list(q);
+ q->request_fn = rfn;
+ q->back_merge_fn = ll_back_merge_fn;
+ q->front_merge_fn = ll_front_merge_fn;
+ q->merge_requests_fn = ll_merge_requests_fn;
+ q->make_request_fn = __make_request;
+ q->plug_tq.sync = 0;
+ q->plug_tq.routine = &generic_unplug_device;
+ q->plug_tq.data = q;
+ q->plugged = 0;
+ q->can_throttle = 0;
+
+ /*
+ * These booleans describe the queue properties. We set the
+ * default (and most common) values here. Other drivers can
+ * use the appropriate functions to alter the queue properties.
+ * as appropriate.
+ */
+ q->plug_device_fn = generic_plug_device;
+ q->head_active = 1;
+
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+}
+
+#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
+/*
+ * Get a free request. io_request_lock must be held and interrupts
+ * disabled on the way in. Returns NULL if there are no free requests.
+ */
+static struct request *get_request(request_queue_t *q, int rw)
+{
+ struct request *rq = NULL;
+ struct request_list *rl = &q->rq;
+
+ if (blk_oversized_queue(q)) {
+ int rlim = q->nr_requests >> 5;
+
+ if (rlim < 4)
+ rlim = 4;
+
+ /*
+ * if its a write, or we have more than a handful of reads
+ * pending, bail out
+ */
+ if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim))
+ return NULL;
+ if (blk_oversized_queue_reads(q))
+ return NULL;
+ }
+
+ if (!list_empty(&rl->free)) {
+ rq = blkdev_free_rq(&rl->free);
+ list_del(&rq->queue);
+ rl->count--;
+ rl->pending[rw]++;
+ rq->rq_status = RQ_ACTIVE;
+ rq->cmd = rw;
+ rq->special = NULL;
+ rq->q = q;
+ }
+
+ return rq;
+}
+
+/*
+ * Here's the request allocation design, low latency version:
+ *
+ * 1: Blocking on request exhaustion is a key part of I/O throttling.
+ *
+ * 2: We want to be `fair' to all requesters. We must avoid starvation, and
+ * attempt to ensure that all requesters sleep for a similar duration. Hence
+ * no stealing requests when there are other processes waiting.
+ *
+ * There used to be more here, attempting to allow a process to send in a
+ * number of requests once it has woken up. But, there's no way to
+ * tell if a process has just been woken up, or if it is a new process
+ * coming in to steal requests from the waiters. So, we give up and force
+ * everyone to wait fairly.
+ *
+ * So here's what we do:
+ *
+ * a) A READA requester fails if free_requests < batch_requests
+ *
+ * We don't want READA requests to prevent sleepers from ever
+ * waking. Note that READA is used extremely rarely - a few
+ * filesystems use it for directory readahead.
+ *
+ * When a process wants a new request:
+ *
+ * b) If free_requests == 0, the requester sleeps in FIFO manner, and
+ * the queue full condition is set. The full condition is not
+ * cleared until there are no longer any waiters. Once the full
+ * condition is set, all new io must wait, hopefully for a very
+ * short period of time.
+ *
+ * When a request is released:
+ *
+ * c) If free_requests < batch_requests, do nothing.
+ *
+ * d) If free_requests >= batch_requests, wake up a single waiter.
+ *
+ * As each waiter gets a request, he wakes another waiter. We do this
+ * to prevent a race where an unplug might get run before a request makes
+ * it's way onto the queue. The result is a cascade of wakeups, so delaying
+ * the initial wakeup until we've got batch_requests available helps avoid
+ * wakeups where there aren't any requests available yet.
+ */
+
+static struct request *__get_request_wait(request_queue_t *q, int rw)
+{
+ register struct request *rq;
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&q->wait_for_requests, &wait);
+
+ do {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_lock_irq(&io_request_lock);
+ if (blk_oversized_queue(q) || q->rq.count == 0) {
+ __generic_unplug_device(q);
+ spin_unlock_irq(&io_request_lock);
+ schedule();
+ spin_lock_irq(&io_request_lock);
+ }
+ rq = get_request(q, rw);
+ spin_unlock_irq(&io_request_lock);
+ } while (rq == NULL);
+ remove_wait_queue(&q->wait_for_requests, &wait);
+ current->state = TASK_RUNNING;
+
+ return rq;
+}
+
+static void get_request_wait_wakeup(request_queue_t *q, int rw)
+{
+ /*
+ * avoid losing an unplug if a second __get_request_wait did the
+ * generic_unplug_device while our __get_request_wait was running
+ * w/o the queue_lock held and w/ our request out of the queue.
+ */
+ if (waitqueue_active(&q->wait_for_requests))
+ wake_up(&q->wait_for_requests);
+}
+
+/* RO fail safe mechanism */
+
+static long ro_bits[MAX_BLKDEV][8];
+
+int is_read_only(kdev_t dev)
+{
+ int minor,major;
+
+ major = MAJOR(dev);
+ minor = MINOR(dev);
+ if (major < 0 || major >= MAX_BLKDEV) return 0;
+ return ro_bits[major][minor >> 5] & (1 << (minor & 31));
+}
+
+void set_device_ro(kdev_t dev,int flag)
+{
+ int minor,major;
+
+ major = MAJOR(dev);
+ minor = MINOR(dev);
+ if (major < 0 || major >= MAX_BLKDEV) return;
+ if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
+ else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
+}
+
+inline void drive_stat_acct (kdev_t dev, int rw,
+ unsigned long nr_sectors, int new_io)
+{
+ unsigned int major = MAJOR(dev);
+ unsigned int index;
+
+ index = disk_index(dev);
+ if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+ return;
+
+ kstat.dk_drive[major][index] += new_io;
+ if (rw == READ) {
+ kstat.dk_drive_rio[major][index] += new_io;
+ kstat.dk_drive_rblk[major][index] += nr_sectors;
+ } else if (rw == WRITE) {
+ kstat.dk_drive_wio[major][index] += new_io;
+ kstat.dk_drive_wblk[major][index] += nr_sectors;
+ } else
+ printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
+}
+
+#ifdef CONFIG_BLK_STATS
+/*
+ * Return up to two hd_structs on which to do IO accounting for a given
+ * request.
+ *
+ * On a partitioned device, we want to account both against the partition
+ * and against the whole disk.
+ */
+static void locate_hd_struct(struct request *req,
+ struct hd_struct **hd1,
+ struct hd_struct **hd2)
+{
+ struct gendisk *gd;
+
+ *hd1 = NULL;
+ *hd2 = NULL;
+
+ gd = get_gendisk(req->rq_dev);
+ if (gd && gd->part) {
+ /* Mask out the partition bits: account for the entire disk */
+ int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
+ int whole_minor = devnr << gd->minor_shift;
+
+ *hd1 = &gd->part[whole_minor];
+ if (whole_minor != MINOR(req->rq_dev))
+ *hd2= &gd->part[MINOR(req->rq_dev)];
+ }
+}
+
+/*
+ * Round off the performance stats on an hd_struct.
+ *
+ * The average IO queue length and utilisation statistics are maintained
+ * by observing the current state of the queue length and the amount of
+ * time it has been in this state for.
+ * Normally, that accounting is done on IO completion, but that can result
+ * in more than a second's worth of IO being accounted for within any one
+ * second, leading to >100% utilisation. To deal with that, we do a
+ * round-off before returning the results when reading /proc/partitions,
+ * accounting immediately for all queue usage up to the current jiffies and
+ * restarting the counters again.
+ */
+void disk_round_stats(struct hd_struct *hd)
+{
+ unsigned long now = jiffies;
+
+ hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
+ hd->last_queue_change = now;
+
+ if (hd->ios_in_flight)
+ hd->io_ticks += (now - hd->last_idle_time);
+ hd->last_idle_time = now;
+}
+
+static inline void down_ios(struct hd_struct *hd)
+{
+ disk_round_stats(hd);
+ --hd->ios_in_flight;
+}
+
+static inline void up_ios(struct hd_struct *hd)
+{
+ disk_round_stats(hd);
+ ++hd->ios_in_flight;
+}
+
+static void account_io_start(struct hd_struct *hd, struct request *req,
+ int merge, int sectors)
+{
+ switch (req->cmd) {
+ case READ:
+ if (merge)
+ hd->rd_merges++;
+ hd->rd_sectors += sectors;
+ break;
+ case WRITE:
+ if (merge)
+ hd->wr_merges++;
+ hd->wr_sectors += sectors;
+ break;
+ }
+ if (!merge)
+ up_ios(hd);
+}
+
+static void account_io_end(struct hd_struct *hd, struct request *req)
+{
+ unsigned long duration = jiffies - req->start_time;
+ switch (req->cmd) {
+ case READ:
+ hd->rd_ticks += duration;
+ hd->rd_ios++;
+ break;
+ case WRITE:
+ hd->wr_ticks += duration;
+ hd->wr_ios++;
+ break;
+ }
+ down_ios(hd);
+}
+
+void req_new_io(struct request *req, int merge, int sectors)
+{
+ struct hd_struct *hd1, *hd2;
+
+ locate_hd_struct(req, &hd1, &hd2);
+ if (hd1)
+ account_io_start(hd1, req, merge, sectors);
+ if (hd2)
+ account_io_start(hd2, req, merge, sectors);
+}
+
+void req_merged_io(struct request *req)
+{
+ struct hd_struct *hd1, *hd2;
+
+ locate_hd_struct(req, &hd1, &hd2);
+ if (hd1)
+ down_ios(hd1);
+ if (hd2)
+ down_ios(hd2);
+}
+
+void req_finished_io(struct request *req)
+{
+ struct hd_struct *hd1, *hd2;
+
+ locate_hd_struct(req, &hd1, &hd2);
+ if (hd1)
+ account_io_end(hd1, req);
+ if (hd2)
+ account_io_end(hd2, req);
+}
+EXPORT_SYMBOL(req_finished_io);
+#endif /* CONFIG_BLK_STATS */
+
+/*
+ * add-request adds a request to the linked list.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
+ *
+ * By this point, req->cmd is always either READ/WRITE, never READA,
+ * which is important for drive_stat_acct() above.
+ */
+static inline void add_request(request_queue_t * q, struct request * req,
+ struct list_head *insert_here)
+{
+ drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
+
+ if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+ spin_unlock_irq(&io_request_lock);
+ BUG();
+ }
+
+ /*
+ * elevator indicated where it wants this request to be
+ * inserted at elevator_merge time
+ */
+ list_add(&req->queue, insert_here);
+}
+
+/*
+ * Must be called with io_request_lock held and interrupts disabled
+ */
+void blkdev_release_request(struct request *req)
+{
+ request_queue_t *q = req->q;
+
+ req->rq_status = RQ_INACTIVE;
+ req->q = NULL;
+
+ /*
+ * Request may not have originated from ll_rw_blk. if not,
+ * assume it has free buffers and check waiters
+ */
+ if (q) {
+ struct request_list *rl = &q->rq;
+ int oversized_batch = 0;
+
+ if (q->can_throttle)
+ oversized_batch = blk_oversized_queue_batch(q);
+ rl->count++;
+ /*
+ * paranoia check
+ */
+ if (req->cmd == READ || req->cmd == WRITE)
+ rl->pending[req->cmd]--;
+ if (rl->pending[READ] > q->nr_requests)
+ printk("blk: reads: %u\n", rl->pending[READ]);
+ if (rl->pending[WRITE] > q->nr_requests)
+ printk("blk: writes: %u\n", rl->pending[WRITE]);
+ if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests)
+ printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests);
+ list_add(&req->queue, &rl->free);
+ if (rl->count >= q->batch_requests && !oversized_batch) {
+ smp_mb();
+ if (waitqueue_active(&q->wait_for_requests))
+ wake_up(&q->wait_for_requests);
+ }
+ }
+}
+
+/*
+ * Has to be called with the request spinlock acquired
+ */
+static void attempt_merge(request_queue_t * q,
+ struct request *req,
+ int max_sectors,
+ int max_segments)
+{
+ struct request *next;
+
+ next = blkdev_next_request(req);
+ if (req->sector + req->nr_sectors != next->sector)
+ return;
+ if (req->cmd != next->cmd
+ || req->rq_dev != next->rq_dev
+ || req->nr_sectors + next->nr_sectors > max_sectors
+ || next->waiting)
+ return;
+ /*
+ * If we are not allowed to merge these requests, then
+ * return. If we are allowed to merge, then the count
+ * will have been updated to the appropriate number,
+ * and we shouldn't do it here too.
+ */
+ if (!q->merge_requests_fn(q, req, next, max_segments))
+ return;
+
+ q->elevator.elevator_merge_req_fn(req, next);
+ req->bhtail->b_reqnext = next->bh;
+ req->bhtail = next->bhtail;
+ req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+ list_del(&next->queue);
+
+ /* One last thing: we have removed a request, so we now have one
+ less expected IO to complete for accounting purposes. */
+ req_merged_io(req);
+
+ blkdev_release_request(next);
+}
+
+static inline void attempt_back_merge(request_queue_t * q,
+ struct request *req,
+ int max_sectors,
+ int max_segments)
+{
+ if (&req->queue == q->queue_head.prev)
+ return;
+ attempt_merge(q, req, max_sectors, max_segments);
+}
+
+static inline void attempt_front_merge(request_queue_t * q,
+ struct list_head * head,
+ struct request *req,
+ int max_sectors,
+ int max_segments)
+{
+ struct list_head * prev;
+
+ prev = req->queue.prev;
+ if (head == prev)
+ return;
+ attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+}
+
+static int __make_request(request_queue_t * q, int rw,
+ struct buffer_head * bh)
+{
+ unsigned int sector, count, sync;
+ int max_segments = MAX_SEGMENTS;
+ struct request * req, *freereq = NULL;
+ int rw_ahead, max_sectors, el_ret;
+ struct list_head *head, *insert_here;
+ int latency;
+ elevator_t *elevator = &q->elevator;
+ int should_wake = 0;
+
+ count = bh->b_size >> 9;
+ sector = bh->b_rsector;
+ sync = test_and_clear_bit(BH_Sync, &bh->b_state);
+
+ rw_ahead = 0; /* normal case; gets changed below for READA */
+ switch (rw) {
+ case READA:
+#if 0 /* bread() misinterprets failed READA attempts as IO errors on SMP */
+ rw_ahead = 1;
+#endif
+ rw = READ; /* drop into READ */
+ case READ:
+ case WRITE:
+ latency = elevator_request_latency(elevator, rw);
+ break;
+ default:
+ BUG();
+ goto end_io;
+ }
+
+ /* We'd better have a real physical mapping!
+ Check this bit only if the buffer was dirty and just locked
+ down by us so at this point flushpage will block and
+ won't clear the mapped bit under us. */
+ if (!buffer_mapped(bh))
+ BUG();
+
+ /*
+ * Temporary solution - in 2.5 this will be done by the lowlevel
+ * driver. Create a bounce buffer if the buffer data points into
+ * high memory - keep the original buffer otherwise.
+ */
+ bh = blk_queue_bounce(q, rw, bh);
+
+/* look for a free request. */
+ /*
+ * Try to coalesce the new request with old requests
+ */
+ max_sectors = get_max_sectors(bh->b_rdev);
+
+ req = NULL;
+ head = &q->queue_head;
+ /*
+ * Now we acquire the request spinlock, we have to be mega careful
+ * not to schedule or do something nonatomic
+ */
+ spin_lock_irq(&io_request_lock);
+
+again:
+ insert_here = head->prev;
+
+ if (list_empty(head)) {
+ q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+ goto get_rq;
+ } else if (q->head_active && !q->plugged)
+ head = head->next;
+
+ el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+ switch (el_ret) {
+
+ case ELEVATOR_BACK_MERGE:
+ if (!q->back_merge_fn(q, req, bh, max_segments)) {
+ insert_here = &req->queue;
+ break;
+ }
+ req->bhtail->b_reqnext = bh;
+ req->bhtail = bh;
+ req->nr_sectors = req->hard_nr_sectors += count;
+ blk_started_io(count);
+ blk_started_sectors(req, count);
+ drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+ req_new_io(req, 1, count);
+ attempt_back_merge(q, req, max_sectors, max_segments);
+ goto out;
+
+ case ELEVATOR_FRONT_MERGE:
+ if (!q->front_merge_fn(q, req, bh, max_segments)) {
+ insert_here = req->queue.prev;
+ break;
+ }
+ bh->b_reqnext = req->bh;
+ req->bh = bh;
+ /*
+ * may not be valid, but queues not having bounce
+ * enabled for highmem pages must not look at
+ * ->buffer anyway
+ */
+ req->buffer = bh->b_data;
+ req->current_nr_sectors = req->hard_cur_sectors = count;
+ req->sector = req->hard_sector = sector;
+ req->nr_sectors = req->hard_nr_sectors += count;
+ blk_started_io(count);
+ blk_started_sectors(req, count);
+ drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+ req_new_io(req, 1, count);
+ attempt_front_merge(q, head, req, max_sectors, max_segments);
+ goto out;
+
+ /*
+ * elevator says don't/can't merge. get new request
+ */
+ case ELEVATOR_NO_MERGE:
+ /*
+ * use elevator hints as to where to insert the
+ * request. if no hints, just add it to the back
+ * of the queue
+ */
+ if (req)
+ insert_here = &req->queue;
+ break;
+
+ default:
+ printk("elevator returned crap (%d)\n", el_ret);
+ BUG();
+ }
+
+get_rq:
+ if (freereq) {
+ req = freereq;
+ freereq = NULL;
+ } else {
+ /*
+ * See description above __get_request_wait()
+ */
+ if (rw_ahead) {
+ if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) {
+ spin_unlock_irq(&io_request_lock);
+ goto end_io;
+ }
+ req = get_request(q, rw);
+ if (req == NULL)
+ BUG();
+ } else {
+ req = get_request(q, rw);
+ if (req == NULL) {
+ spin_unlock_irq(&io_request_lock);
+ freereq = __get_request_wait(q, rw);
+ head = &q->queue_head;
+ spin_lock_irq(&io_request_lock);
+ should_wake = 1;
+ goto again;
+ }
+ }
+ }
+
+/* fill up the request-info, and add it to the queue */
+ req->elevator_sequence = latency;
+ req->cmd = rw;
+ req->errors = 0;
+ req->hard_sector = req->sector = sector;
+ req->hard_nr_sectors = req->nr_sectors = count;
+ req->current_nr_sectors = req->hard_cur_sectors = count;
+ req->nr_segments = 1; /* Always 1 for a new request. */
+ req->nr_hw_segments = 1; /* Always 1 for a new request. */
+ req->buffer = bh->b_data;
+ req->waiting = NULL;
+ req->bh = bh;
+ req->bhtail = bh;
+ req->rq_dev = bh->b_rdev;
+ req->start_time = jiffies;
+ req_new_io(req, 0, count);
+ blk_started_io(count);
+ blk_started_sectors(req, count);
+ add_request(q, req, insert_here);
+out:
+ if (freereq)
+ blkdev_release_request(freereq);
+ if (should_wake)
+ get_request_wait_wakeup(q, rw);
+ if (sync)
+ __generic_unplug_device(q);
+ spin_unlock_irq(&io_request_lock);
+ return 0;
+end_io:
+ bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+ return 0;
+}
+
+/**
+ * generic_make_request: hand a buffer head to it's device driver for I/O
+ * @rw: READ, WRITE, or READA - what sort of I/O is desired.
+ * @bh: The buffer head describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct buffer_head and a &rw value. The
+ * %READ and %WRITE options are (hopefully) obvious in meaning. The
+ * %READA value means that a read is required, but that the driver is
+ * free to fail the request if, for example, it cannot get needed
+ * resources immediately.
+ *
+ * generic_make_request() does not return any status. The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bh->b_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that b_page,
+ * b_addr, b_size are set to describe the memory buffer, that b_rdev
+ * and b_rsector are set to describe the device address, and the
+ * b_end_io and optionally b_private are set to describe how
+ * completion notification should be signaled. BH_Mapped should also
+ * be set (to confirm that b_dev and b_blocknr are valid).
+ *
+ * generic_make_request and the drivers it calls may use b_reqnext,
+ * and may change b_rdev and b_rsector. So the values of these fields
+ * should NOT be depended on after the call to generic_make_request.
+ * Because of this, the caller should record the device address
+ * information in b_dev and b_blocknr.
+ *
+ * Apart from those fields mentioned above, no other fields, and in
+ * particular, no other flags, are changed by generic_make_request or
+ * any lower level drivers.
+ * */
+void generic_make_request (int rw, struct buffer_head * bh)
+{
+ int major = MAJOR(bh->b_rdev);
+ int minorsize = 0;
+ request_queue_t *q;
+
+ if (!bh->b_end_io)
+ BUG();
+
+ /* Test device size, when known. */
+ if (blk_size[major])
+ minorsize = blk_size[major][MINOR(bh->b_rdev)];
+ if (minorsize) {
+ unsigned long maxsector = (minorsize << 1) + 1;
+ unsigned long sector = bh->b_rsector;
+ unsigned int count = bh->b_size >> 9;
+
+ if (maxsector < count || maxsector - count < sector) {
+ /* Yecch */
+ bh->b_state &= ~(1 << BH_Dirty);
+
+ /* This may well happen - the kernel calls bread()
+ without checking the size of the device, e.g.,
+ when mounting a device. */
+ printk(KERN_INFO
+ "attempt to access beyond end of device\n");
+ printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
+ kdevname(bh->b_rdev), rw,
+ (sector + count)>>1, minorsize);
+
+ bh->b_end_io(bh, 0);
+ return;
+ }
+ }
+
+ /*
+ * Resolve the mapping until finished. (drivers are
+ * still free to implement/resolve their own stacking
+ * by explicitly returning 0)
+ */
+ /* NOTE: we don't repeat the blk_size check for each new device.
+ * Stacking drivers are expected to know what they are doing.
+ */
+ do {
+ q = blk_get_queue(bh->b_rdev);
+ if (!q) {
+ printk(KERN_ERR
+ "generic_make_request: Trying to access "
+ "nonexistent block-device %s (%ld)\n",
+ kdevname(bh->b_rdev), bh->b_rsector);
+ buffer_IO_error(bh);
+ break;
+ }
+ } while (q->make_request_fn(q, rw, bh));
+}
+
+
+/**
+ * submit_bh: submit a buffer_head to the block device later for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bh: The &struct buffer_head which describes the I/O
+ *
+ * submit_bh() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work.
+ *
+ * The extra functionality provided by submit_bh is to determine
+ * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
+ * This is is appropriate for IO requests that come from the buffer
+ * cache and page cache which (currently) always use aligned blocks.
+ */
+void submit_bh(int rw, struct buffer_head * bh)
+{
+ int count = bh->b_size >> 9;
+
+ if (!test_bit(BH_Lock, &bh->b_state))
+ BUG();
+
+ set_bit(BH_Req, &bh->b_state);
+ set_bit(BH_Launder, &bh->b_state);
+
+ /*
+ * First step, 'identity mapping' - RAID or LVM might
+ * further remap this.
+ */
+ bh->b_rdev = bh->b_dev;
+ bh->b_rsector = bh->b_blocknr * count;
+
+ get_bh(bh);
+ generic_make_request(rw, bh);
+
+ /* fix race condition with wait_on_buffer() */
+ smp_mb(); /* spin_unlock may have inclusive semantics */
+ if (waitqueue_active(&bh->b_wait))
+ wake_up(&bh->b_wait);
+
+ if (block_dump)
+ printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev));
+
+ put_bh(bh);
+ switch (rw) {
+ case WRITE:
+ kstat.pgpgout += count;
+ break;
+ default:
+ kstat.pgpgin += count;
+ break;
+ }
+}
+
+/**
+ * ll_rw_block: low-level access to block devices
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @nr: number of &struct buffer_heads in the array
+ * @bhs: array of pointers to &struct buffer_head
+ *
+ * ll_rw_block() takes an array of pointers to &struct buffer_heads,
+ * and requests an I/O operation on them, either a %READ or a %WRITE.
+ * The third %READA option is described in the documentation for
+ * generic_make_request() which ll_rw_block() calls.
+ *
+ * This function provides extra functionality that is not in
+ * generic_make_request() that is relevant to buffers in the buffer
+ * cache or page cache. In particular it drops any buffer that it
+ * cannot get a lock on (with the BH_Lock state bit), any buffer that
+ * appears to be clean when doing a write request, and any buffer that
+ * appears to be up-to-date when doing read request. Further it marks
+ * as clean buffers that are processed for writing (the buffer cache
+ * wont assume that they are actually clean until the buffer gets
+ * unlocked).
+ *
+ * ll_rw_block sets b_end_io to simple completion handler that marks
+ * the buffer up-to-date (if approriate), unlocks the buffer and wakes
+ * any waiters. As client that needs a more interesting completion
+ * routine should call submit_bh() (or generic_make_request())
+ * directly.
+ *
+ * Caveat:
+ * All of the buffers must be for the same device, and must also be
+ * of the current approved size for the device. */
+
+void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
+{
+ unsigned int major;
+ int correct_size;
+ int i;
+
+ if (!nr)
+ return;
+
+ major = MAJOR(bhs[0]->b_dev);
+
+ /* Determine correct block size for this device. */
+ correct_size = get_hardsect_size(bhs[0]->b_dev);
+
+ /* Verify requested block sizes. */
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+ if (bh->b_size % correct_size) {
+ printk(KERN_NOTICE "ll_rw_block: device %s: "
+ "only %d-char blocks implemented (%u)\n",
+ kdevname(bhs[0]->b_dev),
+ correct_size, bh->b_size);
+ goto sorry;
+ }
+ }
+
+ if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
+ printk(KERN_NOTICE "Can't write to read-only device %s\n",
+ kdevname(bhs[0]->b_dev));
+ goto sorry;
+ }
+
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+
+ /* Only one thread can actually submit the I/O. */
+ if (test_and_set_bit(BH_Lock, &bh->b_state))
+ continue;
+
+ /* We have the buffer lock */
+ atomic_inc(&bh->b_count);
+ bh->b_end_io = end_buffer_io_sync;
+
+ switch(rw) {
+ case WRITE:
+ if (!atomic_set_buffer_clean(bh))
+ /* Hmmph! Nothing to write */
+ goto end_io;
+ __mark_buffer_clean(bh);
+ break;
+
+ case READA:
+ case READ:
+ if (buffer_uptodate(bh))
+ /* Hmmph! Already have it */
+ goto end_io;
+ break;
+ default:
+ BUG();
+ end_io:
+ bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+ continue;
+ }
+
+ submit_bh(rw, bh);
+ }
+ return;
+
+sorry:
+ /* Make sure we don't get infinite dirty retries.. */
+ for (i = 0; i < nr; i++)
+ mark_buffer_clean(bhs[i]);
+}
+
+#ifdef CONFIG_STRAM_SWAP
+extern int stram_device_init (void);
+#endif
+
+static void blk_writeback_timer(unsigned long data)
+{
+ wakeup_bdflush();
+ wakeup_kupdate();
+}
+
+/**
+ * end_that_request_first - end I/O on one buffer.
+ * @req: the request being processed
+ * @uptodate: 0 for I/O error
+ * @name: the name printed for an I/O error
+ *
+ * Description:
+ * Ends I/O on the first buffer attached to @req, and sets it up
+ * for the next buffer_head (if any) in the cluster.
+ *
+ * Return:
+ * 0 - we are done with this request, call end_that_request_last()
+ * 1 - still buffers pending for this request
+ *
+ * Caveat:
+ * Drivers implementing their own end_request handling must call
+ * blk_finished_io() appropriately.
+ **/
+
+int end_that_request_first (struct request *req, int uptodate, char *name)
+{
+ struct buffer_head * bh;
+ int nsect;
+
+ req->errors = 0;
+ if (!uptodate)
+ printk("end_request: I/O error, dev %s (%s), sector %lu\n",
+ kdevname(req->rq_dev), name, req->sector);
+
+ if ((bh = req->bh) != NULL) {
+ nsect = bh->b_size >> 9;
+ blk_finished_io(nsect);
+ blk_finished_sectors(req, nsect);
+ req->bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ bh->b_end_io(bh, uptodate);
+ if ((bh = req->bh) != NULL) {
+ req->hard_sector += nsect;
+ req->hard_nr_sectors -= nsect;
+ req->sector = req->hard_sector;
+ req->nr_sectors = req->hard_nr_sectors;
+
+ req->current_nr_sectors = bh->b_size >> 9;
+ req->hard_cur_sectors = req->current_nr_sectors;
+ if (req->nr_sectors < req->current_nr_sectors) {
+ req->nr_sectors = req->current_nr_sectors;
+ printk("end_request: buffer-list destroyed\n");
+ }
+ req->buffer = bh->b_data;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+extern int laptop_mode;
+
+void end_that_request_last(struct request *req)
+{
+ struct completion *waiting = req->waiting;
+
+ /*
+ * schedule the writeout of pending dirty data when the disk is idle
+ */
+ if (laptop_mode && req->cmd == READ)
+ mod_timer(&writeback_timer, jiffies + 5 * HZ);
+
+ req_finished_io(req);
+ blkdev_release_request(req);
+ if (waiting)
+ complete(waiting);
+}
+
+int __init blk_dev_init(void)
+{
+ struct blk_dev_struct *dev;
+
+ request_cachep = kmem_cache_create("blkdev_requests",
+ sizeof(struct request),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+ if (!request_cachep)
+ panic("Can't create request pool slab cache\n");
+
+ for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
+ dev->queue = NULL;
+
+ memset(ro_bits,0,sizeof(ro_bits));
+ memset(max_readahead, 0, sizeof(max_readahead));
+ memset(max_sectors, 0, sizeof(max_sectors));
+
+ blk_max_low_pfn = max_low_pfn - 1;
+ blk_max_pfn = max_pfn - 1;
+
+ init_timer(&writeback_timer);
+ writeback_timer.function = blk_writeback_timer;
+
+#ifdef CONFIG_AMIGA_Z2RAM
+ z2_init();
+#endif
+#ifdef CONFIG_STRAM_SWAP
+ stram_device_init();
+#endif
+#ifdef CONFIG_ISP16_CDI
+ isp16_init();
+#endif
+#ifdef CONFIG_BLK_DEV_PS2
+ ps2esdi_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XD
+ xd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_MFM
+ mfm_init();
+#endif
+#ifdef CONFIG_PARIDE
+ { extern void paride_init(void); paride_init(); };
+#endif
+#ifdef CONFIG_MAC_FLOPPY
+ swim3_init();
+#endif
+#ifdef CONFIG_BLK_DEV_SWIM_IOP
+ swimiop_init();
+#endif
+#ifdef CONFIG_AMIGA_FLOPPY
+ amiga_floppy_init();
+#endif
+#ifdef CONFIG_ATARI_FLOPPY
+ atari_floppy_init();
+#endif
+#ifdef CONFIG_BLK_DEV_FD
+ floppy_init();
+#else
+#if defined(__i386__) && !defined(CONFIG_XENO) /* Do we even need this? */
+ outb_p(0xc, 0x3f2);
+#endif
+#endif
+#ifdef CONFIG_CDU31A
+ cdu31a_init();
+#endif
+#ifdef CONFIG_ATARI_ACSI
+ acsi_init();
+#endif
+#ifdef CONFIG_MCD
+ mcd_init();
+#endif
+#ifdef CONFIG_MCDX
+ mcdx_init();
+#endif
+#ifdef CONFIG_SBPCD
+ sbpcd_init();
+#endif
+#ifdef CONFIG_AZTCD
+ aztcd_init();
+#endif
+#ifdef CONFIG_CDU535
+ sony535_init();
+#endif
+#ifdef CONFIG_GSCD
+ gscd_init();
+#endif
+#ifdef CONFIG_CM206
+ cm206_init();
+#endif
+#ifdef CONFIG_OPTCD
+ optcd_init();
+#endif
+#ifdef CONFIG_SJCD
+ sjcd_init();
+#endif
+#ifdef CONFIG_APBLOCK
+ ap_init();
+#endif
+#ifdef CONFIG_DDV
+ ddv_init();
+#endif
+#ifdef CONFIG_MDISK
+ mdisk_init();
+#endif
+#ifdef CONFIG_DASD
+ dasd_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
+ tapeblock_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XPRAM
+ xpram_init();
+#endif
+
+#ifdef CONFIG_SUN_JSFLASH
+ jsfd_init();
+#endif
+
+#ifdef CONFIG_XENOLINUX_BLOCK
+ xlblk_init();
+#endif
+
+ return 0;
+};
+
+EXPORT_SYMBOL(io_request_lock);
+EXPORT_SYMBOL(end_that_request_first);
+EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blk_grow_request_list);
+EXPORT_SYMBOL(blk_init_queue);
+EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(blk_cleanup_queue);
+EXPORT_SYMBOL(blk_queue_headactive);
+EXPORT_SYMBOL(blk_queue_throttle_sectors);
+EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(generic_make_request);
+EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(generic_unplug_device);
+EXPORT_SYMBOL(blk_queue_bounce_limit);
+EXPORT_SYMBOL(blk_max_low_pfn);
+EXPORT_SYMBOL(blk_max_pfn);
+EXPORT_SYMBOL(blk_seg_merge_ok);
+EXPORT_SYMBOL(blk_nohighio);
diff --git a/xenolinux-2.4.23-sparse/drivers/char/mem.c b/xenolinux-2.4.23-sparse/drivers/char/mem.c
new file mode 100644
index 0000000000..73e6d48713
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/drivers/char/mem.c
@@ -0,0 +1,815 @@
+/*
+ * linux/drivers/char/mem.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Added devfs support.
+ * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
+ * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
+ *
+ * MODIFIED FOR XENOLINUX by Keir Fraser, 10th July 2003.
+ * Xenolinux has strange semantics for /dev/mem and /dev/kmem!!
+ * 1. mmap will not work on /dev/kmem
+ * 2. mmap on /dev/mem interprets the 'file offset' as a machine address
+ * rather than a physical address.
+ * I don't believe anyone sane mmaps /dev/kmem, but /dev/mem is mmapped
+ * to get at memory-mapped I/O spaces (eg. the VESA X server does this).
+ * For this to work at all we need to expect machine addresses.
+ * Reading/writing of /dev/kmem expects kernel virtual addresses, as usual.
+ * Reading/writing of /dev/mem expects 'physical addresses' as usual -- this
+ * is because /dev/mem can only read/write existing kernel mappings, which
+ * will be normal RAM, and we should present pseudo-physical layout for all
+ * except I/O (which is the sticky case that mmap is hacked to deal with).
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/tpqic02.h>
+#include <linux/ftape.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/raw.h>
+#include <linux/tty.h>
+#include <linux/capability.h>
+#include <linux/ptrace.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+
+#ifdef CONFIG_I2C
+extern int i2c_init_all(void);
+#endif
+#ifdef CONFIG_FB
+extern void fbmem_init(void);
+#endif
+#ifdef CONFIG_PROM_CONSOLE
+extern void prom_con_init(void);
+#endif
+#ifdef CONFIG_MDA_CONSOLE
+extern void mda_console_init(void);
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
+extern void tapechar_init(void);
+#endif
+
+static ssize_t do_write_mem(struct file * file, void *p, unsigned long realp,
+ const char * buf, size_t count, loff_t *ppos)
+{
+ ssize_t written;
+
+ written = 0;
+#if defined(__sparc__) || defined(__mc68000__)
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (realp < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE-realp;
+ if (sz > count) sz = count;
+ /* Hmm. Do something? */
+ buf+=sz;
+ p+=sz;
+ count-=sz;
+ written+=sz;
+ }
+#endif
+ if (copy_from_user(p, buf, count))
+ return -EFAULT;
+ written += count;
+ *ppos += written;
+ return written;
+}
+
+
+/*
+ * This funcion reads the *physical* memory. The f_pos points directly to the
+ * memory location.
+ */
+static ssize_t read_mem(struct file * file, char * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long p = *ppos;
+ unsigned long end_mem;
+ ssize_t read;
+
+ end_mem = __pa(high_memory);
+ if (p >= end_mem)
+ return 0;
+ if (count > end_mem - p)
+ count = end_mem - p;
+ read = 0;
+#if defined(__sparc__) || defined(__mc68000__)
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (p < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE-p;
+ if (sz > count)
+ sz = count;
+ if (sz > 0) {
+ if (clear_user(buf, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ count -= sz;
+ read += sz;
+ }
+ }
+#endif
+ if (copy_to_user(buf, __va(p), count))
+ return -EFAULT;
+ read += count;
+ *ppos += read;
+ return read;
+}
+
+static ssize_t write_mem(struct file * file, const char * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long p = *ppos;
+ unsigned long end_mem;
+
+ end_mem = __pa(high_memory);
+ if (p >= end_mem)
+ return 0;
+ if (count > end_mem - p)
+ count = end_mem - p;
+ return do_write_mem(file, __va(p), p, buf, count, ppos);
+}
+
+#ifndef pgprot_noncached
+
+/*
+ * This should probably be per-architecture in <asm/pgtable.h>
+ */
+static inline pgprot_t pgprot_noncached(pgprot_t _prot)
+{
+ unsigned long prot = pgprot_val(_prot);
+
+#if defined(__i386__) || defined(__x86_64__)
+ /* On PPro and successors, PCD alone doesn't always mean
+ uncached because of interactions with the MTRRs. PCD | PWT
+ means definitely uncached. */
+ if (boot_cpu_data.x86 > 3)
+ prot |= _PAGE_PCD | _PAGE_PWT;
+#elif defined(__powerpc__)
+ prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+#elif defined(__mc68000__)
+#ifdef SUN3_PAGE_NOCACHE
+ if (MMU_IS_SUN3)
+ prot |= SUN3_PAGE_NOCACHE;
+ else
+#endif
+ if (MMU_IS_851 || MMU_IS_030)
+ prot |= _PAGE_NOCACHE030;
+ /* Use no-cache mode, serialized */
+ else if (MMU_IS_040 || MMU_IS_060)
+ prot = (prot & _CACHEMASK040) | _PAGE_NOCACHE_S;
+#endif
+
+ return __pgprot(prot);
+}
+
+#endif /* !pgprot_noncached */
+
+/*
+ * Architectures vary in how they handle caching for addresses
+ * outside of main memory.
+ */
+static inline int noncached_address(unsigned long addr)
+{
+#if defined(__i386__)
+ /*
+ * On the PPro and successors, the MTRRs are used to set
+ * memory types for physical addresses outside main memory,
+ * so blindly setting PCD or PWT on those pages is wrong.
+ * For Pentiums and earlier, the surround logic should disable
+ * caching for the high addresses through the KEN pin, but
+ * we maintain the tradition of paranoia in this code.
+ */
+ return !( test_bit(X86_FEATURE_MTRR, &boot_cpu_data.x86_capability) ||
+ test_bit(X86_FEATURE_K6_MTRR, &boot_cpu_data.x86_capability) ||
+ test_bit(X86_FEATURE_CYRIX_ARR, &boot_cpu_data.x86_capability) ||
+ test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) )
+ && addr >= __pa(high_memory);
+#else
+ return addr >= __pa(high_memory);
+#endif
+}
+
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+#if defined(CONFIG_XENO) && defined(CONFIG_XENO_PRIV)
+ if (!(start_info.flags & SIF_PRIVILEGED))
+ return -ENXIO;
+
+ /* DONTCOPY is essential for Xenolinux as copy_page_range is broken. */
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset,
+ vma->vm_end-vma->vm_start, vma->vm_page_prot))
+ return -EAGAIN;
+ return 0;
+#elif defined(CONFIG_XENO)
+ return -ENXIO;
+#else
+ /*
+ * Accessing memory above the top the kernel knows about or
+ * through a file pointer that was marked O_SYNC will be
+ * done non-cached.
+ */
+ if (noncached_address(offset) || (file->f_flags & O_SYNC))
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ /* Don't try to swap out physical pages.. */
+ vma->vm_flags |= VM_RESERVED;
+
+ /*
+ * Don't dump addresses that are not real memory to a core file.
+ */
+ if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC))
+ vma->vm_flags |= VM_IO;
+
+ if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start,
+ vma->vm_page_prot))
+ return -EAGAIN;
+ return 0;
+#endif
+}
+
+/*
+ * This function reads the *virtual* memory as seen by the kernel.
+ */
+static ssize_t read_kmem(struct file *file, char *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long p = *ppos;
+ ssize_t read = 0;
+ ssize_t virtr = 0;
+ char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
+
+ if (p < (unsigned long) high_memory) {
+ read = count;
+ if (count > (unsigned long) high_memory - p)
+ read = (unsigned long) high_memory - p;
+
+#if defined(__sparc__) || defined(__mc68000__)
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (p < PAGE_SIZE && read > 0) {
+ size_t tmp = PAGE_SIZE - p;
+ if (tmp > read) tmp = read;
+ if (clear_user(buf, tmp))
+ return -EFAULT;
+ buf += tmp;
+ p += tmp;
+ read -= tmp;
+ count -= tmp;
+ }
+#endif
+ if (copy_to_user(buf, (char *)p, read))
+ return -EFAULT;
+ p += read;
+ buf += read;
+ count -= read;
+ }
+
+ if (count > 0) {
+ kbuf = (char *)__get_free_page(GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+ while (count > 0) {
+ int len = count;
+
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+ len = vread(kbuf, (char *)p, len);
+ if (!len)
+ break;
+ if (copy_to_user(buf, kbuf, len)) {
+ free_page((unsigned long)kbuf);
+ return -EFAULT;
+ }
+ count -= len;
+ buf += len;
+ virtr += len;
+ p += len;
+ }
+ free_page((unsigned long)kbuf);
+ }
+ *ppos = p;
+ return virtr + read;
+}
+
+extern long vwrite(char *buf, char *addr, unsigned long count);
+
+/*
+ * This function writes to the *virtual* memory as seen by the kernel.
+ */
+static ssize_t write_kmem(struct file * file, const char * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long p = *ppos;
+ ssize_t wrote = 0;
+ ssize_t virtr = 0;
+ char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
+
+ if (p < (unsigned long) high_memory) {
+ wrote = count;
+ if (count > (unsigned long) high_memory - p)
+ wrote = (unsigned long) high_memory - p;
+
+ wrote = do_write_mem(file, (void*)p, p, buf, wrote, ppos);
+
+ p += wrote;
+ buf += wrote;
+ count -= wrote;
+ }
+
+ if (count > 0) {
+ kbuf = (char *)__get_free_page(GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+ while (count > 0) {
+ int len = count;
+
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+ if (len && copy_from_user(kbuf, buf, len)) {
+ free_page((unsigned long)kbuf);
+ return -EFAULT;
+ }
+ len = vwrite(kbuf, (char *)p, len);
+ count -= len;
+ buf += len;
+ virtr += len;
+ p += len;
+ }
+ free_page((unsigned long)kbuf);
+ }
+
+ *ppos = p;
+ return virtr + wrote;
+}
+
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+static ssize_t read_port(struct file * file, char * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long i = *ppos;
+ char *tmp = buf;
+
+ if (verify_area(VERIFY_WRITE,buf,count))
+ return -EFAULT;
+ while (count-- > 0 && i < 65536) {
+ if (__put_user(inb(i),tmp) < 0)
+ return -EFAULT;
+ i++;
+ tmp++;
+ }
+ *ppos = i;
+ return tmp-buf;
+}
+
+static ssize_t write_port(struct file * file, const char * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long i = *ppos;
+ const char * tmp = buf;
+
+ if (verify_area(VERIFY_READ,buf,count))
+ return -EFAULT;
+ while (count-- > 0 && i < 65536) {
+ char c;
+ if (__get_user(c, tmp))
+ return -EFAULT;
+ outb(c,i);
+ i++;
+ tmp++;
+ }
+ *ppos = i;
+ return tmp-buf;
+}
+#endif
+
+static ssize_t read_null(struct file * file, char * buf,
+ size_t count, loff_t *ppos)
+{
+ return 0;
+}
+
+static ssize_t write_null(struct file * file, const char * buf,
+ size_t count, loff_t *ppos)
+{
+ return count;
+}
+
+/*
+ * For fun, we are using the MMU for this.
+ */
+static inline size_t read_zero_pagealigned(char * buf, size_t size)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct * vma;
+ unsigned long addr=(unsigned long)buf;
+
+ mm = current->mm;
+ /* Oops, this was forgotten before. -ben */
+ down_read(&mm->mmap_sem);
+
+ /* For private mappings, just map in zero pages. */
+ for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+ unsigned long count;
+
+ if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
+ goto out_up;
+ if (vma->vm_flags & VM_SHARED)
+ break;
+#if defined(CONFIG_XENO_PRIV)
+ if (vma->vm_flags & VM_IO)
+ break;
+#endif
+ count = vma->vm_end - addr;
+ if (count > size)
+ count = size;
+
+ zap_page_range(mm, addr, count);
+ zeromap_page_range(addr, count, PAGE_COPY);
+
+ size -= count;
+ buf += count;
+ addr += count;
+ if (size == 0)
+ goto out_up;
+ }
+
+ up_read(&mm->mmap_sem);
+
+ /* The shared case is hard. Let's do the conventional zeroing. */
+ do {
+ unsigned long unwritten = clear_user(buf, PAGE_SIZE);
+ if (unwritten)
+ return size + unwritten - PAGE_SIZE;
+ if (current->need_resched)
+ schedule();
+ buf += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ } while (size);
+
+ return size;
+out_up:
+ up_read(&mm->mmap_sem);
+ return size;
+}
+
+static ssize_t read_zero(struct file * file, char * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long left, unwritten, written = 0;
+
+ if (!count)
+ return 0;
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ left = count;
+
+ /* do we want to be clever? Arbitrary cut-off */
+ if (count >= PAGE_SIZE*4) {
+ unsigned long partial;
+
+ /* How much left of the page? */
+ partial = (PAGE_SIZE-1) & -(unsigned long) buf;
+ unwritten = clear_user(buf, partial);
+ written = partial - unwritten;
+ if (unwritten)
+ goto out;
+ left -= partial;
+ buf += partial;
+ unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
+ written += (left & PAGE_MASK) - unwritten;
+ if (unwritten)
+ goto out;
+ buf += left & PAGE_MASK;
+ left &= ~PAGE_MASK;
+ }
+ unwritten = clear_user(buf, left);
+ written += left - unwritten;
+out:
+ return written ? written : -EFAULT;
+}
+
+static int mmap_zero(struct file * file, struct vm_area_struct * vma)
+{
+ if (vma->vm_flags & VM_SHARED)
+ return shmem_zero_setup(vma);
+ if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
+ return -EAGAIN;
+ return 0;
+}
+
+static ssize_t write_full(struct file * file, const char * buf,
+ size_t count, loff_t *ppos)
+{
+ return -ENOSPC;
+}
+
+/*
+ * Special lseek() function for /dev/null and /dev/zero. Most notably, you
+ * can fopen() both devices with "a" now. This was previously impossible.
+ * -- SRB.
+ */
+
+static loff_t null_lseek(struct file * file, loff_t offset, int orig)
+{
+ return file->f_pos = 0;
+}
+
+/*
+ * The memory devices use the full 32/64 bits of the offset, and so we cannot
+ * check against negative addresses: they are ok. The return value is weird,
+ * though, in that case (0).
+ *
+ * also note that seeking relative to the "end of file" isn't supported:
+ * it has no meaning, so it returns -EINVAL.
+ */
+static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+{
+ loff_t ret;
+
+ switch (orig) {
+ case 0:
+ file->f_pos = offset;
+ ret = file->f_pos;
+ force_successful_syscall_return();
+ break;
+ case 1:
+ file->f_pos += offset;
+ ret = file->f_pos;
+ force_successful_syscall_return();
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
+static int open_port(struct inode * inode, struct file * filp)
+{
+ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write)
+{
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long kaddr;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *ptep, pte;
+ struct page *page = NULL;
+
+ /* address is user VA; convert to kernel VA of desired page */
+ kaddr = (address - vma->vm_start) + offset;
+ kaddr = VMALLOC_VMADDR(kaddr);
+
+ spin_lock(&init_mm.page_table_lock);
+
+ /* Lookup page structure for kernel VA */
+ pgd = pgd_offset(&init_mm, kaddr);
+ if (pgd_none(*pgd) || pgd_bad(*pgd))
+ goto out;
+ pmd = pmd_offset(pgd, kaddr);
+ if (pmd_none(*pmd) || pmd_bad(*pmd))
+ goto out;
+ ptep = pte_offset(pmd, kaddr);
+ if (!ptep)
+ goto out;
+ pte = *ptep;
+ if (!pte_present(pte))
+ goto out;
+ if (write && !pte_write(pte))
+ goto out;
+ page = pte_page(pte);
+ if (!VALID_PAGE(page)) {
+ page = NULL;
+ goto out;
+ }
+
+ /* Increment reference count on page */
+ get_page(page);
+
+out:
+ spin_unlock(&init_mm.page_table_lock);
+
+ return page;
+}
+
+struct vm_operations_struct kmem_vm_ops = {
+ nopage: kmem_vm_nopage,
+};
+
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long size = vma->vm_end - vma->vm_start;
+
+#if defined(CONFIG_XENO)
+ return -ENXIO;
+#endif
+
+ /*
+ * If the user is not attempting to mmap a high memory address then
+ * the standard mmap_mem mechanism will work. High memory addresses
+ * need special handling, as remap_page_range expects a physically-
+ * contiguous range of kernel addresses (such as obtained in kmalloc).
+ */
+ if ((offset + size) < (unsigned long) high_memory)
+ return mmap_mem(file, vma);
+
+ /*
+ * Accessing memory above the top the kernel knows about or
+ * through a file pointer that was marked O_SYNC will be
+ * done non-cached.
+ */
+ if (noncached_address(offset) || (file->f_flags & O_SYNC))
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ /* Don't do anything here; "nopage" will fill the holes */
+ vma->vm_ops = &kmem_vm_ops;
+
+ /* Don't try to swap out physical pages.. */
+ vma->vm_flags |= VM_RESERVED;
+
+ /*
+ * Don't dump addresses that are not real memory to a core file.
+ */
+ vma->vm_flags |= VM_IO;
+
+ return 0;
+}
+
+#define zero_lseek null_lseek
+#define full_lseek null_lseek
+#define write_zero write_null
+#define read_full read_zero
+#define open_mem open_port
+#define open_kmem open_mem
+
+static struct file_operations mem_fops = {
+ llseek: memory_lseek,
+ read: read_mem,
+ write: write_mem,
+ mmap: mmap_mem,
+ open: open_mem,
+};
+
+static struct file_operations kmem_fops = {
+ llseek: memory_lseek,
+ read: read_kmem,
+ write: write_kmem,
+ mmap: mmap_kmem,
+ open: open_kmem,
+};
+
+static struct file_operations null_fops = {
+ llseek: null_lseek,
+ read: read_null,
+ write: write_null,
+};
+
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+static struct file_operations port_fops = {
+ llseek: memory_lseek,
+ read: read_port,
+ write: write_port,
+ open: open_port,
+};
+#endif
+
+static struct file_operations zero_fops = {
+ llseek: zero_lseek,
+ read: read_zero,
+ write: write_zero,
+ mmap: mmap_zero,
+};
+
+static struct file_operations full_fops = {
+ llseek: full_lseek,
+ read: read_full,
+ write: write_full,
+};
+
+static int memory_open(struct inode * inode, struct file * filp)
+{
+ switch (MINOR(inode->i_rdev)) {
+ case 1:
+ filp->f_op = &mem_fops;
+ break;
+ case 2:
+ filp->f_op = &kmem_fops;
+ break;
+ case 3:
+ filp->f_op = &null_fops;
+ break;
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+ case 4:
+#if defined(CONFIG_XENO)
+#if defined(CONFIG_XENO_PRIV)
+ if (!(start_info.flags & SIF_PRIVILEGED))
+#endif
+ return -ENXIO;
+#endif
+ filp->f_op = &port_fops;
+ break;
+#endif
+ case 5:
+ filp->f_op = &zero_fops;
+ break;
+ case 7:
+ filp->f_op = &full_fops;
+ break;
+ case 8:
+ filp->f_op = &random_fops;
+ break;
+ case 9:
+ filp->f_op = &urandom_fops;
+ break;
+ default:
+ return -ENXIO;
+ }
+ if (filp->f_op && filp->f_op->open)
+ return filp->f_op->open(inode,filp);
+ return 0;
+}
+
+void __init memory_devfs_register (void)
+{
+ /* These are never unregistered */
+ static const struct {
+ unsigned short minor;
+ char *name;
+ umode_t mode;
+ struct file_operations *fops;
+ } list[] = { /* list of minor devices */
+ {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
+ {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
+ {3, "null", S_IRUGO | S_IWUGO, &null_fops},
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+ {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
+#endif
+ {5, "zero", S_IRUGO | S_IWUGO, &zero_fops},
+ {7, "full", S_IRUGO | S_IWUGO, &full_fops},
+ {8, "random", S_IRUGO | S_IWUSR, &random_fops},
+ {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops}
+ };
+ int i;
+
+ for (i=0; i<(sizeof(list)/sizeof(*list)); i++)
+ devfs_register (NULL, list[i].name, DEVFS_FL_NONE,
+ MEM_MAJOR, list[i].minor,
+ list[i].mode | S_IFCHR,
+ list[i].fops, NULL);
+}
+
+static struct file_operations memory_fops = {
+ open: memory_open, /* just a selector for the real open */
+};
+
+int __init chr_dev_init(void)
+{
+ if (devfs_register_chrdev(MEM_MAJOR,"mem",&memory_fops))
+ printk("unable to get major %d for memory devs\n", MEM_MAJOR);
+ memory_devfs_register();
+ rand_initialize();
+#ifdef CONFIG_I2C
+ i2c_init_all();
+#endif
+#if defined (CONFIG_FB)
+ fbmem_init();
+#endif
+#if defined (CONFIG_PROM_CONSOLE)
+ prom_con_init();
+#endif
+#if defined (CONFIG_MDA_CONSOLE)
+ mda_console_init();
+#endif
+ tty_init();
+#ifdef CONFIG_M68K_PRINTER
+ lp_m68k_init();
+#endif
+ misc_init();
+#if CONFIG_QIC02_TAPE
+ qic02_tape_init();
+#endif
+#ifdef CONFIG_FTAPE
+ ftape_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
+ tapechar_init();
+#endif
+ return 0;
+}
+
+__initcall(chr_dev_init);
diff --git a/xenolinux-2.4.23-sparse/drivers/char/tty_io.c b/xenolinux-2.4.23-sparse/drivers/char/tty_io.c
new file mode 100644
index 0000000000..e5334deb1d
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/drivers/char/tty_io.c
@@ -0,0 +1,2468 @@
+/*
+ * linux/drivers/char/tty_io.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
+ * or rs-channels. It also implements echoing, cooked mode etc.
+ *
+ * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
+ *
+ * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
+ * tty_struct and tty_queue structures. Previously there was an array
+ * of 256 tty_struct's which was statically allocated, and the
+ * tty_queue structures were allocated at boot time. Both are now
+ * dynamically allocated only when the tty is open.
+ *
+ * Also restructured routines so that there is more of a separation
+ * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
+ * the low-level tty routines (serial.c, pty.c, console.c). This
+ * makes for cleaner and more compact code. -TYT, 9/17/92
+ *
+ * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
+ * which can be dynamically activated and de-activated by the line
+ * discipline handling modules (like SLIP).
+ *
+ * NOTE: pay no attention to the line discipline code (yet); its
+ * interface is still subject to change in this version...
+ * -- TYT, 1/31/92
+ *
+ * Added functionality to the OPOST tty handling. No delays, but all
+ * other bits should be there.
+ * -- Nick Holloway <alfie@dcs.warwick.ac.uk>, 27th May 1993.
+ *
+ * Rewrote canonical mode and added more termios flags.
+ * -- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94
+ *
+ * Reorganized FASYNC support so mouse code can share it.
+ * -- ctm@ardi.com, 9Sep95
+ *
+ * New TIOCLINUX variants added.
+ * -- mj@k332.feld.cvut.cz, 19-Nov-95
+ *
+ * Restrict vt switching via ioctl()
+ * -- grif@cs.ucr.edu, 5-Dec-95
+ *
+ * Move console and virtual terminal code to more appropriate files,
+ * implement CONFIG_VT and generalize console device interface.
+ * -- Marko Kohtala <Marko.Kohtala@hut.fi>, March 97
+ *
+ * Rewrote init_dev and release_dev to eliminate races.
+ * -- Bill Hawes <whawes@star.net>, June 97
+ *
+ * Added devfs support.
+ * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 13-Jan-1998
+ *
+ * Added support for a Unix98-style ptmx device.
+ * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
+ *
+ * Reduced memory usage for older ARM systems
+ * -- Russell King <rmk@arm.linux.org.uk>
+ *
+ * Move do_SAK() into process context. Less stack use in devfs functions.
+ * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <linux/devpts_fs.h>
+#include <linux/file.h>
+#include <linux/console.h>
+#include <linux/timer.h>
+#include <linux/ctype.h>
+#include <linux/kd.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+
+#include <linux/kbd_kern.h>
+#include <linux/vt_kern.h>
+#include <linux/selection.h>
+#include <linux/devfs_fs_kernel.h>
+
+#include <linux/kmod.h>
+
+#ifdef CONFIG_XEN_CONSOLE
+extern void xen_console_init(void);
+#endif
+
+#ifdef CONFIG_VT
+extern void con_init_devfs (void);
+#endif
+
+extern void disable_early_printk(void);
+
+#define CONSOLE_DEV MKDEV(TTY_MAJOR,0)
+#define TTY_DEV MKDEV(TTYAUX_MAJOR,0)
+#define SYSCONS_DEV MKDEV(TTYAUX_MAJOR,1)
+#define PTMX_DEV MKDEV(TTYAUX_MAJOR,2)
+
+#undef TTY_DEBUG_HANGUP
+
+#define TTY_PARANOIA_CHECK 1
+#define CHECK_TTY_COUNT 1
+
+struct termios tty_std_termios; /* for the benefit of tty drivers */
+struct tty_driver *tty_drivers; /* linked list of tty drivers */
+struct tty_ldisc ldiscs[NR_LDISCS]; /* line disc dispatch table */
+
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver ptm_driver[]; /* Unix98 pty masters; for /dev/ptmx */
+extern struct tty_driver pts_driver[]; /* Unix98 pty slaves; for /dev/ptmx */
+#endif
+
+static void initialize_tty_struct(struct tty_struct *tty);
+
+static ssize_t tty_read(struct file *, char *, size_t, loff_t *);
+static ssize_t tty_write(struct file *, const char *, size_t, loff_t *);
+static unsigned int tty_poll(struct file *, poll_table *);
+static int tty_open(struct inode *, struct file *);
+static int tty_release(struct inode *, struct file *);
+int tty_ioctl(struct inode * inode, struct file * file,
+ unsigned int cmd, unsigned long arg);
+static int tty_fasync(int fd, struct file * filp, int on);
+extern int vme_scc_init (void);
+extern long vme_scc_console_init(void);
+extern int serial167_init(void);
+extern long serial167_console_init(void);
+extern void console_8xx_init(void);
+extern void au1x00_serial_console_init(void);
+extern int rs_8xx_init(void);
+extern void mac_scc_console_init(void);
+extern void hwc_console_init(void);
+extern void hwc_tty_init(void);
+extern void con3215_init(void);
+extern void tty3215_init(void);
+extern void tub3270_con_init(void);
+extern void tub3270_init(void);
+extern void rs285_console_init(void);
+extern void sa1100_rs_console_init(void);
+extern void sgi_serial_console_init(void);
+extern void sn_sal_serial_console_init(void);
+extern void sci_console_init(void);
+extern void dec_serial_console_init(void);
+extern void tx3912_console_init(void);
+extern void tx3912_rs_init(void);
+extern void txx927_console_init(void);
+extern void txx9_rs_init(void);
+extern void txx9_serial_console_init(void);
+extern void sb1250_serial_console_init(void);
+extern void arc_console_init(void);
+
+#ifndef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a,b) ((a) < (b) ? (b) : (a))
+#endif
+
+static struct tty_struct *alloc_tty_struct(void)
+{
+ struct tty_struct *tty;
+
+ tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+ if (tty)
+ memset(tty, 0, sizeof(struct tty_struct));
+ return tty;
+}
+
+static inline void free_tty_struct(struct tty_struct *tty)
+{
+ kfree(tty);
+}
+
+/*
+ * This routine returns the name of tty.
+ */
+static char *
+_tty_make_name(struct tty_struct *tty, const char *name, char *buf)
+{
+ int idx = (tty)?MINOR(tty->device) - tty->driver.minor_start:0;
+
+ if (!tty) /* Hmm. NULL pointer. That's fun. */
+ strcpy(buf, "NULL tty");
+ else
+ sprintf(buf, name,
+ idx + tty->driver.name_base);
+
+ return buf;
+}
+
+#define TTY_NUMBER(tty) (MINOR((tty)->device) - (tty)->driver.minor_start + \
+ (tty)->driver.name_base)
+
+char *tty_name(struct tty_struct *tty, char *buf)
+{
+ return _tty_make_name(tty, (tty)?tty->driver.name:NULL, buf);
+}
+
+inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device,
+ const char *routine)
+{
+#ifdef TTY_PARANOIA_CHECK
+ static const char badmagic[] = KERN_WARNING
+ "Warning: bad magic number for tty struct (%s) in %s\n";
+ static const char badtty[] = KERN_WARNING
+ "Warning: null TTY for (%s) in %s\n";
+
+ if (!tty) {
+ printk(badtty, kdevname(device), routine);
+ return 1;
+ }
+ if (tty->magic != TTY_MAGIC) {
+ printk(badmagic, kdevname(device), routine);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+static int check_tty_count(struct tty_struct *tty, const char *routine)
+{
+#ifdef CHECK_TTY_COUNT
+ struct list_head *p;
+ int count = 0;
+
+ file_list_lock();
+ for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) {
+ if(list_entry(p, struct file, f_list)->private_data == tty)
+ count++;
+ }
+ file_list_unlock();
+ if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+ tty->driver.subtype == PTY_TYPE_SLAVE &&
+ tty->link && tty->link->count)
+ count++;
+ if (tty->count != count) {
+ printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) "
+ "!= #fd's(%d) in %s\n",
+ kdevname(tty->device), tty->count, count, routine);
+ return count;
+ }
+#endif
+ return 0;
+}
+
+int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
+{
+ if (disc < N_TTY || disc >= NR_LDISCS)
+ return -EINVAL;
+
+ if (new_ldisc) {
+ ldiscs[disc] = *new_ldisc;
+ ldiscs[disc].flags |= LDISC_FLAG_DEFINED;
+ ldiscs[disc].num = disc;
+ } else
+ memset(&ldiscs[disc], 0, sizeof(struct tty_ldisc));
+
+ return 0;
+}
+
+EXPORT_SYMBOL(tty_register_ldisc);
+
+/* Set the discipline of a tty line. */
+static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
+{
+ int retval = 0;
+ struct tty_ldisc o_ldisc;
+ char buf[64];
+
+ if ((ldisc < N_TTY) || (ldisc >= NR_LDISCS))
+ return -EINVAL;
+ /* Eduardo Blanco <ejbs@cs.cs.com.uy> */
+ /* Cyrus Durgin <cider@speakeasy.org> */
+ if (!(ldiscs[ldisc].flags & LDISC_FLAG_DEFINED)) {
+ char modname [20];
+ sprintf(modname, "tty-ldisc-%d", ldisc);
+ request_module (modname);
+ }
+ if (!(ldiscs[ldisc].flags & LDISC_FLAG_DEFINED))
+ return -EINVAL;
+
+ if (tty->ldisc.num == ldisc)
+ return 0; /* We are already in the desired discipline */
+ o_ldisc = tty->ldisc;
+
+ tty_wait_until_sent(tty, 0);
+
+ /* Shutdown the current discipline. */
+ if (tty->ldisc.close)
+ (tty->ldisc.close)(tty);
+
+ /* Now set up the new line discipline. */
+ tty->ldisc = ldiscs[ldisc];
+ tty->termios->c_line = ldisc;
+ if (tty->ldisc.open)
+ retval = (tty->ldisc.open)(tty);
+ if (retval < 0) {
+ tty->ldisc = o_ldisc;
+ tty->termios->c_line = tty->ldisc.num;
+ if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) {
+ tty->ldisc = ldiscs[N_TTY];
+ tty->termios->c_line = N_TTY;
+ if (tty->ldisc.open) {
+ int r = tty->ldisc.open(tty);
+
+ if (r < 0)
+ panic("Couldn't open N_TTY ldisc for "
+ "%s --- error %d.",
+ tty_name(tty, buf), r);
+ }
+ }
+ }
+ if (tty->ldisc.num != o_ldisc.num && tty->driver.set_ldisc)
+ tty->driver.set_ldisc(tty);
+ return retval;
+}
+
+/*
+ * This routine returns a tty driver structure, given a device number
+ */
+struct tty_driver *get_tty_driver(kdev_t device)
+{
+ int major, minor;
+ struct tty_driver *p;
+
+ minor = MINOR(device);
+ major = MAJOR(device);
+
+ for (p = tty_drivers; p; p = p->next) {
+ if (p->major != major)
+ continue;
+ if (minor < p->minor_start)
+ continue;
+ if (minor >= p->minor_start + p->num)
+ continue;
+ return p;
+ }
+ return NULL;
+}
+
+/*
+ * If we try to write to, or set the state of, a terminal and we're
+ * not in the foreground, send a SIGTTOU. If the signal is blocked or
+ * ignored, go ahead and perform the operation. (POSIX 7.2)
+ */
+int tty_check_change(struct tty_struct * tty)
+{
+ if (current->tty != tty)
+ return 0;
+ if (tty->pgrp <= 0) {
+ printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n");
+ return 0;
+ }
+ if (current->pgrp == tty->pgrp)
+ return 0;
+ if (is_ignored(SIGTTOU))
+ return 0;
+ if (is_orphaned_pgrp(current->pgrp))
+ return -EIO;
+ (void) kill_pg(current->pgrp,SIGTTOU,1);
+ return -ERESTARTSYS;
+}
+
+static ssize_t hung_up_tty_read(struct file * file, char * buf,
+ size_t count, loff_t *ppos)
+{
+ /* Can't seek (pread) on ttys. */
+ if (ppos != &file->f_pos)
+ return -ESPIPE;
+ return 0;
+}
+
+static ssize_t hung_up_tty_write(struct file * file, const char * buf,
+ size_t count, loff_t *ppos)
+{
+ /* Can't seek (pwrite) on ttys. */
+ if (ppos != &file->f_pos)
+ return -ESPIPE;
+ return -EIO;
+}
+
+/* No kernel lock held - none needed ;) */
+static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait)
+{
+ return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM;
+}
+
+static int hung_up_tty_ioctl(struct inode * inode, struct file * file,
+ unsigned int cmd, unsigned long arg)
+{
+ return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
+}
+
+static struct file_operations tty_fops = {
+ llseek: no_llseek,
+ read: tty_read,
+ write: tty_write,
+ poll: tty_poll,
+ ioctl: tty_ioctl,
+ open: tty_open,
+ release: tty_release,
+ fasync: tty_fasync,
+};
+
+static struct file_operations hung_up_tty_fops = {
+ llseek: no_llseek,
+ read: hung_up_tty_read,
+ write: hung_up_tty_write,
+ poll: hung_up_tty_poll,
+ ioctl: hung_up_tty_ioctl,
+ release: tty_release,
+};
+
+static spinlock_t redirect_lock = SPIN_LOCK_UNLOCKED;
+static struct file *redirect;
+/*
+ * This can be called by the "eventd" kernel thread. That is process synchronous,
+ * but doesn't hold any locks, so we need to make sure we have the appropriate
+ * locks for what we're doing..
+ */
+void do_tty_hangup(void *data)
+{
+ struct tty_struct *tty = (struct tty_struct *) data;
+ struct file * cons_filp = NULL;
+ struct file *f = NULL;
+ struct task_struct *p;
+ struct list_head *l;
+ int closecount = 0, n;
+
+ if (!tty)
+ return;
+
+ /* inuse_filps is protected by the single kernel lock */
+ lock_kernel();
+
+ spin_lock(&redirect_lock);
+ if (redirect && redirect->private_data == tty) {
+ f = redirect;
+ redirect = NULL;
+ }
+ spin_unlock(&redirect_lock);
+
+ check_tty_count(tty, "do_tty_hangup");
+ file_list_lock();
+ for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) {
+ struct file * filp = list_entry(l, struct file, f_list);
+ if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV ||
+ filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) {
+ cons_filp = filp;
+ continue;
+ }
+ if (filp->f_op != &tty_fops)
+ continue;
+ closecount++;
+ tty_fasync(-1, filp, 0); /* can't block */
+ filp->f_op = &hung_up_tty_fops;
+ }
+ file_list_unlock();
+
+ /* FIXME! What are the locking issues here? This may me overdoing things.. */
+ {
+ unsigned long flags;
+
+ save_flags(flags); cli();
+ if (tty->ldisc.flush_buffer)
+ tty->ldisc.flush_buffer(tty);
+ if (tty->driver.flush_buffer)
+ tty->driver.flush_buffer(tty);
+ if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+ tty->ldisc.write_wakeup)
+ (tty->ldisc.write_wakeup)(tty);
+ restore_flags(flags);
+ }
+
+ wake_up_interruptible(&tty->write_wait);
+ wake_up_interruptible(&tty->read_wait);
+
+ /*
+ * Shutdown the current line discipline, and reset it to
+ * N_TTY.
+ */
+ if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS)
+ *tty->termios = tty->driver.init_termios;
+ if (tty->ldisc.num != ldiscs[N_TTY].num) {
+ if (tty->ldisc.close)
+ (tty->ldisc.close)(tty);
+ tty->ldisc = ldiscs[N_TTY];
+ tty->termios->c_line = N_TTY;
+ if (tty->ldisc.open) {
+ int i = (tty->ldisc.open)(tty);
+ if (i < 0)
+ printk(KERN_ERR "do_tty_hangup: N_TTY open: "
+ "error %d\n", -i);
+ }
+ }
+
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if ((tty->session > 0) && (p->session == tty->session) &&
+ p->leader) {
+ send_sig(SIGHUP,p,1);
+ send_sig(SIGCONT,p,1);
+ if (tty->pgrp > 0)
+ p->tty_old_pgrp = tty->pgrp;
+ }
+ if (p->tty == tty)
+ p->tty = NULL;
+ }
+ read_unlock(&tasklist_lock);
+
+ tty->flags = 0;
+ tty->session = 0;
+ tty->pgrp = -1;
+ tty->ctrl_status = 0;
+ /*
+ * If one of the devices matches a console pointer, we
+ * cannot just call hangup() because that will cause
+ * tty->count and state->count to go out of sync.
+ * So we just call close() the right number of times.
+ */
+ if (cons_filp) {
+ if (tty->driver.close)
+ for (n = 0; n < closecount; n++)
+ tty->driver.close(tty, cons_filp);
+ } else if (tty->driver.hangup)
+ (tty->driver.hangup)(tty);
+ unlock_kernel();
+ if (f)
+ fput(f);
+}
+
+void tty_hangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+ char buf[64];
+
+ printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf));
+#endif
+ schedule_task(&tty->tq_hangup);
+}
+
+void tty_vhangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+ char buf[64];
+
+ printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
+#endif
+ do_tty_hangup((void *) tty);
+}
+
+int tty_hung_up_p(struct file * filp)
+{
+ return (filp->f_op == &hung_up_tty_fops);
+}
+
+/*
+ * This function is typically called only by the session leader, when
+ * it wants to disassociate itself from its controlling tty.
+ *
+ * It performs the following functions:
+ * (1) Sends a SIGHUP and SIGCONT to the foreground process group
+ * (2) Clears the tty from being controlling the session
+ * (3) Clears the controlling tty for all processes in the
+ * session group.
+ *
+ * The argument on_exit is set to 1 if called when a process is
+ * exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ */
+void disassociate_ctty(int on_exit)
+{
+ struct tty_struct *tty = current->tty;
+ struct task_struct *p;
+ int tty_pgrp = -1;
+
+ if (tty) {
+ tty_pgrp = tty->pgrp;
+ if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY)
+ tty_vhangup(tty);
+ } else {
+ if (current->tty_old_pgrp) {
+ kill_pg(current->tty_old_pgrp, SIGHUP, on_exit);
+ kill_pg(current->tty_old_pgrp, SIGCONT, on_exit);
+ }
+ return;
+ }
+ if (tty_pgrp > 0) {
+ kill_pg(tty_pgrp, SIGHUP, on_exit);
+ if (!on_exit)
+ kill_pg(tty_pgrp, SIGCONT, on_exit);
+ }
+
+ current->tty_old_pgrp = 0;
+ tty->session = 0;
+ tty->pgrp = -1;
+
+ read_lock(&tasklist_lock);
+ for_each_task(p)
+ if (p->session == current->session)
+ p->tty = NULL;
+ read_unlock(&tasklist_lock);
+}
+
+void stop_tty(struct tty_struct *tty)
+{
+ if (tty->stopped)
+ return;
+ tty->stopped = 1;
+ if (tty->link && tty->link->packet) {
+ tty->ctrl_status &= ~TIOCPKT_START;
+ tty->ctrl_status |= TIOCPKT_STOP;
+ wake_up_interruptible(&tty->link->read_wait);
+ }
+ if (tty->driver.stop)
+ (tty->driver.stop)(tty);
+}
+
+void start_tty(struct tty_struct *tty)
+{
+ if (!tty->stopped || tty->flow_stopped)
+ return;
+ tty->stopped = 0;
+ if (tty->link && tty->link->packet) {
+ tty->ctrl_status &= ~TIOCPKT_STOP;
+ tty->ctrl_status |= TIOCPKT_START;
+ wake_up_interruptible(&tty->link->read_wait);
+ }
+ if (tty->driver.start)
+ (tty->driver.start)(tty);
+ if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+ tty->ldisc.write_wakeup)
+ (tty->ldisc.write_wakeup)(tty);
+ wake_up_interruptible(&tty->write_wait);
+}
+
+static ssize_t tty_read(struct file * file, char * buf, size_t count,
+ loff_t *ppos)
+{
+ int i;
+ struct tty_struct * tty;
+ struct inode *inode;
+
+ /* Can't seek (pread) on ttys. */
+ if (ppos != &file->f_pos)
+ return -ESPIPE;
+
+ tty = (struct tty_struct *)file->private_data;
+ inode = file->f_dentry->d_inode;
+ if (tty_paranoia_check(tty, inode->i_rdev, "tty_read"))
+ return -EIO;
+ if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
+ return -EIO;
+
+ /* This check not only needs to be done before reading, but also
+ whenever read_chan() gets woken up after sleeping, so I've
+ moved it to there. This should only be done for the N_TTY
+ line discipline, anyway. Same goes for write_chan(). -- jlc. */
+#if 0
+ if ((inode->i_rdev != CONSOLE_DEV) && /* don't stop on /dev/console */
+ (tty->pgrp > 0) &&
+ (current->tty == tty) &&
+ (tty->pgrp != current->pgrp))
+ if (is_ignored(SIGTTIN) || is_orphaned_pgrp(current->pgrp))
+ return -EIO;
+ else {
+ (void) kill_pg(current->pgrp, SIGTTIN, 1);
+ return -ERESTARTSYS;
+ }
+#endif
+ lock_kernel();
+ if (tty->ldisc.read)
+ i = (tty->ldisc.read)(tty,file,buf,count);
+ else
+ i = -EIO;
+ unlock_kernel();
+ if (i > 0)
+ inode->i_atime = CURRENT_TIME;
+ return i;
+}
+
+/*
+ * Split writes up in sane blocksizes to avoid
+ * denial-of-service type attacks
+ */
+static inline ssize_t do_tty_write(
+ ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t),
+ struct tty_struct *tty,
+ struct file *file,
+ const unsigned char *buf,
+ size_t count)
+{
+ ssize_t ret = 0, written = 0;
+
+ if (file->f_flags & O_NONBLOCK) {
+ if (down_trylock(&tty->atomic_write))
+ return -EAGAIN;
+ }
+ else {
+ if (down_interruptible(&tty->atomic_write))
+ return -ERESTARTSYS;
+ }
+ if ( test_bit(TTY_NO_WRITE_SPLIT, &tty->flags) ) {
+ lock_kernel();
+ written = write(tty, file, buf, count);
+ unlock_kernel();
+ } else {
+ for (;;) {
+ unsigned long size = MAX(PAGE_SIZE*2,16384);
+ if (size > count)
+ size = count;
+ lock_kernel();
+ ret = write(tty, file, buf, size);
+ unlock_kernel();
+ if (ret <= 0)
+ break;
+ written += ret;
+ buf += ret;
+ count -= ret;
+ if (!count)
+ break;
+ ret = -ERESTARTSYS;
+ if (signal_pending(current))
+ break;
+ if (current->need_resched)
+ schedule();
+ }
+ }
+ if (written) {
+ file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
+ ret = written;
+ }
+ up(&tty->atomic_write);
+ return ret;
+}
+
+
+static ssize_t tty_write(struct file * file, const char * buf, size_t count,
+ loff_t *ppos)
+{
+ int is_console;
+ struct tty_struct * tty;
+ struct inode *inode = file->f_dentry->d_inode;
+
+ /* Can't seek (pwrite) on ttys. */
+ if (ppos != &file->f_pos)
+ return -ESPIPE;
+
+ /*
+ * For now, we redirect writes from /dev/console as
+ * well as /dev/tty0.
+ */
+ inode = file->f_dentry->d_inode;
+ is_console = (inode->i_rdev == SYSCONS_DEV ||
+ inode->i_rdev == CONSOLE_DEV);
+
+ if (is_console) {
+ struct file *p = NULL;
+
+ spin_lock(&redirect_lock);
+ if (redirect) {
+ get_file(redirect);
+ p = redirect;
+ }
+ spin_unlock(&redirect_lock);
+
+ if (p) {
+ ssize_t res = p->f_op->write(p, buf, count, &p->f_pos);
+ fput(p);
+ return res;
+ }
+ }
+
+ tty = (struct tty_struct *)file->private_data;
+ if (tty_paranoia_check(tty, inode->i_rdev, "tty_write"))
+ return -EIO;
+ if (!tty || !tty->driver.write || (test_bit(TTY_IO_ERROR, &tty->flags)))
+ return -EIO;
+#if 0
+ if (!is_console && L_TOSTOP(tty) && (tty->pgrp > 0) &&
+ (current->tty == tty) && (tty->pgrp != current->pgrp)) {
+ if (is_orphaned_pgrp(current->pgrp))
+ return -EIO;
+ if (!is_ignored(SIGTTOU)) {
+ (void) kill_pg(current->pgrp, SIGTTOU, 1);
+ return -ERESTARTSYS;
+ }
+ }
+#endif
+ if (!tty->ldisc.write)
+ return -EIO;
+ return do_tty_write(tty->ldisc.write, tty, file,
+ (const unsigned char *)buf, count);
+}
+
+/* Semaphore to protect creating and releasing a tty */
+static DECLARE_MUTEX(tty_sem);
+
+static void down_tty_sem(int index)
+{
+ down(&tty_sem);
+}
+
+static void up_tty_sem(int index)
+{
+ up(&tty_sem);
+}
+
+static void release_mem(struct tty_struct *tty, int idx);
+
+/*
+ * WSH 06/09/97: Rewritten to remove races and properly clean up after a
+ * failed open. The new code protects the open with a semaphore, so it's
+ * really quite straightforward. The semaphore locking can probably be
+ * relaxed for the (most common) case of reopening a tty.
+ */
+static int init_dev(kdev_t device, struct tty_struct **ret_tty)
+{
+ struct tty_struct *tty, *o_tty;
+ struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
+ struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
+ struct tty_driver *driver;
+ int retval=0;
+ int idx;
+
+ driver = get_tty_driver(device);
+ if (!driver)
+ return -ENODEV;
+
+ idx = MINOR(device) - driver->minor_start;
+
+ /*
+ * Check whether we need to acquire the tty semaphore to avoid
+ * race conditions. For now, play it safe.
+ */
+ down_tty_sem(idx);
+
+ /* check whether we're reopening an existing tty */
+ tty = driver->table[idx];
+ if (tty) goto fast_track;
+
+ /*
+ * First time open is complex, especially for PTY devices.
+ * This code guarantees that either everything succeeds and the
+ * TTY is ready for operation, or else the table slots are vacated
+ * and the allocated memory released. (Except that the termios
+ * and locked termios may be retained.)
+ */
+
+ o_tty = NULL;
+ tp = o_tp = NULL;
+ ltp = o_ltp = NULL;
+
+ tty = alloc_tty_struct();
+ if(!tty)
+ goto fail_no_mem;
+ initialize_tty_struct(tty);
+ tty->device = device;
+ tty->driver = *driver;
+
+ tp_loc = &driver->termios[idx];
+ if (!*tp_loc) {
+ tp = (struct termios *) kmalloc(sizeof(struct termios),
+ GFP_KERNEL);
+ if (!tp)
+ goto free_mem_out;
+ *tp = driver->init_termios;
+ }
+
+ ltp_loc = &driver->termios_locked[idx];
+ if (!*ltp_loc) {
+ ltp = (struct termios *) kmalloc(sizeof(struct termios),
+ GFP_KERNEL);
+ if (!ltp)
+ goto free_mem_out;
+ memset(ltp, 0, sizeof(struct termios));
+ }
+
+ if (driver->type == TTY_DRIVER_TYPE_PTY) {
+ o_tty = alloc_tty_struct();
+ if (!o_tty)
+ goto free_mem_out;
+ initialize_tty_struct(o_tty);
+ o_tty->device = (kdev_t) MKDEV(driver->other->major,
+ driver->other->minor_start + idx);
+ o_tty->driver = *driver->other;
+
+ o_tp_loc = &driver->other->termios[idx];
+ if (!*o_tp_loc) {
+ o_tp = (struct termios *)
+ kmalloc(sizeof(struct termios), GFP_KERNEL);
+ if (!o_tp)
+ goto free_mem_out;
+ *o_tp = driver->other->init_termios;
+ }
+
+ o_ltp_loc = &driver->other->termios_locked[idx];
+ if (!*o_ltp_loc) {
+ o_ltp = (struct termios *)
+ kmalloc(sizeof(struct termios), GFP_KERNEL);
+ if (!o_ltp)
+ goto free_mem_out;
+ memset(o_ltp, 0, sizeof(struct termios));
+ }
+
+ /*
+ * Everything allocated ... set up the o_tty structure.
+ */
+ driver->other->table[idx] = o_tty;
+ if (!*o_tp_loc)
+ *o_tp_loc = o_tp;
+ if (!*o_ltp_loc)
+ *o_ltp_loc = o_ltp;
+ o_tty->termios = *o_tp_loc;
+ o_tty->termios_locked = *o_ltp_loc;
+ (*driver->other->refcount)++;
+ if (driver->subtype == PTY_TYPE_MASTER)
+ o_tty->count++;
+
+ /* Establish the links in both directions */
+ tty->link = o_tty;
+ o_tty->link = tty;
+ }
+
+ /*
+ * All structures have been allocated, so now we install them.
+ * Failures after this point use release_mem to clean up, so
+ * there's no need to null out the local pointers.
+ */
+ driver->table[idx] = tty;
+
+ if (!*tp_loc)
+ *tp_loc = tp;
+ if (!*ltp_loc)
+ *ltp_loc = ltp;
+ tty->termios = *tp_loc;
+ tty->termios_locked = *ltp_loc;
+ (*driver->refcount)++;
+ tty->count++;
+
+ /*
+ * Structures all installed ... call the ldisc open routines.
+ * If we fail here just call release_mem to clean up. No need
+ * to decrement the use counts, as release_mem doesn't care.
+ */
+ if (tty->ldisc.open) {
+ retval = (tty->ldisc.open)(tty);
+ if (retval)
+ goto release_mem_out;
+ }
+ if (o_tty && o_tty->ldisc.open) {
+ retval = (o_tty->ldisc.open)(o_tty);
+ if (retval) {
+ if (tty->ldisc.close)
+ (tty->ldisc.close)(tty);
+ goto release_mem_out;
+ }
+ }
+ goto success;
+
+ /*
+ * This fast open can be used if the tty is already open.
+ * No memory is allocated, and the only failures are from
+ * attempting to open a closing tty or attempting multiple
+ * opens on a pty master.
+ */
+fast_track:
+ if (test_bit(TTY_CLOSING, &tty->flags)) {
+ retval = -EIO;
+ goto end_init;
+ }
+ if (driver->type == TTY_DRIVER_TYPE_PTY &&
+ driver->subtype == PTY_TYPE_MASTER) {
+ /*
+ * special case for PTY masters: only one open permitted,
+ * and the slave side open count is incremented as well.
+ */
+ if (tty->count) {
+ retval = -EIO;
+ goto end_init;
+ }
+ tty->link->count++;
+ }
+ tty->count++;
+ tty->driver = *driver; /* N.B. why do this every time?? */
+
+success:
+ *ret_tty = tty;
+
+ /* All paths come through here to release the semaphore */
+end_init:
+ up_tty_sem(idx);
+ return retval;
+
+ /* Release locally allocated memory ... nothing placed in slots */
+free_mem_out:
+ if (o_tp)
+ kfree(o_tp);
+ if (o_tty)
+ free_tty_struct(o_tty);
+ if (ltp)
+ kfree(ltp);
+ if (tp)
+ kfree(tp);
+ free_tty_struct(tty);
+
+fail_no_mem:
+ retval = -ENOMEM;
+ goto end_init;
+
+ /* call the tty release_mem routine to clean out this slot */
+release_mem_out:
+ printk(KERN_INFO "init_dev: ldisc open failed, "
+ "clearing slot %d\n", idx);
+ release_mem(tty, idx);
+ goto end_init;
+}
+
+/*
+ * Releases memory associated with a tty structure, and clears out the
+ * driver table slots.
+ */
+static void release_mem(struct tty_struct *tty, int idx)
+{
+ struct tty_struct *o_tty;
+ struct termios *tp;
+
+ if ((o_tty = tty->link) != NULL) {
+ o_tty->driver.table[idx] = NULL;
+ if (o_tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+ tp = o_tty->driver.termios[idx];
+ o_tty->driver.termios[idx] = NULL;
+ kfree(tp);
+ }
+ o_tty->magic = 0;
+ (*o_tty->driver.refcount)--;
+ list_del_init(&o_tty->tty_files);
+ free_tty_struct(o_tty);
+ }
+
+ tty->driver.table[idx] = NULL;
+ if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+ tp = tty->driver.termios[idx];
+ tty->driver.termios[idx] = NULL;
+ kfree(tp);
+ }
+ tty->magic = 0;
+ (*tty->driver.refcount)--;
+ list_del_init(&tty->tty_files);
+ free_tty_struct(tty);
+}
+
+/*
+ * Even releasing the tty structures is a tricky business.. We have
+ * to be very careful that the structures are all released at the
+ * same time, as interrupts might otherwise get the wrong pointers.
+ *
+ * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
+ * lead to double frees or releasing memory still in use.
+ */
+static void release_dev(struct file * filp)
+{
+ struct tty_struct *tty, *o_tty;
+ int pty_master, tty_closing, o_tty_closing, do_sleep;
+ int idx;
+ char buf[64];
+
+ tty = (struct tty_struct *)filp->private_data;
+ if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "release_dev"))
+ return;
+
+ check_tty_count(tty, "release_dev");
+
+ tty_fasync(-1, filp, 0);
+
+ idx = MINOR(tty->device) - tty->driver.minor_start;
+ pty_master = (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+ tty->driver.subtype == PTY_TYPE_MASTER);
+ o_tty = tty->link;
+
+#ifdef TTY_PARANOIA_CHECK
+ if (idx < 0 || idx >= tty->driver.num) {
+ printk(KERN_DEBUG "release_dev: bad idx when trying to "
+ "free (%s)\n", kdevname(tty->device));
+ return;
+ }
+ if (tty != tty->driver.table[idx]) {
+ printk(KERN_DEBUG "release_dev: driver.table[%d] not tty "
+ "for (%s)\n", idx, kdevname(tty->device));
+ return;
+ }
+ if (tty->termios != tty->driver.termios[idx]) {
+ printk(KERN_DEBUG "release_dev: driver.termios[%d] not termios "
+ "for (%s)\n",
+ idx, kdevname(tty->device));
+ return;
+ }
+ if (tty->termios_locked != tty->driver.termios_locked[idx]) {
+ printk(KERN_DEBUG "release_dev: driver.termios_locked[%d] not "
+ "termios_locked for (%s)\n",
+ idx, kdevname(tty->device));
+ return;
+ }
+#endif
+
+#ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "release_dev of %s (tty count=%d)...",
+ tty_name(tty, buf), tty->count);
+#endif
+
+#ifdef TTY_PARANOIA_CHECK
+ if (tty->driver.other) {
+ if (o_tty != tty->driver.other->table[idx]) {
+ printk(KERN_DEBUG "release_dev: other->table[%d] "
+ "not o_tty for (%s)\n",
+ idx, kdevname(tty->device));
+ return;
+ }
+ if (o_tty->termios != tty->driver.other->termios[idx]) {
+ printk(KERN_DEBUG "release_dev: other->termios[%d] "
+ "not o_termios for (%s)\n",
+ idx, kdevname(tty->device));
+ return;
+ }
+ if (o_tty->termios_locked !=
+ tty->driver.other->termios_locked[idx]) {
+ printk(KERN_DEBUG "release_dev: other->termios_locked["
+ "%d] not o_termios_locked for (%s)\n",
+ idx, kdevname(tty->device));
+ return;
+ }
+ if (o_tty->link != tty) {
+ printk(KERN_DEBUG "release_dev: bad pty pointers\n");
+ return;
+ }
+ }
+#endif
+
+ if (tty->driver.close)
+ tty->driver.close(tty, filp);
+
+ /*
+ * Sanity check: if tty->count is going to zero, there shouldn't be
+ * any waiters on tty->read_wait or tty->write_wait. We test the
+ * wait queues and kick everyone out _before_ actually starting to
+ * close. This ensures that we won't block while releasing the tty
+ * structure.
+ *
+ * The test for the o_tty closing is necessary, since the master and
+ * slave sides may close in any order. If the slave side closes out
+ * first, its count will be one, since the master side holds an open.
+ * Thus this test wouldn't be triggered at the time the slave closes,
+ * so we do it now.
+ *
+ * Note that it's possible for the tty to be opened again while we're
+ * flushing out waiters. By recalculating the closing flags before
+ * each iteration we avoid any problems.
+ */
+ while (1) {
+ tty_closing = tty->count <= 1;
+ o_tty_closing = o_tty &&
+ (o_tty->count <= (pty_master ? 1 : 0));
+ do_sleep = 0;
+
+ if (tty_closing) {
+ if (waitqueue_active(&tty->read_wait)) {
+ wake_up(&tty->read_wait);
+ do_sleep++;
+ }
+ if (waitqueue_active(&tty->write_wait)) {
+ wake_up(&tty->write_wait);
+ do_sleep++;
+ }
+ }
+ if (o_tty_closing) {
+ if (waitqueue_active(&o_tty->read_wait)) {
+ wake_up(&o_tty->read_wait);
+ do_sleep++;
+ }
+ if (waitqueue_active(&o_tty->write_wait)) {
+ wake_up(&o_tty->write_wait);
+ do_sleep++;
+ }
+ }
+ if (!do_sleep)
+ break;
+
+ printk(KERN_WARNING "release_dev: %s: read/write wait queue "
+ "active!\n", tty_name(tty, buf));
+ schedule();
+ }
+
+ /*
+ * The closing flags are now consistent with the open counts on
+ * both sides, and we've completed the last operation that could
+ * block, so it's safe to proceed with closing.
+ */
+ if (pty_master) {
+ if (--o_tty->count < 0) {
+ printk(KERN_WARNING "release_dev: bad pty slave count "
+ "(%d) for %s\n",
+ o_tty->count, tty_name(o_tty, buf));
+ o_tty->count = 0;
+ }
+ }
+ if (--tty->count < 0) {
+ printk(KERN_WARNING "release_dev: bad tty->count (%d) for %s\n",
+ tty->count, tty_name(tty, buf));
+ tty->count = 0;
+ }
+
+ /*
+ * We've decremented tty->count, so we should zero out
+ * filp->private_data, to break the link between the tty and
+ * the file descriptor. Otherwise if filp_close() blocks before
+ * the file descriptor is removed from the inuse_filp
+ * list, check_tty_count() could observe a discrepancy and
+ * printk a warning message to the user.
+ */
+ filp->private_data = 0;
+
+ /*
+ * Perform some housekeeping before deciding whether to return.
+ *
+ * Set the TTY_CLOSING flag if this was the last open. In the
+ * case of a pty we may have to wait around for the other side
+ * to close, and TTY_CLOSING makes sure we can't be reopened.
+ */
+ if(tty_closing)
+ set_bit(TTY_CLOSING, &tty->flags);
+ if(o_tty_closing)
+ set_bit(TTY_CLOSING, &o_tty->flags);
+
+ /*
+ * If _either_ side is closing, make sure there aren't any
+ * processes that still think tty or o_tty is their controlling
+ * tty.
+ */
+ if (tty_closing || o_tty_closing) {
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (p->tty == tty || (o_tty && p->tty == o_tty))
+ p->tty = NULL;
+ }
+ read_unlock(&tasklist_lock);
+ }
+
+ /* check whether both sides are closing ... */
+ if (!tty_closing || (o_tty && !o_tty_closing))
+ return;
+
+#ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "freeing tty structure...");
+#endif
+
+ /*
+ * Shutdown the current line discipline, and reset it to N_TTY.
+ * N.B. why reset ldisc when we're releasing the memory??
+ */
+ if (tty->ldisc.close)
+ (tty->ldisc.close)(tty);
+ tty->ldisc = ldiscs[N_TTY];
+ tty->termios->c_line = N_TTY;
+ if (o_tty) {
+ if (o_tty->ldisc.close)
+ (o_tty->ldisc.close)(o_tty);
+ o_tty->ldisc = ldiscs[N_TTY];
+ }
+
+ /*
+ * Make sure that the tty's task queue isn't activated.
+ */
+ run_task_queue(&tq_timer);
+ flush_scheduled_tasks();
+
+ /*
+ * The release_mem function takes care of the details of clearing
+ * the slots and preserving the termios structure.
+ */
+ release_mem(tty, idx);
+}
+
+/*
+ * tty_open and tty_release keep up the tty count that contains the
+ * number of opens done on a tty. We cannot use the inode-count, as
+ * different inodes might point to the same tty.
+ *
+ * Open-counting is needed for pty masters, as well as for keeping
+ * track of serial lines: DTR is dropped when the last close happens.
+ * (This is not done solely through tty->count, now. - Ted 1/27/92)
+ *
+ * The termios state of a pty is reset on first open so that
+ * settings don't persist across reuse.
+ */
+static int tty_open(struct inode * inode, struct file * filp)
+{
+ struct tty_struct *tty;
+ int noctty, retval;
+ kdev_t device;
+ unsigned short saved_flags;
+ char buf[64];
+
+ saved_flags = filp->f_flags;
+retry_open:
+ noctty = filp->f_flags & O_NOCTTY;
+ device = inode->i_rdev;
+ if (device == TTY_DEV) {
+ if (!current->tty)
+ return -ENXIO;
+ device = current->tty->device;
+ filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+ /* noctty = 1; */
+ }
+#ifdef CONFIG_VT
+ if (device == CONSOLE_DEV) {
+ extern int fg_console;
+ device = MKDEV(TTY_MAJOR, fg_console + 1);
+ noctty = 1;
+ }
+#endif
+ if (device == SYSCONS_DEV) {
+ struct console *c = console_drivers;
+ while(c && !c->device)
+ c = c->next;
+ if (!c)
+ return -ENODEV;
+ device = c->device(c);
+ filp->f_flags |= O_NONBLOCK; /* Don't let /dev/console block */
+ noctty = 1;
+ }
+
+ if (device == PTMX_DEV) {
+#ifdef CONFIG_UNIX98_PTYS
+
+ /* find a free pty. */
+ int major, minor;
+ struct tty_driver *driver;
+
+ /* find a device that is not in use. */
+ retval = -1;
+ for ( major = 0 ; major < UNIX98_NR_MAJORS ; major++ ) {
+ driver = &ptm_driver[major];
+ for (minor = driver->minor_start ;
+ minor < driver->minor_start + driver->num ;
+ minor++) {
+ device = MKDEV(driver->major, minor);
+ if (!init_dev(device, &tty)) goto ptmx_found; /* ok! */
+ }
+ }
+ return -EIO; /* no free ptys */
+ ptmx_found:
+ set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
+ minor -= driver->minor_start;
+ devpts_pty_new(driver->other->name_base + minor, MKDEV(driver->other->major, minor + driver->other->minor_start));
+ tty_register_devfs(&pts_driver[major], DEVFS_FL_DEFAULT,
+ pts_driver[major].minor_start + minor);
+ noctty = 1;
+ goto init_dev_done;
+
+#else /* CONFIG_UNIX_98_PTYS */
+
+ return -ENODEV;
+
+#endif /* CONFIG_UNIX_98_PTYS */
+ }
+
+ retval = init_dev(device, &tty);
+ if (retval)
+ return retval;
+
+#ifdef CONFIG_UNIX98_PTYS
+init_dev_done:
+#endif
+ filp->private_data = tty;
+ file_move(filp, &tty->tty_files);
+ check_tty_count(tty, "tty_open");
+ if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+ tty->driver.subtype == PTY_TYPE_MASTER)
+ noctty = 1;
+#ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "opening %s...", tty_name(tty, buf));
+#endif
+ if (tty->driver.open)
+ retval = tty->driver.open(tty, filp);
+ else
+ retval = -ENODEV;
+ filp->f_flags = saved_flags;
+
+ if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) && !suser())
+ retval = -EBUSY;
+
+ if (retval) {
+#ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "error %d in opening %s...", retval,
+ tty_name(tty, buf));
+#endif
+
+ release_dev(filp);
+ if (retval != -ERESTARTSYS)
+ return retval;
+ if (signal_pending(current))
+ return retval;
+ schedule();
+ /*
+ * Need to reset f_op in case a hangup happened.
+ */
+ filp->f_op = &tty_fops;
+ goto retry_open;
+ }
+ if (!noctty &&
+ current->leader &&
+ !current->tty &&
+ tty->session == 0) {
+ task_lock(current);
+ current->tty = tty;
+ task_unlock(current);
+ current->tty_old_pgrp = 0;
+ tty->session = current->session;
+ tty->pgrp = current->pgrp;
+ }
+ if ((tty->driver.type == TTY_DRIVER_TYPE_SERIAL) &&
+ (tty->driver.subtype == SERIAL_TYPE_CALLOUT) &&
+ (tty->count == 1)) {
+ static int nr_warns;
+ if (nr_warns < 5) {
+ printk(KERN_WARNING "tty_io.c: "
+ "process %d (%s) used obsolete /dev/%s - "
+ "update software to use /dev/ttyS%d\n",
+ current->pid, current->comm,
+ tty_name(tty, buf), TTY_NUMBER(tty));
+ nr_warns++;
+ }
+ }
+ return 0;
+}
+
+static int tty_release(struct inode * inode, struct file * filp)
+{
+ lock_kernel();
+ release_dev(filp);
+ unlock_kernel();
+ return 0;
+}
+
+/* No kernel lock held - fine */
+static unsigned int tty_poll(struct file * filp, poll_table * wait)
+{
+ struct tty_struct * tty;
+
+ tty = (struct tty_struct *)filp->private_data;
+ if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_poll"))
+ return 0;
+
+ if (tty->ldisc.poll)
+ return (tty->ldisc.poll)(tty, filp, wait);
+ return 0;
+}
+
+static int tty_fasync(int fd, struct file * filp, int on)
+{
+ struct tty_struct * tty;
+ int retval;
+
+ tty = (struct tty_struct *)filp->private_data;
+ if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_fasync"))
+ return 0;
+
+ retval = fasync_helper(fd, filp, on, &tty->fasync);
+ if (retval <= 0)
+ return retval;
+
+ if (on) {
+ if (!waitqueue_active(&tty->read_wait))
+ tty->minimum_to_wake = 1;
+ if (filp->f_owner.pid == 0) {
+ filp->f_owner.pid = (-tty->pgrp) ? : current->pid;
+ filp->f_owner.uid = current->uid;
+ filp->f_owner.euid = current->euid;
+ }
+ } else {
+ if (!tty->fasync && !waitqueue_active(&tty->read_wait))
+ tty->minimum_to_wake = N_TTY_BUF_SIZE;
+ }
+ return 0;
+}
+
+static int tiocsti(struct tty_struct *tty, char * arg)
+{
+ char ch, mbz = 0;
+
+ if ((current->tty != tty) && !suser())
+ return -EPERM;
+ if (get_user(ch, arg))
+ return -EFAULT;
+ tty->ldisc.receive_buf(tty, &ch, &mbz, 1);
+ return 0;
+}
+
+static int tiocgwinsz(struct tty_struct *tty, struct winsize * arg)
+{
+ if (copy_to_user(arg, &tty->winsize, sizeof(*arg)))
+ return -EFAULT;
+ return 0;
+}
+
+static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
+ struct winsize * arg)
+{
+ struct winsize tmp_ws;
+
+ if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
+ return -EFAULT;
+ if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg)))
+ return 0;
+ if (tty->pgrp > 0)
+ kill_pg(tty->pgrp, SIGWINCH, 1);
+ if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0))
+ kill_pg(real_tty->pgrp, SIGWINCH, 1);
+ tty->winsize = tmp_ws;
+ real_tty->winsize = tmp_ws;
+ return 0;
+}
+
+static int tioccons(struct inode *inode, struct file *file)
+{
+ if (inode->i_rdev == SYSCONS_DEV ||
+ inode->i_rdev == CONSOLE_DEV) {
+ struct file *f;
+ if (!suser())
+ return -EPERM;
+ spin_lock(&redirect_lock);
+ f = redirect;
+ redirect = NULL;
+ spin_unlock(&redirect_lock);
+ if (f)
+ fput(f);
+ return 0;
+ }
+ spin_lock(&redirect_lock);
+ if (redirect) {
+ spin_unlock(&redirect_lock);
+ return -EBUSY;
+ }
+ get_file(file);
+ redirect = file;
+ spin_unlock(&redirect_lock);
+ return 0;
+}
+
+
+static int fionbio(struct file *file, int *arg)
+{
+ int nonblock;
+
+ if (get_user(nonblock, arg))
+ return -EFAULT;
+
+ if (nonblock)
+ file->f_flags |= O_NONBLOCK;
+ else
+ file->f_flags &= ~O_NONBLOCK;
+ return 0;
+}
+
+static int tiocsctty(struct tty_struct *tty, int arg)
+{
+ if (current->leader &&
+ (current->session == tty->session))
+ return 0;
+ /*
+ * The process must be a session leader and
+ * not have a controlling tty already.
+ */
+ if (!current->leader || current->tty)
+ return -EPERM;
+ if (tty->session > 0) {
+ /*
+ * This tty is already the controlling
+ * tty for another session group!
+ */
+ if ((arg == 1) && suser()) {
+ /*
+ * Steal it away
+ */
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+ for_each_task(p)
+ if (p->tty == tty)
+ p->tty = NULL;
+ read_unlock(&tasklist_lock);
+ } else
+ return -EPERM;
+ }
+ task_lock(current);
+ current->tty = tty;
+ task_unlock(current);
+ current->tty_old_pgrp = 0;
+ tty->session = current->session;
+ tty->pgrp = current->pgrp;
+ return 0;
+}
+
+static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+ /*
+ * (tty == real_tty) is a cheap way of
+ * testing if the tty is NOT a master pty.
+ */
+ if (tty == real_tty && current->tty != real_tty)
+ return -ENOTTY;
+ return put_user(real_tty->pgrp, arg);
+}
+
+static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+ pid_t pgrp;
+ int retval = tty_check_change(real_tty);
+
+ if (retval == -EIO)
+ return -ENOTTY;
+ if (retval)
+ return retval;
+ if (!current->tty ||
+ (current->tty != real_tty) ||
+ (real_tty->session != current->session))
+ return -ENOTTY;
+ if (get_user(pgrp, (pid_t *) arg))
+ return -EFAULT;
+ if (pgrp < 0)
+ return -EINVAL;
+ if (session_of_pgrp(pgrp) != current->session)
+ return -EPERM;
+ real_tty->pgrp = pgrp;
+ return 0;
+}
+
+static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+ /*
+ * (tty == real_tty) is a cheap way of
+ * testing if the tty is NOT a master pty.
+ */
+ if (tty == real_tty && current->tty != real_tty)
+ return -ENOTTY;
+ if (real_tty->session <= 0)
+ return -ENOTTY;
+ return put_user(real_tty->session, arg);
+}
+
+static int tiocttygstruct(struct tty_struct *tty, struct tty_struct *arg)
+{
+ if (copy_to_user(arg, tty, sizeof(*arg)))
+ return -EFAULT;
+ return 0;
+}
+
+static int tiocsetd(struct tty_struct *tty, int *arg)
+{
+ int ldisc;
+
+ if (get_user(ldisc, arg))
+ return -EFAULT;
+ return tty_set_ldisc(tty, ldisc);
+}
+
+static int send_break(struct tty_struct *tty, int duration)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ tty->driver.break_ctl(tty, -1);
+ if (!signal_pending(current))
+ schedule_timeout(duration);
+ tty->driver.break_ctl(tty, 0);
+ if (signal_pending(current))
+ return -EINTR;
+ return 0;
+}
+
+static int tty_generic_brk(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg)
+{
+ if (cmd == TCSBRK && arg)
+ {
+ /* tcdrain case */
+ int retval = tty_check_change(tty);
+ if (retval)
+ return retval;
+ tty_wait_until_sent(tty, 0);
+ if (signal_pending(current))
+ return -EINTR;
+ }
+ return 0;
+}
+
+/*
+ * Split this up, as gcc can choke on it otherwise..
+ */
+int tty_ioctl(struct inode * inode, struct file * file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct tty_struct *tty, *real_tty;
+ int retval;
+
+ tty = (struct tty_struct *)file->private_data;
+ if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl"))
+ return -EINVAL;
+
+ real_tty = tty;
+ if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+ tty->driver.subtype == PTY_TYPE_MASTER)
+ real_tty = tty->link;
+
+ /*
+ * Break handling by driver
+ */
+ if (!tty->driver.break_ctl) {
+ switch(cmd) {
+ case TIOCSBRK:
+ case TIOCCBRK:
+ if (tty->driver.ioctl)
+ return tty->driver.ioctl(tty, file, cmd, arg);
+ return -EINVAL;
+
+ /* These two ioctl's always return success; even if */
+ /* the driver doesn't support them. */
+ case TCSBRK:
+ case TCSBRKP:
+ retval = -ENOIOCTLCMD;
+ if (tty->driver.ioctl)
+ retval = tty->driver.ioctl(tty, file, cmd, arg);
+ /* Not driver handled */
+ if (retval == -ENOIOCTLCMD)
+ retval = tty_generic_brk(tty, file, cmd, arg);
+ return retval;
+ }
+ }
+
+ /*
+ * Factor out some common prep work
+ */
+ switch (cmd) {
+ case TIOCSETD:
+ case TIOCSBRK:
+ case TIOCCBRK:
+ case TCSBRK:
+ case TCSBRKP:
+ retval = tty_check_change(tty);
+ if (retval)
+ return retval;
+ if (cmd != TIOCCBRK) {
+ tty_wait_until_sent(tty, 0);
+ if (signal_pending(current))
+ return -EINTR;
+ }
+ break;
+ }
+
+ switch (cmd) {
+ case TIOCSTI:
+ return tiocsti(tty, (char *)arg);
+ case TIOCGWINSZ:
+ return tiocgwinsz(tty, (struct winsize *) arg);
+ case TIOCSWINSZ:
+ return tiocswinsz(tty, real_tty, (struct winsize *) arg);
+ case TIOCCONS:
+ return real_tty!=tty ? -EINVAL : tioccons(inode, file);
+ case FIONBIO:
+ return fionbio(file, (int *) arg);
+ case TIOCEXCL:
+ set_bit(TTY_EXCLUSIVE, &tty->flags);
+ return 0;
+ case TIOCNXCL:
+ clear_bit(TTY_EXCLUSIVE, &tty->flags);
+ return 0;
+ case TIOCNOTTY:
+ if (current->tty != tty)
+ return -ENOTTY;
+ if (current->leader)
+ disassociate_ctty(0);
+ task_lock(current);
+ current->tty = NULL;
+ task_unlock(current);
+ return 0;
+ case TIOCSCTTY:
+ return tiocsctty(tty, arg);
+ case TIOCGPGRP:
+ return tiocgpgrp(tty, real_tty, (pid_t *) arg);
+ case TIOCSPGRP:
+ return tiocspgrp(tty, real_tty, (pid_t *) arg);
+ case TIOCGSID:
+ return tiocgsid(tty, real_tty, (pid_t *) arg);
+ case TIOCGETD:
+ return put_user(tty->ldisc.num, (int *) arg);
+ case TIOCSETD:
+ return tiocsetd(tty, (int *) arg);
+#ifdef CONFIG_VT
+ case TIOCLINUX:
+ return tioclinux(tty, arg);
+#endif
+ case TIOCTTYGSTRUCT:
+ return tiocttygstruct(tty, (struct tty_struct *) arg);
+
+ /*
+ * Break handling
+ */
+ case TIOCSBRK: /* Turn break on, unconditionally */
+ tty->driver.break_ctl(tty, -1);
+ return 0;
+
+ case TIOCCBRK: /* Turn break off, unconditionally */
+ tty->driver.break_ctl(tty, 0);
+ return 0;
+ case TCSBRK: /* SVID version: non-zero arg --> no break */
+ /*
+ * XXX is the above comment correct, or the
+ * code below correct? Is this ioctl used at
+ * all by anyone?
+ */
+ if (!arg)
+ return send_break(tty, HZ/4);
+ return 0;
+ case TCSBRKP: /* support for POSIX tcsendbreak() */
+ return send_break(tty, arg ? arg*(HZ/10) : HZ/4);
+ }
+ if (tty->driver.ioctl) {
+ int retval = (tty->driver.ioctl)(tty, file, cmd, arg);
+ if (retval != -ENOIOCTLCMD)
+ return retval;
+ }
+ if (tty->ldisc.ioctl) {
+ int retval = (tty->ldisc.ioctl)(tty, file, cmd, arg);
+ if (retval != -ENOIOCTLCMD)
+ return retval;
+ }
+ return -EINVAL;
+}
+
+
+/*
+ * This implements the "Secure Attention Key" --- the idea is to
+ * prevent trojan horses by killing all processes associated with this
+ * tty when the user hits the "Secure Attention Key". Required for
+ * super-paranoid applications --- see the Orange Book for more details.
+ *
+ * This code could be nicer; ideally it should send a HUP, wait a few
+ * seconds, then send a INT, and then a KILL signal. But you then
+ * have to coordinate with the init process, since all processes associated
+ * with the current tty must be dead before the new getty is allowed
+ * to spawn.
+ *
+ * Now, if it would be correct ;-/ The current code has a nasty hole -
+ * it doesn't catch files in flight. We may send the descriptor to ourselves
+ * via AF_UNIX socket, close it and later fetch from socket. FIXME.
+ *
+ * Nasty bug: do_SAK is being called in interrupt context. This can
+ * deadlock. We punt it up to process context. AKPM - 16Mar2001
+ */
+static void __do_SAK(void *arg)
+{
+#ifdef TTY_SOFT_SAK
+ tty_hangup(tty);
+#else
+ struct tty_struct *tty = arg;
+ struct task_struct *p;
+ int session;
+ int i;
+ struct file *filp;
+
+ if (!tty)
+ return;
+ session = tty->session;
+ if (tty->ldisc.flush_buffer)
+ tty->ldisc.flush_buffer(tty);
+ if (tty->driver.flush_buffer)
+ tty->driver.flush_buffer(tty);
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if ((p->tty == tty) ||
+ ((session > 0) && (p->session == session))) {
+ send_sig(SIGKILL, p, 1);
+ continue;
+ }
+ task_lock(p);
+ if (p->files) {
+ read_lock(&p->files->file_lock);
+ for (i=0; i < p->files->max_fds; i++) {
+ filp = fcheck_files(p->files, i);
+ if (filp && (filp->f_op == &tty_fops) &&
+ (filp->private_data == tty)) {
+ send_sig(SIGKILL, p, 1);
+ break;
+ }
+ }
+ read_unlock(&p->files->file_lock);
+ }
+ task_unlock(p);
+ }
+ read_unlock(&tasklist_lock);
+#endif
+}
+
+/*
+ * The tq handling here is a little racy - tty->SAK_tq may already be queued.
+ * But there's no mechanism to fix that without futzing with tqueue_lock.
+ * Fortunately we don't need to worry, because if ->SAK_tq is already queued,
+ * the values which we write to it will be identical to the values which it
+ * already has. --akpm
+ */
+void do_SAK(struct tty_struct *tty)
+{
+ if (!tty)
+ return;
+ PREPARE_TQUEUE(&tty->SAK_tq, __do_SAK, tty);
+ schedule_task(&tty->SAK_tq);
+}
+
+/*
+ * This routine is called out of the software interrupt to flush data
+ * from the flip buffer to the line discipline.
+ */
+static void flush_to_ldisc(void *private_)
+{
+ struct tty_struct *tty = (struct tty_struct *) private_;
+ unsigned char *cp;
+ char *fp;
+ int count;
+ unsigned long flags;
+
+ if (test_bit(TTY_DONT_FLIP, &tty->flags)) {
+ queue_task(&tty->flip.tqueue, &tq_timer);
+ return;
+ }
+ if (tty->flip.buf_num) {
+ cp = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+ fp = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+ tty->flip.buf_num = 0;
+
+ save_flags(flags); cli();
+ tty->flip.char_buf_ptr = tty->flip.char_buf;
+ tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+ } else {
+ cp = tty->flip.char_buf;
+ fp = tty->flip.flag_buf;
+ tty->flip.buf_num = 1;
+
+ save_flags(flags); cli();
+ tty->flip.char_buf_ptr = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+ tty->flip.flag_buf_ptr = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+ }
+ count = tty->flip.count;
+ tty->flip.count = 0;
+ restore_flags(flags);
+
+ tty->ldisc.receive_buf(tty, cp, fp, count);
+}
+
+/*
+ * Routine which returns the baud rate of the tty
+ *
+ * Note that the baud_table needs to be kept in sync with the
+ * include/asm/termbits.h file.
+ */
+static int baud_table[] = {
+ 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800,
+ 9600, 19200, 38400, 57600, 115200, 230400, 460800,
+#ifdef __sparc__
+ 76800, 153600, 307200, 614400, 921600
+#else
+ 500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000,
+ 2500000, 3000000, 3500000, 4000000
+#endif
+};
+
+static int n_baud_table = sizeof(baud_table)/sizeof(int);
+
+int tty_get_baud_rate(struct tty_struct *tty)
+{
+ unsigned int cflag, i;
+
+ cflag = tty->termios->c_cflag;
+
+ i = cflag & CBAUD;
+ if (i & CBAUDEX) {
+ i &= ~CBAUDEX;
+ if (i < 1 || i+15 >= n_baud_table)
+ tty->termios->c_cflag &= ~CBAUDEX;
+ else
+ i += 15;
+ }
+ if (i==15 && tty->alt_speed) {
+ if (!tty->warned) {
+ printk(KERN_WARNING "Use of setserial/setrocket to "
+ "set SPD_* flags is deprecated\n");
+ tty->warned = 1;
+ }
+ return(tty->alt_speed);
+ }
+
+ return baud_table[i];
+}
+
+void tty_flip_buffer_push(struct tty_struct *tty)
+{
+ if (tty->low_latency)
+ flush_to_ldisc((void *) tty);
+ else
+ queue_task(&tty->flip.tqueue, &tq_timer);
+}
+
+/*
+ * This subroutine initializes a tty structure.
+ */
+static void initialize_tty_struct(struct tty_struct *tty)
+{
+ memset(tty, 0, sizeof(struct tty_struct));
+ tty->magic = TTY_MAGIC;
+ tty->ldisc = ldiscs[N_TTY];
+ tty->pgrp = -1;
+ tty->flip.char_buf_ptr = tty->flip.char_buf;
+ tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+ tty->flip.tqueue.routine = flush_to_ldisc;
+ tty->flip.tqueue.data = tty;
+ init_MUTEX(&tty->flip.pty_sem);
+ init_waitqueue_head(&tty->write_wait);
+ init_waitqueue_head(&tty->read_wait);
+ tty->tq_hangup.routine = do_tty_hangup;
+ tty->tq_hangup.data = tty;
+ sema_init(&tty->atomic_read, 1);
+ sema_init(&tty->atomic_write, 1);
+ spin_lock_init(&tty->read_lock);
+ INIT_LIST_HEAD(&tty->tty_files);
+ INIT_TQUEUE(&tty->SAK_tq, 0, 0);
+}
+
+/*
+ * The default put_char routine if the driver did not define one.
+ */
+void tty_default_put_char(struct tty_struct *tty, unsigned char ch)
+{
+ tty->driver.write(tty, 0, &ch, 1);
+}
+
+/*
+ * Register a tty device described by <driver>, with minor number <minor>.
+ */
+void tty_register_devfs (struct tty_driver *driver, unsigned int flags, unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+ umode_t mode = S_IFCHR | S_IRUSR | S_IWUSR;
+ kdev_t device = MKDEV (driver->major, minor);
+ int idx = minor - driver->minor_start;
+ char buf[32];
+
+ switch (device) {
+ case TTY_DEV:
+ case PTMX_DEV:
+ mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+ break;
+ default:
+ if (driver->major == PTY_MASTER_MAJOR)
+ mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+ break;
+ }
+ if ( (minor < driver->minor_start) ||
+ (minor >= driver->minor_start + driver->num) ) {
+ printk(KERN_ERR "Attempt to register invalid minor number "
+ "with devfs (%d:%d).\n", (int)driver->major,(int)minor);
+ return;
+ }
+# ifdef CONFIG_UNIX98_PTYS
+ if ( (driver->major >= UNIX98_PTY_SLAVE_MAJOR) &&
+ (driver->major < UNIX98_PTY_SLAVE_MAJOR + UNIX98_NR_MAJORS) )
+ flags |= DEVFS_FL_CURRENT_OWNER;
+# endif
+ sprintf(buf, driver->name, idx + driver->name_base);
+ devfs_register (NULL, buf, flags | DEVFS_FL_DEFAULT,
+ driver->major, minor, mode, &tty_fops, NULL);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+void tty_unregister_devfs (struct tty_driver *driver, unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+ void * handle;
+ int idx = minor - driver->minor_start;
+ char buf[32];
+
+ sprintf(buf, driver->name, idx + driver->name_base);
+ handle = devfs_find_handle (NULL, buf, driver->major, minor,
+ DEVFS_SPECIAL_CHR, 0);
+ devfs_unregister (handle);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+EXPORT_SYMBOL(tty_register_devfs);
+EXPORT_SYMBOL(tty_unregister_devfs);
+
+/*
+ * Called by a tty driver to register itself.
+ */
+int tty_register_driver(struct tty_driver *driver)
+{
+ int error;
+ int i;
+
+ if (driver->flags & TTY_DRIVER_INSTALLED)
+ return 0;
+
+ error = devfs_register_chrdev(driver->major, driver->name, &tty_fops);
+ if (error < 0)
+ return error;
+ else if(driver->major == 0)
+ driver->major = error;
+
+ if (!driver->put_char)
+ driver->put_char = tty_default_put_char;
+
+ driver->prev = 0;
+ driver->next = tty_drivers;
+ if (tty_drivers) tty_drivers->prev = driver;
+ tty_drivers = driver;
+
+ if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
+ for(i = 0; i < driver->num; i++)
+ tty_register_devfs(driver, 0, driver->minor_start + i);
+ }
+ proc_tty_register_driver(driver);
+ return error;
+}
+
+/*
+ * Called by a tty driver to unregister itself.
+ */
+int tty_unregister_driver(struct tty_driver *driver)
+{
+ int retval;
+ struct tty_driver *p;
+ int i, found = 0;
+ struct termios *tp;
+ const char *othername = NULL;
+
+ if (*driver->refcount)
+ return -EBUSY;
+
+ for (p = tty_drivers; p; p = p->next) {
+ if (p == driver)
+ found++;
+ else if (p->major == driver->major)
+ othername = p->name;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ if (othername == NULL) {
+ retval = devfs_unregister_chrdev(driver->major, driver->name);
+ if (retval)
+ return retval;
+ } else
+ devfs_register_chrdev(driver->major, othername, &tty_fops);
+
+ if (driver->prev)
+ driver->prev->next = driver->next;
+ else
+ tty_drivers = driver->next;
+
+ if (driver->next)
+ driver->next->prev = driver->prev;
+
+ /*
+ * Free the termios and termios_locked structures because
+ * we don't want to get memory leaks when modular tty
+ * drivers are removed from the kernel.
+ */
+ for (i = 0; i < driver->num; i++) {
+ tp = driver->termios[i];
+ if (tp) {
+ driver->termios[i] = NULL;
+ kfree(tp);
+ }
+ tp = driver->termios_locked[i];
+ if (tp) {
+ driver->termios_locked[i] = NULL;
+ kfree(tp);
+ }
+ tty_unregister_devfs(driver, driver->minor_start + i);
+ }
+ proc_tty_unregister_driver(driver);
+ return 0;
+}
+
+
+/*
+ * Initialize the console device. This is called *early*, so
+ * we can't necessarily depend on lots of kernel help here.
+ * Just do some early initializations, and do the complex setup
+ * later.
+ */
+void __init console_init(void)
+{
+ /* Setup the default TTY line discipline. */
+ memset(ldiscs, 0, sizeof(ldiscs));
+ (void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY);
+
+ /*
+ * Set up the standard termios. Individual tty drivers may
+ * deviate from this; this is used as a template.
+ */
+ memset(&tty_std_termios, 0, sizeof(struct termios));
+ memcpy(tty_std_termios.c_cc, INIT_C_CC, NCCS);
+ tty_std_termios.c_iflag = ICRNL | IXON;
+ tty_std_termios.c_oflag = OPOST | ONLCR;
+ tty_std_termios.c_cflag = B38400 | CS8 | CREAD | HUPCL;
+ tty_std_termios.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
+ ECHOCTL | ECHOKE | IEXTEN;
+
+ /*
+ * set up the console device so that later boot sequences can
+ * inform about problems etc..
+ */
+#ifdef CONFIG_EARLY_PRINTK
+ disable_early_printk();
+#endif
+
+#ifdef CONFIG_XEN_CONSOLE
+ xen_console_init();
+#endif
+
+#ifdef CONFIG_VT
+ con_init();
+#endif
+#ifdef CONFIG_AU1X00_SERIAL_CONSOLE
+ au1x00_serial_console_init();
+#endif
+#ifdef CONFIG_SERIAL_CONSOLE
+#if (defined(CONFIG_8xx) || defined(CONFIG_8260))
+ console_8xx_init();
+#elif defined(CONFIG_MAC_SERIAL) && defined(CONFIG_SERIAL)
+ if (_machine == _MACH_Pmac)
+ mac_scc_console_init();
+ else
+ serial_console_init();
+#elif defined(CONFIG_MAC_SERIAL)
+ mac_scc_console_init();
+#elif defined(CONFIG_PARISC)
+ pdc_console_init();
+#elif defined(CONFIG_SERIAL)
+ serial_console_init();
+#endif /* CONFIG_8xx */
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC)
+ vme_scc_console_init();
+#endif
+#if defined(CONFIG_SERIAL167)
+ serial167_console_init();
+#endif
+#if defined(CONFIG_SH_SCI)
+ sci_console_init();
+#endif
+#endif
+#ifdef CONFIG_SERIAL_DEC_CONSOLE
+ dec_serial_console_init();
+#endif
+#ifdef CONFIG_TN3270_CONSOLE
+ tub3270_con_init();
+#endif
+#ifdef CONFIG_TN3215
+ con3215_init();
+#endif
+#ifdef CONFIG_HWC
+ hwc_console_init();
+#endif
+#ifdef CONFIG_STDIO_CONSOLE
+ stdio_console_init();
+#endif
+#ifdef CONFIG_SERIAL_21285_CONSOLE
+ rs285_console_init();
+#endif
+#ifdef CONFIG_SERIAL_SA1100_CONSOLE
+ sa1100_rs_console_init();
+#endif
+#ifdef CONFIG_ARC_CONSOLE
+ arc_console_init();
+#endif
+#ifdef CONFIG_SERIAL_AMBA_CONSOLE
+ ambauart_console_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912_CONSOLE
+ tx3912_console_init();
+#endif
+#ifdef CONFIG_TXX927_SERIAL_CONSOLE
+ txx927_console_init();
+#endif
+#ifdef CONFIG_SERIAL_TXX9_CONSOLE
+ txx9_serial_console_init();
+#endif
+#ifdef CONFIG_SIBYTE_SB1250_DUART_CONSOLE
+ sb1250_serial_console_init();
+#endif
+#ifdef CONFIG_IP22_SERIAL
+ sgi_serial_console_init();
+#endif
+}
+
+static struct tty_driver dev_tty_driver, dev_syscons_driver;
+#ifdef CONFIG_UNIX98_PTYS
+static struct tty_driver dev_ptmx_driver;
+#endif
+#ifdef CONFIG_VT
+static struct tty_driver dev_console_driver;
+#endif
+
+/*
+ * Ok, now we can initialize the rest of the tty devices and can count
+ * on memory allocations, interrupts etc..
+ */
+void __init tty_init(void)
+{
+ /*
+ * dev_tty_driver and dev_console_driver are actually magic
+ * devices which get redirected at open time. Nevertheless,
+ * we register them so that register_chrdev is called
+ * appropriately.
+ */
+ memset(&dev_tty_driver, 0, sizeof(struct tty_driver));
+ dev_tty_driver.magic = TTY_DRIVER_MAGIC;
+ dev_tty_driver.driver_name = "/dev/tty";
+ dev_tty_driver.name = dev_tty_driver.driver_name + 5;
+ dev_tty_driver.name_base = 0;
+ dev_tty_driver.major = TTYAUX_MAJOR;
+ dev_tty_driver.minor_start = 0;
+ dev_tty_driver.num = 1;
+ dev_tty_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+ dev_tty_driver.subtype = SYSTEM_TYPE_TTY;
+
+ if (tty_register_driver(&dev_tty_driver))
+ panic("Couldn't register /dev/tty driver\n");
+
+ dev_syscons_driver = dev_tty_driver;
+ dev_syscons_driver.driver_name = "/dev/console";
+ dev_syscons_driver.name = dev_syscons_driver.driver_name + 5;
+ dev_syscons_driver.major = TTYAUX_MAJOR;
+ dev_syscons_driver.minor_start = 1;
+ dev_syscons_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+ dev_syscons_driver.subtype = SYSTEM_TYPE_SYSCONS;
+
+ if (tty_register_driver(&dev_syscons_driver))
+ panic("Couldn't register /dev/console driver\n");
+
+ /* console calls tty_register_driver() before kmalloc() works.
+ * Thus, we can't devfs_register() then. Do so now, instead.
+ */
+#ifdef CONFIG_VT
+ con_init_devfs();
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+ dev_ptmx_driver = dev_tty_driver;
+ dev_ptmx_driver.driver_name = "/dev/ptmx";
+ dev_ptmx_driver.name = dev_ptmx_driver.driver_name + 5;
+ dev_ptmx_driver.major= MAJOR(PTMX_DEV);
+ dev_ptmx_driver.minor_start = MINOR(PTMX_DEV);
+ dev_ptmx_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+ dev_ptmx_driver.subtype = SYSTEM_TYPE_SYSPTMX;
+
+ if (tty_register_driver(&dev_ptmx_driver))
+ panic("Couldn't register /dev/ptmx driver\n");
+#endif
+
+#ifdef CONFIG_VT
+ dev_console_driver = dev_tty_driver;
+ dev_console_driver.driver_name = "/dev/vc/0";
+ dev_console_driver.name = dev_console_driver.driver_name + 5;
+ dev_console_driver.major = TTY_MAJOR;
+ dev_console_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+ dev_console_driver.subtype = SYSTEM_TYPE_CONSOLE;
+
+ if (tty_register_driver(&dev_console_driver))
+ panic("Couldn't register /dev/tty0 driver\n");
+
+ kbd_init();
+#endif
+
+#ifdef CONFIG_SGI_L1_SERIAL_CONSOLE
+ if (ia64_platform_is("sn2")) {
+ sn_sal_serial_console_init();
+ return; /* only one console right now for SN2 */
+ }
+#endif
+#ifdef CONFIG_ESPSERIAL /* init ESP before rs, so rs doesn't see the port */
+ espserial_init();
+#endif
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC)
+ vme_scc_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912
+ tx3912_rs_init();
+#endif
+#ifdef CONFIG_ROCKETPORT
+ rp_init();
+#endif
+#ifdef CONFIG_SERIAL167
+ serial167_init();
+#endif
+#ifdef CONFIG_CYCLADES
+ cy_init();
+#endif
+#ifdef CONFIG_STALLION
+ stl_init();
+#endif
+#ifdef CONFIG_ISTALLION
+ stli_init();
+#endif
+#ifdef CONFIG_DIGI
+ pcxe_init();
+#endif
+#ifdef CONFIG_DIGIEPCA
+ pc_init();
+#endif
+#ifdef CONFIG_SPECIALIX
+ specialix_init();
+#endif
+#if (defined(CONFIG_8xx) || defined(CONFIG_8260))
+ rs_8xx_init();
+#endif /* CONFIG_8xx */
+ pty_init();
+#ifdef CONFIG_MOXA_SMARTIO
+ mxser_init();
+#endif
+#ifdef CONFIG_MOXA_INTELLIO
+ moxa_init();
+#endif
+#ifdef CONFIG_VT
+ vcs_init();
+#endif
+#ifdef CONFIG_TN3270
+ tub3270_init();
+#endif
+#ifdef CONFIG_TN3215
+ tty3215_init();
+#endif
+#ifdef CONFIG_HWC
+ hwc_tty_init();
+#endif
+#ifdef CONFIG_A2232
+ a2232board_init();
+#endif
+}