diff options
author | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2003-02-24 16:55:07 +0000 |
---|---|---|
committer | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2003-02-24 16:55:07 +0000 |
commit | a48212cb65e09669ed243581556529681cebba0a (patch) | |
tree | a58f47e4764f343db87eba48d17ce9b2ddbf8047 /xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers | |
parent | 96ce9e11d148a721557d48ed5a8ca7857a7bc937 (diff) | |
download | xen-a48212cb65e09669ed243581556529681cebba0a.tar.gz xen-a48212cb65e09669ed243581556529681cebba0a.tar.bz2 xen-a48212cb65e09669ed243581556529681cebba0a.zip |
bitkeeper revision 1.93 (3e5a4e6bkPheUp3x1uufN2MS3LAB7A)
Latest and Greatest version of XenoLinux based on the Linux-2.4.21-pre4
kernel.
Diffstat (limited to 'xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers')
13 files changed, 2489 insertions, 0 deletions
diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/Makefile new file mode 100644 index 0000000000..74a0c6c565 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/Makefile @@ -0,0 +1,3 @@ +O_TARGET := blk.o +obj-y := xl_block.o xl_block_test.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block.c new file mode 100644 index 0000000000..0b77e5536e --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block.c @@ -0,0 +1,490 @@ +/****************************************************************************** + * xl_block.c + * + * Xenolinux virtual block-device driver. + * + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/fs.h> +#include <linux/hdreg.h> +#include <linux/blkdev.h> +#include <linux/major.h> + +#include <asm/hypervisor-ifs/block.h> +#include <asm/hypervisor-ifs/hypervisor-if.h> +#include <asm/io.h> +#include <asm/uaccess.h> + +#define MAJOR_NR XLBLK_MAJOR /* force defns in blk.h, must precede include */ +static int xlblk_major = XLBLK_MAJOR; +#include <linux/blk.h> + +/* Copied from linux/ide.h */ +typedef unsigned char byte; + +void xlblk_ide_register_disk(int, unsigned long); + +#define XLBLK_MAX 32 /* Maximum minor devices we support */ +#define XLBLK_MAJOR_NAME "xhd" +#define IDE_PARTN_BITS 6 /* from ide.h::PARTN_BITS */ +#define IDE_PARTN_MASK ((1<<IDE_PARTN_BITS)-1) /* from ide.h::PARTN_MASK */ +static int xlblk_blk_size[XLBLK_MAX]; +static int xlblk_blksize_size[XLBLK_MAX]; +static int xlblk_read_ahead; +static int xlblk_hardsect_size[XLBLK_MAX]; +static int xlblk_max_sectors[XLBLK_MAX]; + +#define XLBLK_RESPONSE_IRQ _EVENT_BLK_RESP + +#define DEBUG_IRQ _EVENT_DEBUG + +#if 0 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK(_f, _a...) ((void)0) +#define DPRINTK_IOCTL(_f, _a...) ((void)0) +#endif + +static blk_ring_t *blk_ring; +static unsigned int resp_cons; /* Response consumer for comms ring. */ +static xen_disk_info_t xen_disk_info; + +int hypervisor_request(void * id, + int operation, + char * buffer, + unsigned long block_number, + unsigned short block_size, + kdev_t device); + + +/* ------------------------------------------------------------------------ + */ + +static int xenolinux_block_open(struct inode *inode, struct file *filep) +{ + DPRINTK("xenolinux_block_open\n"); + return 0; +} + +static int xenolinux_block_release(struct inode *inode, struct file *filep) +{ + DPRINTK("xenolinux_block_release\n"); + return 0; +} + +static int xenolinux_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument) +{ + int minor_dev; + struct hd_geometry *geo = (struct hd_geometry *)argument; + + DPRINTK("xenolinux_block_ioctl\n"); + + /* check permissions */ + if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!inode) return -EINVAL; + minor_dev = MINOR(inode->i_rdev); + if (minor_dev >= XLBLK_MAX) return -ENODEV; + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, minor: 0x%x\n", + command, (long) argument, minor_dev); + + switch (command) + { + case BLKGETSIZE: + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, + (long) xen_disk_info.disks[0].capacity); + return put_user(xen_disk_info.disks[0].capacity, + (unsigned long *) argument); + + case BLKRRPART: + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); + break; + + case BLKSSZGET: + DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET, + xlblk_hardsect_size[minor_dev]); + return xlblk_hardsect_size[minor_dev]; + + case HDIO_GETGEO: + DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; + return 0; + + case HDIO_GETGEO_BIG: + DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; + + return 0; + + default: + DPRINTK_IOCTL(" eh? unknown ioctl\n"); + break; + } + + return 0; +} + +static int xenolinux_block_check(kdev_t dev) +{ + DPRINTK("xenolinux_block_check\n"); + return 0; +} + +static int xenolinux_block_revalidate(kdev_t dev) +{ + DPRINTK("xenolinux_block_revalidate\n"); + return 0; +} + +/* + * hypervisor_request + * + * request block io + * + * id: for guest use only. + * operation: XEN_BLOCK_READ, XEN_BLOCK_WRITE or XEN_BLOCK_PROBE + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + * block_number: block to read + * block_size: size of each block + * device: ide/hda is 768 or 0x300 + */ +int hypervisor_request(void * id, + int operation, + char * buffer, + unsigned long block_number, + unsigned short block_size, + kdev_t device) +{ + int position; + void *buffer_ma; + kdev_t phys_device = (kdev_t) 0; + unsigned long sector_number = 0; + struct gendisk *gd; + + /* + * Bail if there's no room in the request communication ring. This may be + * because we have a whole bunch of outstanding responses to process. No + * matter, as the response handler will kick the request queue. + */ + if ( BLK_RING_INC(blk_ring->req_prod) == resp_cons ) + return 1; + + buffer_ma = (void *)phys_to_machine(virt_to_phys(buffer)); + + switch ( operation ) + { + case XEN_BLOCK_PROBE: + phys_device = (kdev_t) 0; + sector_number = 0; + break; + + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + if ( MAJOR(device) != XLBLK_MAJOR ) + panic("error: xl_block::hypervisor_request: " + "unknown device [0x%x]\n", device); + phys_device = MKDEV(IDE0_MAJOR, 0); + /* Compute real buffer location on disk */ + sector_number = block_number; + if ( (gd = (struct gendisk *)xen_disk_info.disks[0].gendisk) != NULL ) + sector_number += gd->part[MINOR(device)&IDE_PARTN_MASK].start_sect; + break; + + default: + panic("unknown op %d\n", operation); + } + + /* Fill out a communications ring structure & trap to the hypervisor */ + position = blk_ring->req_prod; + blk_ring->ring[position].req.id = id; + blk_ring->ring[position].req.operation = operation; + blk_ring->ring[position].req.buffer = buffer_ma; + blk_ring->ring[position].req.block_number = block_number; + blk_ring->ring[position].req.block_size = block_size; + blk_ring->ring[position].req.device = phys_device; + blk_ring->ring[position].req.sector_number = sector_number; + + blk_ring->req_prod = BLK_RING_INC(position); + + return 0; +} + + +/* + * do_xlblk_request + * read a block; request is in a request queue + */ +static void do_xlblk_request (request_queue_t *rq) +{ + struct request *req; + struct buffer_head *bh; + int rw, nsect, full, queued = 0; + + DPRINTK("xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME); + + while ( !rq->plugged && !QUEUE_EMPTY ) + { + if ( (req = CURRENT) == NULL ) goto out; + + DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", + req, req->cmd, req->sector, + req->current_nr_sectors, req->nr_sectors, req->bh); + + rw = req->cmd; + if ( rw == READA ) rw = READ; + if ((rw != READ) && (rw != WRITE)) + panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); + + req->errors = 0; + + bh = req->bh; + while ( bh != NULL ) + { + full = hypervisor_request( + bh, (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size, bh->b_dev); + + if ( full ) goto out; + + queued++; + + /* Dequeue the buffer head from the request. */ + nsect = bh->b_size >> 9; + req->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh = req->bh; + + if ( bh != NULL ) + { + /* There's another buffer head to do. Update the request. */ + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + req->current_nr_sectors = bh->b_size >> 9; + req->buffer = bh->b_data; + } + else + { + /* That was the last buffer head. Finalise the request. */ + if ( end_that_request_first(req, 1, "XenBlk") ) BUG(); + blkdev_dequeue_request(req); + end_that_request_last(req); + } + } + } + + out: + if ( queued != 0 ) HYPERVISOR_block_io_op(); +} + + +static struct block_device_operations xenolinux_block_fops = +{ + open: xenolinux_block_open, + release: xenolinux_block_release, + ioctl: xenolinux_block_ioctl, + check_media_change: xenolinux_block_check, + revalidate: xenolinux_block_revalidate, +}; + +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + int i; + unsigned long flags; + struct buffer_head *bh; + + spin_lock_irqsave(&io_request_lock, flags); + + for ( i = resp_cons; + i != blk_ring->resp_prod; + i = BLK_RING_INC(i) ) + { + blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp; + if ( (bh = bret->id) != NULL ) bh->b_end_io(bh, 1); + } + + resp_cons = i; + + /* KAF: We can push work down at this point. We have the lock. */ + do_xlblk_request(BLK_DEFAULT_QUEUE(MAJOR_NR)); + + spin_unlock_irqrestore(&io_request_lock, flags); +} + + +int __init xlblk_init(void) +{ + int i, error, result; + + /* This mapping was created early at boot time. */ + blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = 0; + + error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, 0, + "xlblk-response", NULL); + if (error) { + printk(KERN_ALERT "Could not allocate receive interrupt\n"); + goto fail; + } + + memset (&xen_disk_info, 0, sizeof(xen_disk_info)); + xen_disk_info.count = 0; + + if ( hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info, + 0, 0, (kdev_t) 0) ) + BUG(); + HYPERVISOR_block_io_op(); + while ( blk_ring->resp_prod != 1 ) barrier(); + for ( i = 0; i < xen_disk_info.count; i++ ) + { + printk (KERN_ALERT " %2d: type: %d, capacity: %ld\n", + i, xen_disk_info.disks[i].type, + xen_disk_info.disks[i].capacity); + } + + SET_MODULE_OWNER(&xenolinux_block_fops); + result = register_blkdev(xlblk_major, "block", &xenolinux_block_fops); + if (result < 0) { + printk (KERN_ALERT "xenolinux block: can't get major %d\n", + xlblk_major); + return result; + } + + /* initialize global arrays in drivers/block/ll_rw_block.c */ + for (i = 0; i < XLBLK_MAX; i++) { + xlblk_blk_size[i] = xen_disk_info.disks[0].capacity; + xlblk_blksize_size[i] = 512; + xlblk_hardsect_size[i] = 512; + xlblk_max_sectors[i] = 128; + } + xlblk_read_ahead = 8; + + blk_size[xlblk_major] = xlblk_blk_size; + blksize_size[xlblk_major] = xlblk_blksize_size; + hardsect_size[xlblk_major] = xlblk_hardsect_size; + read_ahead[xlblk_major] = xlblk_read_ahead; + max_sectors[xlblk_major] = xlblk_max_sectors; + + blk_init_queue(BLK_DEFAULT_QUEUE(xlblk_major), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as + * soon as we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(xlblk_major), 0); + + xlblk_ide_register_disk(0, xen_disk_info.disks[0].capacity); + + printk(KERN_ALERT + "XenoLinux Virtual Block Device Driver installed [device: %d]\n", + xlblk_major); + return 0; + + fail: + return error; +} + +void xlblk_ide_register_disk(int idx, unsigned long capacity) +{ + int units; + int minors; + struct gendisk *gd; + + /* plagarized from ide-probe.c::init_gendisk */ + + units = 2; /* from ide.h::MAX_DRIVES */ + + minors = units * (1<<IDE_PARTN_BITS); + gd = kmalloc (sizeof(struct gendisk), GFP_KERNEL); + gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); + gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); + memset(gd->part, 0, minors * sizeof(struct hd_struct)); + + gd->major = xlblk_major; + gd->major_name = XLBLK_MAJOR_NAME; + gd->minor_shift = IDE_PARTN_BITS; + gd->max_p = 1<<IDE_PARTN_BITS; + gd->nr_real = units; + gd->real_devices = NULL; + gd->next = NULL; + gd->fops = &xenolinux_block_fops; + gd->de_arr = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL); + gd->flags = kmalloc (sizeof *gd->flags * units, GFP_KERNEL); + + if (gd->de_arr) + memset (gd->de_arr, 0, sizeof *gd->de_arr * units); + + if (gd->flags) + memset (gd->flags, 0, sizeof *gd->flags * units); + + add_gendisk(gd); + + xen_disk_info.disks[idx].gendisk = gd; + + /* default disk size is just a big number. in the future, we + need a message to probe the devices to determine the actual size */ + register_disk(gd, MKDEV(xlblk_major, 0), 1<<IDE_PARTN_BITS, + &xenolinux_block_fops, capacity); + + return; +} + + + +static void __exit xlblk_cleanup(void) +{ + /* CHANGE FOR MULTIQUEUE */ + blk_cleanup_queue(BLK_DEFAULT_QUEUE(xlblk_major)); + + /* clean up global arrays */ + read_ahead[xlblk_major] = 0; + + if (blk_size[xlblk_major]) + kfree(blk_size[xlblk_major]); + blk_size[xlblk_major] = NULL; + + if (blksize_size[xlblk_major]) + kfree(blksize_size[xlblk_major]); + blksize_size[xlblk_major] = NULL; + + if (hardsect_size[xlblk_major]) + kfree(hardsect_size[xlblk_major]); + hardsect_size[xlblk_major] = NULL; + + /* XXX: free each gendisk */ + if (unregister_blkdev(xlblk_major, "block")) + printk(KERN_ALERT + "XenoLinux Virtual Block Device Driver uninstalled w/ errs\n"); + else + printk(KERN_ALERT + "XenoLinux Virtual Block Device Driver uninstalled\n"); + + return; +} + + +#ifdef MODULE +module_init(xlblk_init); +module_exit(xlblk_cleanup); +#endif diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block_test.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block_test.c new file mode 100644 index 0000000000..2ddef271e5 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_block_test.c @@ -0,0 +1,225 @@ +/****************************************************************************** + * xenolinux_block_test.c + * + */ +#define EXPORT_SYMTAB + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/delay.h> + +#include <asm/hypervisor-ifs/block.h> +#include <asm/hypervisor-ifs/hypervisor-if.h> + +/******************************************************************/ + +static struct proc_dir_entry *bdt; +static blk_ring_req_entry_t meta; +static char * data; + +static int proc_read_bdt(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + switch (meta.operation) + { + case XEN_BLOCK_READ : + case XEN_BLOCK_WRITE : + { + return proc_dump_block(page, start, off, count, eof, data); + } + case XEN_BLOCK_DEBUG : + { + return proc_dump_debug(page, start, off, count, eof, data); + } + default : + { + printk(KERN_ALERT + "block device test error: unknown operation [%c]\n", + meta.operation); + return -EINVAL; + } + } +} + +int proc_dump_debug(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char header[100]; + char dump[1024]; + + sprintf (header, "Block Device Test: Debug Dump\n\n"); + + sprintf (dump, "%s\n", meta.buffer); + + if (data) + { + kfree(data); + } + + strncpy (page, dump, count); + return strlen(page); +} + +int proc_dump_block(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char header[100]; + char dump[1024]; + char temp[100]; + int loop; + + sprintf (header, "Block Device Test\n\n%s blk num: %ld 0x%lx; size: %d 0x%x; device: 0x%x\n", + meta.operation == XEN_BLOCK_WRITE ? "write" : "read", + meta.block_number, meta.block_number, + meta.block_size, meta.block_size, + meta.device); + + sprintf (dump, "%s", header); + + if (meta.buffer) + { + for (loop = 0; loop < 100; loop++) + { + int i = meta.buffer[loop]; + + if (loop % 8 == 0) + { + sprintf (temp, "[%2d] ", loop); + strcat(dump, temp); + } + else if (loop % 2 == 0) + { + strcat(dump, " "); + } + + sprintf (temp, " 0x%02x", i & 255); + strcat(dump, temp); + if ((loop + 1) % 8 == 0) + { + strcat(dump, "\n"); + } + } + strcat(dump, "\n\n"); + } + + if (data) + { + kfree(data); + } + + strncpy (page, dump, count); + return strlen(page); +} + +int proc_write_bdt(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char *local = kmalloc((count + 1) * sizeof(char), GFP_KERNEL); + char opcode; + int block_number = 0; + int block_size = 0; + int device = 0; + + if (copy_from_user(local, buffer, count)) + { + return -EFAULT; + } + local[count] = '\0'; + + sscanf(local, "%c %i %i %i", + &opcode, &block_number, &block_size, &device); + + if (opcode == 'r' || opcode == 'R') + { + meta.operation = XEN_BLOCK_READ; + } + else if (opcode == 'w' || opcode == 'W') + { + meta.operation = XEN_BLOCK_WRITE; + } + else if (opcode == 'd' || opcode == 'D') + { + meta.operation = XEN_BLOCK_DEBUG; + block_size = 10000; + } + else + { + printk(KERN_ALERT + "block device test error: unknown opcode [%c]\n", opcode); + return -EINVAL; + } + + if (data) + { + kfree(data); + } + data = kmalloc(block_size * sizeof(char), GFP_KERNEL); + if (data == NULL) + { + kfree(local); + return -ENOMEM; + } + + meta.block_number = block_number; + meta.block_size = block_size; + meta.device = device; + meta.buffer = data; + + /* submit request */ + hypervisor_request(0, meta.operation, meta.buffer, + meta.block_number, meta.block_size, + meta.device); + HYPERVISOR_block_io_op(); + mdelay(1000); /* should wait for a proper acknowledgement/response. */ + + kfree(local); + return count; +} + + +static int __init init_module(void) +{ + int return_value = 0; + + /* create proc entry */ + bdt = create_proc_entry("bdt", 0644, NULL); + if (bdt == NULL) + { + return_value = -ENOMEM; + goto error; + } + bdt->data = NULL; + bdt->read_proc = proc_read_bdt; + bdt->write_proc = proc_write_bdt; + bdt->owner = THIS_MODULE; + + memset(&meta, 0, sizeof(meta)); + + /* success */ + printk(KERN_ALERT "XenoLinux Block Device Test installed\n"); + return 0; + + error: + return return_value; +} + +static void __exit cleanup_module(void) +{ + if (data) + { + kfree(data); + } + printk(KERN_ALERT "XenoLinux Block Device Test uninstalled\n"); +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/Makefile new file mode 100644 index 0000000000..5a0e7b36b1 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/Makefile @@ -0,0 +1,3 @@ +O_TARGET := con.o +obj-y := console.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/console.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/console.c new file mode 100644 index 0000000000..11548f877e --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/console/console.c @@ -0,0 +1,204 @@ +/****************************************************************************** + * console.c + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/serial.h> +#include <linux/major.h> +#include <linux/ptrace.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/console.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/uaccess.h> +#include <asm/hypervisor.h> + +/******************** Kernel console driver ********************************/ + +static void kconsole_write(struct console *co, const char *s, unsigned count) +{ +#define STRLEN 256 + static char str[STRLEN]; + static int pos = 0; + int len; + + /* We buffer output until we see a newline, or until the buffer is full. */ + while ( count != 0 ) + { + len = ((STRLEN - pos) > count) ? count : STRLEN - pos; + memcpy(str + pos, s, len); + pos += len; + s += len; + count -= len; + if ( (pos == STRLEN) || (str[pos-1] == '\n') ) + { + (void)HYPERVISOR_console_write(str, pos); + pos = 0; + } + } +} + +static kdev_t kconsole_device(struct console *c) +{ + /* + * This is the magic that binds our "struct console" to our + * "tty_struct", defined below. + */ + return MKDEV(TTY_MAJOR, 0); +} + +static struct console kconsole_info = { + name: "xenocon", + write: kconsole_write, + device: kconsole_device, + flags: CON_PRINTBUFFER, + index: -1, +}; + +void xeno_console_init(void) +{ + register_console(&kconsole_info); +} + + +/******************** Initial /dev/console *********************************/ + + +static struct tty_driver console_driver; +static int console_refcount; +static struct tty_struct *console_table[1]; +static struct termios *console_termios[1]; +static struct termios *console_termios_locked[1]; + +static int console_write_room(struct tty_struct *tty) +{ + return INT_MAX; +} + +static int console_chars_in_buffer(struct tty_struct *tty) +{ + return 0; +} + +static inline int console_xmit(int ch) +{ + char _ch = ch; + kconsole_write(NULL, &_ch, 1); + return 1; +} + +static int console_write(struct tty_struct *tty, int from_user, + const u_char * buf, int count) +{ + int i; + + if ( from_user && verify_area(VERIFY_READ, buf, count) ) + { + return -EINVAL; + } + + for ( i = 0; i < count; i++ ) + { + char ch; + if ( from_user ) + { + __get_user(ch, buf + i); + } + else + { + ch = buf[i]; + } + console_xmit(ch); + } + return i; +} + +static void console_put_char(struct tty_struct *tty, u_char ch) +{ + console_xmit(ch); +} + +static int console_open(struct tty_struct *tty, struct file *filp) +{ + int line; + + MOD_INC_USE_COUNT; + line = MINOR(tty->device) - tty->driver.minor_start; + if ( line ) + { + MOD_DEC_USE_COUNT; + return -ENODEV; + } + + tty->driver_data = NULL; + + return 0; +} + +static void console_close(struct tty_struct *tty, struct file *filp) +{ + MOD_DEC_USE_COUNT; +} + +static int __init console_ini(void) +{ + memset(&console_driver, 0, sizeof(struct tty_driver)); + console_driver.magic = TTY_DRIVER_MAGIC; + console_driver.driver_name = "xeno_console"; + console_driver.name = "console"; + console_driver.major = TTY_MAJOR; + console_driver.minor_start = 0; + console_driver.num = 1; + console_driver.type = TTY_DRIVER_TYPE_SERIAL; + console_driver.subtype = SERIAL_TYPE_NORMAL; + console_driver.init_termios = tty_std_termios; + console_driver.flags = TTY_DRIVER_REAL_RAW; + console_driver.refcount = &console_refcount; + console_driver.table = console_table; + console_driver.termios = console_termios; + console_driver.termios_locked = console_termios_locked; + /* Functions */ + console_driver.open = console_open; + console_driver.close = console_close; + console_driver.write = console_write; + console_driver.write_room = console_write_room; + console_driver.put_char = console_put_char; + console_driver.chars_in_buffer = console_chars_in_buffer; + + if ( tty_register_driver(&console_driver) ) + { + printk(KERN_ERR "Couldn't register Xeno console driver\n"); + } + else + { + printk("Xeno console successfully installed\n"); + } + + return 0; +} + +static void __exit console_fin(void) +{ + int ret; + + ret = tty_unregister_driver(&console_driver); + if ( ret != 0 ) + { + printk(KERN_ERR "Unable to unregister Xeno console driver: %d\n", ret); + } +} + +module_init(console_ini); +module_exit(console_fin); + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile new file mode 100644 index 0000000000..4738fc0ba4 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile @@ -0,0 +1,3 @@ +O_TARGET := dom0.o +obj-y := dom0_memory.o dom0_core.o vfr.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_block.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_block.c new file mode 100644 index 0000000000..97d4a65b78 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_block.c @@ -0,0 +1,27 @@ +/* + * domain 0 block driver interface + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +static int __init init_module(void) +{ + request_module("xl_block"); + printk("Successfully installed domain 0 block interface\n"); + + + return 0; +} + +static void __exit cleanup_module(void) +{ + printk("Successfully de-installed domain-0 block interface\n"); + return 0; +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c new file mode 100644 index 0000000000..f8af85358b --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c @@ -0,0 +1,334 @@ +/****************************************************************************** + * dom0_core.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002, K A Fraser, B Dragovic + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> + +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/swap.h> +#include <linux/smp_lock.h> +#include <linux/swapctl.h> +#include <linux/iobuf.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> + +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> + +#include "dom0_ops.h" + +/* Private proc-file data structures. */ +typedef struct proc_data { + unsigned int domain; + unsigned long map_size; +} dom_procdata_t; + +typedef struct proc_mem_data { + unsigned long pfn; + int tot_pages; +} proc_memdata_t; + +#define XENO_BASE "xeno" +#define DOM0_CMD_INTF "dom0_cmd" +#define DOM0_NEWDOM "new_dom_data" + +#define MAX_LEN 16 +#define DOM_DIR "dom" +#define DOM_MEM "mem" +#define DOM_VIF "vif" + +#define MAP_DISCONT 1 + +static struct proc_dir_entry *xeno_base; +static struct proc_dir_entry *dom0_cmd_intf; +static struct proc_dir_entry *proc_ft; + +unsigned long direct_mmap(unsigned long, unsigned long, pgprot_t, int, int); +int direct_unmap(unsigned long, unsigned long); +int direct_disc_unmap(unsigned long, unsigned long, int); + +static unsigned char readbuf[1204]; + +static int cmd_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + strcpy(page, readbuf); + *readbuf = '\0'; + *eof = 1; + *start = page; + return strlen(page); +} + +static ssize_t dom_vif_read(struct file * file, char * buff, size_t size, loff_t * off) +{ + char hyp_buf[128]; // Hypervisor is going to write its reply here. + network_op_t op; + static int finished = 0; + + // This seems to be the only way to make the OS stop making read requests + // to the file. When we use the fileoperations version of read, offset + // seems to be ignored altogether. + + if (finished) + { + finished = 0; + return 0; + } + + op.cmd = NETWORK_OP_VIFQUERY; + op.u.vif_query.domain = (unsigned int) ((struct proc_dir_entry *)file->f_dentry->d_inode->u.generic_ip)->data; + op.u.vif_query.buf = hyp_buf; + + strcpy(hyp_buf, "Error getting domain's vif list from hypervisor.\n"); // This will be replaced if everything works. + + (void)HYPERVISOR_network_op(&op); + + if (*off >= (strlen(hyp_buf)+1)) return 0; + + copy_to_user(buff, hyp_buf, strlen(hyp_buf)); + + finished = 1; + + return strlen(hyp_buf)+1; +} + +struct file_operations dom_vif_ops = { + read: dom_vif_read +}; + + +static void create_proc_dom_entries(int dom) +{ + struct proc_dir_entry * dir; + dom_procdata_t * dom_data; + char dir_name[MAX_LEN]; + struct proc_dir_entry * file; + + snprintf(dir_name, MAX_LEN, "%s%d", DOM_DIR, dom); + + dom_data = (dom_procdata_t *)kmalloc(sizeof(dom_procdata_t), GFP_KERNEL); + dom_data->domain = dom; + + dir = proc_mkdir(dir_name, xeno_base); + dir->data = dom_data; + + file = create_proc_entry(DOM_VIF, 0600, dir); + if (file != NULL) + { + file->owner = THIS_MODULE; + file->nlink = 1; + file->proc_fops = &dom_vif_ops; + file->data = (void *) dom; + } +} + +static ssize_t dom_mem_write(struct file * file, const char * buff, + size_t size , loff_t * off) +{ + dom_mem_t mem_data; + + copy_from_user(&mem_data, (dom_mem_t *)buff, sizeof(dom_mem_t)); + + if(direct_disc_unmap(mem_data.vaddr, mem_data.start_pfn, + mem_data.tot_pages) == 0){ + return sizeof(sizeof(dom_mem_t)); + } else { + return -1; + } +} + +static ssize_t dom_mem_read(struct file * file, char * buff, size_t size, loff_t * off) +{ + unsigned long addr; + pgprot_t prot; + + proc_memdata_t * mem_data = (proc_memdata_t *)((struct proc_dir_entry *)file->f_dentry->d_inode->u.generic_ip)->data; + + prot = PAGE_SHARED; + + /* remap the range using xen specific routines */ + + addr = direct_mmap(mem_data->pfn << PAGE_SHIFT, mem_data->tot_pages << PAGE_SHIFT, prot, MAP_DISCONT, mem_data->tot_pages); + + copy_to_user((unsigned long *)buff, &addr, sizeof(addr)); + + return sizeof(addr); +} + +struct file_operations dom_mem_ops = { + read: dom_mem_read, + write: dom_mem_write, +}; + +static int dom_map_mem(unsigned int dom, unsigned long pfn, int tot_pages) +{ + int ret = -ENOENT; + struct proc_dir_entry * pd = xeno_base->subdir; + struct proc_dir_entry * file; + proc_memdata_t * memdata; + + while(pd != NULL){ + + if((pd->mode & S_IFDIR) && ((dom_procdata_t *)pd->data)->domain == dom){ + + /* check if there is already an entry for mem and if so + * remove it. + */ + remove_proc_entry(DOM_MEM, pd); + + /* create new entry with parameters describing what to do + * when it is mmaped. + */ + file = create_proc_entry(DOM_MEM, 0600, pd); + if(file != NULL) + { + file->owner = THIS_MODULE; + file->nlink = 1; + file->proc_fops = &dom_mem_ops; + + memdata = (proc_memdata_t *)kmalloc(sizeof(proc_memdata_t), GFP_KERNEL); + memdata->pfn = pfn; + memdata->tot_pages = tot_pages; + file->data = memdata; + + ret = 0; + break; + } + + ret = -EAGAIN; + break; + } + pd = pd->next; + } + + return ret; +} + +/* function used to retrieve data associated with new domain */ +static ssize_t dom_data_read(struct file * file, char * buff, size_t size, loff_t * off) +{ + dom0_newdomain_t * dom_data = (dom0_newdomain_t *) + ((struct proc_dir_entry *)file->f_dentry->d_inode->u.generic_ip)->data; + + copy_to_user((dom0_newdomain_t *)buff, dom_data, sizeof(dom0_newdomain_t)); + + remove_proc_entry(DOM0_NEWDOM, xeno_base); + + kfree(dom_data); + + return sizeof(dom0_newdomain_t); +} + +struct file_operations newdom_data_fops = { + read: dom_data_read, +}; + +static int cmd_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + dom0_op_t op; + int ret = 0; + struct proc_dir_entry * new_dom_id; + dom0_newdomain_t * params; + int i; + unsigned long p; + + copy_from_user(&op, buffer, sizeof(dom0_op_t)); + + /* do some sanity checks */ + if(op.cmd > MAX_CMD){ + ret = -ENOSYS; + goto out; + } + + if ( op.cmd == MAP_DOM_MEM ) + { + ret = dom_map_mem(op.u.dommem.domain, op.u.dommem.start_pfn, + op.u.dommem.tot_pages); + } + else if ( op.cmd == DO_PGUPDATES ) + { + ret = HYPERVISOR_pt_update(op.u.pgupdate.pgt_update_arr, + op.u.pgupdate.num_pgt_updates); + } + else + { + ret = HYPERVISOR_dom0_op(&op); + + /* if new domain created, create proc entries */ + if(op.cmd == DOM0_NEWDOMAIN){ + create_proc_dom_entries(ret); + + params = (dom0_newdomain_t *)kmalloc(sizeof(dom0_newdomain_t), + GFP_KERNEL); + params->memory_kb = op.u.newdomain.memory_kb; + params->pg_head = op.u.newdomain.pg_head; + params->num_vifs = op.u.newdomain.num_vifs; + params->domain = op.u.newdomain.domain; + + /* now notify user space of the new domain's id */ + new_dom_id = create_proc_entry(DOM0_NEWDOM, 0600, xeno_base); + if ( new_dom_id != NULL ) + { + new_dom_id->owner = THIS_MODULE; + new_dom_id->nlink = 1; + new_dom_id->proc_fops = &newdom_data_fops; + new_dom_id->data = (void *)params; + } + + } + + } + +out: + return ret; + +} + +static int __init init_module(void) +{ + /* xeno proc root setup */ + xeno_base = proc_mkdir(XENO_BASE, &proc_root); + + /* xeno control interface */ + *readbuf = '\0'; + dom0_cmd_intf = create_proc_entry (DOM0_CMD_INTF, 0600, xeno_base); + if ( dom0_cmd_intf != NULL ) + { + dom0_cmd_intf->owner = THIS_MODULE; + dom0_cmd_intf->nlink = 1; + dom0_cmd_intf->read_proc = cmd_read_proc; + dom0_cmd_intf->write_proc = cmd_write_proc; + } + + /* set up /proc entries for dom 0 */ + create_proc_dom_entries(0); + + return 0; +} + + +static void __exit cleanup_module(void) +{ + if ( dom0_cmd_intf == NULL ) return; + remove_proc_entry("dom0", &proc_root); + dom0_cmd_intf = NULL; +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_memory.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_memory.c new file mode 100644 index 0000000000..9d14070a1e --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_memory.c @@ -0,0 +1,368 @@ +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/swap.h> +#include <linux/smp_lock.h> +#include <linux/swapctl.h> +#include <linux/iobuf.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/list.h> + +#include <asm/pgalloc.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> +#include <asm/mmu.h> + +#include "dom0_ops.h" + +#define MAP_CONT 0 +#define MAP_DISCONT 1 + +extern struct list_head * find_direct(struct list_head *, unsigned long); + +/* + * bd240: functions below perform direct mapping to the real physical pages + * needed for mapping various hypervisor specific structures needed in dom0 + * userspace by various management applications such as domain builder etc. + */ + +#define direct_set_pte(pteptr, pteval) queue_l1_entry_update(__pa(pteptr)|PGREQ_UNCHECKED_UPDATE, (pteval).pte_low) + +#define direct_pte_clear(pteptr) queue_l1_entry_update(__pa(pteptr)|PGREQ_UNCHECKED_UPDATE, 0) + +#define __direct_pte(x) ((pte_t) { (x) } ) +#define __direct_mk_pte(page_nr,pgprot) __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) +#define direct_mk_pte_phys(physpage, pgprot) __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) + +static inline void forget_pte(pte_t page) +{ + if (!pte_none(page)) { + printk("forget_pte: old mapping existed!\n"); + BUG(); + } +} + +static inline void direct_remappte_range(pte_t * pte, unsigned long address, unsigned long size, + unsigned long phys_addr, pgprot_t prot) +{ + unsigned long end; + + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + do { + pte_t oldpage; + oldpage = ptep_get_and_clear(pte); + + direct_set_pte(pte, direct_mk_pte_phys(phys_addr, prot)); + + forget_pte(oldpage); + address += PAGE_SIZE; + phys_addr += PAGE_SIZE; + pte++; + } while (address && (address < end)); + +} + +static inline int direct_remappmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, + unsigned long phys_addr, pgprot_t prot) +{ + unsigned long end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + phys_addr -= address; + do { + pte_t * pte = pte_alloc(mm, pmd, address); + if (!pte) + return -ENOMEM; + direct_remappte_range(pte, address, end - address, address + phys_addr, prot); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); + return 0; +} + +/* Note: this is only safe if the mm semaphore is held when called. */ +int direct_remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot) +{ + int error = 0; + pgd_t * dir; + unsigned long beg = from; + unsigned long end = from + size; + struct mm_struct *mm = current->mm; + + phys_addr -= from; + dir = pgd_offset(mm, from); + flush_cache_range(mm, beg, end); + if (from >= end) + BUG(); + + spin_lock(&mm->page_table_lock); + do { + pmd_t *pmd = pmd_alloc(mm, dir, from); + error = -ENOMEM; + if (!pmd) + break; + error = direct_remappmd_range(mm, pmd, from, end - from, phys_addr + from, prot); + if (error) + break; + from = (from + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (from && (from < end)); + spin_unlock(&mm->page_table_lock); + flush_tlb_range(mm, beg, end); + return error; +} + +/* + * used for remapping discontiguous bits of domain's memory, pages to map are + * found from frame table beginning at the given first_pg index + */ +int direct_remap_disc_page_range(unsigned long from, + unsigned long first_pg, int tot_pages, pgprot_t prot) +{ + dom0_op_t dom0_op; + unsigned long *pfns = get_free_page(GFP_KERNEL); + unsigned long start = from; + int pages, i; + + while ( tot_pages != 0 ) + { + dom0_op.cmd = DOM0_GETMEMLIST; + dom0_op.u.getmemlist.start_pfn = first_pg; + pages = 1023; + dom0_op.u.getmemlist.num_pfns = 1024; + if ( tot_pages < 1024 ) + dom0_op.u.getmemlist.num_pfns = pages = tot_pages; + dom0_op.u.getmemlist.buffer = pfns; + (void)HYPERVISOR_dom0_op(&dom0_op); + first_pg = pfns[1023]; + + for ( i = 0; i < pages; i++ ) + { + if(direct_remap_page_range(start, pfns[i] << PAGE_SHIFT, + PAGE_SIZE, prot)) + goto out; + start += PAGE_SIZE; + tot_pages--; + } + } + +out: + free_page(pfns); + return tot_pages; +} + +/* below functions replace standard sys_mmap and sys_munmap which are absolutely useless + * for direct memory mapping. direct_zap* functions are minor ammendments to the + * original versions in mm/memory.c. the changes are to enable unmapping of real physical + * addresses. + */ + +unsigned long direct_mmap(unsigned long phys_addr, unsigned long size, + pgprot_t prot, int flag, int tot_pages) +{ + direct_mmap_node_t * dmmap; + struct list_head * entry; + unsigned long addr; + int ret = 0; + + if(!capable(CAP_SYS_ADMIN)){ + ret = -EPERM; + goto out; + } + + /* get unmapped area invokes xen specific arch_get_unmapped_area */ + addr = get_unmapped_area(NULL, 0, size, 0, 0); + if(addr & ~PAGE_MASK){ + ret = -ENOMEM; + goto out; + } + + /* add node on the list of directly mapped areas, make sure the + * list remains sorted. + */ + dmmap = (direct_mmap_node_t *)kmalloc(sizeof(direct_mmap_node_t), GFP_KERNEL); + dmmap->vm_start = addr; + dmmap->vm_end = addr + size; + entry = find_direct(¤t->mm->context.direct_list, addr); + if(entry != ¤t->mm->context.direct_list){ + list_add_tail(&dmmap->list, entry); + } else { + list_add_tail(&dmmap->list, ¤t->mm->context.direct_list); + } + + /* and perform the mapping */ + if(flag == MAP_DISCONT){ + ret = direct_remap_disc_page_range(addr, phys_addr >> PAGE_SHIFT, + tot_pages, prot); + } else { + ret = direct_remap_page_range(addr, phys_addr, size, prot); + } + + if(ret == 0) + ret = addr; + +out: + return ret; +} + +/* most of the checks, refcnt updates, cache stuff have been thrown out as they are not + * needed + */ +static inline int direct_zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, + unsigned long size) +{ + unsigned long offset; + pte_t * ptep; + int freed = 0; + + if (pmd_none(*pmd)) + return 0; + if (pmd_bad(*pmd)) { + pmd_ERROR(*pmd); + pmd_clear(pmd); + return 0; + } + ptep = pte_offset(pmd, address); + offset = address & ~PMD_MASK; + if (offset + size > PMD_SIZE) + size = PMD_SIZE - offset; + size &= PAGE_MASK; + for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { + pte_t pte = *ptep; + if (pte_none(pte)) + continue; + freed ++; + direct_pte_clear(ptep); + } + + return freed; +} + +static inline int direct_zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, + unsigned long address, unsigned long size) +{ + pmd_t * pmd; + unsigned long end; + int freed; + + if (pgd_none(*dir)) + return 0; + if (pgd_bad(*dir)) { + pgd_ERROR(*dir); + pgd_clear(dir); + return 0; + } + pmd = pmd_offset(dir, address); + end = address + size; + if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) + end = ((address + PGDIR_SIZE) & PGDIR_MASK); + freed = 0; + do { + freed += direct_zap_pte_range(tlb, pmd, address, end - address); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address < end); + return freed; +} + +/* + * remove user pages in a given range. + */ +void direct_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) +{ + mmu_gather_t *tlb; + pgd_t * dir; + unsigned long start = address, end = address + size; + int freed = 0; + + dir = pgd_offset(mm, address); + + /* + * This is a long-lived spinlock. That's fine. + * There's no contention, because the page table + * lock only protects against kswapd anyway, and + * even if kswapd happened to be looking at this + * process we _want_ it to get stuck. + */ + if (address >= end) + BUG(); + spin_lock(&mm->page_table_lock); + flush_cache_range(mm, address, end); + tlb = tlb_gather_mmu(mm); + + do { + freed += direct_zap_pmd_range(tlb, dir, address, end - address); + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (address && (address < end)); + + /* this will flush any remaining tlb entries */ + tlb_finish_mmu(tlb, start, end); + + /* decrementing rss removed */ + + spin_unlock(&mm->page_table_lock); +} + +int direct_unmap(unsigned long addr, unsigned long size) +{ + direct_mmap_node_t * node; + struct list_head * curr; + struct list_head * direct_list = ¤t->mm->context.direct_list; + + curr = direct_list->next; + while(curr != direct_list){ + node = list_entry(curr, direct_mmap_node_t, list); + if(node->vm_start == addr) + break; + curr = curr->next; + } + + if(curr == direct_list) + return -1; + + list_del(&node->list); + kfree(node); + + direct_zap_page_range(current->mm, addr, size); + + return 0; +} + +int direct_disc_unmap(unsigned long from, unsigned long first_pg, int tot_pages) +{ + int count = 0; + direct_mmap_node_t * node; + struct list_head * curr; + struct list_head * direct_list = ¤t->mm->context.direct_list; + + curr = direct_list->next; + while(curr != direct_list){ + node = list_entry(curr, direct_mmap_node_t, list); + + if(node->vm_start == from) + break; + curr = curr->next; + } + + if(curr == direct_list) + return -1; + + list_del(&node->list); + kfree(node); + + while(count < tot_pages){ + direct_zap_page_range(current->mm, from, PAGE_SIZE); + from += PAGE_SIZE; + count++; + } + + return 0; +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h new file mode 100644 index 0000000000..d98ce1b1eb --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h @@ -0,0 +1,80 @@ +/****************************************************************************** + * dom0_ops.h + * + * Process command requests from domain-0 guest OS. + * + * Copyright (c) 2002, K A Fraser, B Dragovic + */ + +#define DOM0_NEWDOMAIN 0 +#define DOM0_KILLDOMAIN 1 +#define DOM0_GETMEMLIST 2 +#define DOM0_STARTDOM 4 +#define MAP_DOM_MEM 6 /* Not passed down to Xen */ +#define DO_PGUPDATES 7 /* Not passed down to Xen */ +#define MAX_CMD 8 + +#define MAX_CMD_LEN 256 + +typedef struct dom0_newdomain_st +{ + unsigned int domain; + unsigned int memory_kb; + unsigned int num_vifs; // temporary + unsigned long pg_head; // return parameter +} dom0_newdomain_t; + +typedef struct dom0_killdomain_st +{ + unsigned int domain; +} dom0_killdomain_t; + +typedef struct dom0_getmemlist_st +{ + unsigned long start_pfn; + unsigned long num_pfns; + void *buffer; +} dom0_getmemlist_t; + +/* This is entirely processed by XenoLinux */ +typedef struct dom_mem +{ + unsigned int domain; + unsigned long vaddr; + unsigned long start_pfn; + int tot_pages; +} dom_mem_t; + +/* This is entirely processed by XenoLinux */ +typedef struct dom_pgupdate +{ + unsigned long pgt_update_arr; + unsigned long num_pgt_updates; +} dom_pgupdate_t; + +typedef struct domain_launch +{ + unsigned int domain; + unsigned long l2_pgt_addr; + unsigned long virt_load_addr; + unsigned long virt_shinfo_addr; + unsigned long virt_startinfo_addr; + unsigned int num_vifs; + char cmd_line[MAX_CMD_LEN]; +} dom_meminfo_t; + +typedef struct dom0_op_st +{ + unsigned long cmd; + union + { + dom0_newdomain_t newdomain; + dom0_killdomain_t killdomain; + dom0_getmemlist_t getmemlist; + dom_mem_t dommem; + dom_pgupdate_t pgupdate; + dom_meminfo_t meminfo; + } + u; +} dom0_op_t; + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c new file mode 100644 index 0000000000..13fe25ec9c --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c @@ -0,0 +1,306 @@ +/****************************************************************************** + * vfr.c + * + * Interface to the virtual firewall/router. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> + +#include <asm/hypervisor-ifs/network.h> + +static struct proc_dir_entry *proc_vfr; + +static unsigned char readbuf[1024]; + +/* Helpers, implemented at the bottom. */ +u32 getipaddr(const char *buff, unsigned int len); +u16 antous(const char *buff, int len); +int anton(const char *buff, int len); + +static int vfr_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + strcpy(page, readbuf); + *readbuf = '\0'; + *eof = 1; + *start = page; + return strlen(page); +} + +/* The format for the vfr interface is as follows: + * + * COMMAND <field>=<val> [<field>=<val> [...]] + * + * where: + * + * COMMAND = { ACCEPT | COUNT } + * + * field=val pairs are as follows: + * + * field = { srcaddr | dstaddr } + * val is a dot seperated, numeric IP address. + * + * field = { srcport | dstport } + * val is a (16-bit) unsigned int + * + * field = { proto } + * val = { IP | TCP | UDP | ARP } + * + */ + +#define isspace(_x) ( ((_x)==' ') || ((_x)=='\t') || ((_x)=='\v') || \ + ((_x)=='\f') || ((_x)=='\r') || ((_x)=='\n') ) + +static int vfr_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + network_op_t op; + int ret, len; + int ts, te, tl; // token start, end, and length + int fs, fe, fl; // field. + + len = count; + ts = te = 0; + + memset(&op, 0, sizeof(network_op_t)); + + // get the command: + while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && !isspace(buffer[te]) ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ADD", tl) == 0 ) + { + op.cmd = NETWORK_OP_ADDRULE; + } + else if ( strncmp(&buffer[ts], "DELETE", tl) == 0 ) + { + op.cmd = NETWORK_OP_DELETERULE; + } + else if ( strncmp(&buffer[ts], "PRINT", tl) == 0 ) + { + op.cmd = NETWORK_OP_GETRULELIST; + goto doneparsing; + } + + ts = te; + + // get the action + while ( count && (buffer[ts] == ' ') ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && (buffer[te] != ' ') ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ACCEPT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + goto keyval; + } + if ( strncmp(&buffer[ts], "COUNT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_COUNT; + goto keyval; + } + + // default case; + return (len); + + + // get the key=val pairs. + keyval: + while (count) + { + //get field + ts = te; while ( count && isspace(buffer[ts]) ) { ts++; count--; } + te = ts; + while ( count && !isspace(buffer[te]) && (buffer[te] != '=') ) + { te++; count--; } + if ( te <= ts ) + goto doneparsing; + tl = te - ts; + fs = ts; fe = te; fl = tl; // save the field markers. + // skip " = " (ignores extra equals.) + while ( count && (isspace(buffer[te]) || (buffer[te] == '=')) ) + { te++; count--; } + ts = te; + while ( count && !isspace(buffer[te]) ) { te++; count--; } + tl = te - ts; + + if ( (fl <= 0) || (tl <= 0) ) goto bad; + + if (strncmp(&buffer[fs], "srcaddr", fl) == 0) + { + op.u.net_rule.src_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddr", fl) == 0) + { + op.u.net_rule.dst_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcaddrmask", fl) == 0) + { + op.u.net_rule.src_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddrmask", fl) == 0) + { + op.u.net_rule.dst_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcport", fl) == 0) + { + op.u.net_rule.src_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstport", fl) == 0) + { + op.u.net_rule.dst_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcportmask", fl) == 0) + { + op.u.net_rule.src_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstportmask", fl) == 0) + { + op.u.net_rule.dst_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcint", fl) == 0) + { + op.u.net_rule.src_interface = anton(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstint", fl) == 0) + { + op.u.net_rule.dst_interface = anton(&buffer[ts], tl); + } + else if ( (strncmp(&buffer[fs], "proto", fl) == 0)) + { + if (strncmp(&buffer[ts], "any", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ANY; + if (strncmp(&buffer[ts], "ip", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_IP; + if (strncmp(&buffer[ts], "tcp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_TCP; + if (strncmp(&buffer[ts], "udp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_UDP; + if (strncmp(&buffer[ts], "arp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ARP; + + } + } + + doneparsing: + ret = HYPERVISOR_network_op(&op); + return(len); + + bad: + return(len); + + +} + +static int __init init_module(void) +{ + *readbuf = '\0'; + proc_vfr = create_proc_entry ("vfr", 0600, &proc_root); + if ( proc_vfr != NULL ) + { + proc_vfr->owner = THIS_MODULE; + proc_vfr->nlink = 1; + proc_vfr->read_proc = vfr_read_proc; + proc_vfr->write_proc = vfr_write_proc; + printk("Successfully installed virtual firewall/router interface\n"); + } + return 0; +} + +static void __exit cleanup_module(void) +{ + if ( proc_vfr == NULL ) return; + remove_proc_entry("vfr", &proc_root); + proc_vfr = NULL; +} + +module_init(init_module); +module_exit(cleanup_module); + +/* Helper functions start here: */ + +int anton(const char *buff, int len) +{ + int ret; + char c; + int sign = 1; + + ret = 0; + + if (len == 0) return 0; + if (*buff == '-') { sign = -1; buff++; len--; } + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + ret *= sign; + return ret; +} + +u16 antous(const char *buff, int len) +{ + u16 ret; + char c; + + ret = 0; + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + return ret; +} + +u32 getipaddr(const char *buff, unsigned int len) +{ + int i; + char c; + u32 ret, val; + + ret = 0; val = 0; + + while ( len ) + { + if (!((((c = *buff) >= '0') && ( c <= '9')) || ( c == '.' ) ) ) + { + return(0); // malformed. + } + + if ( c == '.' ) { + if (val > 255) return (0); //malformed. + ret = ret << 8; + ret += val; + val = 0; + len--; buff++; + continue; + } + val *= 10; + val += c - '0'; + buff++; len--; + } + ret = ret << 8; + ret += val; + + return (ret); +} + diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/Makefile new file mode 100644 index 0000000000..b44a288a5b --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/Makefile @@ -0,0 +1,3 @@ +O_TARGET := net.o +obj-y := network.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c new file mode 100644 index 0000000000..4c4ace1006 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c @@ -0,0 +1,443 @@ +/****************************************************************************** + * network.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002, K A Fraser + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <net/sock.h> + +#define NET_TX_IRQ _EVENT_NET_TX +#define NET_RX_IRQ _EVENT_NET_RX + +#define TX_MAX_ENTRIES (TX_RING_SIZE - 2) +#define RX_MAX_ENTRIES (RX_RING_SIZE - 2) + +#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) +#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) + +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ + +static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void network_free_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +static struct list_head dev_list; + +/* + * RX RING: RX_IDX <= rx_cons <= rx_prod + * TX RING: TX_IDX <= tx_cons <= tx_prod + * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor) + */ +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + struct sk_buff **tx_skb_ring; + struct sk_buff **rx_skb_ring; + atomic_t tx_entries; + unsigned int rx_idx, tx_idx, tx_full; + net_ring_t *net_ring; + spinlock_t tx_lock; +}; + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + int error = 0; + + np->rx_idx = np->tx_idx = np->tx_full = 0; + + memset(&np->stats, 0, sizeof(np->stats)); + + spin_lock_init(&np->tx_lock); + + atomic_set(&np->tx_entries, 0); + + np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0; + np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0; + np->net_ring->tx_ring = NULL; + np->net_ring->rx_ring = NULL; + + np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *), + GFP_KERNEL); + np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *), + GFP_KERNEL); + np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), + GFP_KERNEL); + np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), + GFP_KERNEL); + if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) || + (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) ) + { + printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name); + error = -ENOBUFS; + goto fail; + } + + network_alloc_rx_buffers(dev); + + error = request_irq(NET_RX_IRQ, network_rx_int, 0, + "net-rx", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Could not allocate receive interrupt\n", + dev->name); + network_free_rx_buffers(dev); + goto fail; + } + + error = request_irq(NET_TX_IRQ, network_tx_int, 0, + "net-tx", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n", + dev->name); + free_irq(NET_RX_IRQ, dev); + network_free_rx_buffers(dev); + goto fail; + } + + printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; + + fail: + if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring); + if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring); + if ( np->rx_skb_ring ) kfree(np->rx_skb_ring); + if ( np->tx_skb_ring ) kfree(np->tx_skb_ring); + kfree(np); + return error; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned long flags; + + spin_lock_irqsave(&np->tx_lock, flags); + + for ( i = np->tx_idx; i != np->net_ring->tx_cons; i = TX_RING_INC(i) ) + { + skb = np->tx_skb_ring[i]; + dev_kfree_skb_any(skb); + atomic_dec(&np->tx_entries); + } + + np->tx_idx = i; + + if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) + { + np->tx_full = 0; + netif_wake_queue(dev); + } + + spin_unlock_irqrestore(&np->tx_lock, flags); +} + +inline unsigned long get_ppte(unsigned long addr) +{ + unsigned long ppte; + pgd_t *pgd; pmd_t *pmd; pte_t *ptep; + pgd = pgd_offset_k(addr); + + if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); + + pmd = pmd_offset(pgd, addr); + if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); + + ptep = pte_offset(pmd, addr); + ppte = (unsigned long)phys_to_machine(virt_to_phys(ptep)); + + return ppte; +} + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES); + + for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) ) + { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( skb == NULL ) break; + skb->dev = dev; + skb_reserve(skb, 2); /* word align the IP header */ + np->rx_skb_ring[i] = skb; + np->net_ring->rx_ring[i].addr = get_ppte((unsigned long)skb->head); + np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */ + } + + np->net_ring->rx_prod = i; + + np->net_ring->rx_event = RX_RING_INC(np->rx_idx); + + HYPERVISOR_net_update(); +} + + +static void network_free_rx_buffers(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) ) + { + skb = np->rx_skb_ring[i]; + dev_kfree_skb(skb); + } +} + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned int i; + struct net_private *np = (struct net_private *)dev->priv; + + if ( np->tx_full ) + { + printk(KERN_WARNING "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + i = np->net_ring->tx_prod; + + if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE ) + { + struct sk_buff *new_skb = alloc_skb(RX_BUF_SIZE, GFP_KERNEL); + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + kfree_skb(skb); + skb = new_skb; + } + + np->tx_skb_ring[i] = skb; + np->net_ring->tx_ring[i].addr = + (unsigned long)phys_to_machine(virt_to_phys(skb->data)); + np->net_ring->tx_ring[i].size = skb->len; + np->net_ring->tx_prod = TX_RING_INC(i); + atomic_inc(&np->tx_entries); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + spin_lock_irq(&np->tx_lock); + if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES ) + { + np->tx_full = 1; + netif_stop_queue(dev); + np->net_ring->tx_event = + TX_RING_ADD(np->tx_idx, atomic_read(&np->tx_entries) >> 1); + } + else + { + /* Avoid unnecessary tx interrupts. */ + np->net_ring->tx_event = TX_RING_INC(np->net_ring->tx_prod); + } + spin_unlock_irq(&np->tx_lock); + + /* Must do this after setting tx_event: race with updates of tx_cons. */ + network_tx_buf_gc(dev); + + HYPERVISOR_net_update(); + + return 0; +} + + +static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + unsigned int i; + struct net_device *dev = (struct net_device *)dev_id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + again: + for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) ) + { + if (np->net_ring->rx_ring[i].status != RING_STATUS_OK) + { + printk("bad buffer on RX ring!(%d)\n", + np->net_ring->rx_ring[i].status); + continue; + } + skb = np->rx_skb_ring[i]; + + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = + (*(unsigned long *)phys_to_virt( + machine_to_phys(np->net_ring->rx_ring[i].addr)) + ) >> PAGE_SHIFT; + + skb_put(skb, np->net_ring->rx_ring[i].size); + skb->protocol = eth_type_trans(skb, dev); + + /* + * Set up shinfo -- from alloc_skb This was particularily nasty: the + * shared info is hidden at the back of the data area (presumably so it + * can be shared), but on page flip it gets very spunked. + */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + np->stats.rx_packets++; + + np->stats.rx_bytes += np->net_ring->rx_ring[i].size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_idx = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + smp_mb(); + if ( np->net_ring->rx_cons != i ) goto again; +} + + +static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + struct net_device *dev = (struct net_device *)dev_id; + network_tx_buf_gc(dev); +} + + +int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + netif_stop_queue(dev); + + free_irq(NET_RX_IRQ, dev); + free_irq(NET_TX_IRQ, dev); + + /* + * XXXX This cannot be done safely until be have a proper interface + * for setting up and tearing down virtual interfaces on the fly. + * Currently the receive buffers are locked down by Xen and we have + * no sensible way of retrieving them. + */ +#if 0 + network_free_rx_buffers(dev); + kfree(np->net_ring->rx_ring); + kfree(np->net_ring->tx_ring); +#endif + + kfree(np->rx_skb_ring); + kfree(np->tx_skb_ring); + + MOD_DEC_USE_COUNT; + + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +int __init init_module(void) +{ + int i, err; + struct net_device *dev; + struct net_private *np; + + INIT_LIST_HEAD(&dev_list); + + for ( i = 0; i < start_info.num_net_rings; i++ ) + { + dev = alloc_etherdev(sizeof(struct net_private)); + if ( dev == NULL ) + { + err = -ENOMEM; + goto fail; + } + + np = dev->priv; + np->net_ring = start_info.net_rings + i; + + SET_MODULE_OWNER(dev); + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + memset(dev->dev_addr, 0, ETH_ALEN); + *(unsigned int *)(dev->dev_addr + 1) = i; + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + } + + return 0; + + fail: + cleanup_module(); + return err; +} + + +static void cleanup_module(void) +{ + struct net_private *np; + struct net_device *dev; + + while ( !list_empty(&dev_list) ) + { + np = list_entry(dev_list.next, struct net_private, list); + list_del(&np->list); + dev = np->dev; + unregister_netdev(dev); + kfree(dev); + } +} + + +module_init(init_module); +module_exit(cleanup_module); |