diff options
Diffstat (limited to 'xenolinux-2.4.24-sparse/arch/xeno/drivers')
13 files changed, 2583 insertions, 0 deletions
diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/balloon/Makefile b/xenolinux-2.4.24-sparse/arch/xeno/drivers/balloon/Makefile new file mode 100644 index 0000000000..f780a515e0 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/balloon/Makefile @@ -0,0 +1,3 @@ +O_TARGET := balloon_driver.o +obj-y := balloon.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/balloon/balloon.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/balloon/balloon.c new file mode 100644 index 0000000000..b7e6802077 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/balloon/balloon.c @@ -0,0 +1,282 @@ +/****************************************************************************** + * balloon.c + * + * Xeno balloon driver - enables returning/claiming memory to/from xen + * + * Copyright (c) 2003, B Dragovic + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <asm/xeno_proc.h> + +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/smp_lock.h> +#include <linux/pagemap.h> + +#include <asm/hypervisor.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> + +#include <asm/hypervisor-ifs/dom_mem_ops.h> + +/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */ +#define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */ +#define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */ +typedef struct user_balloon_op { + unsigned int op; + unsigned long size; +} user_balloon_op_t; +/* END OF USER DEFINE */ + +/* Dead entry written into ballon-owned entries in the PMT. */ +#define DEAD 0xdeadbeef + +static struct proc_dir_entry *balloon_pde; +unsigned long credit; + +static inline pte_t *get_ptep(unsigned long addr) +{ + pgd_t *pgd; pmd_t *pmd; pte_t *ptep; + pgd = pgd_offset_k(addr); + + if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); + + pmd = pmd_offset(pgd, addr); + if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); + + ptep = pte_offset(pmd, addr); + + return ptep; +} + +/* main function for relinquishing bit of memory */ +static unsigned long inflate_balloon(unsigned long num_pages) +{ + dom_mem_op_t dom_mem_op; + unsigned long *parray; + unsigned long *currp; + unsigned long curraddr; + unsigned long ret = 0; + unsigned long vaddr; + unsigned long i, j; + + parray = (unsigned long *)kmalloc(num_pages * + sizeof(unsigned long), GFP_KERNEL); + currp = parray; + + for ( i = 0; i < num_pages; i++ ) + { + /* try to obtain a free page, has to be done with GFP_ATOMIC + * as we do not want to sleep indefinately. + */ + vaddr = __get_free_page(GFP_ATOMIC); + + /* if allocation fails, free all reserved pages */ + if(!vaddr){ + printk("Unable to inflate balloon by %ld, only %ld pages free.", + num_pages, i); + currp = parray; + for(j = 0; j < i; j++){ + free_page(*currp++); + } + goto cleanup; + } + + *currp++ = vaddr; + } + + + currp = parray; + for ( i = 0; i < num_pages; i++ ) + { + curraddr = *currp; + *currp = virt_to_machine(*currp) >> PAGE_SHIFT; + queue_l1_entry_update(get_ptep(curraddr), 0); + phys_to_machine_mapping[__pa(curraddr) >> PAGE_SHIFT] = DEAD; + currp++; + } + + XENO_flush_page_update_queue(); + + dom_mem_op.op = MEMOP_RESERVATION_DECREASE; + dom_mem_op.u.decrease.size = num_pages; + dom_mem_op.u.decrease.pages = parray; + if ( (ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != num_pages ) + { + printk("Unable to inflate balloon, error %lx\n", ret); + goto cleanup; + } + + credit += num_pages; + ret = num_pages; + + cleanup: + kfree(parray); + + return ret; +} + +/* install new mem pages obtained by deflate_balloon. function walks + * phys->machine mapping table looking for DEAD entries and populates + * them. + */ +static unsigned long process_new_pages(unsigned long * parray, + unsigned long num) +{ + /* currently, this function is rather simplistic as + * it is assumed that domain reclaims only number of + * pages previously released. this is to change soon + * and the code to extend page tables etc. will be + * incorporated here. + */ + + unsigned long tot_pages = start_info.nr_pages; + unsigned long * curr = parray; + unsigned long num_installed; + unsigned long i; + + num_installed = 0; + for ( i = 0; (i < tot_pages) && (num_installed < num); i++ ) + { + if ( phys_to_machine_mapping[i] == DEAD ) + { + phys_to_machine_mapping[i] = *curr; + queue_l1_entry_update( + (pte_t *)((i << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE), i); + queue_l1_entry_update( + get_ptep((unsigned long)__va(i << PAGE_SHIFT)), + ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); + + *curr = (unsigned long)__va(i << PAGE_SHIFT); + curr++; + num_installed++; + } + } + + /* now, this is tricky (and will also change for machine addrs that + * are mapped to not previously released addresses). we free pages + * that were allocated by get_free_page (the mappings are different + * now, of course). + */ + curr = parray; + for ( i = 0; i < num_installed; i++ ) + { + free_page(*curr); + curr++; + } + + return num_installed; +} + +unsigned long deflate_balloon(unsigned long num_pages) +{ + dom_mem_op_t dom_mem_op; + unsigned long ret; + unsigned long * parray; + + printk(KERN_ALERT "bd240 debug: deflate balloon called for %lx pages\n", num_pages); + + if ( num_pages > credit ) + { + printk("Can not allocate more pages than previously released.\n"); + return -EAGAIN; + } + + parray = (unsigned long *)kmalloc(num_pages * sizeof(unsigned long), + GFP_KERNEL); + + dom_mem_op.op = MEMOP_RESERVATION_INCREASE; + dom_mem_op.u.increase.size = num_pages; + dom_mem_op.u.increase.pages = parray; + if((ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != num_pages){ + printk("Unable to deflate balloon, error %lx\n", ret); + goto cleanup; + } + + if((ret = process_new_pages(parray, num_pages)) < num_pages){ + printk("Unable to deflate balloon by specified %lx pages, only %lx.\n", + num_pages, ret); + goto cleanup; + } + + ret = num_pages; + credit -= num_pages; + + cleanup: + kfree(parray); + + return ret; +} + +static int balloon_write(struct file *file, const char *buffer, + u_long count, void *data) +{ + user_balloon_op_t bop; + + /* Only admin can play with the balloon :) */ + if ( !capable(CAP_SYS_ADMIN) ) + return -EPERM; + + if ( copy_from_user(&bop, buffer, sizeof(bop)) ) + return -EFAULT; + + switch ( bop.op ) + { + case USER_INFLATE_BALLOON: + if ( inflate_balloon(bop.size) < bop.size ) + return -EAGAIN; + break; + + case USER_DEFLATE_BALLOON: + deflate_balloon(bop.size); + break; + + default: + printk("Unknown command to balloon driver."); + return -EFAULT; + } + + return sizeof(bop); +} + +/* + * main balloon driver initialization function. + */ +static int __init init_module(void) +{ + printk(KERN_ALERT "Starting Xeno Balloon driver\n"); + + credit = 0; + + balloon_pde = create_xeno_proc_entry("balloon", 0600); + if ( balloon_pde == NULL ) + { + printk(KERN_ALERT "Unable to create balloon driver proc entry!"); + return -1; + } + + balloon_pde->write_proc = balloon_write; + + return 0; +} + +static void __exit cleanup_module(void) +{ + if ( balloon_pde != NULL ) + { + remove_xeno_proc_entry("balloon"); + balloon_pde = NULL; + } +} + +module_init(init_module); +module_exit(cleanup_module); + + diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/Makefile new file mode 100644 index 0000000000..7c87e099c7 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/Makefile @@ -0,0 +1,3 @@ +O_TARGET := blk.o +obj-y := xl_block.o xl_vbd.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c new file mode 100644 index 0000000000..8271654f1d --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c @@ -0,0 +1,586 @@ +/****************************************************************************** + * xl_block.c + * + * Xenolinux virtual block-device driver. + * + */ + +#include "xl_block.h" +#include <linux/blk.h> +#include <linux/cdrom.h> + +typedef unsigned char byte; /* from linux/ide.h */ + +#define XLBLK_RESPONSE_IRQ _EVENT_BLKDEV +#define DEBUG_IRQ _EVENT_DEBUG + +#define STATE_ACTIVE 0 +#define STATE_SUSPENDED 1 +#define STATE_CLOSED 2 +static unsigned int state = STATE_SUSPENDED; + +static blk_ring_t *blk_ring; +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ +static BLK_RING_IDX req_prod; /* Private request producer. */ + +#define XDI_MAX 64 +static xen_disk_info_t xlblk_disk_info; /* information about our disks/VBDs */ + +/* We plug the I/O ring if the driver is suspended or if the ring is full. */ +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ + (state != STATE_ACTIVE)) + +/* + * Request queues with outstanding work, but ring is currently full. + * We need no special lock here, as we always access this with the + * io_request_lock held. We only need a small maximum list. + */ +#define MAX_PENDING 8 +static request_queue_t *pending_queues[MAX_PENDING]; +static int nr_pending; + +static kdev_t sg_dev; +static int sg_operation = -1; +static unsigned long sg_next_sect; +#define DISABLE_SCATTERGATHER() (sg_operation = -1) + +static inline void signal_requests_to_xen(void) +{ + block_io_op_t op; + + DISABLE_SCATTERGATHER(); + blk_ring->req_prod = req_prod; + + op.cmd = BLOCK_IO_OP_SIGNAL; + HYPERVISOR_block_io_op(&op); + return; +} + +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) +{ + struct gendisk *gd = get_gendisk(xldev); + return (xl_disk_t *)gd->real_devices + + (MINOR(xldev) >> gd->minor_shift); +} + + +int xenolinux_block_open(struct inode *inode, struct file *filep) +{ + short xldev = inode->i_rdev; + struct gendisk *gd = get_gendisk(xldev); + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + short minor = MINOR(xldev); + + if ( gd->part[minor].nr_sects == 0 ) + { + /* + * Device either doesn't exist, or has zero capacity; we use a few + * cheesy heuristics to return the relevant error code + */ + if ( (gd->sizes[minor >> gd->minor_shift] != 0) || + ((minor & (gd->max_p - 1)) != 0) ) + { + /* + * We have a real device, but no such partition, or we just have a + * partition number so guess this is the problem. + */ + return -ENXIO; /* no such device or address */ + } + else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) + { + /* This is a removable device => assume that media is missing. */ + return -ENOMEDIUM; /* media not present (this is a guess) */ + } + else + { + /* Just go for the general 'no such device' error. */ + return -ENODEV; /* no such device */ + } + } + + disk->usage++; + DPRINTK("xenolinux_block_open\n"); + return 0; +} + + +int xenolinux_block_release(struct inode *inode, struct file *filep) +{ + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + disk->usage--; + DPRINTK("xenolinux_block_release\n"); + return 0; +} + + +int xenolinux_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument) +{ + kdev_t dev = inode->i_rdev; + struct hd_geometry *geo = (struct hd_geometry *)argument; + struct gendisk *gd; + struct hd_struct *part; + + /* NB. No need to check permissions. That is done for us. */ + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", + command, (long) argument, dev); + + gd = get_gendisk(dev); + part = &gd->part[MINOR(dev)]; + + switch ( command ) + { + case BLKGETSIZE: + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); + return put_user(part->nr_sects, (unsigned long *) argument); + + case BLKRRPART: /* re-read partition table */ + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); + return xenolinux_block_revalidate(dev); + + case BLKSSZGET: + return hardsect_size[MAJOR(dev)][MINOR(dev)]; + + case BLKBSZGET: /* get block size */ + DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); + break; + + case BLKBSZSET: /* set block size */ + DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); + break; + + case BLKRASET: /* set read-ahead */ + DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); + break; + + case BLKRAGET: /* get read-ahead */ + DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); + break; + + case HDIO_GETGEO: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; + return 0; + + case HDIO_GETGEO_BIG: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; + return 0; + + case CDROMMULTISESSION: + DPRINTK("FIXME: support multisession CDs later\n"); + memset((struct cdrom_multisession *)argument, 0, + sizeof(struct cdrom_multisession)); + return 0; + + default: + printk("ioctl %08x not supported by xl_block\n", command); + return -ENOSYS; + } + + return 0; +} + +/* check media change: should probably do something here in some cases :-) */ +int xenolinux_block_check(kdev_t dev) +{ + DPRINTK("xenolinux_block_check\n"); + return 0; +} + +int xenolinux_block_revalidate(kdev_t dev) +{ + struct gendisk *gd = get_gendisk(dev); + xl_disk_t *disk = xldev_to_xldisk(dev); + unsigned long flags, capacity = gd->part[MINOR(dev)].nr_sects; + int i, disk_nr = MINOR(dev) >> gd->minor_shift; + + DPRINTK("xenolinux_block_revalidate: %d\n", dev); + + /* + * We didn't construct this VBD by reading a partition table. This + * function can only do bad things to us. + */ + if ( capacity == 0 ) + return -EINVAL; + + spin_lock_irqsave(&io_request_lock, flags); + if ( disk->usage > 1 ) + { + spin_unlock_irqrestore(&io_request_lock, flags); + return -EBUSY; + } + spin_unlock_irqrestore(&io_request_lock, flags); + + for ( i = gd->max_p - 1; i >= 0; i-- ) + { + invalidate_device(dev+i, 1); + gd->part[MINOR(dev+i)].start_sect = 0; + gd->part[MINOR(dev+i)].nr_sects = 0; + gd->sizes[MINOR(dev+i)] = 0; + } + + /* XXX Should perhaps revalidate VBDs here */ + + grok_partitions(gd, disk_nr, gd->max_p, capacity); + + return 0; +} + + +/* + * hypervisor_request + * + * request block io + * + * id: for guest use only. + * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*} + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + */ +static int hypervisor_request(unsigned long id, + int operation, + char * buffer, + unsigned long sector_number, + unsigned short nr_sectors, + kdev_t device) +{ + unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); + struct gendisk *gd; + blk_ring_req_entry_t *req; + struct buffer_head *bh; + + if ( unlikely(nr_sectors >= (1<<9)) ) + BUG(); + if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) + BUG(); + + if ( unlikely(state == STATE_CLOSED) ) + return 1; + + switch ( operation ) + { + + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + gd = get_gendisk(device); + + /* + * Update the sector_number we'll pass down as appropriate; note that + * we could sanity check that resulting sector will be in this + * partition, but this will happen in xen anyhow. + */ + sector_number += gd->part[MINOR(device)].start_sect; + + /* + * If this unit doesn't consist of virtual (i.e., Xen-specified) + * partitions then we clear the partn bits from the device number. + */ + if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & + GENHD_FL_VIRT_PARTNS) ) + device &= ~(gd->max_p - 1); + + if ( (sg_operation == operation) && + (sg_dev == device) && + (sg_next_sect == sector_number) ) + { + req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req; + bh = (struct buffer_head *)id; + bh->b_reqnext = (struct buffer_head *)req->id; + req->id = id; + req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; + if ( ++req->nr_segments < MAX_BLK_SEGS ) + sg_next_sect += nr_sectors; + else + DISABLE_SCATTERGATHER(); + return 0; + } + else if ( RING_PLUGGED ) + { + return 1; + } + else + { + sg_operation = operation; + sg_dev = device; + sg_next_sect = sector_number + nr_sectors; + } + break; + + default: + panic("unknown op %d\n", operation); + } + + /* Fill out a communications ring structure. */ + req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; + req->id = id; + req->operation = operation; + req->sector_number = sector_number; + req->device = device; + req->nr_segments = 1; + req->buffer_and_sects[0] = buffer_ma | nr_sectors; + req_prod++; + + return 0; +} + + +/* + * do_xlblk_request + * read a block; request is in a request queue + */ +void do_xlblk_request(request_queue_t *rq) +{ + struct request *req; + struct buffer_head *bh, *next_bh; + int rw, nsect, full, queued = 0; + + DPRINTK("xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME); + + while ( !rq->plugged && !list_empty(&rq->queue_head)) + { + if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) + goto out; + + DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", + req, req->cmd, req->sector, + req->current_nr_sectors, req->nr_sectors, req->bh); + + rw = req->cmd; + if ( rw == READA ) + rw = READ; + if ( unlikely((rw != READ) && (rw != WRITE)) ) + panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); + + req->errors = 0; + + bh = req->bh; + while ( bh != NULL ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + + full = hypervisor_request( + (unsigned long)bh, + (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); + + if ( full ) + { + bh->b_reqnext = next_bh; + pending_queues[nr_pending++] = rq; + if ( unlikely(nr_pending >= MAX_PENDING) ) + BUG(); + goto out; + } + + queued++; + + /* Dequeue the buffer head from the request. */ + nsect = bh->b_size >> 9; + bh = req->bh = next_bh; + + if ( bh != NULL ) + { + /* There's another buffer head to do. Update the request. */ + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + req->current_nr_sectors = bh->b_size >> 9; + req->buffer = bh->b_data; + } + else + { + /* That was the last buffer head. Finalise the request. */ + if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) + BUG(); + blkdev_dequeue_request(req); + end_that_request_last(req); + } + } + } + + out: + if ( queued != 0 ) signal_requests_to_xen(); +} + + +static void kick_pending_request_queues(void) +{ + /* We kick pending request queues if the ring is reasonably empty. */ + if ( (nr_pending != 0) && + ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) ) + { + /* Attempt to drain the queue, but bail if the ring becomes full. */ + while ( (nr_pending != 0) && !RING_PLUGGED ) + do_xlblk_request(pending_queues[--nr_pending]); + } +} + + +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + BLK_RING_IDX i; + unsigned long flags; + struct buffer_head *bh, *next_bh; + + if ( unlikely(state == STATE_CLOSED) ) + return; + + spin_lock_irqsave(&io_request_lock, flags); + + for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) + { + blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; + switch ( bret->operation ) + { + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + if ( unlikely(bret->status != 0) ) + DPRINTK("Bad return from blkdev data request: %lx\n", + bret->status); + for ( bh = (struct buffer_head *)bret->id; + bh != NULL; + bh = next_bh ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, !bret->status); + } + break; + + default: + BUG(); + } + } + + resp_cons = i; + + kick_pending_request_queues(); + + spin_unlock_irqrestore(&io_request_lock, flags); +} + + + +static void reset_xlblk_interface(void) +{ + block_io_op_t op; + + nr_pending = 0; + + op.cmd = BLOCK_IO_OP_RESET; + if ( HYPERVISOR_block_io_op(&op) != 0 ) + printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n"); + + op.cmd = BLOCK_IO_OP_RING_ADDRESS; + (void)HYPERVISOR_block_io_op(&op); + + set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT); + blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; + + wmb(); + state = STATE_ACTIVE; +} + + +int __init xlblk_init(void) +{ + int error; + block_io_op_t op; + + reset_xlblk_interface(); + + error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, + SA_SAMPLE_RANDOM, "blkdev", NULL); + if ( error ) + { + printk(KERN_ALERT "Could not allocate receive interrupt\n"); + goto fail; + } + + /* Setup our [empty] disk information structure */ + xlblk_disk_info.max = XDI_MAX; + xlblk_disk_info.disks = kmalloc(XDI_MAX * sizeof(xen_disk_t), GFP_KERNEL); + xlblk_disk_info.count = 0; + + /* Probe for disk information. */ + memset(&op, 0, sizeof(op)); + op.cmd = BLOCK_IO_OP_VBD_PROBE; + op.u.probe_params.domain = 0; + memcpy(&op.u.probe_params.xdi, &xlblk_disk_info, sizeof(xlblk_disk_info)); + + error = HYPERVISOR_block_io_op(&op); + + if ( error ) + { + printk(KERN_ALERT "Could not probe disks (%d)\n", error); + free_irq(XLBLK_RESPONSE_IRQ, NULL); + goto fail; + } + + /* copy back the [updated] count parameter */ + xlblk_disk_info.count = op.u.probe_params.xdi.count; + + /* Pass the information to our virtual block device susbystem. */ + xlvbd_init(&xlblk_disk_info); + + return 0; + + fail: + return error; +} + +static void __exit xlblk_cleanup(void) +{ + xlvbd_cleanup(); + free_irq(XLBLK_RESPONSE_IRQ, NULL); +} + + +#ifdef MODULE +module_init(xlblk_init); +module_exit(xlblk_cleanup); +#endif + + +void blkdev_suspend(void) +{ + state = STATE_SUSPENDED; + wmb(); + + while ( resp_cons != blk_ring->req_prod ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + wmb(); + state = STATE_CLOSED; + wmb(); + + clear_fixmap(FIX_BLKRING_BASE); +} + + +void blkdev_resume(void) +{ + reset_xlblk_interface(); + spin_lock_irq(&io_request_lock); + kick_pending_request_queues(); + spin_unlock_irq(&io_request_lock); +} diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h new file mode 100644 index 0000000000..9c6dcea522 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h @@ -0,0 +1,70 @@ +/****************************************************************************** + * xl_block.h + * + * Shared definitions between all levels of XenoLinux Virtual block devices. + */ + +#ifndef __XL_BLOCK_H__ +#define __XL_BLOCK_H__ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/fs.h> +#include <linux/hdreg.h> +#include <linux/blkdev.h> +#include <linux/major.h> + +#include <asm/hypervisor-ifs/hypervisor-if.h> +#include <asm/hypervisor-ifs/vbd.h> +#include <asm/io.h> +#include <asm/atomic.h> +#include <asm/uaccess.h> + +#if 0 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK(_f, _a...) ((void)0) +#endif + +#if 0 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK_IOCTL(_f, _a...) ((void)0) +#endif + +/* Private gendisk->flags[] values. */ +#define GENHD_FL_XENO 2 /* Is unit a Xen block device? */ +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ + +/* + * We have one of these per vbd, whether ide, scsi or 'other'. + * They hang in an array off the gendisk structure. We may end up putting + * all kinds of interesting stuff here :-) + */ +typedef struct xl_disk { + int usage; +} xl_disk_t; + +/* Generic layer. */ +extern int xenolinux_control_msg(int operration, char *buffer, int size); +extern int xenolinux_block_open(struct inode *inode, struct file *filep); +extern int xenolinux_block_release(struct inode *inode, struct file *filep); +extern int xenolinux_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument); +extern int xenolinux_block_check(kdev_t dev); +extern int xenolinux_block_revalidate(kdev_t dev); +extern void do_xlblk_request (request_queue_t *rq); + + +/* Virtual block-device subsystem. */ +extern int xlvbd_init(xen_disk_info_t *xdi); +extern void xlvbd_cleanup(void); + +#endif /* __XL_BLOCK_H__ */ diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c new file mode 100644 index 0000000000..d4e01f73d8 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c @@ -0,0 +1,343 @@ +/****************************************************************************** + * xl_vbd.c + * + * Xenolinux virtual block-device driver (xvd). + * + */ + +#include "xl_block.h" +#include <linux/blk.h> + +/* + * For convenience we distinguish between ide, scsi and 'other' (i.e. + * potentially combinations of the two) in the naming scheme and in a few + * other places (like default readahead, etc). + */ +#define XLIDE_MAJOR_NAME "hd" +#define XLSCSI_MAJOR_NAME "sd" +#define XLVBD_MAJOR_NAME "xvd" + +#define XLIDE_DEVS_PER_MAJOR 2 +#define XLSCSI_DEVS_PER_MAJOR 16 +#define XLVBD_DEVS_PER_MAJOR 16 + +#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ +#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */ + +#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ +#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */ + +#define XLVBD_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ +#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ + +/* The below are for the generic drivers/block/ll_rw_block.c code. */ +static int xlide_blksize_size[256]; +static int xlide_hardsect_size[256]; +static int xlide_max_sectors[256]; +static int xlscsi_blksize_size[256]; +static int xlscsi_hardsect_size[256]; +static int xlscsi_max_sectors[256]; +static int xlvbd_blksize_size[256]; +static int xlvbd_hardsect_size[256]; +static int xlvbd_max_sectors[256]; + +static struct block_device_operations xlvbd_block_fops = +{ + open: xenolinux_block_open, + release: xenolinux_block_release, + ioctl: xenolinux_block_ioctl, + check_media_change: xenolinux_block_check, + revalidate: xenolinux_block_revalidate, +}; + +/* + * Set up all the linux device goop for the virtual block devices (vbd's) that + * xen tells us about. Note that although from xen's pov VBDs are addressed + * simply an opaque 16-bit device number, the domain creation tools + * conventionally allocate these numbers to correspond to those used by 'real' + * linux -- this is just for convenience as it means e.g. that the same + * /etc/fstab can be used when booting with or without xen. + */ +int __init xlvbd_init(xen_disk_info_t *xdi) +{ + int i, j, result, max_part; + struct gendisk *gd = NULL; + kdev_t device; + unsigned short major, minor, partno; + int is_ide, is_scsi; + char *major_name; + unsigned char buf[64]; + + SET_MODULE_OWNER(&xlvbd_block_fops); + + /* Initialize the global arrays. */ + for ( i = 0; i < 256; i++ ) + { + /* from the generic ide code (drivers/ide/ide-probe.c, etc) */ + xlide_blksize_size[i] = 1024; + xlide_hardsect_size[i] = 512; + xlide_max_sectors[i] = 128; /* 'hwif->rqsize' if we knew it */ + + /* from the generic scsi disk code (drivers/scsi/sd.c) */ + xlscsi_blksize_size[i] = 1024; /* XXX 512; */ + xlscsi_hardsect_size[i] = 512; + xlscsi_max_sectors[i] = 128*8; /* XXX 128; */ + + /* we don't really know what to set these too since it depends */ + xlvbd_blksize_size[i] = 512; + xlvbd_hardsect_size[i] = 512; + xlvbd_max_sectors[i] = 128; + } + + /* + * We need to loop through each major device we've been told about and: + * a) register the appropriate blkdev + * b) setup the indexed-by-major global arrays (blk_size[], + * blksize_size[], hardsect_size[], max_sectors[], read_ahead[]) + * c) setup the block queue + make it sensible + * d) create an appropriate gendisk structure, and + * e) register the gendisk + */ + for ( i = 0; i < xdi->count; i++ ) + { + device = xdi->disks[i].device; + major = MAJOR(device); + minor = MINOR(device); + is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ + is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ + + if ( is_ide ) + { + major_name = XLIDE_MAJOR_NAME; + max_part = XLIDE_MAX_PART; + } + else if ( is_scsi ) + { + major_name = XLSCSI_MAJOR_NAME; + max_part = XLSCSI_MAX_PART; + } + else + { + major_name = XLVBD_MAJOR_NAME; + max_part = XLVBD_MAX_PART; + } + + partno = minor & (max_part - 1); + + if ( (gd = get_gendisk(device)) == NULL ) + { + result = register_blkdev(major, major_name, &xlvbd_block_fops); + if ( result < 0 ) + { + printk(KERN_ALERT "XL VBD: can't get major %d\n", major); + continue; + } + + if ( is_ide ) + { + blksize_size[major] = xlide_blksize_size; + hardsect_size[major] = xlide_hardsect_size; + max_sectors[major] = xlide_max_sectors; + read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ + } + else if ( is_scsi ) + { + blksize_size[major] = xlscsi_blksize_size; + hardsect_size[major] = xlscsi_hardsect_size; + max_sectors[major] = xlscsi_max_sectors; + read_ahead[major] = 0; /* XXX 8; -- guessing */ + } + else + { + blksize_size[major] = xlvbd_blksize_size; + hardsect_size[major] = xlvbd_hardsect_size; + max_sectors[major] = xlvbd_max_sectors; + read_ahead[major] = 8; + } + + blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as + * soon as we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); + + /* Construct an appropriate gendisk structure. */ + gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + gd->major = major; + gd->major_name = major_name; + + gd->max_p = max_part; + if ( is_ide ) + { + gd->minor_shift = XLIDE_PARTN_SHIFT; + gd->nr_real = XLIDE_DEVS_PER_MAJOR; + } + else if ( is_scsi ) + { + gd->minor_shift = XLSCSI_PARTN_SHIFT; + gd->nr_real = XLSCSI_DEVS_PER_MAJOR; + } + else + { + gd->minor_shift = XLVBD_PARTN_SHIFT; + gd->nr_real = XLVBD_DEVS_PER_MAJOR; + } + + /* + ** The sizes[] and part[] arrays hold the sizes and other + ** information about every partition with this 'major' (i.e. + ** every disk sharing the 8 bit prefix * max partns per disk) + */ + gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); + gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), + GFP_KERNEL); + memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); + memset(gd->part, 0, max_part * gd->nr_real + * sizeof(struct hd_struct)); + + + gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), + GFP_KERNEL); + memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); + + gd->next = NULL; + gd->fops = &xlvbd_block_fops; + + gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), + GFP_KERNEL); + gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); + + memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); + memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); + + add_gendisk(gd); + + blk_size[major] = gd->sizes; + } + + if ( XD_READONLY(xdi->disks[i].info) ) + set_device_ro(device, 1); + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XENO; + + if ( partno != 0 ) + { + /* + * If this was previously set up as a real disc we will have set + * up partition-table information. Virtual partitions override + * 'real' partitions, and the two cannot coexist on a device. + */ + if ( gd->sizes[minor & ~(max_part-1)] != 0 ) + { + kdev_t dev = device & ~(max_part-1); + for ( j = max_part - 1; j >= 0; j-- ) + { + invalidate_device(dev+j, 1); + gd->part[MINOR(dev+j)].start_sect = 0; + gd->part[MINOR(dev+j)].nr_sects = 0; + gd->sizes[MINOR(dev+j)] = 0; + } + printk(KERN_ALERT + "Virtual partitions found for /dev/%s - ignoring any " + "real partition information we may have found.\n", + disk_name(gd, MINOR(device), buf)); + } + + /* Need to skankily setup 'partition' information */ + gd->part[minor].start_sect = 0; + gd->part[minor].nr_sects = xdi->disks[i].capacity; + gd->sizes[minor] = xdi->disks[i].capacity; + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; + } + else + { + /* Some final fix-ups depending on the device type */ + switch ( XD_TYPE(xdi->disks[i].info) ) + { + case XD_TYPE_CDROM: + case XD_TYPE_FLOPPY: + case XD_TYPE_TAPE: + gd->part[minor].nr_sects = xdi->disks[i].capacity; + gd->sizes[minor] = xdi->disks[i].capacity>>(BLOCK_SIZE_BITS-9); + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; + printk(KERN_ALERT + "Skipping partition check on %s /dev/%s\n", + XD_TYPE(xdi->disks[i].info)==XD_TYPE_CDROM ? "cdrom" : + (XD_TYPE(xdi->disks[i].info)==XD_TYPE_TAPE ? "tape" : + "floppy"), disk_name(gd, MINOR(device), buf)); + break; + + case XD_TYPE_DISK: + /* Only check partitions on real discs (not virtual!). */ + if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) + { + printk(KERN_ALERT + "Skipping partition check on virtual /dev/%s\n", + disk_name(gd, MINOR(device), buf)); + break; + } + register_disk(gd, device, gd->max_p, &xlvbd_block_fops, + xdi->disks[i].capacity); + break; + + default: + printk(KERN_ALERT "XenoLinux: unknown device type %d\n", + XD_TYPE(xdi->disks[i].info)); + break; + } + } + + printk(KERN_ALERT "XenoLinux Virtual Block Device Driver " + "installed [device: %04x]\n", device); + } + + return 0; +} + +void xlvbd_cleanup(void) +{ + int is_ide, is_scsi, i; + struct gendisk *gd; + char *major_name; + int major; + + for ( major = 0; major < MAX_BLKDEV; major++ ) + { + if ( (gd = get_gendisk(MKDEV(major, 0))) == NULL ) + continue; + + /* + * If this is a 'Xeno' blkdev then at least one unit will have the Xeno + * flag set. + */ + for ( i = 0; i < gd->nr_real; i++ ) + if ( gd->flags[i] & GENHD_FL_XENO ) + break; + if ( i == gd->nr_real ) + continue; + + is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ + is_scsi = SCSI_BLK_MAJOR(major); /* is this a scsi device? */ + + blk_cleanup_queue(BLK_DEFAULT_QUEUE(major)); + + if ( is_ide ) + major_name = XLIDE_MAJOR_NAME; + else if ( is_scsi ) + major_name = XLSCSI_MAJOR_NAME; + else + major_name = XLVBD_MAJOR_NAME; + + if ( unregister_blkdev(major, major_name) != 0 ) + printk(KERN_ALERT "XenoLinux Virtual Block Device Driver:" + "major device %04x uninstalled w/ errors\n", major); + } +} + +#ifdef MODULE +module_init(xlvbd_init); +module_exit(xlvbd_cleanup); +#endif diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/console/Makefile b/xenolinux-2.4.24-sparse/arch/xeno/drivers/console/Makefile new file mode 100644 index 0000000000..546180a3c2 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/console/Makefile @@ -0,0 +1,3 @@ +O_TARGET := con.o +obj-$(CONFIG_XEN_CONSOLE) := console.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/console/console.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/console/console.c new file mode 100644 index 0000000000..b93fe47656 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/console/console.c @@ -0,0 +1,229 @@ +/****************************************************************************** + * console.c + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/serial.h> +#include <linux/major.h> +#include <linux/ptrace.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/console.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/uaccess.h> +#include <asm/hypervisor.h> + +#define XENO_TTY_MINOR 123 + +/*** Useful function for console debugging -- goes straight to Xen ****/ +asmlinkage int xprintk(const char *fmt, ...) +{ + va_list args; + int printk_len; + static char printk_buf[1024]; + + /* Emit the output into the temporary buffer */ + va_start(args, fmt); + printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args); + va_end(args); + + /* Send the processed output directly to Xen. */ + (void)HYPERVISOR_console_write(printk_buf, printk_len); + + return 0; +} + + + +/******************** Kernel console driver ********************************/ + +static void xen_console_write(struct console *co, const char *s, unsigned count) +{ +#define STRLEN 256 + static char str[STRLEN]; + static int pos = 0; + int len; + + /* We buffer output until we see a newline, or until the buffer is full. */ + while ( count != 0 ) + { + len = ((STRLEN - pos) > count) ? count : STRLEN - pos; + memcpy(str + pos, s, len); + pos += len; + s += len; + count -= len; + if ( (pos == STRLEN) || (str[pos-1] == '\n') ) + { + (void)HYPERVISOR_console_write(str, pos); + pos = 0; + } + } +} + +static kdev_t xen_console_device(struct console *c) +{ + /* + * This is the magic that binds our "struct console" to our + * "tty_struct", defined below. + */ + return MKDEV(TTY_MAJOR, XENO_TTY_MINOR); +} + +static struct console xen_console_info = { + name: "xencons", /* Used to be xen_console, but we're only + actually allowed 8 charcters including + the terminator... */ + write: xen_console_write, + device: xen_console_device, + flags: CON_PRINTBUFFER, + index: -1, +}; + +void xen_console_init(void) +{ + xprintk("xen_console_init\n"); + register_console(&xen_console_info); +} + + +/******************** Initial /dev/console *********************************/ + + +static struct tty_driver xeno_console_driver; +static int xeno_console_refcount; +static struct tty_struct *xeno_console_table[1]; +static struct termios *xeno_console_termios[1]; +static struct termios *xeno_console_termios_locked[1]; + +static int xeno_console_write_room(struct tty_struct *tty) +{ + return INT_MAX; +} + +static int xeno_console_chars_in_buffer(struct tty_struct *tty) +{ + return 0; +} + +static inline int xeno_console_xmit(int ch) +{ + char _ch = ch; + xen_console_write(NULL, &_ch, 1); + return 1; +} + +static int xeno_console_write(struct tty_struct *tty, int from_user, + const u_char * buf, int count) +{ + int i; + + if ( from_user && verify_area(VERIFY_READ, buf, count) ) + { + return -EINVAL; + } + + for ( i = 0; i < count; i++ ) + { + char ch; + if ( from_user ) + { + __get_user(ch, buf + i); + } + else + { + ch = buf[i]; + } + xeno_console_xmit(ch); + } + return i; +} + +static void xeno_console_put_char(struct tty_struct *tty, u_char ch) +{ + xeno_console_xmit(ch); +} + +static int xeno_console_open(struct tty_struct *tty, struct file *filp) +{ + int line; + + MOD_INC_USE_COUNT; + line = MINOR(tty->device) - tty->driver.minor_start; + if ( line ) + { + MOD_DEC_USE_COUNT; + return -ENODEV; + } + + tty->driver_data = NULL; + + return 0; +} + +static void xeno_console_close(struct tty_struct *tty, struct file *filp) +{ + MOD_DEC_USE_COUNT; +} + +int __init xeno_con_init(void) +{ + memset(&xeno_console_driver, 0, sizeof(struct tty_driver)); + xeno_console_driver.magic = TTY_DRIVER_MAGIC; + xeno_console_driver.driver_name = "xeno_console"; + xeno_console_driver.name = "xencon"; + xeno_console_driver.major = TTY_MAJOR; + xeno_console_driver.minor_start = XENO_TTY_MINOR; + xeno_console_driver.num = 1; + xeno_console_driver.type = TTY_DRIVER_TYPE_SERIAL; + xeno_console_driver.subtype = SERIAL_TYPE_NORMAL; + xeno_console_driver.init_termios = tty_std_termios; + xeno_console_driver.flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS; + xeno_console_driver.refcount = &xeno_console_refcount; + xeno_console_driver.table = xeno_console_table; + xeno_console_driver.termios = xeno_console_termios; + xeno_console_driver.termios_locked = xeno_console_termios_locked; + /* Functions */ + xeno_console_driver.open = xeno_console_open; + xeno_console_driver.close = xeno_console_close; + xeno_console_driver.write = xeno_console_write; + xeno_console_driver.write_room = xeno_console_write_room; + xeno_console_driver.put_char = xeno_console_put_char; + xeno_console_driver.chars_in_buffer = xeno_console_chars_in_buffer; + + if ( tty_register_driver(&xeno_console_driver) ) + { + printk(KERN_ERR "Couldn't register Xeno console driver\n"); + } + else + { + printk("Xeno console successfully installed\n"); + } + + return 0; +} + +void __exit xeno_con_fini(void) +{ + int ret; + + ret = tty_unregister_driver(&xeno_console_driver); + if ( ret != 0 ) + { + printk(KERN_ERR "Unable to unregister Xeno console driver: %d\n", ret); + } +} + +module_init(xeno_con_init); +module_exit(xeno_con_fini); + diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/dom0/Makefile b/xenolinux-2.4.24-sparse/arch/xeno/drivers/dom0/Makefile new file mode 100644 index 0000000000..9030801f14 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/dom0/Makefile @@ -0,0 +1,3 @@ +O_TARGET := dom0.o +obj-y := dom0_core.o vfr.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/dom0/dom0_core.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/dom0/dom0_core.c new file mode 100644 index 0000000000..08144d9678 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/dom0/dom0_core.c @@ -0,0 +1,108 @@ +/****************************************************************************** + * dom0_core.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2003, K A Fraser, B Dragovic + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/swap.h> +#include <linux/smp_lock.h> +#include <linux/swapctl.h> +#include <linux/iobuf.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/seq_file.h> + +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> +#include <asm/proc_cmd.h> +#include <asm/hypervisor-ifs/dom0_ops.h> +#include <asm/xeno_proc.h> + +#include "../block/xl_block.h" + +static struct proc_dir_entry *privcmd_intf; + + +static int privcmd_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long data) +{ + int ret = 0; + + switch ( cmd ) + { + case IOCTL_PRIVCMD_HYPERCALL: + { + privcmd_hypercall_t hypercall; + + if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) ) + return -EFAULT; + + __asm__ __volatile__ ( + "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; " + "movl 4(%%eax),%%ebx ;" + "movl 8(%%eax),%%ecx ;" + "movl 12(%%eax),%%edx ;" + "movl 16(%%eax),%%esi ;" + "movl 20(%%eax),%%edi ;" + "movl (%%eax),%%eax ;" + TRAP_INSTR "; " + "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx" + : "=a" (ret) : "0" (&hypercall) : "memory" ); + + } + break; + + default: + ret = -EINVAL; + break; + } + return ret; +} + + +static struct file_operations privcmd_file_ops = { + ioctl : privcmd_ioctl +}; + + +static int __init init_module(void) +{ + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return 0; + + /* xeno control interface */ + privcmd_intf = create_xeno_proc_entry("privcmd", 0400); + if ( privcmd_intf != NULL ) + { + privcmd_intf->owner = THIS_MODULE; + privcmd_intf->nlink = 1; + privcmd_intf->proc_fops = &privcmd_file_ops; + } + + return 0; +} + + +static void __exit cleanup_module(void) +{ + if ( privcmd_intf == NULL ) return; + remove_xeno_proc_entry("privcmd"); + privcmd_intf = NULL; +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/dom0/vfr.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/dom0/vfr.c new file mode 100644 index 0000000000..f3725db6c1 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/dom0/vfr.c @@ -0,0 +1,323 @@ +/****************************************************************************** + * vfr.c + * + * Interface to the virtual firewall/router. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <asm/xeno_proc.h> +#include <asm/hypervisor-ifs/network.h> + +static struct proc_dir_entry *proc_vfr; + +static unsigned char readbuf[1024]; + +/* Helpers, implemented at the bottom. */ +u32 getipaddr(const char *buff, unsigned int len); +u16 antous(const char *buff, int len); +int anton(const char *buff, int len); + +static int vfr_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + strcpy(page, readbuf); + *readbuf = '\0'; + *eof = 1; + *start = page; + return strlen(page); +} + +/* The format for the vfr interface is as follows: + * + * COMMAND <field>=<val> [<field>=<val> [...]] + * + * where: + * + * COMMAND = { ACCEPT | COUNT } + * + * field=val pairs are as follows: + * + * field = { srcaddr | dstaddr } + * val is a dot seperated, numeric IP address. + * + * field = { srcport | dstport } + * val is a (16-bit) unsigned int + * + * field = { proto } + * val = { IP | TCP | UDP | ARP } + * + */ + +#define isspace(_x) ( ((_x)==' ') || ((_x)=='\t') || ((_x)=='\v') || \ + ((_x)=='\f') || ((_x)=='\r') || ((_x)=='\n') ) + +static int vfr_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + network_op_t op; + int ret, len; + int ts, te, tl; // token start, end, and length + int fs, fe, fl; // field. + + len = count; + ts = te = 0; + + memset(&op, 0, sizeof(network_op_t)); + + // get the command: + while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && !isspace(buffer[te]) ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ADD", tl) == 0 ) + { + op.cmd = NETWORK_OP_ADDRULE; + } + else if ( strncmp(&buffer[ts], "DELETE", tl) == 0 ) + { + op.cmd = NETWORK_OP_DELETERULE; + } + else if ( strncmp(&buffer[ts], "PRINT", tl) == 0 ) + { + op.cmd = NETWORK_OP_GETRULELIST; + goto doneparsing; + } + + ts = te; + + // get the action + while ( count && (buffer[ts] == ' ') ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && (buffer[te] != ' ') ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ACCEPT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + goto keyval; + } + if ( strncmp(&buffer[ts], "COUNT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_COUNT; + goto keyval; + } + + // default case; + return (len); + + + // get the key=val pairs. + keyval: + while (count) + { + //get field + ts = te; while ( count && isspace(buffer[ts]) ) { ts++; count--; } + te = ts; + while ( count && !isspace(buffer[te]) && (buffer[te] != '=') ) + { te++; count--; } + if ( te <= ts ) + goto doneparsing; + tl = te - ts; + fs = ts; fe = te; fl = tl; // save the field markers. + // skip " = " (ignores extra equals.) + while ( count && (isspace(buffer[te]) || (buffer[te] == '=')) ) + { te++; count--; } + ts = te; + while ( count && !isspace(buffer[te]) ) { te++; count--; } + tl = te - ts; + + if ( (fl <= 0) || (tl <= 0) ) goto bad; + + /* NB. Prefix matches must go first! */ + if (strncmp(&buffer[fs], "src", fl) == 0) + { + op.u.net_rule.src_vif = VIF_ANY_INTERFACE; + } + else if (strncmp(&buffer[fs], "dst", fl) == 0) + { + op.u.net_rule.dst_vif = VIF_PHYSICAL_INTERFACE; + } + else if (strncmp(&buffer[fs], "srcaddr", fl) == 0) + { + op.u.net_rule.src_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddr", fl) == 0) + { + op.u.net_rule.dst_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcaddrmask", fl) == 0) + { + op.u.net_rule.src_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddrmask", fl) == 0) + { + op.u.net_rule.dst_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcport", fl) == 0) + { + op.u.net_rule.src_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstport", fl) == 0) + { + op.u.net_rule.dst_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcportmask", fl) == 0) + { + op.u.net_rule.src_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstportmask", fl) == 0) + { + op.u.net_rule.dst_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcdom", fl) == 0) + { + op.u.net_rule.src_vif |= anton(&buffer[ts], tl)<<VIF_DOMAIN_SHIFT; + } + else if (strncmp(&buffer[fs], "srcidx", fl) == 0) + { + op.u.net_rule.src_vif |= anton(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstdom", fl) == 0) + { + op.u.net_rule.dst_vif |= anton(&buffer[ts], tl)<<VIF_DOMAIN_SHIFT; + } + else if (strncmp(&buffer[fs], "dstidx", fl) == 0) + { + op.u.net_rule.dst_vif |= anton(&buffer[ts], tl); + } + else if ( (strncmp(&buffer[fs], "proto", fl) == 0)) + { + if (strncmp(&buffer[ts], "any", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ANY; + if (strncmp(&buffer[ts], "ip", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_IP; + if (strncmp(&buffer[ts], "tcp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_TCP; + if (strncmp(&buffer[ts], "udp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_UDP; + if (strncmp(&buffer[ts], "arp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ARP; + } + } + + doneparsing: + ret = HYPERVISOR_network_op(&op); + return(len); + + bad: + return(len); + + +} + +static int __init init_module(void) +{ + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return 0; + + *readbuf = '\0'; + proc_vfr = create_xeno_proc_entry("vfr", 0600); + if ( proc_vfr != NULL ) + { + proc_vfr->owner = THIS_MODULE; + proc_vfr->nlink = 1; + proc_vfr->read_proc = vfr_read_proc; + proc_vfr->write_proc = vfr_write_proc; + printk("Successfully installed virtual firewall/router interface\n"); + } + return 0; +} + +static void __exit cleanup_module(void) +{ + if ( proc_vfr == NULL ) return; + remove_xeno_proc_entry("vfr"); + proc_vfr = NULL; +} + +module_init(init_module); +module_exit(cleanup_module); + +/* Helper functions start here: */ + +int anton(const char *buff, int len) +{ + int ret; + char c; + int sign = 1; + + ret = 0; + + if (len == 0) return 0; + if (*buff == '-') { sign = -1; buff++; len--; } + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + ret *= sign; + return ret; +} + +u16 antous(const char *buff, int len) +{ + u16 ret; + char c; + + ret = 0; + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + return ret; +} + +u32 getipaddr(const char *buff, unsigned int len) +{ + char c; + u32 ret, val; + + ret = 0; val = 0; + + while ( len ) + { + if (!((((c = *buff) >= '0') && ( c <= '9')) || ( c == '.' ) ) ) + { + return(0); // malformed. + } + + if ( c == '.' ) { + if (val > 255) return (0); //malformed. + ret = ret << 8; + ret += val; + val = 0; + len--; buff++; + continue; + } + val *= 10; + val += c - '0'; + buff++; len--; + } + ret = ret << 8; + ret += val; + + return (ret); +} + diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/Makefile b/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/Makefile new file mode 100644 index 0000000000..b44a288a5b --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/Makefile @@ -0,0 +1,3 @@ +O_TARGET := net.o +obj-y := network.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/network.c new file mode 100644 index 0000000000..ac557a3c11 --- /dev/null +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/network.c @@ -0,0 +1,627 @@ +/****************************************************************************** + * network.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002-2003, K A Fraser + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <net/sock.h> +#include <net/pkt_sched.h> + +#define NET_IRQ _EVENT_NET + +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ + +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +static struct list_head dev_list; + +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + NET_RING_IDX rx_resp_cons, tx_resp_cons; + unsigned int net_ring_fixmap_idx, tx_full; + net_ring_t *net_ring; + net_idx_t *net_idx; + spinlock_t tx_lock; + unsigned int idx; /* Domain-specific index of this VIF. */ + + unsigned int rx_bufs_to_notify; + +#define STATE_ACTIVE 0 +#define STATE_SUSPENDED 1 +#define STATE_CLOSED 2 + unsigned int state; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[RX_RING_SIZE+1]; +}; + +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ +#define ADD_ID_TO_FREELIST(_list, _id) \ + (_list)[(_id)] = (_list)[0]; \ + (_list)[0] = (void *)(unsigned long)(_id); +#define GET_ID_FROM_FREELIST(_list) \ + ({ unsigned long _id = (unsigned long)(_list)[0]; \ + (_list)[0] = (_list)[_id]; \ + (unsigned short)_id; }) + + +static void _dbg_network_int(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + if ( np->state == STATE_CLOSED ) + return; + + printk(KERN_ALERT "tx_full = %d, tx_resp_cons = 0x%08x," + " tx_req_prod = 0x%08x, tx_resp_prod = 0x%08x," + " tx_event = 0x%08x, state=%d\n", + np->tx_full, np->tx_resp_cons, + np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, + np->net_idx->tx_event, + test_bit(__LINK_STATE_XOFF, &dev->state)); + printk(KERN_ALERT "rx_resp_cons = 0x%08x," + " rx_req_prod = 0x%08x, rx_resp_prod = 0x%08x, rx_event = 0x%08x\n", + np->rx_resp_cons, np->net_idx->rx_req_prod, + np->net_idx->rx_resp_prod, np->net_idx->rx_event); +} + + +static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _dbg_network_int(np->dev); + } +} + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + int i, ret; + + netop.cmd = NETOP_RESET_RINGS; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n"); + return ret; + } + + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx); + return ret; + } + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, + netop.u.get_vif_info.ring_mfn << PAGE_SHIFT); + np->net_ring = (net_ring_t *)fix_to_virt( + FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx]; + + np->rx_bufs_to_notify = 0; + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; + memset(&np->stats, 0, sizeof(np->stats)); + spin_lock_init(&np->tx_lock); + memset(np->net_ring, 0, sizeof(*np->net_ring)); + memset(np->net_idx, 0, sizeof(*np->net_idx)); + + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for ( i = 0; i <= TX_RING_SIZE; i++ ) + np->tx_skbs[i] = (void *)(i+1); + for ( i = 0; i <= RX_RING_SIZE; i++ ) + np->rx_skbs[i] = (void *)(i+1); + + wmb(); + np->state = STATE_ACTIVE; + + network_alloc_rx_buffers(dev); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + NET_RING_IDX i, prod; + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + tx_entry_t *tx_ring = np->net_ring->tx_ring; + + do { + prod = np->net_idx->tx_resp_prod; + + for ( i = np->tx_resp_cons; i != prod; i++ ) + { + id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; + skb = np->tx_skbs[id]; + ADD_ID_TO_FREELIST(np->tx_skbs, id); + dev_kfree_skb_any(skb); + } + + np->tx_resp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. Note + * that it is essential to schedule a callback, no matter how few + * buffers are pending. Even if there is space in the transmit ring, + * higher layers may be blocked because too much data is outstanding: + * in such cases notification from Xen is likely to be the only kick + * that we'll get. + */ + np->net_idx->tx_event = + prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; + mb(); + } + while ( prod != np->net_idx->tx_resp_prod ); + + if ( np->tx_full && ((np->net_idx->tx_req_prod - prod) < TX_RING_SIZE) ) + { + np->tx_full = 0; + if ( np->state == STATE_ACTIVE ) + netif_wake_queue(dev); + } +} + + +static inline pte_t *get_ppte(void *addr) +{ + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + pgd = pgd_offset_k( (unsigned long)addr); + pmd = pmd_offset(pgd, (unsigned long)addr); + pte = pte_offset(pmd, (unsigned long)addr); + return pte; +} + + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + netop_t netop; + NET_RING_IDX i = np->net_idx->rx_req_prod; + + if ( unlikely((i - np->rx_resp_cons) == RX_RING_SIZE) || + unlikely(np->state != STATE_ACTIVE) ) + return; + + do { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(skb == NULL) ) + break; + + skb->dev = dev; + + if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) + panic("alloc_skb needs to provide us page-aligned buffers."); + + id = GET_ID_FROM_FREELIST(np->rx_skbs); + np->rx_skbs[id] = skb; + + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = + virt_to_machine(get_ppte(skb->head)); + + np->rx_bufs_to_notify++; + } + while ( (++i - np->rx_resp_cons) != RX_RING_SIZE ); + + /* + * We may have allocated buffers which have entries outstanding in the page + * update queue -- make sure we flush those first! + */ + flush_page_update_queue(); + + np->net_idx->rx_req_prod = i; + np->net_idx->rx_event = np->rx_resp_cons + 1; + + /* Batch Xen notifications. */ + if ( np->rx_bufs_to_notify > (RX_RING_SIZE/4) ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + np->rx_bufs_to_notify = 0; + } +} + + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned short id; + struct net_private *np = (struct net_private *)dev->priv; + tx_req_entry_t *tx; + netop_t netop; + NET_RING_IDX i; + + if ( unlikely(np->tx_full) ) + { + printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + + if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= + PAGE_SIZE) ) + { + struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(new_skb == NULL) ) + return 1; + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + dev_kfree_skb(skb); + skb = new_skb; + } + + spin_lock_irq(&np->tx_lock); + + i = np->net_idx->tx_req_prod; + + id = GET_ID_FROM_FREELIST(np->tx_skbs); + np->tx_skbs[id] = skb; + + tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; + + tx->id = id; + tx->addr = phys_to_machine(virt_to_phys(skb->data)); + tx->size = skb->len; + + wmb(); + np->net_idx->tx_req_prod = i + 1; + + network_tx_buf_gc(dev); + + if ( (i - np->tx_resp_cons) == (TX_RING_SIZE - 1) ) + { + np->tx_full = 1; + netif_stop_queue(dev); + } + + spin_unlock_irq(&np->tx_lock); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + /* Only notify Xen if there are no outstanding responses. */ + mb(); + if ( np->net_idx->tx_resp_prod == i ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + } + + return 0; +} + + +static inline void _network_interrupt(struct net_device *dev) +{ + struct net_private *np = dev->priv; + unsigned long flags; + struct sk_buff *skb; + rx_resp_entry_t *rx; + NET_RING_IDX i; + + if ( unlikely(np->state == STATE_CLOSED) ) + return; + + spin_lock_irqsave(&np->tx_lock, flags); + network_tx_buf_gc(dev); + spin_unlock_irqrestore(&np->tx_lock, flags); + + again: + for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) + { + rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; + + skb = np->rx_skbs[rx->id]; + ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); + + if ( unlikely(rx->status != RING_STATUS_OK) ) + { + /* Gate this error. We get a (valid) slew of them on suspend. */ + if ( np->state == STATE_ACTIVE ) + printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); + dev_kfree_skb_any(skb); + continue; + } + + /* + * Set up shinfo -- from alloc_skb This was particularily nasty: the + * shared info is hidden at the back of the data area (presumably so it + * can be shared), but on page flip it gets very spunked. + */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = + (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; + + skb->data = skb->tail = skb->head + rx->offset; + skb_put(skb, rx->size); + skb->protocol = eth_type_trans(skb, dev); + + np->stats.rx_packets++; + + np->stats.rx_bytes += rx->size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_resp_cons = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + mb(); + if ( np->net_idx->rx_resp_prod != i ) + goto again; +} + + +static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _network_interrupt(np->dev); + } +} + + +int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + + np->state = STATE_SUSPENDED; + wmb(); + + netif_stop_queue(np->dev); + + netop.cmd = NETOP_FLUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + + while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) || + (np->tx_resp_cons != np->net_idx->tx_req_prod) ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + wmb(); + np->state = STATE_CLOSED; + wmb(); + + /* Now no longer safe to take interrupts for this device. */ + clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + + MOD_DEC_USE_COUNT; + + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +/* + * This notifier is installed for domain 0 only. + * All other domains have VFR rules installed on their behalf by domain 0 + * when they are created. For bootstrap, Xen creates wildcard rules for + * domain 0 -- this notifier is used to detect when we find our proper + * IP address, so we can poke down proper rules and remove the wildcards. + */ +static int inetdev_notify(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct list_head *ent; + struct net_private *np; + int idx = -1; + network_op_t op; + + list_for_each ( ent, &dev_list ) + { + np = list_entry(dev_list.next, struct net_private, list); + if ( np->dev == dev ) + idx = np->idx; + } + + if ( idx == -1 ) + goto out; + + memset(&op, 0, sizeof(op)); + op.u.net_rule.proto = NETWORK_PROTO_ANY; + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + + if ( event == NETDEV_UP ) + op.cmd = NETWORK_OP_ADDRULE; + else if ( event == NETDEV_DOWN ) + op.cmd = NETWORK_OP_DELETERULE; + else + goto out; + + op.u.net_rule.src_vif = idx; + op.u.net_rule.dst_vif = VIF_PHYSICAL_INTERFACE; + op.u.net_rule.src_addr = ntohl(ifa->ifa_address); + op.u.net_rule.src_addr_mask = ~0UL; + op.u.net_rule.dst_addr = 0; + op.u.net_rule.dst_addr_mask = 0; + (void)HYPERVISOR_network_op(&op); + + op.u.net_rule.src_vif = VIF_ANY_INTERFACE; + op.u.net_rule.dst_vif = idx; + op.u.net_rule.src_addr = 0; + op.u.net_rule.src_addr_mask = 0; + op.u.net_rule.dst_addr = ntohl(ifa->ifa_address); + op.u.net_rule.dst_addr_mask = ~0UL; + (void)HYPERVISOR_network_op(&op); + + out: + return NOTIFY_DONE; +} + +static struct notifier_block notifier_inetdev = { + .notifier_call = inetdev_notify, + .next = NULL, + .priority = 0 +}; + + +int __init init_module(void) +{ + int i, fixmap_idx=-1, err; + struct net_device *dev; + struct net_private *np; + netop_t netop; + + INIT_LIST_HEAD(&dev_list); + + /* + * Domain 0 must poke its own network rules as it discovers its IP + * addresses. All other domains have a privileged "parent" to do this for + * them at start of day. + */ + if ( start_info.dom_id == 0 ) + (void)register_inetaddr_notifier(¬ifier_inetdev); + + err = request_irq(NET_IRQ, network_interrupt, + SA_SAMPLE_RANDOM, "network", NULL); + if ( err ) + { + printk(KERN_WARNING "Could not allocate network interrupt\n"); + goto fail; + } + + err = request_irq(_EVENT_DEBUG, dbg_network_int, 0, "debug", NULL); + if ( err ) + printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); + + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + { + /* If the VIF is invalid then the query hypercall will fail. */ + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = i; + if ( HYPERVISOR_net_io_op(&netop) != 0 ) + continue; + + /* We actually only support up to 4 vifs right now. */ + if ( ++fixmap_idx == 4 ) + break; + + dev = alloc_etherdev(sizeof(struct net_private)); + if ( dev == NULL ) + { + err = -ENOMEM; + goto fail; + } + + np = dev->priv; + np->state = STATE_CLOSED; + np->net_ring_fixmap_idx = fixmap_idx; + np->idx = i; + + SET_MODULE_OWNER(dev); + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + } + + return 0; + + fail: + cleanup_module(); + return err; +} + + +static void cleanup_module(void) +{ + struct net_private *np; + struct net_device *dev; + + while ( !list_empty(&dev_list) ) + { + np = list_entry(dev_list.next, struct net_private, list); + list_del(&np->list); + dev = np->dev; + unregister_netdev(dev); + kfree(dev); + } + + if ( start_info.dom_id == 0 ) + (void)unregister_inetaddr_notifier(¬ifier_inetdev); +} + + +module_init(init_module); +module_exit(cleanup_module); |