diff options
Diffstat (limited to 'xenolinux-2.4.16-sparse/arch')
9 files changed, 1073 insertions, 562 deletions
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/defconfig b/xenolinux-2.4.16-sparse/arch/xeno/defconfig index b278dec50f..c83d96d1b0 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/defconfig +++ b/xenolinux-2.4.16-sparse/arch/xeno/defconfig @@ -114,6 +114,7 @@ CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=4096 CONFIG_BLK_DEV_INITRD=y +CONFIG_XENOLINUX_BLOCK=y # # Multi-device support (RAID and LVM) @@ -379,13 +380,13 @@ CONFIG_MSDOS_PARTITION=y # # Kernel hacking # -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_HIGHMEM=y -CONFIG_DEBUG_SLAB=y -CONFIG_DEBUG_IOVIRT=y +# CONFIG_DEBUG_KERNEL is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_IOVIRT is not set # CONFIG_MAGIC_SYSRQ is not set -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_KDB is not set CONFIG_KALLSYMS=y -CONFIG_FRAME_POINTER=y +# CONFIG_FRAME_POINTER is not set diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile index 9361a01ec7..74a0c6c565 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile @@ -1,3 +1,3 @@ O_TARGET := blk.o -obj-y := block.o +obj-y := xl_block.o xl_block_test.o include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c deleted file mode 100644 index bf7d416dff..0000000000 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c +++ /dev/null @@ -1,392 +0,0 @@ -/****************************************************************************** - * block.c - * - * Virtual block driver for XenoLinux. - * - * adapted from network.c - */ - -#include <linux/config.h> -#include <linux/module.h> - -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/errno.h> - -#include <asm/hypervisor-ifs/block.h> - -#ifdef UNDEFINED - -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/init.h> - -#include <net/sock.h> - -#define BLK_TX_IRQ _EVENT_BLK_TX -#define BLK_RX_IRQ _EVENT_BLK_RX - -#define TX_MAX_ENTRIES (TX_RING_SIZE - 2) -#define RX_MAX_ENTRIES (RX_RING_SIZE - 2) - -#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) -#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) -#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) -#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) - -#define RX_BUF_SIZE 1600 /* Ethernet MTU + plenty of slack! */ - - - -int network_probe(struct net_device *dev); -static int network_open(struct net_device *dev); -static int network_start_xmit(struct sk_buff *skb, struct net_device *dev); -static int network_close(struct net_device *dev); -static struct net_device_stats *network_get_stats(struct net_device *dev); -static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs); -static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs); -static void network_tx_buf_gc(struct net_device *dev); -static void network_alloc_rx_buffers(struct net_device *dev); -static void network_free_rx_buffers(struct net_device *dev); - -static struct net_device dev_net_xeno; - -/* - * RX RING: RX_IDX <= rx_cons <= rx_prod - * TX RING: TX_IDX <= tx_cons <= tx_prod - * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor) - */ -struct net_private -{ - struct net_device_stats stats; - struct sk_buff **tx_skb_ring; - struct sk_buff **rx_skb_ring; - atomic_t tx_entries; - unsigned int rx_idx, tx_idx, tx_full; - net_ring_t *net_ring; - spinlock_t tx_lock; -}; - - -int __init network_probe(struct net_device *dev) -{ - SET_MODULE_OWNER(dev); - - memcpy(dev->dev_addr, "\xFE\xFD\x00\x00\x00\x00", 6); - - dev->open = network_open; - dev->hard_start_xmit = network_start_xmit; - dev->stop = network_close; - dev->get_stats = network_get_stats; - - ether_setup(dev); - - return 0; -} - - -static int network_open(struct net_device *dev) -{ - struct net_private *np; - int error; - - np = kmalloc(sizeof(struct net_private), GFP_KERNEL); - if ( np == NULL ) - { - printk(KERN_WARNING "%s: No memory for private data\n", dev->name); - return -ENOMEM; - } - memset(np, 0, sizeof(struct net_private)); - dev->priv = np; - - spin_lock_init(&np->tx_lock); - - atomic_set(&np->tx_entries, 0); - - np->net_ring = start_info.net_rings; - np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0; - np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0; - np->net_ring->tx_ring = NULL; - np->net_ring->rx_ring = NULL; - - np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *), - GFP_KERNEL); - np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *), - GFP_KERNEL); - np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), - GFP_KERNEL); - np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), - GFP_KERNEL); - if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) || - (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) ) - { - printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name); - error = -ENOBUFS; - goto fail; - } - - network_alloc_rx_buffers(dev); - - error = request_irq(NET_RX_IRQ, network_rx_int, 0, "net-rx", dev); - if ( error ) - { - printk(KERN_WARNING "%s: Could not allocate receive interrupt\n", - dev->name); - goto fail; - } - - error = request_irq(NET_TX_IRQ, network_tx_int, 0, "net-tx", dev); - if ( error ) - { - printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n", - dev->name); - free_irq(NET_RX_IRQ, dev); - goto fail; - } - - printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name); - - netif_start_queue(dev); - - MOD_INC_USE_COUNT; - - return 0; - - fail: - if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring); - if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring); - if ( np->rx_skb_ring ) kfree(np->rx_skb_ring); - if ( np->tx_skb_ring ) kfree(np->tx_skb_ring); - kfree(np); - return error; -} - - -static void network_tx_buf_gc(struct net_device *dev) -{ - unsigned int i; - struct net_private *np = dev->priv; - struct sk_buff *skb; - unsigned long flags; - - spin_lock_irqsave(&np->tx_lock, flags); - - for ( i = np->tx_idx; i != np->net_ring->tx_cons; i = TX_RING_INC(i) ) - { - skb = np->tx_skb_ring[i]; - dev_kfree_skb_any(skb); - atomic_dec(&np->tx_entries); - } - - np->tx_idx = i; - - if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) - { - np->tx_full = 0; - netif_wake_queue(dev); - } - - spin_unlock_irqrestore(&np->tx_lock, flags); -} - - -static void network_alloc_rx_buffers(struct net_device *dev) -{ - unsigned int i; - struct net_private *np = dev->priv; - struct sk_buff *skb; - unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES); - - for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) ) - { - skb = dev_alloc_skb(RX_BUF_SIZE); - if ( skb == NULL ) break; - skb->dev = dev; - skb_reserve(skb, 2); /* word align the IP header */ - np->rx_skb_ring[i] = skb; - np->net_ring->rx_ring[i].addr = (unsigned long)skb->data; - np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */ - } - - np->net_ring->rx_prod = i; - - np->net_ring->rx_event = RX_RING_INC(np->rx_idx); - - HYPERVISOR_net_update(); -} - - -static void network_free_rx_buffers(struct net_device *dev) -{ - unsigned int i; - struct net_private *np = dev->priv; - struct sk_buff *skb; - - for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) ) - { - skb = np->rx_skb_ring[i]; - dev_kfree_skb(skb); - } -} - - -static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - unsigned int i; - struct net_private *np = (struct net_private *)dev->priv; - - if ( np->tx_full ) - { - printk(KERN_WARNING "%s: full queue wasn't stopped!\n", dev->name); - netif_stop_queue(dev); - return -ENOBUFS; - } - - i = np->net_ring->tx_prod; - np->tx_skb_ring[i] = skb; - np->net_ring->tx_ring[i].addr = (unsigned long)skb->data; - np->net_ring->tx_ring[i].size = skb->len; - np->net_ring->tx_prod = TX_RING_INC(i); - atomic_inc(&np->tx_entries); - - np->stats.tx_bytes += skb->len; - np->stats.tx_packets++; - - spin_lock_irq(&np->tx_lock); - if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES ) - { - np->tx_full = 1; - netif_stop_queue(dev); - np->net_ring->tx_event = TX_RING_ADD(np->tx_idx, - atomic_read(&np->tx_entries) >> 1); - } - else - { - /* Avoid unnecessary tx interrupts. */ - np->net_ring->tx_event = TX_RING_INC(np->net_ring->tx_prod); - } - spin_unlock_irq(&np->tx_lock); - - /* Must do this after setting tx_event: race with updates of tx_cons. */ - network_tx_buf_gc(dev); - - HYPERVISOR_net_update(); - - return 0; -} - - -static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - unsigned int i; - struct net_device *dev = (struct net_device *)dev_id; - struct net_private *np = dev->priv; - struct sk_buff *skb; - - again: - for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) ) - { - skb = np->rx_skb_ring[i]; - skb_put(skb, np->net_ring->rx_ring[i].size); - skb->protocol = eth_type_trans(skb, dev); - np->stats.rx_packets++; - np->stats.rx_bytes += np->net_ring->rx_ring[i].size; - netif_rx(skb); - dev->last_rx = jiffies; - } - - np->rx_idx = i; - - network_alloc_rx_buffers(dev); - - /* Deal with hypervisor racing our resetting of rx_event. */ - smp_mb(); - if ( np->net_ring->rx_cons != i ) goto again; -} - - -static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - struct net_device *dev = (struct net_device *)dev_id; - network_tx_buf_gc(dev); -} - - -static int network_close(struct net_device *dev) -{ - struct net_private *np = dev->priv; - - netif_stop_queue(dev); - free_irq(NET_RX_IRQ, dev); - free_irq(NET_TX_IRQ, dev); - network_free_rx_buffers(dev); - kfree(np->net_ring->rx_ring); - kfree(np->net_ring->tx_ring); - kfree(np->rx_skb_ring); - kfree(np->tx_skb_ring); - kfree(np); - MOD_DEC_USE_COUNT; - return 0; -} - - -static struct net_device_stats *network_get_stats(struct net_device *dev) -{ - struct net_private *np = (struct net_private *)dev->priv; - return &np->stats; -} - - -static int __init init_module(void) -{ - memset(&dev_net_xeno, 0, sizeof(dev_net_xeno)); - strcpy(dev_net_xeno.name, "eth%d"); - dev_net_xeno.init = network_probe; - return (register_netdev(&dev_net_xeno) != 0) ? -EIO : 0; -} - - -static void __exit cleanup_module(void) -{ - unregister_netdev(&dev_net_xeno); -} - -#endif /* UNDEFINED */ - - -static void block_initialize(void) -{ - blk_ring_t *blk_ring = start_info.blk_ring; - - if ( blk_ring == NULL ) return; - - blk_ring->tx_prod = blk_ring->tx_cons = blk_ring->tx_event = 0; - blk_ring->rx_prod = blk_ring->rx_cons = blk_ring->rx_event = 0; - blk_ring->tx_ring = NULL; - blk_ring->rx_ring = NULL; -} - - -/* - * block_setup initialized the xeno block device driver - */ - -static int __init init_module(void) -{ - block_initialize(); - printk("XenoLinux Virtual Block Device Driver installed\n"); - return 0; -} - -static void __exit cleanup_module(void) -{ - printk("XenoLinux Virtual Block Device Driver uninstalled\n"); -} - -module_init(init_module); -module_exit(cleanup_module); - diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c new file mode 100644 index 0000000000..f7bd088ff4 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c @@ -0,0 +1,595 @@ +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/fs.h> +#include <linux/hdreg.h> /* HDIO_GETGEO, et al */ +#include <linux/blkdev.h> +#include <linux/major.h> + +/* NOTE: this is drive independent, so no inclusion of ide.h */ + +#include <asm/hypervisor-ifs/block.h> +#include <asm/hypervisor-ifs/hypervisor-if.h> +#include <asm/io.h> +#include <asm/uaccess.h> /* put_user() */ + +#define MAJOR_NR XLBLK_MAJOR /* force defns in blk.h, must preceed include */ +static int xlblk_major = XLBLK_MAJOR; + +#include <linux/blk.h> /* must come after definition of MAJOR_NR!! */ + +/* instead of including linux/ide.h to pick up the definitiong of byte + * (and consequently screwing up blk.h, we'll just copy the definition */ +typedef unsigned char byte; + +void xlblk_ide_register_disk(int, unsigned long); + +#define XLBLK_MAX 2 /* very arbitrary */ +#define XLBLK_MAJOR_NAME "xhd" +#define IDE_PARTN_BITS 6 /* from ide.h::PARTN_BITS */ +#define IDE_PARTN_MASK ((1<<IDE_PARTN_BITS)-1) /* from ide.h::PARTN_MASK */ +static int xlblk_blk_size[XLBLK_MAX]; +static int xlblk_blksize_size[XLBLK_MAX]; +static int xlblk_read_ahead; +static int xlblk_hardsect_size[XLBLK_MAX]; +static int xlblk_max_sectors[XLBLK_MAX]; + +#define XLBLK_RX_IRQ _EVENT_BLK_RX +#define XLBLK_TX_IRQ _EVENT_BLK_TX + +#define DEBUG_IRQ _EVENT_DEBUG + +typedef struct xlblk_device +{ + struct buffer_head *bh; + unsigned int tx_count; /* number of used slots in tx ring */ +} xlblk_device_t; + +xlblk_device_t xlblk_device; + +#define XLBLK_DEBUG 0 +#define XLBLK_DEBUG_IOCTL 0 + +/* + * disk management + */ + +xen_disk_info_t xen_disk_info; + +/* some declarations */ +void hypervisor_request(void * id, + int operation, + char * buffer, + unsigned long block_number, + unsigned short block_size, + kdev_t device, + int mode); + + +/* ------------------------------------------------------------------------ + */ + +static int xenolinux_block_open(struct inode *inode, struct file *filep) +{ + if (XLBLK_DEBUG) + printk (KERN_ALERT "xenolinux_block_open\n"); + + return 0; +} + +static int xenolinux_block_release(struct inode *inode, struct file *filep) +{ + if (XLBLK_DEBUG) + printk (KERN_ALERT "xenolinux_block_release\n"); + + return 0; +} + +static int xenolinux_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument) +{ + int minor_dev; + struct hd_geometry *geo = (struct hd_geometry *)argument; + + if (XLBLK_DEBUG_IOCTL) + printk (KERN_ALERT "xenolinux_block_ioctl\n"); + + /* check permissions */ + if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!inode) return -EINVAL; + minor_dev = MINOR(inode->i_rdev); + if (minor_dev >= XLBLK_MAX) return -ENODEV; + + if (XLBLK_DEBUG_IOCTL) + printk (KERN_ALERT " command: 0x%x, argument: 0x%lx, minor: 0x%x\n", + command, (long) argument, minor_dev); + + switch (command) { + + case BLKGETSIZE: + if (XLBLK_DEBUG_IOCTL) + printk (KERN_ALERT + " BLKGETSIZE: %x %lx\n", BLKGETSIZE, + (long) xen_disk_info.disks[0].capacity); + return put_user(xen_disk_info.disks[0].capacity, + (unsigned long *) argument); + + case BLKRRPART: + if (XLBLK_DEBUG_IOCTL) + printk (KERN_ALERT " BLKRRPART: %x\n", BLKRRPART); + break; + + case BLKSSZGET: + if (XLBLK_DEBUG_IOCTL) + printk (KERN_ALERT " BLKSSZGET: %x 0x%x\n", BLKSSZGET, + xlblk_hardsect_size[minor_dev]); + return xlblk_hardsect_size[minor_dev]; + + case HDIO_GETGEO: + + if (XLBLK_DEBUG_IOCTL) + printk (KERN_ALERT " HDIO_GETGEO: %x\n", HDIO_GETGEO); + + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; + return 0; + + case HDIO_GETGEO_BIG: + + if (XLBLK_DEBUG_IOCTL) + printk (KERN_ALERT " HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); + + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; + + return 0; + + default: + if (XLBLK_DEBUG_IOCTL) + printk (KERN_ALERT " eh? unknown ioctl\n"); + break; + } + + return 0; +} + +static int xenolinux_block_check(kdev_t dev) +{ + if (XLBLK_DEBUG) + printk (KERN_ALERT "xenolinux_block_check\n"); + return 0; +} + +static int xenolinux_block_revalidate(kdev_t dev) +{ + if (XLBLK_DEBUG) + printk (KERN_ALERT "xenolinux_block_revalidate\n"); + return 0; +} + +/* + * hypervisor_request + * + * request block io + * + * id: for guest use only. + * operation: XEN_BLOCK_READ, XEN_BLOCK_WRITE or XEN_BLOCK_PROBE + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + * block_number: block to read + * block_size: size of each block + * device: ide/hda is 768 or 0x300 + * mode: XEN_BLOCK_SYNC or XEN_BLOCK_ASYNC. async requests + * will queue until a sync request is issued. + */ + +void hypervisor_request(void * id, + int operation, + char * buffer, + unsigned long block_number, + unsigned short block_size, + kdev_t device, + int mode) +{ + blk_ring_t *blk_ring = start_info.blk_ring; + int position; + void *buffer_pa, *buffer_ma; + kdev_t phys_device = (kdev_t) 0; + unsigned long sector_number = 0; + struct gendisk *gd; + + + buffer_pa = (void *)virt_to_phys(buffer); + buffer_ma = (void *)phys_to_machine((unsigned long)buffer_pa); + + if (operation == XEN_BLOCK_PROBE) { + phys_device = (kdev_t) 0; + sector_number = 0; + + } else if (operation == XEN_BLOCK_READ || operation == XEN_BLOCK_WRITE) { + + /* + * map logial major device to the physical device number + * + * XLBLK_MAJOR -> IDE0_MAJOR (123 -> 3) + */ + if (MAJOR(device) == XLBLK_MAJOR) + phys_device = MKDEV(IDE0_MAJOR, 0); + else { + printk (KERN_ALERT "error: xl_block::hypervisor_request: " + "unknown device [0x%x]\n", device); + BUG(); + } + + /* + * compute real buffer location on disk + * (from ll_rw_block.c::submit_bh) + */ + + + sector_number = block_number /* * block_size >> 9 */; + + if((gd = (struct gendisk *)xen_disk_info.disks[0].gendisk) != NULL) + sector_number += gd->part[MINOR(device)&IDE_PARTN_MASK].start_sect; + } + + + if (BLK_TX_RING_INC(blk_ring->btx_prod) == blk_ring->btx_cons) { + printk (KERN_ALERT "hypervisor_request: btx_cons: %d, btx_prod:%d", + blk_ring->btx_cons, blk_ring->btx_prod); + BUG(); + } + + /* Fill out a communications ring structure & trap to the hypervisor */ + position = blk_ring->btx_prod; + blk_ring->btx_ring[position].id = id; + blk_ring->btx_ring[position].priority = mode; + blk_ring->btx_ring[position].operation = operation; + blk_ring->btx_ring[position].buffer = buffer_ma; + blk_ring->btx_ring[position].block_number = block_number; + blk_ring->btx_ring[position].block_size = block_size; + blk_ring->btx_ring[position].device = phys_device; + blk_ring->btx_ring[position].sector_number = sector_number; + + blk_ring->btx_prod = BLK_TX_RING_INC(blk_ring->btx_prod); + + switch(mode) { + + case XEN_BLOCK_SYNC: + /* trap into hypervisor */ + HYPERVISOR_block_io_op(); + break; + + case XEN_BLOCK_ASYNC: + /* for now, do nothing. the request will go in the ring and + the next sync request will trigger the hypervisor to act */ + printk("Oh dear-- ASYNC xen block of doom!\n"); + break; + + default: + /* ummm, unknown mode. */ + printk("xl_block thingy: unknown mode %d\n", mode); + BUG(); + } + + return; +} + + +/* + * do_xlblk_request + * + * read a block; request is in a request queue + * + * TO DO: should probably release the io_request_lock and then re-acquire + * (see LDD p. 338) + */ +static void do_xlblk_request (request_queue_t *rq) +{ + struct request *req; + + if (XLBLK_DEBUG) + printk (KERN_ALERT "xlblk.c::do_xlblk_request for '%s'\n", + DEVICE_NAME); + + while (!QUEUE_EMPTY) + { + struct buffer_head *bh; + unsigned long offset; + unsigned long length; + int rw; + + if(rq->plugged) + return ; + + req = CURRENT; + + if (XLBLK_DEBUG) + printk (KERN_ALERT + "do_xlblk_request %p: cmd %i, sec %lx, (%li) bh:%p\n", + req, req->cmd, req->sector, + req->current_nr_sectors, req->bh); + + /* is there space in the tx ring for this request? + * if the ring is full, then leave the request in the queue + * + * THIS IS A BIT BOGUS SINCE XEN COULD BE UPDATING BTX_CONS + * AT THE SAME TIME + */ + { + blk_ring_t *blk_ring = start_info.blk_ring; + + if (BLK_RX_RING_INC(blk_ring->btx_prod) == blk_ring->btx_cons) + { + printk (KERN_ALERT "OOPS, TX LOOKS FULL cons: %d prod: %d\n", + blk_ring->btx_cons, blk_ring->btx_prod); + BUG(); + break; + } + } + + req->errors = 0; + blkdev_dequeue_request(req); + + bh = req->bh; + + while (bh) + { + offset = bh->b_rsector << 9; + length = bh->b_size; + + rw = req->cmd; + if (rw == READA) rw= READ; + if ((rw != READ) && (rw != WRITE)) { + printk (KERN_ALERT + "XenoLinux Virtual Block Device: bad cmd: %d\n", rw); + BUG(); + } + + hypervisor_request (req, rw == READ ? + XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size, + bh->b_dev, XEN_BLOCK_SYNC); + bh = bh->b_reqnext; + } + + blkdev_dequeue_request(req); + + } + + return; +} + + +static struct block_device_operations xenolinux_block_fops = +{ + open: xenolinux_block_open, + release: xenolinux_block_release, + ioctl: xenolinux_block_ioctl, + check_media_change: xenolinux_block_check, + revalidate: xenolinux_block_revalidate, +}; + +static void xlblk_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + blk_ring_t *blk_ring = start_info.blk_ring; + struct request *req; + int loop; + u_long flags; + + for (loop = blk_ring->brx_cons; + loop != blk_ring->brx_prod; + loop = BLK_RX_RING_INC(loop)) { + + blk_ring_entry_t *bret = &blk_ring->brx_ring[loop]; + + if(bret->operation == XEN_BLOCK_PROBE) + continue; + + spin_lock_irqsave(&io_request_lock, flags); + req = (struct request *)bret->id; + + if (!end_that_request_first(req, 1, "XenBlk")) + end_that_request_last(req); + spin_unlock_irqrestore(&io_request_lock, flags); + + } + + blk_ring->brx_cons = loop; +} + +static void xlblk_tx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + if (XLBLK_DEBUG) + printk (KERN_ALERT "--- xlblock::xlblk_tx_int\n"); +} + +int __init xlblk_init(void) +{ + blk_ring_t *blk_ring = start_info.blk_ring; + int loop, error, result; + + /* initialize memory rings to communicate with hypervisor */ + if ( blk_ring == NULL ) return -ENOMEM; + + blk_ring->btx_prod = blk_ring->btx_cons = 0; + blk_ring->brx_prod = blk_ring->brx_cons = 0; + blk_ring->btx_ring = NULL; + blk_ring->brx_ring = NULL; + + blk_ring->btx_ring = kmalloc(BLK_TX_RING_SIZE * sizeof(blk_ring_entry_t), + GFP_KERNEL); + blk_ring->brx_ring = kmalloc(BLK_RX_RING_SIZE * sizeof(blk_ring_entry_t), + GFP_KERNEL); + + if ((blk_ring->btx_ring == NULL) || (blk_ring->brx_ring == NULL)) { + printk (KERN_ALERT "could not alloc ring memory for block device\n"); + error = -ENOBUFS; + goto fail; + } + + error = request_irq(XLBLK_RX_IRQ, xlblk_rx_int, 0, + "xlblk-rx", &xlblk_device); + if (error) { + printk(KERN_ALERT "Could not allocate receive interrupt\n"); + goto fail; + } + + error = request_irq(XLBLK_TX_IRQ, xlblk_tx_int, 0, + "xlblk-tx", &xlblk_device); + if (error) { + printk(KERN_ALERT "Could not allocate transmit interrupt\n"); + free_irq(XLBLK_RX_IRQ, &xlblk_device); + goto fail; + } + + memset (&xen_disk_info, 0, sizeof(xen_disk_info)); + xen_disk_info.count = 0; + + hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info, + 0, 0, (kdev_t) 0, XEN_BLOCK_SYNC); + for (loop = 0; loop < xen_disk_info.count; loop++) + printk (KERN_ALERT " %2d: type: %d, capacity: %ld\n", + loop, xen_disk_info.disks[loop].type, + xen_disk_info.disks[loop].capacity); + + + SET_MODULE_OWNER(&xenolinux_block_fops); + result = register_blkdev(xlblk_major, "block", &xenolinux_block_fops); + if (result < 0) { + printk (KERN_ALERT "xenolinux block: can't get major %d\n", + xlblk_major); + return result; + } + + /* initialize global arrays in drivers/block/ll_rw_block.c */ + for (loop = 0; loop < XLBLK_MAX; loop++) { + xlblk_blk_size[loop] = xen_disk_info.disks[0].capacity; + xlblk_blksize_size[loop] = 512; + xlblk_hardsect_size[loop] = 512; + xlblk_max_sectors[loop] = 128; + } + xlblk_read_ahead = 8; + + blk_size[xlblk_major] = xlblk_blk_size; + blksize_size[xlblk_major] = xlblk_blksize_size; + hardsect_size[xlblk_major] = xlblk_hardsect_size; + read_ahead[xlblk_major] = xlblk_read_ahead; + max_sectors[xlblk_major] = xlblk_max_sectors; + + blk_init_queue(BLK_DEFAULT_QUEUE(xlblk_major), do_xlblk_request); + /* + ** XXX SMH: we don't leave req on queue => are happy for evelator + ** to reorder things including it. (main reason for this decision + ** is that it works while 'standard' case doesn't. Ho hum). + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(xlblk_major), 0); + + xlblk_ide_register_disk(0, xen_disk_info.disks[0].capacity); + + printk(KERN_ALERT + "XenoLinux Virtual Block Device Driver installed [device: %d]\n", + xlblk_major); + return 0; + + fail: + if (blk_ring->btx_ring) kfree(blk_ring->btx_ring); + if (blk_ring->brx_ring) kfree(blk_ring->brx_ring); + return error; +} + +void xlblk_ide_register_disk(int idx, unsigned long capacity) +{ + int units; + int minors; + struct gendisk *gd; + + /* plagarized from ide-probe.c::init_gendisk */ + + units = 2; /* from ide.h::MAX_DRIVES */ + + minors = units * (1<<IDE_PARTN_BITS); + gd = kmalloc (sizeof(struct gendisk), GFP_KERNEL); + gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); + gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); + memset(gd->part, 0, minors * sizeof(struct hd_struct)); + + gd->major = xlblk_major; + gd->major_name = XLBLK_MAJOR_NAME; + gd->minor_shift = IDE_PARTN_BITS; + gd->max_p = 1<<IDE_PARTN_BITS; + gd->nr_real = units; + gd->real_devices = NULL; + gd->next = NULL; + gd->fops = &xenolinux_block_fops; + gd->de_arr = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL); + gd->flags = kmalloc (sizeof *gd->flags * units, GFP_KERNEL); + + if (gd->de_arr) + memset (gd->de_arr, 0, sizeof *gd->de_arr * units); + + if (gd->flags) + memset (gd->flags, 0, sizeof *gd->flags * units); + + add_gendisk(gd); + + xen_disk_info.disks[idx].gendisk = gd; + + /* default disk size is just a big number. in the future, we + need a message to probe the devices to determine the actual size */ + register_disk(gd, MKDEV(xlblk_major, 0), 1<<IDE_PARTN_BITS, + &xenolinux_block_fops, capacity); + + return; +} + + + +static void __exit xlblk_cleanup(void) +{ + /* CHANGE FOR MULTIQUEUE */ + blk_cleanup_queue(BLK_DEFAULT_QUEUE(xlblk_major)); + + /* clean up global arrays */ + read_ahead[xlblk_major] = 0; + + if (blk_size[xlblk_major]) + kfree(blk_size[xlblk_major]); + blk_size[xlblk_major] = NULL; + + if (blksize_size[xlblk_major]) + kfree(blksize_size[xlblk_major]); + blksize_size[xlblk_major] = NULL; + + if (hardsect_size[xlblk_major]) + kfree(hardsect_size[xlblk_major]); + hardsect_size[xlblk_major] = NULL; + + /* XXX: free each gendisk */ + if (unregister_blkdev(xlblk_major, "block")) + printk(KERN_ALERT + "XenoLinux Virtual Block Device Driver uninstalled w/ errs\n"); + else + printk(KERN_ALERT + "XenoLinux Virtual Block Device Driver uninstalled\n"); + + return; +} + + +#ifdef MODULE +module_init(xlblk_init); +module_exit(xlblk_cleanup); +#endif diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c new file mode 100644 index 0000000000..cab6d9a330 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c @@ -0,0 +1,233 @@ +/****************************************************************************** + * xenolinux_block_test.c + * + */ +#define EXPORT_SYMTAB + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <asm/hypervisor-ifs/block.h> +#include <asm/hypervisor-ifs/hypervisor-if.h> + +/******************************************************************/ + +static struct proc_dir_entry *bdt; +static blk_ring_entry_t meta; +static char * data; + +static int proc_read_bdt(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + switch (meta.operation) + { + case XEN_BLOCK_READ : + case XEN_BLOCK_WRITE : + { + return proc_dump_block(page, start, off, count, eof, data); + } + case XEN_BLOCK_DEBUG : + { + return proc_dump_debug(page, start, off, count, eof, data); + } + default : + { + printk(KERN_ALERT + "block device test error: unknown operation [%c]\n", + meta.operation); + return -EINVAL; + } + } +} + +int proc_dump_debug(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char header[100]; + char dump[1024]; + + sprintf (header, "Block Device Test: Debug Dump\n\n"); + + sprintf (dump, "%s\n", meta.buffer); + + if (data) + { + kfree(data); + } + + strncpy (page, dump, count); + return strlen(page); +} + +int proc_dump_block(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char header[100]; + char dump[1024]; + char temp[100]; + int loop; + + sprintf (header, "Block Device Test\n\n%s blk num: %ld 0x%lx; size: %d 0x%x; device: 0x%x\n", + meta.operation == XEN_BLOCK_WRITE ? "write" : "read", + meta.block_number, meta.block_number, + meta.block_size, meta.block_size, + meta.device); + + sprintf (dump, "%s", header); + + if (meta.buffer) + { + for (loop = 0; loop < 100; loop++) + { + int i = meta.buffer[loop]; + + if (loop % 8 == 0) + { + sprintf (temp, "[%2d] ", loop); + strcat(dump, temp); + } + else if (loop % 2 == 0) + { + strcat(dump, " "); + } + + sprintf (temp, " 0x%02x", i & 255); + strcat(dump, temp); + if ((loop + 1) % 8 == 0) + { + strcat(dump, "\n"); + } + } + strcat(dump, "\n\n"); + } + + if (data) + { + kfree(data); + } + + strncpy (page, dump, count); + return strlen(page); +} + +int proc_write_bdt(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char *local = kmalloc((count + 1) * sizeof(char), GFP_KERNEL); + char opcode; + int block_number = 0; + int block_size = 0; + int device = 0; + int mode; + + if (copy_from_user(local, buffer, count)) + { + return -EFAULT; + } + local[count] = '\0'; + + sscanf(local, "%c %i %i %i", + &opcode, &block_number, &block_size, &device); + + if (opcode == 'r' || opcode == 'R') + { + meta.operation = XEN_BLOCK_READ; + } + else if (opcode == 'w' || opcode == 'W') + { + meta.operation = XEN_BLOCK_WRITE; + } + else if (opcode == 'd' || opcode == 'D') + { + meta.operation = XEN_BLOCK_DEBUG; + block_size = 10000; + } + else + { + printk(KERN_ALERT + "block device test error: unknown opcode [%c]\n", opcode); + return -EINVAL; + } + + if (opcode == 'r' || opcode == 'w' || + opcode == 'd' || opcode == 'D') + { + mode = XEN_BLOCK_SYNC; + } + else /* (opcode == 'R' || opcode == 'W') */ + { + mode = XEN_BLOCK_ASYNC; + } + + if (data) + { + kfree(data); + } + data = kmalloc(block_size * sizeof(char), GFP_KERNEL); + if (data == NULL) + { + kfree(local); + return -ENOMEM; + } + + meta.block_number = block_number; + meta.block_size = block_size; + meta.device = device; + meta.buffer = data; + + /* submit request */ + hypervisor_request(0, meta.operation, meta.buffer, + meta.block_number, meta.block_size, + meta.device, mode); + + kfree(local); + return count; +} + + +static int __init init_module(void) +{ + int return_value = 0; + + /* create proc entry */ + bdt = create_proc_entry("bdt", 0644, NULL); + if (bdt == NULL) + { + return_value = -ENOMEM; + goto error; + } + bdt->data = NULL; + bdt->read_proc = proc_read_bdt; + bdt->write_proc = proc_write_bdt; + bdt->owner = THIS_MODULE; + + memset(&meta, 0, sizeof(meta)); + + /* success */ + printk(KERN_ALERT "XenoLinux Block Device Test installed\n"); + return 0; + + error: + return return_value; +} + +static void __exit cleanup_module(void) +{ + if (data) + { + kfree(data); + } + printk(KERN_ALERT "XenoLinux Block Device Test uninstalled\n"); +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c new file mode 100644 index 0000000000..97d4a65b78 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c @@ -0,0 +1,27 @@ +/* + * domain 0 block driver interface + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +static int __init init_module(void) +{ + request_module("xl_block"); + printk("Successfully installed domain 0 block interface\n"); + + + return 0; +} + +static void __exit cleanup_module(void) +{ + printk("Successfully de-installed domain-0 block interface\n"); + return 0; +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c index 12db77164b..a35ef1cc8a 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c @@ -42,7 +42,7 @@ extern struct drive_info_struct drive_info; EXPORT_SYMBOL(drive_info); #endif -extern unsigned long get_cmos_time(void); +//extern unsigned long get_cmos_time(void); /* platform dependent support */ EXPORT_SYMBOL(boot_cpu_data); @@ -58,7 +58,7 @@ EXPORT_SYMBOL(probe_irq_mask); EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(pm_idle); EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(get_cmos_time); +//EXPORT_SYMBOL(get_cmos_time); EXPORT_SYMBOL(apm_info); #ifdef CONFIG_DEBUG_IOVIRT diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c index 1c7f27176d..87c52056f6 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c @@ -86,7 +86,7 @@ void cpu_idle (void) while (1) { while (!current->need_resched) - HYPERVISOR_yield(); + HYPERVISOR_do_sched_op(NULL); schedule(); check_pgt_cache(); } diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c index c728eb15e6..4999af6642 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c @@ -1,3 +1,25 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: arch.xeno/time.c + * Author: Rolf Neugebauer + * Changes: + * + * Date: Nov 2002 + * + * Environment: XenoLinux + * Description: Interface with Hypervisor to get correct notion of time + * Currently supports Systemtime and WallClock time. + * + * (This has hardly any resemblence with the Linux code but left the + * copyright notice anyway. Ignore the comments in the copyright notice.) + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + /* * linux/arch/i386/kernel/time.c * @@ -30,19 +52,6 @@ * serialize accesses to xtime/lost_ticks). */ -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/param.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/time.h> -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/smp.h> - -#include <asm/io.h> #include <asm/smp.h> #include <asm/irq.h> #include <asm/msr.h> @@ -51,115 +60,103 @@ #include <asm/uaccess.h> #include <asm/processor.h> -#include <linux/mc146818rtc.h> -#include <linux/timex.h> -#include <linux/config.h> - +#include <asm/div64.h> #include <asm/hypervisor.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/smp.h> #include <linux/irq.h> - -unsigned long cpu_khz; /* Detected as we calibrate the TSC */ - -/* Cached *multiplier* to convert TSC counts to microseconds. - * (see the equation below). - * Equal to 2^32 * (1 / (clocks per usec) ). - * Initialized in time_init. - */ -unsigned long fast_gettimeoffset_quotient; - -extern rwlock_t xtime_lock; -extern unsigned long wall_jiffies; +#undef XENO_TIME_DEBUG /* adds sanity checks and periodic printouts */ spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; +extern rwlock_t xtime_lock; -static inline unsigned long ticks_to_secs(unsigned long long ticks) -{ - unsigned long lo, hi; - unsigned long little_ticks; - - little_ticks = ticks /* XXX URK! XXX / 1000000ULL */; - - __asm__ __volatile__ ( - "mull %2" - : "=a" (lo), "=d" (hi) - : "rm" (fast_gettimeoffset_quotient), "0" (little_ticks) ); +unsigned long cpu_khz; /* get this from Xen, used elsewhere */ +static spinlock_t hyp_stime_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t hyp_wctime_lock = SPIN_LOCK_UNLOCKED; - return(hi); -} +static u32 st_scale_f; +static u32 st_scale_i; +static u32 shadow_st_pcc; +static s64 shadow_st; -/* NB. Only 32 bits of ticks are considered here. */ -static inline unsigned long ticks_to_us(unsigned long ticks) +/* + * System time. + * Although the rest of the Linux kernel doesn't know about this, we + * we use it to extrapolate passage of wallclock time. + * We need to read the values from the shared info page "atomically" + * and use the cycle counter value as the "version" number. Clashes + * should be very rare. + */ +static inline long long get_s_time(void) { - unsigned long lo, hi; + unsigned long flags; + u32 delta_tsc, low, pcc; + u64 delta; + s64 now; - __asm__ __volatile__ ( - "mull %2" - : "=a" (lo), "=d" (hi) - : "rm" (fast_gettimeoffset_quotient), "0" (ticks) ); + spin_lock_irqsave(&hyp_stime_lock, flags); - return(hi); -} + while ((pcc = HYPERVISOR_shared_info->st_timestamp) != shadow_st_pcc) + { + barrier(); + shadow_st_pcc = pcc; + shadow_st = HYPERVISOR_shared_info->system_time; + barrier(); + } -static inline unsigned long do_gettimeoffset(void) -{ -#if 0 - register unsigned long eax, edx; + now = shadow_st; + /* only use bottom 32bits of TSC. This should be sufficient */ + rdtscl(low); + delta_tsc = low - pcc; + delta = ((u64)delta_tsc * st_scale_f); + delta >>= 32; + delta += ((u64)delta_tsc * st_scale_i); - /* Read the Time Stamp Counter */ + spin_unlock_irqrestore(&hyp_time_lock, flags); - rdtsc(eax,edx); + return now + delta; - /* .. relative to previous jiffy (32 bits is enough) */ - eax -= last_tsc_low; /* tsc_low delta */ - - /* - * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient - * = (tsc_low delta) * (usecs_per_clock) - * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) - * - * Using a mull instead of a divl saves up to 31 clock cycles - * in the critical path. - */ - - edx = ticks_to_us(eax); - - /* our adjusted time offset in microseconds */ - return delay_at_last_interrupt + edx; -#else - /* - * We should keep a 'last_tsc_low' thing which incorporates - * delay_at_last_interrupt, adjusted in timer_interrupt after - * do_timer_interrupt. It would look at change in xtime, and - * make appropriate adjustment to a last_tsc variable. - * - * We'd be affected by rounding error in ticks_per_usec, and by - * processor clock drift (which should be no more than in an - * external interrupt source anyhow). - * - * Perhaps a bit rough and ready, but never mind! - */ - return 0; -#endif } +#define NOW() ((long long)get_s_time()) /* - * This version of gettimeofday has microsecond resolution - * and better than microsecond precision on fast x86 machines with TSC. + * Wallclock time. + * Based on what the hypervisor tells us, extrapolated using system time. + * Again need to read a number of values from the shared page "atomically". + * this time using a version number. */ +static u32 shadow_wc_version=0; +static long shadow_tv_sec; +static long shadow_tv_usec; +static long long shadow_wc_timestamp; void do_gettimeofday(struct timeval *tv) { - unsigned long flags; - unsigned long usec, sec, lost; - - read_lock_irqsave(&xtime_lock, flags); - usec = do_gettimeoffset(); - lost = jiffies - wall_jiffies; - if ( lost != 0 ) usec += lost * (1000000 / HZ); - sec = xtime.tv_sec; - usec += xtime.tv_usec; - read_unlock_irqrestore(&xtime_lock, flags); + unsigned long flags; + long usec, sec; + u32 version; + u64 now; + + spin_lock_irqsave(&hyp_wctime_lock, flags); + + while ((version = HYPERVISOR_shared_info->wc_version)!= shadow_wc_version) + { + barrier(); + shadow_wc_version = version; + shadow_tv_sec = HYPERVISOR_shared_info->tv_sec; + shadow_tv_usec = HYPERVISOR_shared_info->tv_usec; + shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp; + barrier(); + } + + now = NOW(); + usec = ((unsigned long)(now-shadow_wc_timestamp))/1000; + sec = shadow_tv_sec; + usec += shadow_tv_usec; while ( usec >= 1000000 ) { @@ -169,10 +166,40 @@ void do_gettimeofday(struct timeval *tv) tv->tv_sec = sec; tv->tv_usec = usec; + + spin_unlock_irqrestore(&hyp_time_lock, flags); + +#ifdef XENO_TIME_DEBUG + { + static long long old_now=0; + static long long wct=0, old_wct=0; + + /* This debug code checks if time increase over two subsequent calls */ + wct=(((long long)sec) * 1000000) + usec; + /* wall clock time going backwards */ + if ((wct < old_wct) ) { + printk("Urgh1: wc diff=%6ld, usec = %ld (0x%lX)\n", + (long)(wct-old_wct), usec, usec); + printk(" st diff=%lld cur st=0x%016llX old st=0x%016llX\n", + now-old_now, now, old_now); + } + + /* system time going backwards */ + if (now<=old_now) { + printk("Urgh2: st diff=%lld cur st=0x%016llX old st=0x%016llX\n", + now-old_now, now, old_now); + } + old_wct = wct; + old_now = now; + } +#endif + } void do_settimeofday(struct timeval *tv) { +/* XXX RN: should do something special here for dom0 */ +#if 0 write_lock_irq(&xtime_lock); /* * This is revolting. We need to set "xtime" correctly. However, the @@ -195,29 +222,73 @@ void do_settimeofday(struct timeval *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_unlock_irq(&xtime_lock); +#endif } /* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * Timer ISR. + * Unlike normal Linux these don't come in at a fixed rate of HZ. + * In here we wrok out how often it should have been called and then call + * the architecture independent part (do_timer()) the appropriate number of + * times. A bit of a nasty hack, to keep the "other" notion of wallclock time + * happy. */ -static inline void do_timer_interrupt( - int irq, void *dev_id, struct pt_regs *regs) +static long long us_per_tick=1000000/HZ; +static long long last_irq; +static inline void do_timer_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { - do_timer(regs); + struct timeval tv; + long long time, delta; + +#ifdef XENO_TIME_DEBUG + static u32 foo_count = 0; + foo_count++; + if (foo_count>= 10000) { + s64 n = NOW(); + struct timeval tv; + do_gettimeofday(&tv); + printk("0x%08X%08X %ld:%ld\n", + (u32)(n>>32), (u32)n, tv.tv_sec, tv.tv_usec); + foo_count = 0; + } +#endif + + /* + * The next bit really sucks: + * Linux not only uses do_gettimeofday() to keep a notion of + * wallclock time, but also maintains the xtime struct and jiffies. + * (Even worse some userland code accesses this via the sys_time() + * system call) + * Unfortunately, xtime is maintain in the architecture independent + * part of the timer ISR (./kernel/timer.c sic!). So, although we have + * perfectly valid notion of wallclock time from the hypervisor we here + * fake missed timer interrupts so that the arch independent part of + * the Timer ISR updates jiffies for us *and* once the bh gets run + * updates xtime accordingly. Yuck! + */ + + /* work out the number of jiffies past and update them */ + do_gettimeofday(&tv); + time = (((long long)tv.tv_sec) * 1000000) + tv.tv_usec; + delta = time - last_irq; + if (delta <= 0) { + printk ("Timer ISR: Time went backwards: %lld\n", delta); + return; + } + while (delta >= us_per_tick) { + do_timer(regs); + delta -= us_per_tick; + last_irq += us_per_tick; + } + #if 0 if (!user_mode(regs)) x86_do_profile(regs->eip); #endif } - -/* - * This is the same as the above, except we _also_ save the current - * Time Stamp Counter value at the time of the timer interrupt, so that - * we later on can estimate the time of day more exactly. - */ static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { write_lock(&xtime_lock); @@ -234,56 +305,31 @@ static struct irqaction irq_timer = { NULL }; - -unsigned long get_cmos_time(void) -{ - unsigned long secs = HYPERVISOR_shared_info->rtc_time; - unsigned long diff; - - rdtscl(diff); - diff -= (unsigned long)HYPERVISOR_shared_info->rtc_timestamp; - - secs += ticks_to_us(diff); - - return(secs + ticks_to_secs(diff)); -} - - -/* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). */ -static unsigned long __init calibrate_tsc(void) +void __init time_init(void) { - unsigned long quo, rem; + unsigned long long alarm; + u64 cpu_freq = HYPERVISOR_shared_info->cpu_freq; + u64 scale; - /* quotient == (1000 * 2^32) / ticks_per ms */ - __asm__ __volatile__ ( - "divl %2" - : "=a" (quo), "=d" (rem) - : "r" (HYPERVISOR_shared_info->ticks_per_ms), "0" (0), "1" (1000) ); + do_get_fast_time = do_gettimeofday; - return(quo); -} + cpu_khz = (u32)cpu_freq/1000; + printk("Xen reported: %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); -void __init time_init(void) -{ - unsigned long long alarm; - - fast_gettimeoffset_quotient = calibrate_tsc(); - do_get_fast_time = do_gettimeofday; + /* + * calculate systemtime scaling factor + * XXX RN: have to cast cpu_freq to u32 limits it to 4.29 GHz. + * Get a better do_div! + */ + scale = 1000000000LL << 32; + do_div(scale,(u32)cpu_freq); + st_scale_f = scale & 0xffffffff; + st_scale_i = scale >> 32; + printk("System Time scale: %X %X\n",st_scale_i, st_scale_f); - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { - unsigned long eax=0, edx=1000; - __asm__ __volatile__ - ("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (fast_gettimeoffset_quotient), - "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } + do_gettimeofday(&xtime); + last_irq = (((long long)xtime.tv_sec) * 1000000) + xtime.tv_usec; setup_irq(TIMER_IRQ, &irq_timer); @@ -292,13 +338,14 @@ void __init time_init(void) * 'domain' time. This means that clock sshould run at the correct * rate. For things like scheduling, it's not clear whether it * matters which sort of time we use. + * XXX RN: unimplemented. */ + rdtscll(alarm); +#if 0 alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms; HYPERVISOR_shared_info->wall_timeout = alarm; HYPERVISOR_shared_info->domain_timeout = ~0ULL; +#endif clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); - - xtime.tv_sec = get_cmos_time(); - xtime.tv_usec = 0; } |