aboutsummaryrefslogtreecommitdiffstats
path: root/xenolinux-2.4.16-sparse/arch
diff options
context:
space:
mode:
Diffstat (limited to 'xenolinux-2.4.16-sparse/arch')
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/defconfig15
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile2
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c392
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c595
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c233
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c27
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c4
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c2
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c365
9 files changed, 1073 insertions, 562 deletions
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/defconfig b/xenolinux-2.4.16-sparse/arch/xeno/defconfig
index b278dec50f..c83d96d1b0 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/defconfig
+++ b/xenolinux-2.4.16-sparse/arch/xeno/defconfig
@@ -114,6 +114,7 @@ CONFIG_BLK_DEV_NBD=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=4096
CONFIG_BLK_DEV_INITRD=y
+CONFIG_XENOLINUX_BLOCK=y
#
# Multi-device support (RAID and LVM)
@@ -379,13 +380,13 @@ CONFIG_MSDOS_PARTITION=y
#
# Kernel hacking
#
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_HIGHMEM=y
-CONFIG_DEBUG_SLAB=y
-CONFIG_DEBUG_IOVIRT=y
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_IOVIRT is not set
# CONFIG_MAGIC_SYSRQ is not set
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
# CONFIG_KDB is not set
CONFIG_KALLSYMS=y
-CONFIG_FRAME_POINTER=y
+# CONFIG_FRAME_POINTER is not set
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile
index 9361a01ec7..74a0c6c565 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile
+++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile
@@ -1,3 +1,3 @@
O_TARGET := blk.o
-obj-y := block.o
+obj-y := xl_block.o xl_block_test.o
include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c
deleted file mode 100644
index bf7d416dff..0000000000
--- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c
+++ /dev/null
@@ -1,392 +0,0 @@
-/******************************************************************************
- * block.c
- *
- * Virtual block driver for XenoLinux.
- *
- * adapted from network.c
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-
-#include <asm/hypervisor-ifs/block.h>
-
-#ifdef UNDEFINED
-
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-
-#include <net/sock.h>
-
-#define BLK_TX_IRQ _EVENT_BLK_TX
-#define BLK_RX_IRQ _EVENT_BLK_RX
-
-#define TX_MAX_ENTRIES (TX_RING_SIZE - 2)
-#define RX_MAX_ENTRIES (RX_RING_SIZE - 2)
-
-#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1))
-#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1))
-#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
-#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
-
-#define RX_BUF_SIZE 1600 /* Ethernet MTU + plenty of slack! */
-
-
-
-int network_probe(struct net_device *dev);
-static int network_open(struct net_device *dev);
-static int network_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int network_close(struct net_device *dev);
-static struct net_device_stats *network_get_stats(struct net_device *dev);
-static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs);
-static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs);
-static void network_tx_buf_gc(struct net_device *dev);
-static void network_alloc_rx_buffers(struct net_device *dev);
-static void network_free_rx_buffers(struct net_device *dev);
-
-static struct net_device dev_net_xeno;
-
-/*
- * RX RING: RX_IDX <= rx_cons <= rx_prod
- * TX RING: TX_IDX <= tx_cons <= tx_prod
- * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor)
- */
-struct net_private
-{
- struct net_device_stats stats;
- struct sk_buff **tx_skb_ring;
- struct sk_buff **rx_skb_ring;
- atomic_t tx_entries;
- unsigned int rx_idx, tx_idx, tx_full;
- net_ring_t *net_ring;
- spinlock_t tx_lock;
-};
-
-
-int __init network_probe(struct net_device *dev)
-{
- SET_MODULE_OWNER(dev);
-
- memcpy(dev->dev_addr, "\xFE\xFD\x00\x00\x00\x00", 6);
-
- dev->open = network_open;
- dev->hard_start_xmit = network_start_xmit;
- dev->stop = network_close;
- dev->get_stats = network_get_stats;
-
- ether_setup(dev);
-
- return 0;
-}
-
-
-static int network_open(struct net_device *dev)
-{
- struct net_private *np;
- int error;
-
- np = kmalloc(sizeof(struct net_private), GFP_KERNEL);
- if ( np == NULL )
- {
- printk(KERN_WARNING "%s: No memory for private data\n", dev->name);
- return -ENOMEM;
- }
- memset(np, 0, sizeof(struct net_private));
- dev->priv = np;
-
- spin_lock_init(&np->tx_lock);
-
- atomic_set(&np->tx_entries, 0);
-
- np->net_ring = start_info.net_rings;
- np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0;
- np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0;
- np->net_ring->tx_ring = NULL;
- np->net_ring->rx_ring = NULL;
-
- np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *),
- GFP_KERNEL);
- np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *),
- GFP_KERNEL);
- np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t),
- GFP_KERNEL);
- np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t),
- GFP_KERNEL);
- if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) ||
- (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) )
- {
- printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name);
- error = -ENOBUFS;
- goto fail;
- }
-
- network_alloc_rx_buffers(dev);
-
- error = request_irq(NET_RX_IRQ, network_rx_int, 0, "net-rx", dev);
- if ( error )
- {
- printk(KERN_WARNING "%s: Could not allocate receive interrupt\n",
- dev->name);
- goto fail;
- }
-
- error = request_irq(NET_TX_IRQ, network_tx_int, 0, "net-tx", dev);
- if ( error )
- {
- printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n",
- dev->name);
- free_irq(NET_RX_IRQ, dev);
- goto fail;
- }
-
- printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name);
-
- netif_start_queue(dev);
-
- MOD_INC_USE_COUNT;
-
- return 0;
-
- fail:
- if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring);
- if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring);
- if ( np->rx_skb_ring ) kfree(np->rx_skb_ring);
- if ( np->tx_skb_ring ) kfree(np->tx_skb_ring);
- kfree(np);
- return error;
-}
-
-
-static void network_tx_buf_gc(struct net_device *dev)
-{
- unsigned int i;
- struct net_private *np = dev->priv;
- struct sk_buff *skb;
- unsigned long flags;
-
- spin_lock_irqsave(&np->tx_lock, flags);
-
- for ( i = np->tx_idx; i != np->net_ring->tx_cons; i = TX_RING_INC(i) )
- {
- skb = np->tx_skb_ring[i];
- dev_kfree_skb_any(skb);
- atomic_dec(&np->tx_entries);
- }
-
- np->tx_idx = i;
-
- if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
- {
- np->tx_full = 0;
- netif_wake_queue(dev);
- }
-
- spin_unlock_irqrestore(&np->tx_lock, flags);
-}
-
-
-static void network_alloc_rx_buffers(struct net_device *dev)
-{
- unsigned int i;
- struct net_private *np = dev->priv;
- struct sk_buff *skb;
- unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES);
-
- for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) )
- {
- skb = dev_alloc_skb(RX_BUF_SIZE);
- if ( skb == NULL ) break;
- skb->dev = dev;
- skb_reserve(skb, 2); /* word align the IP header */
- np->rx_skb_ring[i] = skb;
- np->net_ring->rx_ring[i].addr = (unsigned long)skb->data;
- np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */
- }
-
- np->net_ring->rx_prod = i;
-
- np->net_ring->rx_event = RX_RING_INC(np->rx_idx);
-
- HYPERVISOR_net_update();
-}
-
-
-static void network_free_rx_buffers(struct net_device *dev)
-{
- unsigned int i;
- struct net_private *np = dev->priv;
- struct sk_buff *skb;
-
- for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) )
- {
- skb = np->rx_skb_ring[i];
- dev_kfree_skb(skb);
- }
-}
-
-
-static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- unsigned int i;
- struct net_private *np = (struct net_private *)dev->priv;
-
- if ( np->tx_full )
- {
- printk(KERN_WARNING "%s: full queue wasn't stopped!\n", dev->name);
- netif_stop_queue(dev);
- return -ENOBUFS;
- }
-
- i = np->net_ring->tx_prod;
- np->tx_skb_ring[i] = skb;
- np->net_ring->tx_ring[i].addr = (unsigned long)skb->data;
- np->net_ring->tx_ring[i].size = skb->len;
- np->net_ring->tx_prod = TX_RING_INC(i);
- atomic_inc(&np->tx_entries);
-
- np->stats.tx_bytes += skb->len;
- np->stats.tx_packets++;
-
- spin_lock_irq(&np->tx_lock);
- if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES )
- {
- np->tx_full = 1;
- netif_stop_queue(dev);
- np->net_ring->tx_event = TX_RING_ADD(np->tx_idx,
- atomic_read(&np->tx_entries) >> 1);
- }
- else
- {
- /* Avoid unnecessary tx interrupts. */
- np->net_ring->tx_event = TX_RING_INC(np->net_ring->tx_prod);
- }
- spin_unlock_irq(&np->tx_lock);
-
- /* Must do this after setting tx_event: race with updates of tx_cons. */
- network_tx_buf_gc(dev);
-
- HYPERVISOR_net_update();
-
- return 0;
-}
-
-
-static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
- unsigned int i;
- struct net_device *dev = (struct net_device *)dev_id;
- struct net_private *np = dev->priv;
- struct sk_buff *skb;
-
- again:
- for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) )
- {
- skb = np->rx_skb_ring[i];
- skb_put(skb, np->net_ring->rx_ring[i].size);
- skb->protocol = eth_type_trans(skb, dev);
- np->stats.rx_packets++;
- np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
- netif_rx(skb);
- dev->last_rx = jiffies;
- }
-
- np->rx_idx = i;
-
- network_alloc_rx_buffers(dev);
-
- /* Deal with hypervisor racing our resetting of rx_event. */
- smp_mb();
- if ( np->net_ring->rx_cons != i ) goto again;
-}
-
-
-static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
- struct net_device *dev = (struct net_device *)dev_id;
- network_tx_buf_gc(dev);
-}
-
-
-static int network_close(struct net_device *dev)
-{
- struct net_private *np = dev->priv;
-
- netif_stop_queue(dev);
- free_irq(NET_RX_IRQ, dev);
- free_irq(NET_TX_IRQ, dev);
- network_free_rx_buffers(dev);
- kfree(np->net_ring->rx_ring);
- kfree(np->net_ring->tx_ring);
- kfree(np->rx_skb_ring);
- kfree(np->tx_skb_ring);
- kfree(np);
- MOD_DEC_USE_COUNT;
- return 0;
-}
-
-
-static struct net_device_stats *network_get_stats(struct net_device *dev)
-{
- struct net_private *np = (struct net_private *)dev->priv;
- return &np->stats;
-}
-
-
-static int __init init_module(void)
-{
- memset(&dev_net_xeno, 0, sizeof(dev_net_xeno));
- strcpy(dev_net_xeno.name, "eth%d");
- dev_net_xeno.init = network_probe;
- return (register_netdev(&dev_net_xeno) != 0) ? -EIO : 0;
-}
-
-
-static void __exit cleanup_module(void)
-{
- unregister_netdev(&dev_net_xeno);
-}
-
-#endif /* UNDEFINED */
-
-
-static void block_initialize(void)
-{
- blk_ring_t *blk_ring = start_info.blk_ring;
-
- if ( blk_ring == NULL ) return;
-
- blk_ring->tx_prod = blk_ring->tx_cons = blk_ring->tx_event = 0;
- blk_ring->rx_prod = blk_ring->rx_cons = blk_ring->rx_event = 0;
- blk_ring->tx_ring = NULL;
- blk_ring->rx_ring = NULL;
-}
-
-
-/*
- * block_setup initialized the xeno block device driver
- */
-
-static int __init init_module(void)
-{
- block_initialize();
- printk("XenoLinux Virtual Block Device Driver installed\n");
- return 0;
-}
-
-static void __exit cleanup_module(void)
-{
- printk("XenoLinux Virtual Block Device Driver uninstalled\n");
-}
-
-module_init(init_module);
-module_exit(cleanup_module);
-
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c
new file mode 100644
index 0000000000..f7bd088ff4
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c
@@ -0,0 +1,595 @@
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/fs.h>
+#include <linux/hdreg.h> /* HDIO_GETGEO, et al */
+#include <linux/blkdev.h>
+#include <linux/major.h>
+
+/* NOTE: this is drive independent, so no inclusion of ide.h */
+
+#include <asm/hypervisor-ifs/block.h>
+#include <asm/hypervisor-ifs/hypervisor-if.h>
+#include <asm/io.h>
+#include <asm/uaccess.h> /* put_user() */
+
+#define MAJOR_NR XLBLK_MAJOR /* force defns in blk.h, must preceed include */
+static int xlblk_major = XLBLK_MAJOR;
+
+#include <linux/blk.h> /* must come after definition of MAJOR_NR!! */
+
+/* instead of including linux/ide.h to pick up the definitiong of byte
+ * (and consequently screwing up blk.h, we'll just copy the definition */
+typedef unsigned char byte;
+
+void xlblk_ide_register_disk(int, unsigned long);
+
+#define XLBLK_MAX 2 /* very arbitrary */
+#define XLBLK_MAJOR_NAME "xhd"
+#define IDE_PARTN_BITS 6 /* from ide.h::PARTN_BITS */
+#define IDE_PARTN_MASK ((1<<IDE_PARTN_BITS)-1) /* from ide.h::PARTN_MASK */
+static int xlblk_blk_size[XLBLK_MAX];
+static int xlblk_blksize_size[XLBLK_MAX];
+static int xlblk_read_ahead;
+static int xlblk_hardsect_size[XLBLK_MAX];
+static int xlblk_max_sectors[XLBLK_MAX];
+
+#define XLBLK_RX_IRQ _EVENT_BLK_RX
+#define XLBLK_TX_IRQ _EVENT_BLK_TX
+
+#define DEBUG_IRQ _EVENT_DEBUG
+
+typedef struct xlblk_device
+{
+ struct buffer_head *bh;
+ unsigned int tx_count; /* number of used slots in tx ring */
+} xlblk_device_t;
+
+xlblk_device_t xlblk_device;
+
+#define XLBLK_DEBUG 0
+#define XLBLK_DEBUG_IOCTL 0
+
+/*
+ * disk management
+ */
+
+xen_disk_info_t xen_disk_info;
+
+/* some declarations */
+void hypervisor_request(void * id,
+ int operation,
+ char * buffer,
+ unsigned long block_number,
+ unsigned short block_size,
+ kdev_t device,
+ int mode);
+
+
+/* ------------------------------------------------------------------------
+ */
+
+static int xenolinux_block_open(struct inode *inode, struct file *filep)
+{
+ if (XLBLK_DEBUG)
+ printk (KERN_ALERT "xenolinux_block_open\n");
+
+ return 0;
+}
+
+static int xenolinux_block_release(struct inode *inode, struct file *filep)
+{
+ if (XLBLK_DEBUG)
+ printk (KERN_ALERT "xenolinux_block_release\n");
+
+ return 0;
+}
+
+static int xenolinux_block_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument)
+{
+ int minor_dev;
+ struct hd_geometry *geo = (struct hd_geometry *)argument;
+
+ if (XLBLK_DEBUG_IOCTL)
+ printk (KERN_ALERT "xenolinux_block_ioctl\n");
+
+ /* check permissions */
+ if (!capable(CAP_SYS_ADMIN)) return -EPERM;
+ if (!inode) return -EINVAL;
+ minor_dev = MINOR(inode->i_rdev);
+ if (minor_dev >= XLBLK_MAX) return -ENODEV;
+
+ if (XLBLK_DEBUG_IOCTL)
+ printk (KERN_ALERT " command: 0x%x, argument: 0x%lx, minor: 0x%x\n",
+ command, (long) argument, minor_dev);
+
+ switch (command) {
+
+ case BLKGETSIZE:
+ if (XLBLK_DEBUG_IOCTL)
+ printk (KERN_ALERT
+ " BLKGETSIZE: %x %lx\n", BLKGETSIZE,
+ (long) xen_disk_info.disks[0].capacity);
+ return put_user(xen_disk_info.disks[0].capacity,
+ (unsigned long *) argument);
+
+ case BLKRRPART:
+ if (XLBLK_DEBUG_IOCTL)
+ printk (KERN_ALERT " BLKRRPART: %x\n", BLKRRPART);
+ break;
+
+ case BLKSSZGET:
+ if (XLBLK_DEBUG_IOCTL)
+ printk (KERN_ALERT " BLKSSZGET: %x 0x%x\n", BLKSSZGET,
+ xlblk_hardsect_size[minor_dev]);
+ return xlblk_hardsect_size[minor_dev];
+
+ case HDIO_GETGEO:
+
+ if (XLBLK_DEBUG_IOCTL)
+ printk (KERN_ALERT " HDIO_GETGEO: %x\n", HDIO_GETGEO);
+
+ if (!argument) return -EINVAL;
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
+ return 0;
+
+ case HDIO_GETGEO_BIG:
+
+ if (XLBLK_DEBUG_IOCTL)
+ printk (KERN_ALERT " HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
+
+ if (!argument) return -EINVAL;
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
+
+ return 0;
+
+ default:
+ if (XLBLK_DEBUG_IOCTL)
+ printk (KERN_ALERT " eh? unknown ioctl\n");
+ break;
+ }
+
+ return 0;
+}
+
+static int xenolinux_block_check(kdev_t dev)
+{
+ if (XLBLK_DEBUG)
+ printk (KERN_ALERT "xenolinux_block_check\n");
+ return 0;
+}
+
+static int xenolinux_block_revalidate(kdev_t dev)
+{
+ if (XLBLK_DEBUG)
+ printk (KERN_ALERT "xenolinux_block_revalidate\n");
+ return 0;
+}
+
+/*
+ * hypervisor_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: XEN_BLOCK_READ, XEN_BLOCK_WRITE or XEN_BLOCK_PROBE
+ * buffer: buffer to read/write into. this should be a
+ * virtual address in the guest os.
+ * block_number: block to read
+ * block_size: size of each block
+ * device: ide/hda is 768 or 0x300
+ * mode: XEN_BLOCK_SYNC or XEN_BLOCK_ASYNC. async requests
+ * will queue until a sync request is issued.
+ */
+
+void hypervisor_request(void * id,
+ int operation,
+ char * buffer,
+ unsigned long block_number,
+ unsigned short block_size,
+ kdev_t device,
+ int mode)
+{
+ blk_ring_t *blk_ring = start_info.blk_ring;
+ int position;
+ void *buffer_pa, *buffer_ma;
+ kdev_t phys_device = (kdev_t) 0;
+ unsigned long sector_number = 0;
+ struct gendisk *gd;
+
+
+ buffer_pa = (void *)virt_to_phys(buffer);
+ buffer_ma = (void *)phys_to_machine((unsigned long)buffer_pa);
+
+ if (operation == XEN_BLOCK_PROBE) {
+ phys_device = (kdev_t) 0;
+ sector_number = 0;
+
+ } else if (operation == XEN_BLOCK_READ || operation == XEN_BLOCK_WRITE) {
+
+ /*
+ * map logial major device to the physical device number
+ *
+ * XLBLK_MAJOR -> IDE0_MAJOR (123 -> 3)
+ */
+ if (MAJOR(device) == XLBLK_MAJOR)
+ phys_device = MKDEV(IDE0_MAJOR, 0);
+ else {
+ printk (KERN_ALERT "error: xl_block::hypervisor_request: "
+ "unknown device [0x%x]\n", device);
+ BUG();
+ }
+
+ /*
+ * compute real buffer location on disk
+ * (from ll_rw_block.c::submit_bh)
+ */
+
+
+ sector_number = block_number /* * block_size >> 9 */;
+
+ if((gd = (struct gendisk *)xen_disk_info.disks[0].gendisk) != NULL)
+ sector_number += gd->part[MINOR(device)&IDE_PARTN_MASK].start_sect;
+ }
+
+
+ if (BLK_TX_RING_INC(blk_ring->btx_prod) == blk_ring->btx_cons) {
+ printk (KERN_ALERT "hypervisor_request: btx_cons: %d, btx_prod:%d",
+ blk_ring->btx_cons, blk_ring->btx_prod);
+ BUG();
+ }
+
+ /* Fill out a communications ring structure & trap to the hypervisor */
+ position = blk_ring->btx_prod;
+ blk_ring->btx_ring[position].id = id;
+ blk_ring->btx_ring[position].priority = mode;
+ blk_ring->btx_ring[position].operation = operation;
+ blk_ring->btx_ring[position].buffer = buffer_ma;
+ blk_ring->btx_ring[position].block_number = block_number;
+ blk_ring->btx_ring[position].block_size = block_size;
+ blk_ring->btx_ring[position].device = phys_device;
+ blk_ring->btx_ring[position].sector_number = sector_number;
+
+ blk_ring->btx_prod = BLK_TX_RING_INC(blk_ring->btx_prod);
+
+ switch(mode) {
+
+ case XEN_BLOCK_SYNC:
+ /* trap into hypervisor */
+ HYPERVISOR_block_io_op();
+ break;
+
+ case XEN_BLOCK_ASYNC:
+ /* for now, do nothing. the request will go in the ring and
+ the next sync request will trigger the hypervisor to act */
+ printk("Oh dear-- ASYNC xen block of doom!\n");
+ break;
+
+ default:
+ /* ummm, unknown mode. */
+ printk("xl_block thingy: unknown mode %d\n", mode);
+ BUG();
+ }
+
+ return;
+}
+
+
+/*
+ * do_xlblk_request
+ *
+ * read a block; request is in a request queue
+ *
+ * TO DO: should probably release the io_request_lock and then re-acquire
+ * (see LDD p. 338)
+ */
+static void do_xlblk_request (request_queue_t *rq)
+{
+ struct request *req;
+
+ if (XLBLK_DEBUG)
+ printk (KERN_ALERT "xlblk.c::do_xlblk_request for '%s'\n",
+ DEVICE_NAME);
+
+ while (!QUEUE_EMPTY)
+ {
+ struct buffer_head *bh;
+ unsigned long offset;
+ unsigned long length;
+ int rw;
+
+ if(rq->plugged)
+ return ;
+
+ req = CURRENT;
+
+ if (XLBLK_DEBUG)
+ printk (KERN_ALERT
+ "do_xlblk_request %p: cmd %i, sec %lx, (%li) bh:%p\n",
+ req, req->cmd, req->sector,
+ req->current_nr_sectors, req->bh);
+
+ /* is there space in the tx ring for this request?
+ * if the ring is full, then leave the request in the queue
+ *
+ * THIS IS A BIT BOGUS SINCE XEN COULD BE UPDATING BTX_CONS
+ * AT THE SAME TIME
+ */
+ {
+ blk_ring_t *blk_ring = start_info.blk_ring;
+
+ if (BLK_RX_RING_INC(blk_ring->btx_prod) == blk_ring->btx_cons)
+ {
+ printk (KERN_ALERT "OOPS, TX LOOKS FULL cons: %d prod: %d\n",
+ blk_ring->btx_cons, blk_ring->btx_prod);
+ BUG();
+ break;
+ }
+ }
+
+ req->errors = 0;
+ blkdev_dequeue_request(req);
+
+ bh = req->bh;
+
+ while (bh)
+ {
+ offset = bh->b_rsector << 9;
+ length = bh->b_size;
+
+ rw = req->cmd;
+ if (rw == READA) rw= READ;
+ if ((rw != READ) && (rw != WRITE)) {
+ printk (KERN_ALERT
+ "XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
+ BUG();
+ }
+
+ hypervisor_request (req, rw == READ ?
+ XEN_BLOCK_READ : XEN_BLOCK_WRITE,
+ bh->b_data, bh->b_rsector, bh->b_size,
+ bh->b_dev, XEN_BLOCK_SYNC);
+ bh = bh->b_reqnext;
+ }
+
+ blkdev_dequeue_request(req);
+
+ }
+
+ return;
+}
+
+
+static struct block_device_operations xenolinux_block_fops =
+{
+ open: xenolinux_block_open,
+ release: xenolinux_block_release,
+ ioctl: xenolinux_block_ioctl,
+ check_media_change: xenolinux_block_check,
+ revalidate: xenolinux_block_revalidate,
+};
+
+static void xlblk_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ blk_ring_t *blk_ring = start_info.blk_ring;
+ struct request *req;
+ int loop;
+ u_long flags;
+
+ for (loop = blk_ring->brx_cons;
+ loop != blk_ring->brx_prod;
+ loop = BLK_RX_RING_INC(loop)) {
+
+ blk_ring_entry_t *bret = &blk_ring->brx_ring[loop];
+
+ if(bret->operation == XEN_BLOCK_PROBE)
+ continue;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ req = (struct request *)bret->id;
+
+ if (!end_that_request_first(req, 1, "XenBlk"))
+ end_that_request_last(req);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ }
+
+ blk_ring->brx_cons = loop;
+}
+
+static void xlblk_tx_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ if (XLBLK_DEBUG)
+ printk (KERN_ALERT "--- xlblock::xlblk_tx_int\n");
+}
+
+int __init xlblk_init(void)
+{
+ blk_ring_t *blk_ring = start_info.blk_ring;
+ int loop, error, result;
+
+ /* initialize memory rings to communicate with hypervisor */
+ if ( blk_ring == NULL ) return -ENOMEM;
+
+ blk_ring->btx_prod = blk_ring->btx_cons = 0;
+ blk_ring->brx_prod = blk_ring->brx_cons = 0;
+ blk_ring->btx_ring = NULL;
+ blk_ring->brx_ring = NULL;
+
+ blk_ring->btx_ring = kmalloc(BLK_TX_RING_SIZE * sizeof(blk_ring_entry_t),
+ GFP_KERNEL);
+ blk_ring->brx_ring = kmalloc(BLK_RX_RING_SIZE * sizeof(blk_ring_entry_t),
+ GFP_KERNEL);
+
+ if ((blk_ring->btx_ring == NULL) || (blk_ring->brx_ring == NULL)) {
+ printk (KERN_ALERT "could not alloc ring memory for block device\n");
+ error = -ENOBUFS;
+ goto fail;
+ }
+
+ error = request_irq(XLBLK_RX_IRQ, xlblk_rx_int, 0,
+ "xlblk-rx", &xlblk_device);
+ if (error) {
+ printk(KERN_ALERT "Could not allocate receive interrupt\n");
+ goto fail;
+ }
+
+ error = request_irq(XLBLK_TX_IRQ, xlblk_tx_int, 0,
+ "xlblk-tx", &xlblk_device);
+ if (error) {
+ printk(KERN_ALERT "Could not allocate transmit interrupt\n");
+ free_irq(XLBLK_RX_IRQ, &xlblk_device);
+ goto fail;
+ }
+
+ memset (&xen_disk_info, 0, sizeof(xen_disk_info));
+ xen_disk_info.count = 0;
+
+ hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info,
+ 0, 0, (kdev_t) 0, XEN_BLOCK_SYNC);
+ for (loop = 0; loop < xen_disk_info.count; loop++)
+ printk (KERN_ALERT " %2d: type: %d, capacity: %ld\n",
+ loop, xen_disk_info.disks[loop].type,
+ xen_disk_info.disks[loop].capacity);
+
+
+ SET_MODULE_OWNER(&xenolinux_block_fops);
+ result = register_blkdev(xlblk_major, "block", &xenolinux_block_fops);
+ if (result < 0) {
+ printk (KERN_ALERT "xenolinux block: can't get major %d\n",
+ xlblk_major);
+ return result;
+ }
+
+ /* initialize global arrays in drivers/block/ll_rw_block.c */
+ for (loop = 0; loop < XLBLK_MAX; loop++) {
+ xlblk_blk_size[loop] = xen_disk_info.disks[0].capacity;
+ xlblk_blksize_size[loop] = 512;
+ xlblk_hardsect_size[loop] = 512;
+ xlblk_max_sectors[loop] = 128;
+ }
+ xlblk_read_ahead = 8;
+
+ blk_size[xlblk_major] = xlblk_blk_size;
+ blksize_size[xlblk_major] = xlblk_blksize_size;
+ hardsect_size[xlblk_major] = xlblk_hardsect_size;
+ read_ahead[xlblk_major] = xlblk_read_ahead;
+ max_sectors[xlblk_major] = xlblk_max_sectors;
+
+ blk_init_queue(BLK_DEFAULT_QUEUE(xlblk_major), do_xlblk_request);
+ /*
+ ** XXX SMH: we don't leave req on queue => are happy for evelator
+ ** to reorder things including it. (main reason for this decision
+ ** is that it works while 'standard' case doesn't. Ho hum).
+ */
+ blk_queue_headactive(BLK_DEFAULT_QUEUE(xlblk_major), 0);
+
+ xlblk_ide_register_disk(0, xen_disk_info.disks[0].capacity);
+
+ printk(KERN_ALERT
+ "XenoLinux Virtual Block Device Driver installed [device: %d]\n",
+ xlblk_major);
+ return 0;
+
+ fail:
+ if (blk_ring->btx_ring) kfree(blk_ring->btx_ring);
+ if (blk_ring->brx_ring) kfree(blk_ring->brx_ring);
+ return error;
+}
+
+void xlblk_ide_register_disk(int idx, unsigned long capacity)
+{
+ int units;
+ int minors;
+ struct gendisk *gd;
+
+ /* plagarized from ide-probe.c::init_gendisk */
+
+ units = 2; /* from ide.h::MAX_DRIVES */
+
+ minors = units * (1<<IDE_PARTN_BITS);
+ gd = kmalloc (sizeof(struct gendisk), GFP_KERNEL);
+ gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL);
+ gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL);
+ memset(gd->part, 0, minors * sizeof(struct hd_struct));
+
+ gd->major = xlblk_major;
+ gd->major_name = XLBLK_MAJOR_NAME;
+ gd->minor_shift = IDE_PARTN_BITS;
+ gd->max_p = 1<<IDE_PARTN_BITS;
+ gd->nr_real = units;
+ gd->real_devices = NULL;
+ gd->next = NULL;
+ gd->fops = &xenolinux_block_fops;
+ gd->de_arr = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL);
+ gd->flags = kmalloc (sizeof *gd->flags * units, GFP_KERNEL);
+
+ if (gd->de_arr)
+ memset (gd->de_arr, 0, sizeof *gd->de_arr * units);
+
+ if (gd->flags)
+ memset (gd->flags, 0, sizeof *gd->flags * units);
+
+ add_gendisk(gd);
+
+ xen_disk_info.disks[idx].gendisk = gd;
+
+ /* default disk size is just a big number. in the future, we
+ need a message to probe the devices to determine the actual size */
+ register_disk(gd, MKDEV(xlblk_major, 0), 1<<IDE_PARTN_BITS,
+ &xenolinux_block_fops, capacity);
+
+ return;
+}
+
+
+
+static void __exit xlblk_cleanup(void)
+{
+ /* CHANGE FOR MULTIQUEUE */
+ blk_cleanup_queue(BLK_DEFAULT_QUEUE(xlblk_major));
+
+ /* clean up global arrays */
+ read_ahead[xlblk_major] = 0;
+
+ if (blk_size[xlblk_major])
+ kfree(blk_size[xlblk_major]);
+ blk_size[xlblk_major] = NULL;
+
+ if (blksize_size[xlblk_major])
+ kfree(blksize_size[xlblk_major]);
+ blksize_size[xlblk_major] = NULL;
+
+ if (hardsect_size[xlblk_major])
+ kfree(hardsect_size[xlblk_major]);
+ hardsect_size[xlblk_major] = NULL;
+
+ /* XXX: free each gendisk */
+ if (unregister_blkdev(xlblk_major, "block"))
+ printk(KERN_ALERT
+ "XenoLinux Virtual Block Device Driver uninstalled w/ errs\n");
+ else
+ printk(KERN_ALERT
+ "XenoLinux Virtual Block Device Driver uninstalled\n");
+
+ return;
+}
+
+
+#ifdef MODULE
+module_init(xlblk_init);
+module_exit(xlblk_cleanup);
+#endif
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c
new file mode 100644
index 0000000000..cab6d9a330
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c
@@ -0,0 +1,233 @@
+/******************************************************************************
+ * xenolinux_block_test.c
+ *
+ */
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <asm/hypervisor-ifs/block.h>
+#include <asm/hypervisor-ifs/hypervisor-if.h>
+
+/******************************************************************/
+
+static struct proc_dir_entry *bdt;
+static blk_ring_entry_t meta;
+static char * data;
+
+static int proc_read_bdt(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ switch (meta.operation)
+ {
+ case XEN_BLOCK_READ :
+ case XEN_BLOCK_WRITE :
+ {
+ return proc_dump_block(page, start, off, count, eof, data);
+ }
+ case XEN_BLOCK_DEBUG :
+ {
+ return proc_dump_debug(page, start, off, count, eof, data);
+ }
+ default :
+ {
+ printk(KERN_ALERT
+ "block device test error: unknown operation [%c]\n",
+ meta.operation);
+ return -EINVAL;
+ }
+ }
+}
+
+int proc_dump_debug(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ char header[100];
+ char dump[1024];
+
+ sprintf (header, "Block Device Test: Debug Dump\n\n");
+
+ sprintf (dump, "%s\n", meta.buffer);
+
+ if (data)
+ {
+ kfree(data);
+ }
+
+ strncpy (page, dump, count);
+ return strlen(page);
+}
+
+int proc_dump_block(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ char header[100];
+ char dump[1024];
+ char temp[100];
+ int loop;
+
+ sprintf (header, "Block Device Test\n\n%s blk num: %ld 0x%lx; size: %d 0x%x; device: 0x%x\n",
+ meta.operation == XEN_BLOCK_WRITE ? "write" : "read",
+ meta.block_number, meta.block_number,
+ meta.block_size, meta.block_size,
+ meta.device);
+
+ sprintf (dump, "%s", header);
+
+ if (meta.buffer)
+ {
+ for (loop = 0; loop < 100; loop++)
+ {
+ int i = meta.buffer[loop];
+
+ if (loop % 8 == 0)
+ {
+ sprintf (temp, "[%2d] ", loop);
+ strcat(dump, temp);
+ }
+ else if (loop % 2 == 0)
+ {
+ strcat(dump, " ");
+ }
+
+ sprintf (temp, " 0x%02x", i & 255);
+ strcat(dump, temp);
+ if ((loop + 1) % 8 == 0)
+ {
+ strcat(dump, "\n");
+ }
+ }
+ strcat(dump, "\n\n");
+ }
+
+ if (data)
+ {
+ kfree(data);
+ }
+
+ strncpy (page, dump, count);
+ return strlen(page);
+}
+
+int proc_write_bdt(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ char *local = kmalloc((count + 1) * sizeof(char), GFP_KERNEL);
+ char opcode;
+ int block_number = 0;
+ int block_size = 0;
+ int device = 0;
+ int mode;
+
+ if (copy_from_user(local, buffer, count))
+ {
+ return -EFAULT;
+ }
+ local[count] = '\0';
+
+ sscanf(local, "%c %i %i %i",
+ &opcode, &block_number, &block_size, &device);
+
+ if (opcode == 'r' || opcode == 'R')
+ {
+ meta.operation = XEN_BLOCK_READ;
+ }
+ else if (opcode == 'w' || opcode == 'W')
+ {
+ meta.operation = XEN_BLOCK_WRITE;
+ }
+ else if (opcode == 'd' || opcode == 'D')
+ {
+ meta.operation = XEN_BLOCK_DEBUG;
+ block_size = 10000;
+ }
+ else
+ {
+ printk(KERN_ALERT
+ "block device test error: unknown opcode [%c]\n", opcode);
+ return -EINVAL;
+ }
+
+ if (opcode == 'r' || opcode == 'w' ||
+ opcode == 'd' || opcode == 'D')
+ {
+ mode = XEN_BLOCK_SYNC;
+ }
+ else /* (opcode == 'R' || opcode == 'W') */
+ {
+ mode = XEN_BLOCK_ASYNC;
+ }
+
+ if (data)
+ {
+ kfree(data);
+ }
+ data = kmalloc(block_size * sizeof(char), GFP_KERNEL);
+ if (data == NULL)
+ {
+ kfree(local);
+ return -ENOMEM;
+ }
+
+ meta.block_number = block_number;
+ meta.block_size = block_size;
+ meta.device = device;
+ meta.buffer = data;
+
+ /* submit request */
+ hypervisor_request(0, meta.operation, meta.buffer,
+ meta.block_number, meta.block_size,
+ meta.device, mode);
+
+ kfree(local);
+ return count;
+}
+
+
+static int __init init_module(void)
+{
+ int return_value = 0;
+
+ /* create proc entry */
+ bdt = create_proc_entry("bdt", 0644, NULL);
+ if (bdt == NULL)
+ {
+ return_value = -ENOMEM;
+ goto error;
+ }
+ bdt->data = NULL;
+ bdt->read_proc = proc_read_bdt;
+ bdt->write_proc = proc_write_bdt;
+ bdt->owner = THIS_MODULE;
+
+ memset(&meta, 0, sizeof(meta));
+
+ /* success */
+ printk(KERN_ALERT "XenoLinux Block Device Test installed\n");
+ return 0;
+
+ error:
+ return return_value;
+}
+
+static void __exit cleanup_module(void)
+{
+ if (data)
+ {
+ kfree(data);
+ }
+ printk(KERN_ALERT "XenoLinux Block Device Test uninstalled\n");
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c
new file mode 100644
index 0000000000..97d4a65b78
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c
@@ -0,0 +1,27 @@
+/*
+ * domain 0 block driver interface
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+static int __init init_module(void)
+{
+ request_module("xl_block");
+ printk("Successfully installed domain 0 block interface\n");
+
+
+ return 0;
+}
+
+static void __exit cleanup_module(void)
+{
+ printk("Successfully de-installed domain-0 block interface\n");
+ return 0;
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c
index 12db77164b..a35ef1cc8a 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c
+++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c
@@ -42,7 +42,7 @@ extern struct drive_info_struct drive_info;
EXPORT_SYMBOL(drive_info);
#endif
-extern unsigned long get_cmos_time(void);
+//extern unsigned long get_cmos_time(void);
/* platform dependent support */
EXPORT_SYMBOL(boot_cpu_data);
@@ -58,7 +58,7 @@ EXPORT_SYMBOL(probe_irq_mask);
EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL(pm_idle);
EXPORT_SYMBOL(pm_power_off);
-EXPORT_SYMBOL(get_cmos_time);
+//EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(apm_info);
#ifdef CONFIG_DEBUG_IOVIRT
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c
index 1c7f27176d..87c52056f6 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c
+++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c
@@ -86,7 +86,7 @@ void cpu_idle (void)
while (1) {
while (!current->need_resched)
- HYPERVISOR_yield();
+ HYPERVISOR_do_sched_op(NULL);
schedule();
check_pgt_cache();
}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c
index c728eb15e6..4999af6642 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c
+++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c
@@ -1,3 +1,25 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: arch.xeno/time.c
+ * Author: Rolf Neugebauer
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: XenoLinux
+ * Description: Interface with Hypervisor to get correct notion of time
+ * Currently supports Systemtime and WallClock time.
+ *
+ * (This has hardly any resemblence with the Linux code but left the
+ * copyright notice anyway. Ignore the comments in the copyright notice.)
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
/*
* linux/arch/i386/kernel/time.c
*
@@ -30,19 +52,6 @@
* serialize accesses to xtime/lost_ticks).
*/
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/io.h>
#include <asm/smp.h>
#include <asm/irq.h>
#include <asm/msr.h>
@@ -51,115 +60,103 @@
#include <asm/uaccess.h>
#include <asm/processor.h>
-#include <linux/mc146818rtc.h>
-#include <linux/timex.h>
-#include <linux/config.h>
-
+#include <asm/div64.h>
#include <asm/hypervisor.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/smp.h>
#include <linux/irq.h>
-
-unsigned long cpu_khz; /* Detected as we calibrate the TSC */
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-unsigned long fast_gettimeoffset_quotient;
-
-extern rwlock_t xtime_lock;
-extern unsigned long wall_jiffies;
+#undef XENO_TIME_DEBUG /* adds sanity checks and periodic printouts */
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+extern rwlock_t xtime_lock;
-static inline unsigned long ticks_to_secs(unsigned long long ticks)
-{
- unsigned long lo, hi;
- unsigned long little_ticks;
-
- little_ticks = ticks /* XXX URK! XXX / 1000000ULL */;
-
- __asm__ __volatile__ (
- "mull %2"
- : "=a" (lo), "=d" (hi)
- : "rm" (fast_gettimeoffset_quotient), "0" (little_ticks) );
+unsigned long cpu_khz; /* get this from Xen, used elsewhere */
+static spinlock_t hyp_stime_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t hyp_wctime_lock = SPIN_LOCK_UNLOCKED;
- return(hi);
-}
+static u32 st_scale_f;
+static u32 st_scale_i;
+static u32 shadow_st_pcc;
+static s64 shadow_st;
-/* NB. Only 32 bits of ticks are considered here. */
-static inline unsigned long ticks_to_us(unsigned long ticks)
+/*
+ * System time.
+ * Although the rest of the Linux kernel doesn't know about this, we
+ * we use it to extrapolate passage of wallclock time.
+ * We need to read the values from the shared info page "atomically"
+ * and use the cycle counter value as the "version" number. Clashes
+ * should be very rare.
+ */
+static inline long long get_s_time(void)
{
- unsigned long lo, hi;
+ unsigned long flags;
+ u32 delta_tsc, low, pcc;
+ u64 delta;
+ s64 now;
- __asm__ __volatile__ (
- "mull %2"
- : "=a" (lo), "=d" (hi)
- : "rm" (fast_gettimeoffset_quotient), "0" (ticks) );
+ spin_lock_irqsave(&hyp_stime_lock, flags);
- return(hi);
-}
+ while ((pcc = HYPERVISOR_shared_info->st_timestamp) != shadow_st_pcc)
+ {
+ barrier();
+ shadow_st_pcc = pcc;
+ shadow_st = HYPERVISOR_shared_info->system_time;
+ barrier();
+ }
-static inline unsigned long do_gettimeoffset(void)
-{
-#if 0
- register unsigned long eax, edx;
+ now = shadow_st;
+ /* only use bottom 32bits of TSC. This should be sufficient */
+ rdtscl(low);
+ delta_tsc = low - pcc;
+ delta = ((u64)delta_tsc * st_scale_f);
+ delta >>= 32;
+ delta += ((u64)delta_tsc * st_scale_i);
- /* Read the Time Stamp Counter */
+ spin_unlock_irqrestore(&hyp_time_lock, flags);
- rdtsc(eax,edx);
+ return now + delta;
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= last_tsc_low; /* tsc_low delta */
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- edx = ticks_to_us(eax);
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + edx;
-#else
- /*
- * We should keep a 'last_tsc_low' thing which incorporates
- * delay_at_last_interrupt, adjusted in timer_interrupt after
- * do_timer_interrupt. It would look at change in xtime, and
- * make appropriate adjustment to a last_tsc variable.
- *
- * We'd be affected by rounding error in ticks_per_usec, and by
- * processor clock drift (which should be no more than in an
- * external interrupt source anyhow).
- *
- * Perhaps a bit rough and ready, but never mind!
- */
- return 0;
-#endif
}
+#define NOW() ((long long)get_s_time())
/*
- * This version of gettimeofday has microsecond resolution
- * and better than microsecond precision on fast x86 machines with TSC.
+ * Wallclock time.
+ * Based on what the hypervisor tells us, extrapolated using system time.
+ * Again need to read a number of values from the shared page "atomically".
+ * this time using a version number.
*/
+static u32 shadow_wc_version=0;
+static long shadow_tv_sec;
+static long shadow_tv_usec;
+static long long shadow_wc_timestamp;
void do_gettimeofday(struct timeval *tv)
{
- unsigned long flags;
- unsigned long usec, sec, lost;
-
- read_lock_irqsave(&xtime_lock, flags);
- usec = do_gettimeoffset();
- lost = jiffies - wall_jiffies;
- if ( lost != 0 ) usec += lost * (1000000 / HZ);
- sec = xtime.tv_sec;
- usec += xtime.tv_usec;
- read_unlock_irqrestore(&xtime_lock, flags);
+ unsigned long flags;
+ long usec, sec;
+ u32 version;
+ u64 now;
+
+ spin_lock_irqsave(&hyp_wctime_lock, flags);
+
+ while ((version = HYPERVISOR_shared_info->wc_version)!= shadow_wc_version)
+ {
+ barrier();
+ shadow_wc_version = version;
+ shadow_tv_sec = HYPERVISOR_shared_info->tv_sec;
+ shadow_tv_usec = HYPERVISOR_shared_info->tv_usec;
+ shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp;
+ barrier();
+ }
+
+ now = NOW();
+ usec = ((unsigned long)(now-shadow_wc_timestamp))/1000;
+ sec = shadow_tv_sec;
+ usec += shadow_tv_usec;
while ( usec >= 1000000 )
{
@@ -169,10 +166,40 @@ void do_gettimeofday(struct timeval *tv)
tv->tv_sec = sec;
tv->tv_usec = usec;
+
+ spin_unlock_irqrestore(&hyp_time_lock, flags);
+
+#ifdef XENO_TIME_DEBUG
+ {
+ static long long old_now=0;
+ static long long wct=0, old_wct=0;
+
+ /* This debug code checks if time increase over two subsequent calls */
+ wct=(((long long)sec) * 1000000) + usec;
+ /* wall clock time going backwards */
+ if ((wct < old_wct) ) {
+ printk("Urgh1: wc diff=%6ld, usec = %ld (0x%lX)\n",
+ (long)(wct-old_wct), usec, usec);
+ printk(" st diff=%lld cur st=0x%016llX old st=0x%016llX\n",
+ now-old_now, now, old_now);
+ }
+
+ /* system time going backwards */
+ if (now<=old_now) {
+ printk("Urgh2: st diff=%lld cur st=0x%016llX old st=0x%016llX\n",
+ now-old_now, now, old_now);
+ }
+ old_wct = wct;
+ old_now = now;
+ }
+#endif
+
}
void do_settimeofday(struct timeval *tv)
{
+/* XXX RN: should do something special here for dom0 */
+#if 0
write_lock_irq(&xtime_lock);
/*
* This is revolting. We need to set "xtime" correctly. However, the
@@ -195,29 +222,73 @@ void do_settimeofday(struct timeval *tv)
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
write_unlock_irq(&xtime_lock);
+#endif
}
/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * Timer ISR.
+ * Unlike normal Linux these don't come in at a fixed rate of HZ.
+ * In here we wrok out how often it should have been called and then call
+ * the architecture independent part (do_timer()) the appropriate number of
+ * times. A bit of a nasty hack, to keep the "other" notion of wallclock time
+ * happy.
*/
-static inline void do_timer_interrupt(
- int irq, void *dev_id, struct pt_regs *regs)
+static long long us_per_tick=1000000/HZ;
+static long long last_irq;
+static inline void do_timer_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
{
- do_timer(regs);
+ struct timeval tv;
+ long long time, delta;
+
+#ifdef XENO_TIME_DEBUG
+ static u32 foo_count = 0;
+ foo_count++;
+ if (foo_count>= 10000) {
+ s64 n = NOW();
+ struct timeval tv;
+ do_gettimeofday(&tv);
+ printk("0x%08X%08X %ld:%ld\n",
+ (u32)(n>>32), (u32)n, tv.tv_sec, tv.tv_usec);
+ foo_count = 0;
+ }
+#endif
+
+ /*
+ * The next bit really sucks:
+ * Linux not only uses do_gettimeofday() to keep a notion of
+ * wallclock time, but also maintains the xtime struct and jiffies.
+ * (Even worse some userland code accesses this via the sys_time()
+ * system call)
+ * Unfortunately, xtime is maintain in the architecture independent
+ * part of the timer ISR (./kernel/timer.c sic!). So, although we have
+ * perfectly valid notion of wallclock time from the hypervisor we here
+ * fake missed timer interrupts so that the arch independent part of
+ * the Timer ISR updates jiffies for us *and* once the bh gets run
+ * updates xtime accordingly. Yuck!
+ */
+
+ /* work out the number of jiffies past and update them */
+ do_gettimeofday(&tv);
+ time = (((long long)tv.tv_sec) * 1000000) + tv.tv_usec;
+ delta = time - last_irq;
+ if (delta <= 0) {
+ printk ("Timer ISR: Time went backwards: %lld\n", delta);
+ return;
+ }
+ while (delta >= us_per_tick) {
+ do_timer(regs);
+ delta -= us_per_tick;
+ last_irq += us_per_tick;
+ }
+
#if 0
if (!user_mode(regs))
x86_do_profile(regs->eip);
#endif
}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
write_lock(&xtime_lock);
@@ -234,56 +305,31 @@ static struct irqaction irq_timer = {
NULL
};
-
-unsigned long get_cmos_time(void)
-{
- unsigned long secs = HYPERVISOR_shared_info->rtc_time;
- unsigned long diff;
-
- rdtscl(diff);
- diff -= (unsigned long)HYPERVISOR_shared_info->rtc_timestamp;
-
- secs += ticks_to_us(diff);
-
- return(secs + ticks_to_secs(diff));
-}
-
-
-/* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). */
-static unsigned long __init calibrate_tsc(void)
+void __init time_init(void)
{
- unsigned long quo, rem;
+ unsigned long long alarm;
+ u64 cpu_freq = HYPERVISOR_shared_info->cpu_freq;
+ u64 scale;
- /* quotient == (1000 * 2^32) / ticks_per ms */
- __asm__ __volatile__ (
- "divl %2"
- : "=a" (quo), "=d" (rem)
- : "r" (HYPERVISOR_shared_info->ticks_per_ms), "0" (0), "1" (1000) );
+ do_get_fast_time = do_gettimeofday;
- return(quo);
-}
+ cpu_khz = (u32)cpu_freq/1000;
+ printk("Xen reported: %lu.%03lu MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
-void __init time_init(void)
-{
- unsigned long long alarm;
-
- fast_gettimeoffset_quotient = calibrate_tsc();
- do_get_fast_time = do_gettimeofday;
+ /*
+ * calculate systemtime scaling factor
+ * XXX RN: have to cast cpu_freq to u32 limits it to 4.29 GHz.
+ * Get a better do_div!
+ */
+ scale = 1000000000LL << 32;
+ do_div(scale,(u32)cpu_freq);
+ st_scale_f = scale & 0xffffffff;
+ st_scale_i = scale >> 32;
+ printk("System Time scale: %X %X\n",st_scale_i, st_scale_f);
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- {
- unsigned long eax=0, edx=1000;
- __asm__ __volatile__
- ("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (fast_gettimeoffset_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %lu.%03lu MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
+ do_gettimeofday(&xtime);
+ last_irq = (((long long)xtime.tv_sec) * 1000000) + xtime.tv_usec;
setup_irq(TIMER_IRQ, &irq_timer);
@@ -292,13 +338,14 @@ void __init time_init(void)
* 'domain' time. This means that clock sshould run at the correct
* rate. For things like scheduling, it's not clear whether it
* matters which sort of time we use.
+ * XXX RN: unimplemented.
*/
+
rdtscll(alarm);
+#if 0
alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms;
HYPERVISOR_shared_info->wall_timeout = alarm;
HYPERVISOR_shared_info->domain_timeout = ~0ULL;
+#endif
clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events);
-
- xtime.tv_sec = get_cmos_time();
- xtime.tv_usec = 0;
}