aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c10
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c25
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c10
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/features.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/gnttab.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile1
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c5
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c258
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h77
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c271
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c6
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h4
-rw-r--r--linux-2.6-xen-sparse/include/xen/xenbus.h1
-rw-r--r--tools/blktap/drivers/blktapctrl.c92
-rw-r--r--tools/blktap/drivers/tapdisk.c52
-rw-r--r--tools/blktap/drivers/tapdisk.h8
-rw-r--r--tools/firmware/acpi/acpi_fadt.h9
-rw-r--r--tools/ioemu/vl.c10
-rw-r--r--tools/ioemu/vnc.c2
-rw-r--r--tools/libxc/Makefile2
-rw-r--r--tools/libxc/ia64/xc_ia64_hvm_build.c10
-rw-r--r--tools/libxc/xc_linux_build.c2
-rw-r--r--tools/libxc/xc_linux_restore.c2
-rw-r--r--tools/libxc/xc_linux_save.c2
-rw-r--r--tools/libxc/xc_private.c18
-rw-r--r--tools/python/xen/xend/image.py11
-rw-r--r--tools/python/xen/xend/server/SrvDaemon.py7
-rw-r--r--tools/xenstat/xentop/xentop.13
-rw-r--r--tools/xenstat/xentop/xentop.c6
-rw-r--r--tools/xm-test/lib/XmTestLib/arch.py3
-rw-r--r--unmodified_drivers/linux-2.6/blkfront/Makefile3
-rw-r--r--unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopmd.h14
-rw-r--r--unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopud.h14
-rw-r--r--unmodified_drivers/linux-2.6/compat-include/linux/io.h10
-rw-r--r--unmodified_drivers/linux-2.6/compat-include/linux/mutex.h31
-rw-r--r--unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h52
-rw-r--r--unmodified_drivers/linux-2.6/netfront/Makefile3
-rw-r--r--unmodified_drivers/linux-2.6/overrides.mk2
-rw-r--r--unmodified_drivers/linux-2.6/platform-pci/Kbuild2
-rw-r--r--unmodified_drivers/linux-2.6/platform-pci/Makefile3
-rw-r--r--unmodified_drivers/linux-2.6/platform-pci/evtchn.c4
-rw-r--r--unmodified_drivers/linux-2.6/platform-pci/platform-compat.c116
-rw-r--r--unmodified_drivers/linux-2.6/platform-pci/platform-pci.c5
-rw-r--r--unmodified_drivers/linux-2.6/platform-pci/xen_support.c4
-rw-r--r--unmodified_drivers/linux-2.6/xenbus/Makefile3
-rw-r--r--xen/arch/ia64/vmx/mmio.c70
-rw-r--r--xen/arch/ia64/vmx/vmx_init.c14
-rw-r--r--xen/arch/x86/Makefile2
-rw-r--r--xen/arch/x86/hvm/Makefile1
-rw-r--r--xen/arch/x86/hvm/hvm.c3
-rw-r--r--xen/arch/x86/hvm/i8254.c2
-rw-r--r--xen/arch/x86/hvm/io.c2
-rw-r--r--xen/arch/x86/hvm/pmtimer.c63
-rw-r--r--xen/arch/x86/hvm/rtc.c2
-rw-r--r--xen/arch/x86/hvm/svm/svm.c3
-rw-r--r--xen/arch/x86/hvm/vmx/vmx.c3
-rw-r--r--xen/arch/x86/numa.c308
-rw-r--r--xen/arch/x86/setup.c30
-rw-r--r--xen/arch/x86/smpboot.c3
-rw-r--r--xen/arch/x86/srat.c315
-rw-r--r--xen/common/memory.c21
-rw-r--r--xen/common/page_alloc.c193
-rw-r--r--xen/drivers/acpi/Makefile1
-rw-r--r--xen/drivers/acpi/numa.c216
-rw-r--r--xen/include/asm-ia64/vmx_platform.h2
-rw-r--r--xen/include/asm-x86/acpi.h4
-rw-r--r--xen/include/asm-x86/config.h5
-rw-r--r--xen/include/asm-x86/hvm/domain.h2
-rw-r--r--xen/include/asm-x86/hvm/vpt.h (renamed from xen/include/asm-x86/hvm/vpit.h)24
-rw-r--r--xen/include/asm-x86/mach-generic/mach_apic.h6
-rw-r--r--xen/include/asm-x86/numa.h78
-rw-r--r--xen/include/public/arch-ia64.h3
-rw-r--r--xen/include/public/hvm/ioreq.h4
-rw-r--r--xen/include/xen/config.h2
-rw-r--r--xen/include/xen/mm.h7
-rw-r--r--xen/include/xen/nodemask.h338
-rw-r--r--xen/include/xen/numa.h13
80 files changed, 2505 insertions, 428 deletions
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
index 37b33cbe4d..e9a7e7d070 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
@@ -392,10 +392,15 @@ static void dispatch_rw_block_io(blkif_t *blkif,
for (i = 0; i < nseg; i++) {
if (unlikely(map[i].status != 0)) {
DPRINTK("invalid buffer -- could not remap it\n");
- goto fail_flush;
+ map[i].handle = BLKBACK_INVALID_HANDLE;
+ ret |= 1;
}
pending_handle(pending_req, i) = map[i].handle;
+
+ if (ret)
+ continue;
+
set_phys_to_machine(__pa(vaddr(
pending_req, i)) >> PAGE_SHIFT,
FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
@@ -403,6 +408,9 @@ static void dispatch_rw_block_io(blkif_t *blkif,
(req->seg[i].first_sect << 9);
}
+ if (ret)
+ goto fail_flush;
+
if (vbd_translate(&preq, blkif, operation) != 0) {
DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
operation == READ ? "read" : "write",
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
index e79b653a97..63ebf8ed93 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -48,6 +48,10 @@
#include <asm/hypervisor.h>
#include <asm/maddr.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
#define BLKIF_STATE_DISCONNECTED 0
#define BLKIF_STATE_CONNECTED 1
#define BLKIF_STATE_SUSPENDED 2
@@ -468,6 +472,27 @@ int blkif_ioctl(struct inode *inode, struct file *filep,
command, (long)argument, inode->i_rdev);
switch (command) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+ case HDIO_GETGEO: {
+ struct block_device *bd = inode->i_bdev;
+ struct hd_geometry geo;
+ int ret;
+
+ if (!argument)
+ return -EINVAL;
+
+ geo.start = get_start_sect(bd);
+ ret = blkif_getgeo(bd, &geo);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((struct hd_geometry __user *)argument, &geo,
+ sizeof(geo)))
+ return -EFAULT;
+
+ return 0;
+ }
+#endif
case CDROMMULTISESSION:
DPRINTK("FIXME: support multisession CDs later\n");
for (i = 0; i < sizeof(struct cdrom_multisession); i++)
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
index 8aa453d3a0..0c8b508c9a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
@@ -36,6 +36,10 @@
#include <linux/blkdev.h>
#include <linux/list.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
#define BLKIF_MAJOR(dev) ((dev)>>8)
#define BLKIF_MINOR(dev) ((dev) & 0xff)
@@ -91,7 +95,9 @@ static struct block_device_operations xlvbd_block_fops =
.open = blkif_open,
.release = blkif_release,
.ioctl = blkif_ioctl,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
.getgeo = blkif_getgeo
+#endif
};
DEFINE_SPINLOCK(blkif_io_lock);
@@ -186,7 +192,11 @@ xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
if (rq == NULL)
return -1;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
elevator_init(rq, "noop");
+#else
+ elevator_init(rq, &elevator_noop);
+#endif
/* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_hardsect_size(rq, sector_size);
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/features.c b/linux-2.6-xen-sparse/drivers/xen/core/features.c
index 4d50caf50b..a76f58c04d 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/features.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/features.c
@@ -11,6 +11,10 @@
#include <asm/hypervisor.h>
#include <xen/features.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
EXPORT_SYMBOL(xen_features);
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
index 3195279a87..c5132c13bb 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
@@ -44,6 +44,10 @@
#include <asm/io.h>
#include <xen/interface/memory.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
diff --git a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
index 26e0610d15..e03e44a05a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
@@ -64,6 +64,10 @@
#include <xen/interface/grant_table.h>
#include <xen/gnttab.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
/*
* Mutually-exclusive module options to select receive data path:
* rx_copy : Packets are copied by network backend into local memory
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
index d7c7d05172..ce5acc2457 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
@@ -9,4 +9,5 @@ xenbus-objs += xenbus_client.o
xenbus-objs += xenbus_comms.o
xenbus-objs += xenbus_xs.o
xenbus-objs += xenbus_probe.o
+obj-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
index 9b389ec06b..fd8355f6dd 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
@@ -35,6 +35,10 @@
#include <xen/xenbus.h>
#include <xen/driver_util.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
/* xenbus_probe.c */
extern char *kasprintf(const char *fmt, ...);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
index 38da320b67..ea8f3c283e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
@@ -39,6 +39,10 @@
#include <xen/xenbus.h>
#include "xenbus_comms.h"
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
static int xenbus_irq;
extern void xenbus_probe(void *);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
index bbe4a8c5a8..ba37e61856 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
@@ -40,6 +40,7 @@
#include <linux/wait.h>
#include <linux/fs.h>
#include <linux/poll.h>
+#include <linux/mutex.h>
#include "xenbus_comms.h"
@@ -49,6 +50,10 @@
#include <xen/xen_proc.h>
#include <asm/hypervisor.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
struct xenbus_dev_transaction {
struct list_head list;
struct xenbus_transaction handle;
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
index 87c9e6ed90..13e9f2105d 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
@@ -42,6 +42,7 @@
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
+#include <linux/mutex.h>
#include <asm/io.h>
#include <asm/page.h>
@@ -55,6 +56,11 @@
#include <xen/hvm.h>
#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
int xen_store_evtchn;
struct xenstore_domain_interface *xen_store_interface;
@@ -67,12 +73,7 @@ static struct notifier_block *xenstore_chain;
static void wait_for_devices(struct xenbus_driver *xendrv);
static int xenbus_probe_frontend(const char *type, const char *name);
-static int xenbus_uevent_backend(struct device *dev, char **envp,
- int num_envp, char *buffer, int buffer_size);
-static int xenbus_probe_backend(const char *type, const char *domid);
-static int xenbus_dev_probe(struct device *_dev);
-static int xenbus_dev_remove(struct device *_dev);
static void xenbus_dev_shutdown(struct device *_dev);
/* If something in array of ids matches this device, return it. */
@@ -86,7 +87,7 @@ match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
return NULL;
}
-static int xenbus_match(struct device *_dev, struct device_driver *_drv)
+int xenbus_match(struct device *_dev, struct device_driver *_drv)
{
struct xenbus_driver *drv = to_xenbus_driver(_drv);
@@ -96,17 +97,6 @@ static int xenbus_match(struct device *_dev, struct device_driver *_drv)
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
}
-struct xen_bus_type
-{
- char *root;
- unsigned int levels;
- int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
- int (*probe)(const char *type, const char *dir);
- struct bus_type bus;
- struct device dev;
-};
-
-
/* device/<type>/<id> => <type>-<id> */
static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
{
@@ -143,7 +133,7 @@ static void free_otherend_watch(struct xenbus_device *dev)
}
-static int read_otherend_details(struct xenbus_device *xendev,
+int read_otherend_details(struct xenbus_device *xendev,
char *id_node, char *path_node)
{
int err = xenbus_gather(XBT_NIL, xendev->nodename,
@@ -176,12 +166,6 @@ static int read_backend_details(struct xenbus_device *xendev)
}
-static int read_frontend_details(struct xenbus_device *xendev)
-{
- return read_otherend_details(xendev, "frontend-id", "frontend");
-}
-
-
/* Bus type for frontend drivers. */
static struct xen_bus_type xenbus_frontend = {
.root = "device",
@@ -191,115 +175,17 @@ static struct xen_bus_type xenbus_frontend = {
.bus = {
.name = "xen",
.match = xenbus_match,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
+#endif
},
.dev = {
.bus_id = "xen",
},
};
-/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
-static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
-{
- int domid, err;
- const char *devid, *type, *frontend;
- unsigned int typelen;
-
- type = strchr(nodename, '/');
- if (!type)
- return -EINVAL;
- type++;
- typelen = strcspn(type, "/");
- if (!typelen || type[typelen] != '/')
- return -EINVAL;
-
- devid = strrchr(nodename, '/') + 1;
-
- err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
- "frontend", NULL, &frontend,
- NULL);
- if (err)
- return err;
- if (strlen(frontend) == 0)
- err = -ERANGE;
- if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
- err = -ENOENT;
-
- kfree(frontend);
-
- if (err)
- return err;
-
- if (snprintf(bus_id, BUS_ID_SIZE,
- "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
- return -ENOSPC;
- return 0;
-}
-
-static struct xen_bus_type xenbus_backend = {
- .root = "backend",
- .levels = 3, /* backend/type/<frontend>/<id> */
- .get_bus_id = backend_bus_id,
- .probe = xenbus_probe_backend,
- .bus = {
- .name = "xen-backend",
- .match = xenbus_match,
- .probe = xenbus_dev_probe,
- .remove = xenbus_dev_remove,
-// .shutdown = xenbus_dev_shutdown,
- .uevent = xenbus_uevent_backend,
- },
- .dev = {
- .bus_id = "xen-backend",
- },
-};
-
-static int xenbus_uevent_backend(struct device *dev, char **envp,
- int num_envp, char *buffer, int buffer_size)
-{
- struct xenbus_device *xdev;
- struct xenbus_driver *drv;
- int i = 0;
- int length = 0;
-
- DPRINTK("");
-
- if (dev == NULL)
- return -ENODEV;
-
- xdev = to_xenbus_device(dev);
- if (xdev == NULL)
- return -ENODEV;
-
- /* stuff we want to pass to /sbin/hotplug */
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
- "XENBUS_TYPE=%s", xdev->devicetype);
-
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
- "XENBUS_PATH=%s", xdev->nodename);
-
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
- "XENBUS_BASE_PATH=%s", xenbus_backend.root);
-
- /* terminate, set to next free slot, shrink available space */
- envp[i] = NULL;
- envp = &envp[i];
- num_envp -= i;
- buffer = &buffer[length];
- buffer_size -= length;
-
- if (dev->driver) {
- drv = to_xenbus_driver(dev->driver);
- if (drv && drv->uevent)
- return drv->uevent(xdev, envp, num_envp, buffer,
- buffer_size);
- }
-
- return 0;
-}
-
static void otherend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
@@ -359,7 +245,7 @@ static int watch_otherend(struct xenbus_device *dev)
}
-static int xenbus_dev_probe(struct device *_dev)
+int xenbus_dev_probe(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
@@ -406,7 +292,7 @@ fail:
return -ENODEV;
}
-static int xenbus_dev_remove(struct device *_dev)
+int xenbus_dev_remove(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
@@ -444,14 +330,21 @@ static void xenbus_dev_shutdown(struct device *_dev)
put_device(&dev->dev);
}
-static int xenbus_register_driver_common(struct xenbus_driver *drv,
- struct xen_bus_type *bus)
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus)
{
int ret;
drv->driver.name = drv->name;
drv->driver.bus = &bus->bus;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
drv->driver.owner = drv->owner;
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+ drv->driver.probe = xenbus_dev_probe;
+ drv->driver.remove = xenbus_dev_remove;
+ drv->driver.shutdown = xenbus_dev_shutdown;
+#endif
mutex_lock(&xenwatch_mutex);
ret = driver_register(&drv->driver);
@@ -476,14 +369,6 @@ int xenbus_register_frontend(struct xenbus_driver *drv)
}
EXPORT_SYMBOL_GPL(xenbus_register_frontend);
-int xenbus_register_backend(struct xenbus_driver *drv)
-{
- drv->read_otherend_details = read_frontend_details;
-
- return xenbus_register_driver_common(drv, &xenbus_backend);
-}
-EXPORT_SYMBOL_GPL(xenbus_register_backend);
-
void xenbus_unregister_driver(struct xenbus_driver *drv)
{
driver_unregister(&drv->driver);
@@ -581,23 +466,29 @@ char *kasprintf(const char *fmt, ...)
}
static ssize_t xendev_show_nodename(struct device *dev,
- struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+ struct device_attribute *attr,
+#endif
+ char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
}
DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
static ssize_t xendev_show_devtype(struct device *dev,
- struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+ struct device_attribute *attr,
+#endif
+ char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
}
DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
-static int xenbus_probe_node(struct xen_bus_type *bus,
- const char *type,
- const char *nodename)
+int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename)
{
int err;
struct xenbus_device *xendev;
@@ -667,55 +558,6 @@ static int xenbus_probe_frontend(const char *type, const char *name)
return err;
}
-/* backend/<typename>/<frontend-uuid>/<name> */
-static int xenbus_probe_backend_unit(const char *dir,
- const char *type,
- const char *name)
-{
- char *nodename;
- int err;
-
- nodename = kasprintf("%s/%s", dir, name);
- if (!nodename)
- return -ENOMEM;
-
- DPRINTK("%s\n", nodename);
-
- err = xenbus_probe_node(&xenbus_backend, type, nodename);
- kfree(nodename);
- return err;
-}
-
-/* backend/<typename>/<frontend-domid> */
-static int xenbus_probe_backend(const char *type, const char *domid)
-{
- char *nodename;
- int err = 0;
- char **dir;
- unsigned int i, dir_n = 0;
-
- DPRINTK("");
-
- nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
- if (!nodename)
- return -ENOMEM;
-
- dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
- if (IS_ERR(dir)) {
- kfree(nodename);
- return PTR_ERR(dir);
- }
-
- for (i = 0; i < dir_n; i++) {
- err = xenbus_probe_backend_unit(nodename, type, dir[i]);
- if (err)
- break;
- }
- kfree(dir);
- kfree(nodename);
- return err;
-}
-
static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
{
int err = 0;
@@ -736,7 +578,7 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
return err;
}
-static int xenbus_probe_devices(struct xen_bus_type *bus)
+int xenbus_probe_devices(struct xen_bus_type *bus)
{
int err = 0;
char **dir;
@@ -778,7 +620,7 @@ static int strsep_len(const char *str, char c, unsigned int len)
return (len == 0) ? i : -ERANGE;
}
-static void dev_changed(const char *node, struct xen_bus_type *bus)
+void dev_changed(const char *node, struct xen_bus_type *bus)
{
int exists, rootlen;
struct xenbus_device *dev;
@@ -823,25 +665,12 @@ static void frontend_changed(struct xenbus_watch *watch,
dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
}
-static void backend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
-{
- DPRINTK("");
-
- dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
-}
-
/* We watch for devices appearing and vanishing. */
static struct xenbus_watch fe_watch = {
.node = "device",
.callback = frontend_changed,
};
-static struct xenbus_watch be_watch = {
- .node = "backend",
- .callback = backend_changed,
-};
-
static int suspend_dev(struct device *dev, void *data)
{
int err = 0;
@@ -912,7 +741,7 @@ void xenbus_suspend(void)
DPRINTK("");
bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
- bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev);
+ xenbus_backend_suspend(suspend_dev);
xs_suspend();
}
EXPORT_SYMBOL_GPL(xenbus_suspend);
@@ -922,7 +751,7 @@ void xenbus_resume(void)
xb_init_comms();
xs_resume();
bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
- bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev);
+ xenbus_backend_resume(resume_dev);
}
EXPORT_SYMBOL_GPL(xenbus_resume);
@@ -955,20 +784,17 @@ void xenbus_probe(void *unused)
{
BUG_ON((xenstored_ready <= 0));
- /* Enumerate devices in xenstore. */
+ /* Enumerate devices in xenstore and watch for changes. */
xenbus_probe_devices(&xenbus_frontend);
- xenbus_probe_devices(&xenbus_backend);
-
- /* Watch for changes. */
register_xenbus_watch(&fe_watch);
- register_xenbus_watch(&be_watch);
+ xenbus_backend_probe_and_watch();
/* Notify others that xenstore is up */
notifier_call_chain(&xenstore_chain, 0, NULL);
}
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
static struct file_operations xsd_kva_fops;
static struct proc_dir_entry *xsd_kva_intf;
static struct proc_dir_entry *xsd_port_intf;
@@ -1020,7 +846,7 @@ static int __init xenbus_probe_init(void)
/* Register ourselves with the kernel bus subsystem */
bus_register(&xenbus_frontend.bus);
- bus_register(&xenbus_backend.bus);
+ xenbus_backend_bus_register();
/*
* Domain0 doesn't have a store_evtchn or store_mfn yet.
@@ -1049,7 +875,7 @@ static int __init xenbus_probe_init(void)
xen_store_evtchn = xen_start_info->store_evtchn =
alloc_unbound.port;
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
/* And finally publish the above info in /proc/xen */
xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
if (xsd_kva_intf) {
@@ -1091,7 +917,7 @@ static int __init xenbus_probe_init(void)
/* Register ourselves with the kernel device subsystem */
device_register(&xenbus_frontend.dev);
- device_register(&xenbus_backend.dev);
+ xenbus_backend_device_register();
if (!is_initial_xendomain())
xenbus_probe(NULL);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h
new file mode 100644
index 0000000000..1f61c6cca6
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h
@@ -0,0 +1,77 @@
+/******************************************************************************
+ * xenbus_probe.h
+ *
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_PROBE_H
+#define _XENBUS_PROBE_H
+
+#ifdef CONFIG_XEN_BACKEND
+extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
+extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
+extern void xenbus_backend_probe_and_watch(void);
+extern void xenbus_backend_bus_register(void);
+extern void xenbus_backend_device_register(void);
+#else
+static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_probe_and_watch(void) {}
+static inline void xenbus_backend_bus_register(void) {}
+static inline void xenbus_backend_device_register(void) {}
+#endif
+
+struct xen_bus_type
+{
+ char *root;
+ unsigned int levels;
+ int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+ int (*probe)(const char *type, const char *dir);
+ struct bus_type bus;
+ struct device dev;
+};
+
+extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
+extern int xenbus_dev_probe(struct device *_dev);
+extern int xenbus_dev_remove(struct device *_dev);
+extern int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus);
+extern int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename);
+extern int xenbus_probe_devices(struct xen_bus_type *bus);
+
+extern void dev_changed(const char *node, struct xen_bus_type *bus);
+
+/* Simplified asprintf. Probably belongs in lib */
+extern char *kasprintf(const char *fmt, ...);
+
+#endif
+
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c
new file mode 100644
index 0000000000..7f0dedd577
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -0,0 +1,271 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have (backend half).
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define DPRINTK(fmt, args...) \
+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
+ __FUNCTION__, __LINE__, ##args)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/maddr.h>
+#include <asm/pgtable.h>
+#include <asm/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/xen_proc.h>
+#include <xen/evtchn.h>
+#include <xen/features.h>
+#include <xen/hvm.h>
+
+#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+ int num_envp, char *buffer, int buffer_size);
+static int xenbus_probe_backend(const char *type, const char *domid);
+
+extern int read_otherend_details(struct xenbus_device *xendev,
+ char *id_node, char *path_node);
+
+static int read_frontend_details(struct xenbus_device *xendev)
+{
+ return read_otherend_details(xendev, "frontend-id", "frontend");
+}
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+ int domid, err;
+ const char *devid, *type, *frontend;
+ unsigned int typelen;
+
+ type = strchr(nodename, '/');
+ if (!type)
+ return -EINVAL;
+ type++;
+ typelen = strcspn(type, "/");
+ if (!typelen || type[typelen] != '/')
+ return -EINVAL;
+
+ devid = strrchr(nodename, '/') + 1;
+
+ err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
+ "frontend", NULL, &frontend,
+ NULL);
+ if (err)
+ return err;
+ if (strlen(frontend) == 0)
+ err = -ERANGE;
+ if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
+ err = -ENOENT;
+ kfree(frontend);
+
+ if (err)
+ return err;
+
+ if (snprintf(bus_id, BUS_ID_SIZE,
+ "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+ return -ENOSPC;
+ return 0;
+}
+
+static struct xen_bus_type xenbus_backend = {
+ .root = "backend",
+ .levels = 3, /* backend/type/<frontend>/<id> */
+ .get_bus_id = backend_bus_id,
+ .probe = xenbus_probe_backend,
+ .bus = {
+ .name = "xen-backend",
+ .match = xenbus_match,
+ .probe = xenbus_dev_probe,
+ .remove = xenbus_dev_remove,
+// .shutdown = xenbus_dev_shutdown,
+ .uevent = xenbus_uevent_backend,
+ },
+ .dev = {
+ .bus_id = "xen-backend",
+ },
+};
+
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+ int num_envp, char *buffer, int buffer_size)
+{
+ struct xenbus_device *xdev;
+ struct xenbus_driver *drv;
+ int i = 0;
+ int length = 0;
+
+ DPRINTK("");
+
+ if (dev == NULL)
+ return -ENODEV;
+
+ xdev = to_xenbus_device(dev);
+ if (xdev == NULL)
+ return -ENODEV;
+
+ /* stuff we want to pass to /sbin/hotplug */
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+ "XENBUS_TYPE=%s", xdev->devicetype);
+
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+ "XENBUS_PATH=%s", xdev->nodename);
+
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+ "XENBUS_BASE_PATH=%s", xenbus_backend.root);
+
+ /* terminate, set to next free slot, shrink available space */
+ envp[i] = NULL;
+ envp = &envp[i];
+ num_envp -= i;
+ buffer = &buffer[length];
+ buffer_size -= length;
+
+ if (dev->driver) {
+ drv = to_xenbus_driver(dev->driver);
+ if (drv && drv->uevent)
+ return drv->uevent(xdev, envp, num_envp, buffer,
+ buffer_size);
+ }
+
+ return 0;
+}
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+ drv->read_otherend_details = read_frontend_details;
+
+ return xenbus_register_driver_common(drv, &xenbus_backend);
+}
+EXPORT_SYMBOL_GPL(xenbus_register_backend);
+
+/* backend/<typename>/<frontend-uuid>/<name> */
+static int xenbus_probe_backend_unit(const char *dir,
+ const char *type,
+ const char *name)
+{
+ char *nodename;
+ int err;
+
+ nodename = kasprintf("%s/%s", dir, name);
+ if (!nodename)
+ return -ENOMEM;
+
+ DPRINTK("%s\n", nodename);
+
+ err = xenbus_probe_node(&xenbus_backend, type, nodename);
+ kfree(nodename);
+ return err;
+}
+
+/* backend/<typename>/<frontend-domid> */
+static int xenbus_probe_backend(const char *type, const char *domid)
+{
+ char *nodename;
+ int err = 0;
+ char **dir;
+ unsigned int i, dir_n = 0;
+
+ DPRINTK("");
+
+ nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
+ if (!nodename)
+ return -ENOMEM;
+
+ dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
+ if (IS_ERR(dir)) {
+ kfree(nodename);
+ return PTR_ERR(dir);
+ }
+
+ for (i = 0; i < dir_n; i++) {
+ err = xenbus_probe_backend_unit(nodename, type, dir[i]);
+ if (err)
+ break;
+ }
+ kfree(dir);
+ kfree(nodename);
+ return err;
+}
+
+static void backend_changed(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ DPRINTK("");
+
+ dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
+}
+
+static struct xenbus_watch be_watch = {
+ .node = "backend",
+ .callback = backend_changed,
+};
+
+void xenbus_backend_suspend(int (*fn)(struct device *, void *))
+{
+ DPRINTK("");
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+
+void xenbus_backend_resume(int (*fn)(struct device *, void *))
+{
+ DPRINTK("");
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+
+void xenbus_backend_probe_and_watch(void)
+{
+ xenbus_probe_devices(&xenbus_backend);
+ register_xenbus_watch(&be_watch);
+}
+
+void xenbus_backend_bus_register(void)
+{
+ bus_register(&xenbus_backend.bus);
+}
+
+void xenbus_backend_device_register(void)
+{
+ device_register(&xenbus_backend.dev);
+}
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
index 190fa1e794..1c1fc576c0 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
@@ -42,9 +42,15 @@
#include <linux/fcntl.h>
#include <linux/kthread.h>
#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
#include <xen/xenbus.h>
#include "xenbus_comms.h"
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
/* xenbus_probe.c */
extern char *kasprintf(const char *fmt, ...);
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h
index 6a4e5e4508..807ca388c5 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h
@@ -9,6 +9,10 @@
#include <linux/config.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
#define ADDR (*(volatile long *) addr)
static __inline__ void synch_set_bit(int nr, volatile void * addr)
diff --git a/linux-2.6-xen-sparse/include/xen/xenbus.h b/linux-2.6-xen-sparse/include/xen/xenbus.h
index 8e259ce777..c7cb7eaa3a 100644
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h
@@ -38,6 +38,7 @@
#include <linux/notifier.h>
#include <linux/mutex.h>
#include <linux/completion.h>
+#include <linux/init.h>
#include <xen/interface/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/xenbus.h>
diff --git a/tools/blktap/drivers/blktapctrl.c b/tools/blktap/drivers/blktapctrl.c
index 1c27e9eae7..0b00bc4bfd 100644
--- a/tools/blktap/drivers/blktapctrl.c
+++ b/tools/blktap/drivers/blktapctrl.c
@@ -204,81 +204,49 @@ static blkif_t *test_path(char *path, char **dev, int *type)
static void add_disktype(blkif_t *blkif, int type)
{
- driver_list_entry_t *entry, *ptr, *last;
+ driver_list_entry_t *entry, **pprev;
- if (type > MAX_DISK_TYPES) return;
+ if (type > MAX_DISK_TYPES)
+ return;
entry = malloc(sizeof(driver_list_entry_t));
entry->blkif = blkif;
- entry->next = NULL;
- ptr = active_disks[type];
+ entry->next = NULL;
- if (ptr == NULL) {
- active_disks[type] = entry;
- entry->prev = NULL;
- return;
- }
-
- while (ptr != NULL) {
- last = ptr;
- ptr = ptr->next;
- }
+ pprev = &active_disks[type];
+ while (*pprev != NULL)
+ pprev = &(*pprev)->next;
- /*We've found the end of the list*/
- last->next = entry;
- entry->prev = last;
-
- return;
+ *pprev = entry;
+ entry->pprev = pprev;
}
static int del_disktype(blkif_t *blkif)
{
- driver_list_entry_t *ptr, *cur, *last;
+ driver_list_entry_t *entry, **pprev;
int type = blkif->drivertype, count = 0, close = 0;
- if (type > MAX_DISK_TYPES) return 1;
-
- ptr = active_disks[type];
- last = NULL;
- while (ptr != NULL) {
- count++;
- if (blkif == ptr->blkif) {
- cur = ptr;
- if (ptr->next != NULL) {
- /*There's more later in the chain*/
- if (!last) {
- /*We're first in the list*/
- active_disks[type] = ptr->next;
- ptr = ptr->next;
- ptr->prev = NULL;
- }
- else {
- /*We're sandwiched*/
- last->next = ptr->next;
- ptr = ptr->next;
- ptr->prev = last;
- }
-
- } else if (last) {
- /*There's more earlier in the chain*/
- last->next = NULL;
- } else {
- /*We're the only entry*/
- active_disks[type] = NULL;
- if(dtypes[type]->single_handler == 1)
- close = 1;
- }
- DPRINTF("DEL_DISKTYPE: Freeing entry\n");
- free(cur);
- if (dtypes[type]->single_handler == 0) close = 1;
+ if (type > MAX_DISK_TYPES)
+ return 1;
- return close;
- }
- last = ptr;
- ptr = ptr->next;
+ pprev = &active_disks[type];
+ while ((*pprev != NULL) && ((*pprev)->blkif != blkif))
+ pprev = &(*pprev)->next;
+
+ if ((entry = *pprev) == NULL) {
+ DPRINTF("DEL_DISKTYPE: No match\n");
+ return 1;
}
- DPRINTF("DEL_DISKTYPE: No match\n");
- return 1;
+
+ *pprev = entry->next;
+ if (entry->next)
+ entry->next->pprev = pprev;
+
+ DPRINTF("DEL_DISKTYPE: Freeing entry\n");
+ free(entry);
+
+ /* Caller should close() if no single controller, or list is empty. */
+ return (!dtypes[type]->single_handler || (active_disks[type] == NULL));
}
static int write_msg(int fd, int msgtype, void *ptr, void *ptr2)
@@ -592,8 +560,8 @@ int unmap_blktapctrl(blkif_t *blkif)
if (del_disktype(blkif)) {
close(blkif->fds[WRITE]);
close(blkif->fds[READ]);
-
}
+
return 0;
}
diff --git a/tools/blktap/drivers/tapdisk.c b/tools/blktap/drivers/tapdisk.c
index 7c88027eb3..859687d8b3 100644
--- a/tools/blktap/drivers/tapdisk.c
+++ b/tools/blktap/drivers/tapdisk.c
@@ -79,31 +79,17 @@ static void unmap_disk(struct td_state *s)
{
tapdev_info_t *info = s->ring_info;
struct tap_disk *drv = s->drv;
- fd_list_entry_t *ptr, *prev;
+ fd_list_entry_t *entry;
drv->td_close(s);
if (info != NULL && info->mem > 0)
munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
- ptr = s->fd_entry;
- prev = ptr->prev;
-
- if (prev) {
- /*There are entries earlier in the list*/
- prev->next = ptr->next;
- if (ptr->next) {
- ptr = ptr->next;
- ptr->prev = prev;
- }
- } else {
- /*We are the first entry in list*/
- if (ptr->next) {
- ptr = ptr->next;
- fd_start = ptr;
- ptr->prev = NULL;
- } else fd_start = NULL;
- }
+ entry = s->fd_entry;
+ *entry->pprev = entry->next;
+ if (entry->next)
+ entry->next->pprev = entry->pprev;
close(info->fd);
@@ -144,35 +130,29 @@ static inline int LOCAL_FD_SET(fd_set *readfds)
return 0;
}
-static inline fd_list_entry_t *add_fd_entry(int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
+static inline fd_list_entry_t *add_fd_entry(
+ int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
{
- fd_list_entry_t *ptr, *last, *entry;
+ fd_list_entry_t **pprev, *entry;
int i;
+
DPRINTF("Adding fd_list_entry\n");
/*Add to linked list*/
s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
entry->tap_fd = tap_fd;
- for (i = 0; i < MAX_IOFD; i++) entry->io_fd[i] = io_fd[i];
+ for (i = 0; i < MAX_IOFD; i++)
+ entry->io_fd[i] = io_fd[i];
entry->s = s;
entry->next = NULL;
- ptr = fd_start;
- if (ptr == NULL) {
- /*We are the first entry*/
- fd_start = entry;
- entry->prev = NULL;
- goto finish;
- }
+ pprev = &fd_start;
+ while (*pprev != NULL)
+ pprev = &(*pprev)->next;
- while (ptr != NULL) {
- last = ptr;
- ptr = ptr->next;
- }
- last->next = entry;
- entry->prev = last;
+ *pprev = entry;
+ entry->pprev = pprev;
- finish:
return entry;
}
diff --git a/tools/blktap/drivers/tapdisk.h b/tools/blktap/drivers/tapdisk.h
index 1f03156456..238350016b 100644
--- a/tools/blktap/drivers/tapdisk.h
+++ b/tools/blktap/drivers/tapdisk.h
@@ -191,9 +191,8 @@ static disk_info_t *dtypes[] = {
};
typedef struct driver_list_entry {
- void *blkif;
- void *prev;
- void *next;
+ struct blkif *blkif;
+ struct driver_list_entry **pprev, *next;
} driver_list_entry_t;
typedef struct fd_list_entry {
@@ -201,8 +200,7 @@ typedef struct fd_list_entry {
int tap_fd;
int io_fd[MAX_IOFD];
struct td_state *s;
- void *prev;
- void *next;
+ struct fd_list_entry **pprev, *next;
} fd_list_entry_t;
int qcow_create(const char *filename, uint64_t total_size,
diff --git a/tools/firmware/acpi/acpi_fadt.h b/tools/firmware/acpi/acpi_fadt.h
index d1ecea5588..f30a1dac98 100644
--- a/tools/firmware/acpi/acpi_fadt.h
+++ b/tools/firmware/acpi/acpi_fadt.h
@@ -18,6 +18,8 @@
#ifndef _FADT_H_
#define _FADT_H_
+#include <xen/hvm/ioreq.h>
+
//
// FADT Definitions, see ACPI 2.0 specification for details.
//
@@ -51,7 +53,9 @@
//
// Fixed Feature Flags
//
-#define ACPI_FIXED_FEATURE_FLAGS (ACPI_PROC_C1|ACPI_SLP_BUTTON|ACPI_WBINVD|ACPI_PWR_BUTTON|ACPI_FIX_RTC)
+#define ACPI_FIXED_FEATURE_FLAGS (ACPI_PROC_C1 | ACPI_SLP_BUTTON | \
+ ACPI_WBINVD | ACPI_PWR_BUTTON | \
+ ACPI_FIX_RTC | ACPI_TMR_VAL_EXT)
//
// PM1A Event Register Block Generic Address Information
@@ -59,7 +63,6 @@
#define ACPI_PM1A_EVT_BLK_ADDRESS_SPACE_ID ACPI_SYSTEM_IO
#define ACPI_PM1A_EVT_BLK_BIT_WIDTH 0x20
#define ACPI_PM1A_EVT_BLK_BIT_OFFSET 0x00
-#define ACPI_PM1A_EVT_BLK_ADDRESS 0x000000000000c010
//
// PM1B Event Register Block Generic Address Information
@@ -75,7 +78,6 @@
#define ACPI_PM1A_CNT_BLK_ADDRESS_SPACE_ID ACPI_SYSTEM_IO
#define ACPI_PM1A_CNT_BLK_BIT_WIDTH 0x10
#define ACPI_PM1A_CNT_BLK_BIT_OFFSET 0x00
-#define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
//
// PM1B Control Register Block Generic Address Information
@@ -100,7 +102,6 @@
#define ACPI_PM_TMR_BLK_ADDRESS_SPACE_ID ACPI_SYSTEM_IO
#define ACPI_PM_TMR_BLK_BIT_WIDTH 0x20
#define ACPI_PM_TMR_BLK_BIT_OFFSET 0x00
-#define ACPI_PM_TMR_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08)
//
// General Purpose Event 0 Register Block Generic Address
diff --git a/tools/ioemu/vl.c b/tools/ioemu/vl.c
index 185547743a..e331abd1ae 100644
--- a/tools/ioemu/vl.c
+++ b/tools/ioemu/vl.c
@@ -6448,7 +6448,6 @@ int main(int argc, char **argv)
fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
- /* not yet add for IA64 */
buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
PROT_READ|PROT_WRITE,
page_array[shared_page_nr - 2]);
@@ -6465,7 +6464,7 @@ int main(int argc, char **argv)
#elif defined(__ia64__)
if (xc_ia64_get_pfn_list(xc_handle, domid, page_array,
- IO_PAGE_START >> PAGE_SHIFT, 1) != 1) {
+ IO_PAGE_START >> PAGE_SHIFT, 3) != 3) {
fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno);
exit(-1);
}
@@ -6477,6 +6476,12 @@ int main(int argc, char **argv)
fprintf(logfile, "shared page at pfn:%lx, mfn: %016lx\n",
IO_PAGE_START >> PAGE_SHIFT, page_array[0]);
+ buffered_io_page =xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ page_array[2]);
+ fprintf(logfile, "Buffered IO page at pfn:%lx, mfn: %016lx\n",
+ BUFFER_IO_PAGE_START >> PAGE_SHIFT, page_array[2]);
+
if (xc_ia64_get_pfn_list(xc_handle, domid,
page_array, 0, nr_pages) != nr_pages) {
fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno);
@@ -6496,6 +6501,7 @@ int main(int argc, char **argv)
fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno);
exit(-1);
}
+ free(page_array);
#endif
#else /* !CONFIG_DM */
diff --git a/tools/ioemu/vnc.c b/tools/ioemu/vnc.c
index 9b8bcffa37..631754ca03 100644
--- a/tools/ioemu/vnc.c
+++ b/tools/ioemu/vnc.c
@@ -203,6 +203,8 @@ static void set_bits_in_row(VncState *vs, uint64_t *row,
mask = ~(0ULL);
h += y;
+ if (h > vs->ds->height)
+ h = vs->ds->height;
for (; y < h; y++)
row[y] |= mask;
}
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index b5e61af64d..129b867ff6 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -31,7 +31,7 @@ GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
-include $(XEN_TARGET_ARCH)/Makefile
-CFLAGS += -Werror
+CFLAGS += -Werror -Wmissing-prototypes
CFLAGS += -fno-strict-aliasing
CFLAGS += $(INCLUDES) -I.
diff --git a/tools/libxc/ia64/xc_ia64_hvm_build.c b/tools/libxc/ia64/xc_ia64_hvm_build.c
index 2c34b44a1d..0caaf343b3 100644
--- a/tools/libxc/ia64/xc_ia64_hvm_build.c
+++ b/tools/libxc/ia64/xc_ia64_hvm_build.c
@@ -551,8 +551,9 @@ setup_guest(int xc_handle, uint32_t dom, unsigned long memsize,
char *image, unsigned long image_size, uint32_t vcpus,
unsigned int store_evtchn, unsigned long *store_mfn)
{
- unsigned long page_array[2];
+ unsigned long page_array[3];
shared_iopage_t *sp;
+ void *ioreq_buffer_page;
unsigned long dom_memsize = (memsize << 20);
DECLARE_DOMCTL;
@@ -587,7 +588,7 @@ setup_guest(int xc_handle, uint32_t dom, unsigned long memsize,
/* Retrieve special pages like io, xenstore, etc. */
if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
- IO_PAGE_START>>PAGE_SHIFT, 2) != 2) {
+ IO_PAGE_START>>PAGE_SHIFT, 3) != 3) {
PERROR("Could not get the page frame list");
goto error_out;
}
@@ -604,7 +605,10 @@ setup_guest(int xc_handle, uint32_t dom, unsigned long memsize,
memset(sp, 0, PAGE_SIZE);
munmap(sp, PAGE_SIZE);
-
+ ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom,
+ PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[2]);
+ memset(ioreq_buffer_page,0,PAGE_SIZE);
+ munmap(ioreq_buffer_page, PAGE_SIZE);
return 0;
error_out:
diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c
index 822e55601b..e215d7e198 100644
--- a/tools/libxc/xc_linux_build.c
+++ b/tools/libxc/xc_linux_build.c
@@ -128,7 +128,7 @@ static int probeimageformat(const char *image,
return 0;
}
-int load_initrd(int xc_handle, domid_t dom,
+static int load_initrd(int xc_handle, domid_t dom,
struct initrd_info *initrd,
unsigned long physbase,
xen_pfn_t *phys_to_mach)
diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c
index 6e323340e3..e4bd09ed19 100644
--- a/tools/libxc/xc_linux_restore.c
+++ b/tools/libxc/xc_linux_restore.c
@@ -57,7 +57,7 @@ read_exact(int fd, void *buf, size_t count)
** This function inverts that operation, replacing the pfn values with
** the (now known) appropriate mfn values.
*/
-int uncanonicalize_pagetable(unsigned long type, void *page)
+static int uncanonicalize_pagetable(unsigned long type, void *page)
{
int i, pte_last;
unsigned long pfn;
diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c
index d955072726..7a5e4eaad6 100644
--- a/tools/libxc/xc_linux_save.c
+++ b/tools/libxc/xc_linux_save.c
@@ -413,7 +413,7 @@ static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
** which entries do not require canonicalization (in particular, those
** entries which map the virtual address reserved for the hypervisor).
*/
-int canonicalize_pagetable(unsigned long type, unsigned long pfn,
+static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
const void *spage, void *dpage)
{
diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
index aea9cd78d8..768cf5c5cf 100644
--- a/tools/libxc/xc_private.c
+++ b/tools/libxc/xc_private.c
@@ -6,6 +6,7 @@
#include <inttypes.h>
#include "xc_private.h"
+#include "xg_private.h"
int lock_pages(void *addr, size_t len)
{
@@ -35,23 +36,6 @@ int xc_get_pfn_type_batch(int xc_handle,
return do_domctl(xc_handle, &domctl);
}
-#define GETPFN_ERR (~0U)
-unsigned int get_pfn_type(int xc_handle,
- unsigned long mfn,
- uint32_t dom)
-{
- DECLARE_DOMCTL;
- domctl.cmd = XEN_DOMCTL_getpageframeinfo;
- domctl.u.getpageframeinfo.gmfn = mfn;
- domctl.domain = (domid_t)dom;
- if ( do_domctl(xc_handle, &domctl) < 0 )
- {
- PERROR("Unexpected failure when getting page frame info!");
- return GETPFN_ERR;
- }
- return domctl.u.getpageframeinfo.type;
-}
-
int xc_mmuext_op(
int xc_handle,
struct mmuext_op *op,
diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
index 88a8a169bc..8a761d89cf 100644
--- a/tools/python/xen/xend/image.py
+++ b/tools/python/xen/xend/image.py
@@ -471,7 +471,7 @@ class IA64_HVM_ImageHandler(HVMImageHandler):
def getRequiredAvailableMemory(self, mem_kb):
page_kb = 16
# ROM size for guest firmware, ioreq page and xenstore page
- extra_pages = 1024 + 2
+ extra_pages = 1024 + 3
return mem_kb + extra_pages * page_kb
def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
@@ -500,9 +500,12 @@ class X86_HVM_ImageHandler(HVMImageHandler):
# overhead due to getRequiredInitialReservation.
maxmem_kb = self.getRequiredInitialReservation(maxmem_kb)
- # 1MB per vcpu plus 4Kib/Mib of RAM. This is higher than
- # the minimum that Xen would allocate if no value were given.
- return max(1024 * self.vm.getVCpuCount() + maxmem_kb / 256,
+ # 256 pages (1MB) per vcpu,
+ # plus 1 page per MiB of RAM for the P2M map,
+ # plus 1 page per MiB of RAM to shadow the resident processes.
+ # This is higher than the minimum that Xen would allocate if no value
+ # were given (but the Xen minimum is for safety, not performance).
+ return max(4 * (256 * self.vm.getVCpuCount() + 2 * (maxmem_kb / 1024)),
shadow_mem_kb)
diff --git a/tools/python/xen/xend/server/SrvDaemon.py b/tools/python/xen/xend/server/SrvDaemon.py
index f883e7da85..baba3c437d 100644
--- a/tools/python/xen/xend/server/SrvDaemon.py
+++ b/tools/python/xen/xend/server/SrvDaemon.py
@@ -9,6 +9,7 @@ import os
import signal
import sys
import threading
+import time
import linecache
import pwd
import re
@@ -106,12 +107,14 @@ class Daemon:
os.close(2)
if XEND_DEBUG:
os.open('/dev/null', os.O_RDONLY)
- os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
+ os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT|os.O_APPEND)
os.dup(1)
else:
os.open('/dev/null', os.O_RDWR)
os.dup(0)
- os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
+ os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT|os.O_APPEND)
+ print >>sys.stderr, ("Xend started at %s." %
+ time.asctime(time.localtime()))
def start(self, trace=0):
diff --git a/tools/xenstat/xentop/xentop.1 b/tools/xenstat/xentop/xentop.1
index c7a856bed1..b925a3795f 100644
--- a/tools/xenstat/xentop/xentop.1
+++ b/tools/xenstat/xentop/xentop.1
@@ -47,6 +47,9 @@ seconds between updates (default 3)
\fB\-n\fR, \fB\-\-networks\fR
output network information
.TP
+\fB\-x\fR, \fB\-\-vbds\fR
+output vbd block device data
+.TP
\fB\-r\fR, \fB\-\-repeat\-header\fR
repeat table header before each domain
.TP
diff --git a/tools/xenstat/xentop/xentop.c b/tools/xenstat/xentop/xentop.c
index 7d3ec59d2e..b772f951fb 100644
--- a/tools/xenstat/xentop/xentop.c
+++ b/tools/xenstat/xentop/xentop.c
@@ -204,7 +204,7 @@ static void usage(const char *program)
"-V, --version output version information and exit\n"
"-d, --delay=SECONDS seconds between updates (default 3)\n"
"-n, --networks output vif network data\n"
- "-b, --vbds output vbd block device data\n"
+ "-x, --vbds output vbd block device data\n"
"-r, --repeat-header repeat table header before each domain\n"
"-v, --vcpus output vcpu data\n"
"-b, --batch output in batch mode, no user input accepted\n"
@@ -976,7 +976,7 @@ int main(int argc, char **argv)
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'V' },
{ "networks", no_argument, NULL, 'n' },
- { "vbds", no_argument, NULL, 'x' },
+ { "vbds", no_argument, NULL, 'x' },
{ "repeat-header", no_argument, NULL, 'r' },
{ "vcpus", no_argument, NULL, 'v' },
{ "delay", required_argument, NULL, 'd' },
@@ -1065,7 +1065,7 @@ int main(int argc, char **argv)
break;
} while (1);
}
-
+
/* Cleanup occurs in cleanup(), so no work to do here. */
return 0;
diff --git a/tools/xm-test/lib/XmTestLib/arch.py b/tools/xm-test/lib/XmTestLib/arch.py
index d5a1aa55cb..5625a53546 100644
--- a/tools/xm-test/lib/XmTestLib/arch.py
+++ b/tools/xm-test/lib/XmTestLib/arch.py
@@ -124,6 +124,7 @@ _uname_to_arch_map = {
"i486" : "x86",
"i586" : "x86",
"i686" : "x86",
+ "x86_64": "x86_64",
"ia64" : "ia64",
"ppc" : "powerpc",
"ppc64" : "powerpc",
@@ -131,7 +132,7 @@ _uname_to_arch_map = {
# Lookup current platform.
_arch = _uname_to_arch_map.get(os.uname()[4], "Unknown")
-if _arch == "x86" or _arch == "ia64":
+if _arch == "x86" or _arch == "x86_64" or _arch == "ia64":
minSafeMem = ia_minSafeMem
getDefaultKernel = ia_getDefaultKernel
checkBuffer = ia_checkBuffer
diff --git a/unmodified_drivers/linux-2.6/blkfront/Makefile b/unmodified_drivers/linux-2.6/blkfront/Makefile
new file mode 100644
index 0000000000..64e7acd194
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/blkfront/Makefile
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff --git a/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopmd.h b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopmd.h
new file mode 100644
index 0000000000..ebde567575
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopmd.h
@@ -0,0 +1,14 @@
+#ifndef _PGTABLE_NOPMD_H
+#define _PGTABLE_NOPMD_H
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
+#error "This version of Linux should not need compat pgtable-nopmd.h"
+#endif
+
+#define pud_t pgd_t
+#define pud_offset(d, va) d
+#define pud_none(pud) 0
+#define pud_present(pud) 1
+#define PTRS_PER_PUD 1
+
+#endif /* _PGTABLE_NOPMD_H */
diff --git a/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopud.h b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopud.h
new file mode 100644
index 0000000000..8b23299dd0
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopud.h
@@ -0,0 +1,14 @@
+#ifndef _PGTABLE_NOPUD_H
+#define _PGTABLE_NOPUD_H
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
+#error "This version of Linux should not need compat pgtable-nopud.h"
+#endif
+
+#define pud_t pgd_t
+#define pud_offset(d, va) d
+#define pud_none(pud) 0
+#define pud_present(pud) 1
+#define PTRS_PER_PUD 1
+
+#endif /* _PGTABLE_NOPUD_H */
diff --git a/unmodified_drivers/linux-2.6/compat-include/linux/io.h b/unmodified_drivers/linux-2.6/compat-include/linux/io.h
new file mode 100644
index 0000000000..10499023a5
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/linux/io.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_IO_H
+#define _LINUX_IO_H
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+#error "This version of Linux should not need compat linux/io.h"
+#endif
+
+#include <asm/io.h>
+
+#endif
diff --git a/unmodified_drivers/linux-2.6/compat-include/linux/mutex.h b/unmodified_drivers/linux-2.6/compat-include/linux/mutex.h
new file mode 100644
index 0000000000..fcb4a899c7
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/linux/mutex.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2006 Cisco Systems. All rights reserved.
+ *
+ * This file is released under the GPLv2.
+ */
+
+/* mutex compatibility for pre-2.6.16 kernels */
+
+#ifndef __LINUX_MUTEX_H
+#define __LINUX_MUTEX_H
+
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+#error "This version of Linux should not need compat mutex.h"
+#endif
+
+#include <linux/version.h>
+#include <asm/semaphore.h>
+
+#define mutex semaphore
+#define DEFINE_MUTEX(foo) DECLARE_MUTEX(foo)
+#define mutex_init(foo) init_MUTEX(foo)
+#define mutex_lock(foo) down(foo)
+#define mutex_lock_interruptible(foo) down_interruptible(foo)
+/* this function follows the spin_trylock() convention, so *
+ * it is negated to the down_trylock() return values! Be careful */
+#define mutex_trylock(foo) !down_trylock(foo)
+#define mutex_unlock(foo) up(foo)
+
+#endif /* __LINUX_MUTEX_H */
diff --git a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
new file mode 100644
index 0000000000..4978c63610
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
@@ -0,0 +1,52 @@
+#ifndef COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
+#define COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
+
+#include <linux/version.h>
+
+#include <linux/spinlock.h>
+
+#if defined(__LINUX_COMPILER_H) && !defined(__always_inline)
+#define __always_inline inline
+#endif
+
+#if defined(__LINUX_SPINLOCK_H) && !defined(DEFINE_SPINLOCK)
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+#endif
+
+#if defined(_LINUX_INIT_H) && !defined(__init)
+#define __init
+#endif
+
+#if defined(__LINUX_CACHE_H) && !defined(__read_mostly)
+#define __read_mostly
+#endif
+
+#if defined(_LINUX_SKBUFF_H) && !defined(NET_IP_ALIGN)
+#define NET_IP_ALIGN 0
+#endif
+
+#if defined(_LINUX_FS_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+#define nonseekable_open(inode, filp) /* Nothing to do */
+#endif
+
+#if defined(_LINUX_MM_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
+unsigned long vmalloc_to_pfn(void *addr);
+#endif
+
+#if defined(__LINUX_COMPLETION_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
+unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout);
+#endif
+
+#if defined(_LINUX_SCHED_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+signed long schedule_timeout_interruptible(signed long timeout);
+#endif
+
+#if defined(_LINUX_SLAB_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+void *kzalloc(size_t size, int flags);
+#endif
+
+#if defined(_LINUX_BLKDEV_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+#define end_that_request_last(req, uptodate) end_that_request_last(req)
+#endif
+
+#endif
diff --git a/unmodified_drivers/linux-2.6/netfront/Makefile b/unmodified_drivers/linux-2.6/netfront/Makefile
new file mode 100644
index 0000000000..64e7acd194
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/netfront/Makefile
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff --git a/unmodified_drivers/linux-2.6/overrides.mk b/unmodified_drivers/linux-2.6/overrides.mk
index 74ef12c4c9..53a96d87a4 100644
--- a/unmodified_drivers/linux-2.6/overrides.mk
+++ b/unmodified_drivers/linux-2.6/overrides.mk
@@ -9,4 +9,4 @@ EXTRA_CFLAGS += -DCONFIG_XEN_SHADOW_MODE -DCONFIG_XEN_SHADOW_TRANSLATE
EXTRA_CFLAGS += -DCONFIG_XEN_BLKDEV_GRANT -DXEN_EVTCHN_MASK_OPS
EXTRA_CFLAGS += -DCONFIG_XEN_NETDEV_GRANT_RX -DCONFIG_XEN_NETDEV_GRANT_TX
EXTRA_CFLAGS += -D__XEN_INTERFACE_VERSION__=0x00030202
-EXTRA_CFLAGS += -I$(M)/include
+EXTRA_CFLAGS += -I$(M)/include -I$(M)/compat-include -DHAVE_XEN_PLATFORM_COMPAT_H
diff --git a/unmodified_drivers/linux-2.6/platform-pci/Kbuild b/unmodified_drivers/linux-2.6/platform-pci/Kbuild
index dda3d0e7cf..a44e50e94c 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/Kbuild
+++ b/unmodified_drivers/linux-2.6/platform-pci/Kbuild
@@ -4,7 +4,7 @@ obj-m := xen-platform-pci.o
EXTRA_CFLAGS += -I$(M)/platform-pci
-xen-platform-pci-objs := evtchn.o platform-pci.o gnttab.o xen_support.o features.o
+xen-platform-pci-objs := evtchn.o platform-pci.o gnttab.o xen_support.o features.o platform-compat.o
# Can we do better ?
ifeq ($(ARCH),ia64)
diff --git a/unmodified_drivers/linux-2.6/platform-pci/Makefile b/unmodified_drivers/linux-2.6/platform-pci/Makefile
new file mode 100644
index 0000000000..64e7acd194
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/platform-pci/Makefile
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff --git a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
index a38c50c1c4..4bd9592754 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
@@ -36,6 +36,10 @@
#include <xen/features.h>
#include "platform-pci.h"
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
void *shared_info_area;
#define MAX_EVTCHN 256
diff --git a/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c
new file mode 100644
index 0000000000..f3cef11620
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c
@@ -0,0 +1,116 @@
+#include <linux/config.h>
+#include <linux/version.h>
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <xen/platform-compat.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7)
+static int system_state = 1;
+EXPORT_SYMBOL(system_state);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
+size_t strcspn(const char *s, const char *reject)
+{
+ const char *p;
+ const char *r;
+ size_t count = 0;
+
+ for (p = s; *p != '\0'; ++p) {
+ for (r = reject; *r != '\0'; ++r) {
+ if (*p == *r)
+ return count;
+ }
+ ++count;
+ }
+
+ return count;
+}
+EXPORT_SYMBOL(strcspn);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
+/*
+ * Map a vmalloc()-space virtual address to the physical page frame number.
+ */
+unsigned long vmalloc_to_pfn(void * vmalloc_addr)
+{
+ return page_to_pfn(vmalloc_to_page(vmalloc_addr));
+}
+EXPORT_SYMBOL(vmalloc_to_pfn);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
+unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout)
+{
+ might_sleep();
+
+ spin_lock_irq(&x->wait.lock);
+ if (!x->done) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ wait.flags |= WQ_FLAG_EXCLUSIVE;
+ __add_wait_queue_tail(&x->wait, &wait);
+ do {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&x->wait.lock);
+ timeout = schedule_timeout(timeout);
+ spin_lock_irq(&x->wait.lock);
+ if (!timeout) {
+ __remove_wait_queue(&x->wait, &wait);
+ goto out;
+ }
+ } while (!x->done);
+ __remove_wait_queue(&x->wait, &wait);
+ }
+ x->done--;
+out:
+ spin_unlock_irq(&x->wait.lock);
+ return timeout;
+}
+EXPORT_SYMBOL(wait_for_completion_timeout);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12)
+/*
+ fake do_exit using complete_and_exit
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
+asmlinkage NORET_TYPE void do_exit(long code)
+#else
+fastcall NORET_TYPE void do_exit(long code)
+#endif
+{
+ complete_and_exit(NULL, code);
+}
+EXPORT_SYMBOL_GPL(do_exit);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+signed long schedule_timeout_interruptible(signed long timeout)
+{
+ __set_current_state(TASK_INTERRUPTIBLE);
+ return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_interruptible);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+/**
+ * kzalloc - allocate memory. The memory is set to zero.
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ */
+void *kzalloc(size_t size, int flags)
+{
+ void *ret = kmalloc(size, flags);
+ if (ret)
+ memset(ret, 0, size);
+ return ret;
+}
+EXPORT_SYMBOL(kzalloc);
+#endif
diff --git a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
index cb9e8dd7e5..5ff6ba83f7 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -33,6 +33,7 @@
#include <asm/irq.h>
#include <asm/uaccess.h>
#include <asm/hypervisor.h>
+#include <asm/pgtable.h>
#include <xen/interface/memory.h>
#include <xen/features.h>
#ifdef __ia64__
@@ -41,6 +42,10 @@
#include "platform-pci.h"
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
#define DRV_NAME "xen-platform-pci"
#define DRV_VERSION "0.10"
#define DRV_RELDATE "03/03/2005"
diff --git a/unmodified_drivers/linux-2.6/platform-pci/xen_support.c b/unmodified_drivers/linux-2.6/platform-pci/xen_support.c
index b1a903b1c7..423d2f2e24 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/xen_support.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/xen_support.c
@@ -26,6 +26,10 @@
#include <asm/hypervisor.h>
#include "platform-pci.h"
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
void xen_machphys_update(unsigned long mfn, unsigned long pfn)
{
BUG();
diff --git a/unmodified_drivers/linux-2.6/xenbus/Makefile b/unmodified_drivers/linux-2.6/xenbus/Makefile
new file mode 100644
index 0000000000..64e7acd194
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/xenbus/Makefile
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff --git a/xen/arch/ia64/vmx/mmio.c b/xen/arch/ia64/vmx/mmio.c
index 579785b563..d605e828f0 100644
--- a/xen/arch/ia64/vmx/mmio.c
+++ b/xen/arch/ia64/vmx/mmio.c
@@ -52,6 +52,70 @@ struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base)
#define PIB_OFST_INTA 0x1E0000
#define PIB_OFST_XTP 0x1E0008
+#define HVM_BUFFERED_IO_RANGE_NR 1
+
+struct hvm_buffered_io_range {
+ unsigned long start_addr;
+ unsigned long length;
+};
+
+static struct hvm_buffered_io_range buffered_stdvga_range = {0xA0000, 0x20000};
+static struct hvm_buffered_io_range
+*hvm_buffered_io_ranges[HVM_BUFFERED_IO_RANGE_NR] =
+{
+ &buffered_stdvga_range
+};
+
+int hvm_buffered_io_intercept(ioreq_t *p)
+{
+ struct vcpu *v = current;
+ spinlock_t *buffered_io_lock;
+ buffered_iopage_t *buffered_iopage =
+ (buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va);
+ unsigned long tmp_write_pointer = 0;
+ int i;
+
+ /* ignore READ ioreq_t! */
+ if ( p->dir == IOREQ_READ )
+ return 0;
+
+ for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
+ if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
+ p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
+ hvm_buffered_io_ranges[i]->length )
+ break;
+ }
+
+ if ( i == HVM_BUFFERED_IO_RANGE_NR )
+ return 0;
+
+ buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock;
+ spin_lock(buffered_io_lock);
+
+ if ( buffered_iopage->write_pointer - buffered_iopage->read_pointer ==
+ (unsigned long)IOREQ_BUFFER_SLOT_NUM ) {
+ /* the queue is full.
+ * send the iopacket through the normal path.
+ * NOTE: The arithimetic operation could handle the situation for
+ * write_pointer overflow.
+ */
+ spin_unlock(buffered_io_lock);
+ return 0;
+ }
+
+ tmp_write_pointer = buffered_iopage->write_pointer % IOREQ_BUFFER_SLOT_NUM;
+
+ memcpy(&buffered_iopage->ioreq[tmp_write_pointer], p, sizeof(ioreq_t));
+
+ /*make the ioreq_t visible before write_pointer*/
+ wmb();
+ buffered_iopage->write_pointer++;
+
+ spin_unlock(buffered_io_lock);
+
+ return 1;
+}
+
static void write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value);
static void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma)
@@ -156,7 +220,11 @@ static void low_mmio_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
p->df = 0;
p->io_count++;
-
+ if(hvm_buffered_io_intercept(p)){
+ p->state = STATE_IORESP_READY;
+ vmx_io_assist(v);
+ return ;
+ }else
vmx_send_assist_req(v);
if(dir==IOREQ_READ){ //read
*val=p->u.data;
diff --git a/xen/arch/ia64/vmx/vmx_init.c b/xen/arch/ia64/vmx/vmx_init.c
index 2694149d5b..9d8fbe8ec8 100644
--- a/xen/arch/ia64/vmx/vmx_init.c
+++ b/xen/arch/ia64/vmx/vmx_init.c
@@ -362,8 +362,8 @@ static const io_range_t io_ranges[] = {
{PIB_START, PIB_SIZE, GPFN_PIB},
};
-/* Reseve 1 page for shared I/O and 1 page for xenstore. */
-#define VMX_SYS_PAGES (2 + (GFW_SIZE >> PAGE_SHIFT))
+/* Reseve 1 page for shared I/O ,1 page for xenstore and 1 page for buffer I/O. */
+#define VMX_SYS_PAGES (3 + (GFW_SIZE >> PAGE_SHIFT))
#define VMX_CONFIG_PAGES(d) ((d)->max_pages - VMX_SYS_PAGES)
static void vmx_build_physmap_table(struct domain *d)
@@ -425,8 +425,12 @@ static void vmx_build_physmap_table(struct domain *d)
mfn = page_to_mfn(list_entry(list_ent, struct page_info, list));
assign_domain_page(d, STORE_PAGE_START, mfn << PAGE_SHIFT);
list_ent = mfn_to_page(mfn)->list.next;
+ ASSERT(list_ent != &d->page_list);
+
+ mfn = page_to_mfn(list_entry(list_ent, struct page_info, list));
+ assign_domain_page(d, BUFFER_IO_PAGE_START, mfn << PAGE_SHIFT);
+ list_ent = mfn_to_page(mfn)->list.next;
ASSERT(list_ent == &d->page_list);
-
}
void vmx_setup_platform(struct domain *d)
@@ -437,6 +441,10 @@ void vmx_setup_platform(struct domain *d)
d->arch.vmx_platform.shared_page_va =
(unsigned long)__va(__gpa_to_mpa(d, IO_PAGE_START));
+ //For buffered IO requests.
+ spin_lock_init(&d->arch.hvm_domain.buffered_io_lock);
+ d->arch.hvm_domain.buffered_io_va =
+ (unsigned long)__va(__gpa_to_mpa(d, BUFFER_IO_PAGE_START));
/* TEMP */
d->arch.vmx_platform.pib_base = 0xfee00000UL;
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 31f2793fb9..89cc508d02 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -28,12 +28,14 @@ obj-y += microcode.o
obj-y += mm.o
obj-y += mpparse.o
obj-y += nmi.o
+obj-y += numa.o
obj-y += physdev.o
obj-y += rwlock.o
obj-y += setup.o
obj-y += shutdown.o
obj-y += smp.o
obj-y += smpboot.o
+obj-y += srat.o
obj-y += string.o
obj-y += sysctl.o
obj-y += time.o
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index 37623ff5eb..843a9232bf 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -5,6 +5,7 @@ obj-y += hvm.o
obj-y += i8254.o
obj-y += i8259.o
obj-y += rtc.o
+obj-y += pmtimer.o
obj-y += instrlen.o
obj-y += intercept.o
obj-y += io.o
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 47d7ca46c4..f950d05295 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -43,7 +43,7 @@
#include <asm/mc146818rtc.h>
#include <asm/spinlock.h>
#include <asm/hvm/hvm.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
#include <asm/hvm/support.h>
#include <public/sched.h>
#include <public/hvm/ioreq.h>
@@ -285,6 +285,7 @@ void hvm_setup_platform(struct domain* d)
pt_timer_fn, v, v->processor);
pit_init(v, cpu_khz);
rtc_init(v, RTC_PORT(0), RTC_IRQ);
+ pmtimer_init(v, ACPI_PM_TMR_BLK_ADDRESS);
}
void pic_irq_request(void *data, int level)
diff --git a/xen/arch/x86/hvm/i8254.c b/xen/arch/x86/hvm/i8254.c
index 5f27ee25b2..464ddee8f9 100644
--- a/xen/arch/x86/hvm/i8254.c
+++ b/xen/arch/x86/hvm/i8254.c
@@ -38,7 +38,7 @@
#include <asm/hvm/hvm.h>
#include <asm/hvm/io.h>
#include <asm/hvm/support.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
#include <asm/current.h>
/* Enable DEBUG_PIT may cause guest calibration inaccuracy */
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index 8993572e10..a1ce8ddf33 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -35,7 +35,7 @@
#include <asm/shadow.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
#include <asm/hvm/vpic.h>
#include <asm/hvm/vlapic.h>
diff --git a/xen/arch/x86/hvm/pmtimer.c b/xen/arch/x86/hvm/pmtimer.c
new file mode 100644
index 0000000000..e0c93536ea
--- /dev/null
+++ b/xen/arch/x86/hvm/pmtimer.c
@@ -0,0 +1,63 @@
+#include <asm/hvm/vpt.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/support.h>
+
+#define TMR_STS (1 << 0)
+static void pmt_update_status(void *opaque)
+{
+ PMTState *s = opaque;
+ s->pm1_status |= TMR_STS;
+
+ /* TODO: When TMR_EN == 1, generate a SCI event */
+
+ set_timer(&s->timer, NOW() + (1000000000ULL << 31) / FREQUENCE_PMTIMER);
+}
+
+static int handle_pmt_io(ioreq_t *p)
+{
+ struct vcpu *v = current;
+ PMTState *s = &v->domain->arch.hvm_domain.pl_time.vpmt;
+ uint64_t curr_gtime;
+
+ if (p->size != 4 ||
+ p->pdata_valid ||
+ p->type != IOREQ_TYPE_PIO){
+ printk("HVM_PMT: wrong PM timer IO\n");
+ return 1;
+ }
+
+ if (p->dir == 0) { /* write */
+ /* PM_TMR_BLK is read-only */
+ return 1;
+ } else if (p->dir == 1) { /* read */
+ curr_gtime = hvm_get_guest_time(s->vcpu);
+ s->pm1_timer += ((curr_gtime - s->last_gtime) * s->scale) >> 32;
+ p->u.data = s->pm1_timer;
+ s->last_gtime = curr_gtime;
+ return 1;
+ }
+ return 0;
+}
+
+void pmtimer_init(struct vcpu *v, int base)
+{
+ PMTState *s = &v->domain->arch.hvm_domain.pl_time.vpmt;
+
+ s->pm1_timer = 0;
+ s->pm1_status = 0;
+ s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / ticks_per_sec(v);
+ s->vcpu = v;
+
+ init_timer(&s->timer, pmt_update_status, s, v->processor);
+ /* ACPI supports a 32-bit power management timer */
+ set_timer(&s->timer, NOW() + (1000000000ULL << 31) / FREQUENCE_PMTIMER);
+
+ register_portio_handler(base, 4, handle_pmt_io);
+}
+
+void pmtimer_deinit(struct domain *d)
+{
+ PMTState *s = &d->arch.hvm_domain.pl_time.vpmt;
+
+ kill_timer(&s->timer);
+}
diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c
index 210168c26b..0f5e11986e 100644
--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -23,7 +23,7 @@
*/
#include <asm/mc146818rtc.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
#include <asm/hvm/io.h>
#include <asm/hvm/support.h>
#include <asm/current.h>
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 92167d74fb..88c0802425 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -922,6 +922,7 @@ static void svm_relinquish_guest_resources(struct domain *d)
kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
rtc_deinit(d);
+ pmtimer_deinit(d);
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
@@ -937,6 +938,7 @@ static void svm_migrate_timers(struct vcpu *v)
struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
struct RTCState *vrtc = &v->domain->arch.hvm_domain.pl_time.vrtc;
+ struct PMTState *vpmt = &v->domain->arch.hvm_domain.pl_time.vpmt;
if ( pt->enabled )
{
@@ -947,6 +949,7 @@ static void svm_migrate_timers(struct vcpu *v)
migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor);
migrate_timer(&vrtc->second_timer, v->processor);
migrate_timer(&vrtc->second_timer2, v->processor);
+ migrate_timer(&vpmt->timer, v->processor);
}
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 476f8beae9..ac1be73556 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -147,6 +147,7 @@ static void vmx_relinquish_guest_resources(struct domain *d)
kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
rtc_deinit(d);
+ pmtimer_deinit(d);
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
@@ -489,6 +490,7 @@ void vmx_migrate_timers(struct vcpu *v)
{
struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
struct RTCState *vrtc = &v->domain->arch.hvm_domain.pl_time.vrtc;
+ struct PMTState *vpmt = &v->domain->arch.hvm_domain.pl_time.vpmt;
if ( pt->enabled )
{
@@ -499,6 +501,7 @@ void vmx_migrate_timers(struct vcpu *v)
migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor);
migrate_timer(&vrtc->second_timer, v->processor);
migrate_timer(&vrtc->second_timer2, v->processor);
+ migrate_timer(&vpmt->timer, v->processor);
}
static void vmx_store_cpu_guest_regs(
diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
new file mode 100644
index 0000000000..d332320af6
--- /dev/null
+++ b/xen/arch/x86/numa.c
@@ -0,0 +1,308 @@
+/*
+ * Generic VM initialization for x86-64 NUMA setups.
+ * Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
+ */
+
+#include <xen/mm.h>
+#include <xen/string.h>
+#include <xen/init.h>
+#include <xen/ctype.h>
+#include <xen/nodemask.h>
+#include <xen/numa.h>
+#include <xen/keyhandler.h>
+#include <xen/time.h>
+#include <xen/smp.h>
+#include <asm/acpi.h>
+
+static int numa_setup(char *s);
+custom_param("numa", numa_setup);
+
+#ifndef Dprintk
+#define Dprintk(x...)
+#endif
+
+/* from proto.h */
+#define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
+
+struct node_data node_data[MAX_NUMNODES];
+
+int memnode_shift;
+u8 memnodemap[NODEMAPSIZE];
+
+unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = NUMA_NO_NODE
+};
+unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
+
+nodemask_t node_online_map = { { [0] = 1UL } };
+
+/* Default NUMA to off for now. acpi=on required to enable it. */
+int numa_off __initdata = 1;
+
+int acpi_numa __initdata;
+
+/*
+ * Given a shift value, try to populate memnodemap[]
+ * Returns :
+ * 1 if OK
+ * 0 if memnodmap[] too small (of shift too small)
+ * -1 if node overlap or lost ram (shift too big)
+ */
+static int __init
+populate_memnodemap(const struct node *nodes, int numnodes, int shift)
+{
+ int i;
+ int res = -1;
+ unsigned long addr, end;
+
+ if (shift >= 64)
+ return -1;
+ memset(memnodemap, 0xff, sizeof(memnodemap));
+ for (i = 0; i < numnodes; i++) {
+ addr = nodes[i].start;
+ end = nodes[i].end;
+ if (addr >= end)
+ continue;
+ if ((end >> shift) >= NODEMAPSIZE)
+ return 0;
+ do {
+ if (memnodemap[addr >> shift] != 0xff)
+ return -1;
+ memnodemap[addr >> shift] = i;
+ addr += (1UL << shift);
+ } while (addr < end);
+ res = 1;
+ }
+ return res;
+}
+
+int __init compute_hash_shift(struct node *nodes, int numnodes)
+{
+ int shift = 20;
+
+ while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
+ shift++;
+
+ printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
+ shift);
+
+ if (populate_memnodemap(nodes, numnodes, shift) != 1) {
+ printk(KERN_INFO
+ "Your memory is not aligned you need to rebuild your kernel "
+ "with a bigger NODEMAPSIZE shift=%d\n",
+ shift);
+ return -1;
+ }
+ return shift;
+}
+
+/* initialize NODE_DATA given nodeid and start/end */
+void __init setup_node_bootmem(int nodeid, u64 start, u64 end)
+{
+ unsigned long start_pfn, end_pfn;
+
+ start_pfn = start >> PAGE_SHIFT;
+ end_pfn = end >> PAGE_SHIFT;
+
+ NODE_DATA(nodeid)->node_id = nodeid;
+ NODE_DATA(nodeid)->node_start_pfn = start_pfn;
+ NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
+
+ node_set_online(nodeid);
+}
+
+void __init numa_init_array(void)
+{
+ int rr, i;
+ /* There are unfortunately some poorly designed mainboards around
+ that only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ mapping. To avoid this fill in the mapping for all possible
+ CPUs, as the number of CPUs is not known yet.
+ We round robin the existing nodes. */
+ rr = first_node(node_online_map);
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_to_node[i] != NUMA_NO_NODE)
+ continue;
+ numa_set_node(i, rr);
+ rr = next_node(rr, node_online_map);
+ if (rr == MAX_NUMNODES)
+ rr = first_node(node_online_map);
+ }
+
+}
+
+#ifdef CONFIG_NUMA_EMU
+static int numa_fake __initdata = 0;
+
+/* Numa emulation */
+static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
+{
+ int i;
+ struct node nodes[MAX_NUMNODES];
+ unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
+
+ /* Kludge needed for the hash function */
+ if (hweight64(sz) > 1) {
+ unsigned long x = 1;
+ while ((x << 1) < sz)
+ x <<= 1;
+ if (x < sz/2)
+ printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
+ sz = x;
+ }
+
+ memset(&nodes,0,sizeof(nodes));
+ for (i = 0; i < numa_fake; i++) {
+ nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
+ if (i == numa_fake-1)
+ sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
+ nodes[i].end = nodes[i].start + sz;
+ printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
+ i,
+ nodes[i].start, nodes[i].end,
+ (nodes[i].end - nodes[i].start) >> 20);
+ node_set_online(i);
+ }
+ memnode_shift = compute_hash_shift(nodes, numa_fake);
+ if (memnode_shift < 0) {
+ memnode_shift = 0;
+ printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
+ return -1;
+ }
+ for_each_online_node(i)
+ setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ numa_init_array();
+ return 0;
+}
+#endif
+
+void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+{
+ int i;
+
+#ifdef CONFIG_NUMA_EMU
+ if (numa_fake && !numa_emulation(start_pfn, end_pfn))
+ return;
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+ if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
+ end_pfn << PAGE_SHIFT))
+ return;
+#endif
+
+ printk(KERN_INFO "%s\n",
+ numa_off ? "NUMA turned off" : "No NUMA configuration found");
+
+ printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
+ start_pfn << PAGE_SHIFT,
+ end_pfn << PAGE_SHIFT);
+ /* setup dummy node covering all memory */
+ memnode_shift = 63;
+ memnodemap[0] = 0;
+ nodes_clear(node_online_map);
+ node_set_online(0);
+ for (i = 0; i < NR_CPUS; i++)
+ numa_set_node(i, 0);
+ node_to_cpumask[0] = cpumask_of_cpu(0);
+ setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
+}
+
+__cpuinit void numa_add_cpu(int cpu)
+{
+ set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+ cpu_to_node[cpu] = node;
+}
+
+/* [numa=off] */
+static __init int numa_setup(char *opt)
+{
+ if (!strncmp(opt,"off",3))
+ numa_off = 1;
+ if (!strncmp(opt,"on",2))
+ numa_off = 0;
+#ifdef CONFIG_NUMA_EMU
+ if(!strncmp(opt, "fake=", 5)) {
+ numa_off = 0;
+ numa_fake = simple_strtoul(opt+5,NULL,0); ;
+ if (numa_fake >= MAX_NUMNODES)
+ numa_fake = MAX_NUMNODES;
+ }
+#endif
+#ifdef CONFIG_ACPI_NUMA
+ if (!strncmp(opt,"noacpi",6)) {
+ numa_off = 0;
+ acpi_numa = -1;
+ }
+#endif
+ return 1;
+}
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ */
+void __init init_cpu_to_node(void)
+{
+ int i;
+ for (i = 0; i < NR_CPUS; i++) {
+ u8 apicid = x86_cpu_to_apicid[i];
+ if (apicid == BAD_APICID)
+ continue;
+ if (apicid_to_node[apicid] == NUMA_NO_NODE)
+ continue;
+ numa_set_node(i,apicid_to_node[apicid]);
+ }
+}
+
+EXPORT_SYMBOL(cpu_to_node);
+EXPORT_SYMBOL(node_to_cpumask);
+EXPORT_SYMBOL(memnode_shift);
+EXPORT_SYMBOL(memnodemap);
+EXPORT_SYMBOL(node_data);
+
+static void dump_numa(unsigned char key)
+{
+ s_time_t now = NOW();
+ int i;
+
+ printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
+ (u32)(now>>32), (u32)now);
+
+ for_each_online_node(i) {
+ unsigned long pa = (NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT;
+ printk("idx%d -> NODE%d start->%lu size->%lu\n",
+ i, NODE_DATA(i)->node_id,
+ NODE_DATA(i)->node_start_pfn,
+ NODE_DATA(i)->node_spanned_pages);
+ /* sanity check phys_to_nid() */
+ printk("phys_to_nid(%lx) -> %d should be %d\n", pa, phys_to_nid(pa),
+ NODE_DATA(i)->node_id);
+ }
+ for_each_online_cpu(i)
+ printk("CPU%d -> NODE%d\n", i, cpu_to_node[i]);
+}
+
+static __init int register_numa_trigger(void)
+{
+ register_keyhandler('u', dump_numa, "dump numa info");
+ return 0;
+}
+__initcall(register_numa_trigger);
+
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 2c8b638944..15c42b133c 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -16,6 +16,7 @@
#include <xen/percpu.h>
#include <xen/hypercall.h>
#include <xen/keyhandler.h>
+#include <xen/numa.h>
#include <public/version.h>
#include <asm/bitops.h>
#include <asm/smp.h>
@@ -29,6 +30,7 @@
extern void dmi_scan_machine(void);
extern void generic_apic_probe(void);
+extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
/*
* opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
@@ -257,6 +259,20 @@ static void __init init_idle_domain(void)
setup_idle_pagetable();
}
+static void srat_detect_node(int cpu)
+{
+ unsigned node;
+ u8 apicid = x86_cpu_to_apicid[cpu];
+
+ node = apicid_to_node[apicid];
+ if ( node == NUMA_NO_NODE )
+ node = 0;
+ numa_set_node(cpu, node);
+
+ if ( acpi_numa > 0 )
+ printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
+}
+
void __init __start_xen(multiboot_info_t *mbi)
{
char __cmdline[] = "", *cmdline = __cmdline;
@@ -485,6 +501,12 @@ void __init __start_xen(multiboot_info_t *mbi)
init_frametable();
+ acpi_boot_table_init();
+
+ acpi_numa_init();
+
+ numa_initmem_init(0, max_page);
+
end_boot_allocator();
/* Initialise the Xen heap, skipping RAM holes. */
@@ -536,9 +558,10 @@ void __init __start_xen(multiboot_info_t *mbi)
generic_apic_probe();
- acpi_boot_table_init();
acpi_boot_init();
+ init_cpu_to_node();
+
if ( smp_found_config )
get_smp_config();
@@ -589,6 +612,11 @@ void __init __start_xen(multiboot_info_t *mbi)
break;
if ( !cpu_online(i) )
__cpu_up(i);
+
+ /* Set up cpu_to_node[]. */
+ srat_detect_node(i);
+ /* Set up node_to_cpumask based on cpu_to_node[]. */
+ numa_add_cpu(i);
}
printk("Brought up %ld CPUs\n", (long)num_online_cpus());
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index eb2d21111c..f7d8712563 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -43,6 +43,7 @@
#include <xen/delay.h>
#include <xen/softirq.h>
#include <xen/serial.h>
+#include <xen/numa.h>
#include <asm/current.h>
#include <asm/mc146818rtc.h>
#include <asm/desc.h>
@@ -628,7 +629,7 @@ u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICI
static void map_cpu_to_logical_apicid(void)
{
int cpu = smp_processor_id();
- int apicid = logical_smp_processor_id();
+ int apicid = hard_smp_processor_id();
cpu_2_logical_apicid[cpu] = apicid;
map_cpu_to_node(cpu, apicid_to_node(apicid));
diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
new file mode 100644
index 0000000000..ea462e222b
--- /dev/null
+++ b/xen/arch/x86/srat.c
@@ -0,0 +1,315 @@
+/*
+ * ACPI 3.0 based NUMA setup
+ * Copyright 2004 Andi Kleen, SuSE Labs.
+ *
+ * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
+ *
+ * Called from acpi_numa_init while reading the SRAT and SLIT tables.
+ * Assumes all memory regions belonging to a single proximity domain
+ * are in one chunk. Holes between them will be included in the node.
+ *
+ * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
+ */
+
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/inttypes.h>
+#include <xen/nodemask.h>
+#include <xen/acpi.h>
+#include <xen/numa.h>
+#include <asm/page.h>
+
+static struct acpi_table_slit *acpi_slit;
+
+static nodemask_t nodes_parsed __initdata;
+static nodemask_t nodes_found __initdata;
+static struct node nodes[MAX_NUMNODES] __initdata;
+static u8 pxm2node[256] = { [0 ... 255] = 0xff };
+
+/* Too small nodes confuse the VM badly. Usually they result
+ from BIOS bugs. */
+#define NODE_MIN_SIZE (4*1024*1024)
+
+static int node_to_pxm(int n);
+
+int pxm_to_node(int pxm)
+{
+ if ((unsigned)pxm >= 256)
+ return -1;
+ /* Extend 0xff to (int)-1 */
+ return (signed char)pxm2node[pxm];
+}
+
+static __init int setup_node(int pxm)
+{
+ unsigned node = pxm2node[pxm];
+ if (node == 0xff) {
+ if (nodes_weight(nodes_found) >= MAX_NUMNODES)
+ return -1;
+ node = first_unset_node(nodes_found);
+ node_set(node, nodes_found);
+ pxm2node[pxm] = node;
+ }
+ return pxm2node[pxm];
+}
+
+static __init int conflicting_nodes(u64 start, u64 end)
+{
+ int i;
+ for_each_node_mask(i, nodes_parsed) {
+ struct node *nd = &nodes[i];
+ if (nd->start == nd->end)
+ continue;
+ if (nd->end > start && nd->start < end)
+ return i;
+ if (nd->end == end && nd->start == start)
+ return i;
+ }
+ return -1;
+}
+
+static __init void cutoff_node(int i, u64 start, u64 end)
+{
+ struct node *nd = &nodes[i];
+ if (nd->start < start) {
+ nd->start = start;
+ if (nd->end < nd->start)
+ nd->start = nd->end;
+ }
+ if (nd->end > end) {
+ nd->end = end;
+ if (nd->start > nd->end)
+ nd->start = nd->end;
+ }
+}
+
+static __init void bad_srat(void)
+{
+ int i;
+ printk(KERN_ERR "SRAT: SRAT not used.\n");
+ acpi_numa = -1;
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ apicid_to_node[i] = NUMA_NO_NODE;
+}
+
+static __init inline int srat_disabled(void)
+{
+ return numa_off || acpi_numa < 0;
+}
+
+/*
+ * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
+ * up the NUMA heuristics which wants the local node to have a smaller
+ * distance than the others.
+ * Do some quick checks here and only use the SLIT if it passes.
+ */
+static __init int slit_valid(struct acpi_table_slit *slit)
+{
+ int i, j;
+ int d = slit->localities;
+ for (i = 0; i < d; i++) {
+ for (j = 0; j < d; j++) {
+ u8 val = slit->entry[d*i + j];
+ if (i == j) {
+ if (val != 10)
+ return 0;
+ } else if (val <= 10)
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* Callback for SLIT parsing */
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+ if (!slit_valid(slit)) {
+ printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
+ return;
+ }
+ acpi_slit = slit;
+}
+
+/* Callback for Proximity Domain -> LAPIC mapping */
+void __init
+acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
+{
+ int pxm, node;
+ if (srat_disabled())
+ return;
+ if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) { bad_srat();
+ return;
+ }
+ if (pa->flags.enabled == 0)
+ return;
+ pxm = pa->proximity_domain;
+ node = setup_node(pxm);
+ if (node < 0) {
+ printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
+ bad_srat();
+ return;
+ }
+ apicid_to_node[pa->apic_id] = node;
+ acpi_numa = 1;
+ printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
+ pxm, pa->apic_id, node);
+}
+
+/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
+void __init
+acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
+{
+ struct node *nd;
+ u64 start, end;
+ int node, pxm;
+ int i;
+
+ if (srat_disabled())
+ return;
+ if (ma->header.length != sizeof(struct acpi_table_memory_affinity)) {
+ bad_srat();
+ return;
+ }
+ if (ma->flags.enabled == 0)
+ return;
+ start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
+ end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
+ pxm = ma->proximity_domain;
+ node = setup_node(pxm);
+ if (node < 0) {
+ printk(KERN_ERR "SRAT: Too many proximity domains.\n");
+ bad_srat();
+ return;
+ }
+ /* It is fine to add this area to the nodes data it will be used later*/
+ if (ma->flags.hot_pluggable == 1)
+ printk(KERN_INFO "SRAT: hot plug zone found %"PRIx64" - %"PRIx64" \n",
+ start, end);
+ i = conflicting_nodes(start, end);
+ if (i == node) {
+ printk(KERN_WARNING
+ "SRAT: Warning: PXM %d (%"PRIx64"-%"PRIx64") overlaps with itself (%"
+ PRIx64"-%"PRIx64")\n", pxm, start, end, nodes[i].start, nodes[i].end);
+ } else if (i >= 0) {
+ printk(KERN_ERR
+ "SRAT: PXM %d (%"PRIx64"-%"PRIx64") overlaps with PXM %d (%"
+ PRIx64"-%"PRIx64")\n", pxm, start, end, node_to_pxm(i),
+ nodes[i].start, nodes[i].end);
+ bad_srat();
+ return;
+ }
+ nd = &nodes[node];
+ if (!node_test_and_set(node, nodes_parsed)) {
+ nd->start = start;
+ nd->end = end;
+ } else {
+ if (start < nd->start)
+ nd->start = start;
+ if (nd->end < end)
+ nd->end = end;
+ }
+ printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"\n", node, pxm,
+ nd->start, nd->end);
+}
+
+/* Sanity check to catch more bad SRATs (they are amazingly common).
+ Make sure the PXMs cover all memory. */
+static int nodes_cover_memory(void)
+{
+ int i;
+ u64 pxmram, e820ram;
+
+ pxmram = 0;
+ for_each_node_mask(i, nodes_parsed) {
+ u64 s = nodes[i].start >> PAGE_SHIFT;
+ u64 e = nodes[i].end >> PAGE_SHIFT;
+ pxmram += e - s;
+ }
+
+ e820ram = max_page;
+ /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
+ if ((long)(e820ram - pxmram) >= 1*1024*1024) {
+ printk(KERN_ERR "SRAT: PXMs only cover %"PRIu64"MB of your %"
+ PRIu64"MB e820 RAM. Not used.\n",
+ (pxmram << PAGE_SHIFT) >> 20,
+ (e820ram << PAGE_SHIFT) >> 20);
+ return 0;
+ }
+ return 1;
+}
+
+static void unparse_node(int node)
+{
+ int i;
+ node_clear(node, nodes_parsed);
+ for (i = 0; i < MAX_LOCAL_APIC; i++) {
+ if (apicid_to_node[i] == node)
+ apicid_to_node[i] = NUMA_NO_NODE;
+ }
+}
+
+void __init acpi_numa_arch_fixup(void) {}
+
+/* Use the information discovered above to actually set up the nodes. */
+int __init acpi_scan_nodes(u64 start, u64 end)
+{
+ int i;
+
+ /* First clean up the node list */
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ cutoff_node(i, start, end);
+ if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
+ unparse_node(i);
+ }
+
+ if (acpi_numa <= 0)
+ return -1;
+
+ if (!nodes_cover_memory()) {
+ bad_srat();
+ return -1;
+ }
+
+ memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
+ if (memnode_shift < 0) {
+ printk(KERN_ERR
+ "SRAT: No NUMA node hash function found. Contact maintainer\n");
+ bad_srat();
+ return -1;
+ }
+
+ /* Finally register nodes */
+ for_each_node_mask(i, nodes_parsed)
+ setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_to_node[i] == NUMA_NO_NODE)
+ continue;
+ if (!node_isset(cpu_to_node[i], nodes_parsed))
+ numa_set_node(i, NUMA_NO_NODE);
+ }
+ numa_init_array();
+ return 0;
+}
+
+static int node_to_pxm(int n)
+{
+ int i;
+ if (pxm2node[n] == n)
+ return n;
+ for (i = 0; i < 256; i++)
+ if (pxm2node[i] == n)
+ return i;
+ return 0;
+}
+
+int __node_distance(int a, int b)
+{
+ int index;
+
+ if (!acpi_slit)
+ return a == b ? 10 : 20;
+ index = acpi_slit->localities * node_to_pxm(a);
+ return acpi_slit->entry[index + node_to_pxm(b)];
+}
+
+EXPORT_SYMBOL(__node_distance);
diff --git a/xen/common/memory.c b/xen/common/memory.c
index c2827fa59f..9ab62a48ec 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -41,6 +41,8 @@ increase_reservation(
struct page_info *page;
unsigned long i;
xen_pfn_t mfn;
+ /* use domain's first processor for locality parameter */
+ unsigned int cpu = d->vcpu[0]->processor;
if ( !guest_handle_is_null(extent_list) &&
!guest_handle_okay(extent_list, nr_extents) )
@@ -58,8 +60,8 @@ increase_reservation(
return i;
}
- if ( unlikely((page = alloc_domheap_pages(
- d, extent_order, memflags)) == NULL) )
+ if ( unlikely((page = __alloc_domheap_pages( d, cpu,
+ extent_order, memflags )) == NULL) )
{
DPRINTK("Could not allocate order=%d extent: "
"id=%d memflags=%x (%ld of %d)\n",
@@ -92,6 +94,8 @@ populate_physmap(
unsigned long i, j;
xen_pfn_t gpfn;
xen_pfn_t mfn;
+ /* use domain's first processor for locality parameter */
+ unsigned int cpu = d->vcpu[0]->processor;
if ( !guest_handle_okay(extent_list, nr_extents) )
return 0;
@@ -111,8 +115,8 @@ populate_physmap(
if ( unlikely(__copy_from_guest_offset(&gpfn, extent_list, i, 1)) )
goto out;
- if ( unlikely((page = alloc_domheap_pages(
- d, extent_order, memflags)) == NULL) )
+ if ( unlikely((page = __alloc_domheap_pages( d, cpu,
+ extent_order, memflags )) == NULL) )
{
DPRINTK("Could not allocate order=%d extent: "
"id=%d memflags=%x (%ld of %d)\n",
@@ -294,7 +298,7 @@ memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
unsigned long in_chunk_order, out_chunk_order;
xen_pfn_t gpfn, gmfn, mfn;
unsigned long i, j, k;
- unsigned int memflags = 0;
+ unsigned int memflags = 0, cpu;
long rc = 0;
struct domain *d;
struct page_info *page;
@@ -368,6 +372,9 @@ memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
}
d = current->domain;
+ /* use domain's first processor for locality parameter */
+ cpu = d->vcpu[0]->processor;
+
for ( i = 0; i < (exch.in.nr_extents >> in_chunk_order); i++ )
{
if ( hypercall_preempt_check() )
@@ -413,8 +420,8 @@ memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
/* Allocate a chunk's worth of anonymous output pages. */
for ( j = 0; j < (1UL << out_chunk_order); j++ )
{
- page = alloc_domheap_pages(
- NULL, exch.out.extent_order, memflags);
+ page = __alloc_domheap_pages( NULL, cpu,
+ exch.out.extent_order, memflags);
if ( unlikely(page == NULL) )
{
rc = -ENOMEM;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index fbbe837780..f4a1adc274 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -4,6 +4,7 @@
* Simple buddy heap allocator for Xen.
*
* Copyright (c) 2002-2004 K A Fraser
+ * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -33,6 +34,8 @@
#include <xen/domain_page.h>
#include <xen/keyhandler.h>
#include <xen/perfc.h>
+#include <xen/numa.h>
+#include <xen/nodemask.h>
#include <asm/page.h>
/*
@@ -247,22 +250,23 @@ unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align)
#define pfn_dom_zone_type(_pfn) \
(((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
-static struct list_head heap[NR_ZONES][MAX_ORDER+1];
+static struct list_head heap[NR_ZONES][MAX_NUMNODES][MAX_ORDER+1];
-static unsigned long avail[NR_ZONES];
+static unsigned long avail[NR_ZONES][MAX_NUMNODES];
static DEFINE_SPINLOCK(heap_lock);
void end_boot_allocator(void)
{
- unsigned long i, j;
+ unsigned long i, j, k;
int curr_free = 0, next_free = 0;
memset(avail, 0, sizeof(avail));
for ( i = 0; i < NR_ZONES; i++ )
- for ( j = 0; j <= MAX_ORDER; j++ )
- INIT_LIST_HEAD(&heap[i][j]);
+ for ( j = 0; j < MAX_NUMNODES; j++ )
+ for ( k = 0; k <= MAX_ORDER; k++ )
+ INIT_LIST_HEAD(&heap[i][j][k]);
/* Pages that are free now go to the domain sub-allocator. */
for ( i = 0; i < max_page; i++ )
@@ -272,29 +276,59 @@ void end_boot_allocator(void)
if ( next_free )
map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
if ( curr_free )
- free_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 0);
+ init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
}
}
-/* Hand the specified arbitrary page range to the specified heap zone. */
+/*
+ * Hand the specified arbitrary page range to the specified heap zone
+ * checking the node_id of the previous page. If they differ and the
+ * latter is not on a MAX_ORDER boundary, then we reserve the page by
+ * not freeing it to the buddy allocator.
+ */
+#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
void init_heap_pages(
unsigned int zone, struct page_info *pg, unsigned long nr_pages)
{
+ unsigned int nid_curr,nid_prev;
unsigned long i;
ASSERT(zone < NR_ZONES);
+ if ( likely(page_to_mfn(pg) != 0) )
+ nid_prev = phys_to_nid(page_to_maddr(pg-1));
+ else
+ nid_prev = phys_to_nid(page_to_maddr(pg));
+
for ( i = 0; i < nr_pages; i++ )
- free_heap_pages(zone, pg+i, 0);
+ {
+ nid_curr = phys_to_nid(page_to_maddr(pg+i));
+
+ /*
+ * free pages of the same node, or if they differ, but are on a
+ * MAX_ORDER alignement boundary (which already get reserved)
+ */
+ if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
+ MAX_ORDER_ALIGNED) )
+ free_heap_pages(zone, pg+i, 0);
+ else
+ printk("Reserving non-aligned node boundary @ mfn %lu\n",
+ page_to_mfn(pg+i));
+
+ nid_prev = nid_curr;
+ }
}
-
/* Allocate 2^@order contiguous pages. */
-struct page_info *alloc_heap_pages(unsigned int zone, unsigned int order)
+struct page_info *alloc_heap_pages(unsigned int zone, unsigned int cpu,
+ unsigned int order)
{
- int i;
+ unsigned int i,j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
+ unsigned int request = (1UL << order);
struct page_info *pg;
+ ASSERT(node >= 0);
+ ASSERT(node < num_nodes);
ASSERT(zone < NR_ZONES);
if ( unlikely(order > MAX_ORDER) )
@@ -302,29 +336,46 @@ struct page_info *alloc_heap_pages(unsigned int zone, unsigned int order)
spin_lock(&heap_lock);
- /* Find smallest order which can satisfy the request. */
- for ( i = order; i <= MAX_ORDER; i++ )
- if ( !list_empty(&heap[zone][i]) )
- goto found;
+ /* start with requested node, but exhaust all node memory
+ * in requested zone before failing, only calc new node
+ * value if we fail to find memory in target node, this avoids
+ * needless computation on fast-path */
+ for ( i = 0; i < num_nodes; i++ )
+ {
+ /* check if target node can support the allocation */
+ if ( avail[zone][node] >= request )
+ {
+ /* Find smallest order which can satisfy the request. */
+ for ( j = order; j <= MAX_ORDER; j++ )
+ {
+ if ( !list_empty(&heap[zone][node][j]) )
+ goto found;
+ }
+ }
+ /* pick next node, wrapping around if needed */
+ if ( ++node == num_nodes )
+ node = 0;
+ }
/* No suitable memory blocks. Fail the request. */
spin_unlock(&heap_lock);
return NULL;
found:
- pg = list_entry(heap[zone][i].next, struct page_info, list);
+ pg = list_entry(heap[zone][node][j].next, struct page_info, list);
list_del(&pg->list);
/* We may have to halve the chunk a number of times. */
- while ( i != order )
+ while ( j != order )
{
- PFN_ORDER(pg) = --i;
- list_add_tail(&pg->list, &heap[zone][i]);
- pg += 1 << i;
+ PFN_ORDER(pg) = --j;
+ list_add_tail(&pg->list, &heap[zone][node][j]);
+ pg += 1 << j;
}
- map_alloc(page_to_mfn(pg), 1 << order);
- avail[zone] -= 1 << order;
+ map_alloc(page_to_mfn(pg), request);
+ ASSERT(avail[zone][node] >= request);
+ avail[zone][node] -= request;
spin_unlock(&heap_lock);
@@ -337,14 +388,17 @@ void free_heap_pages(
unsigned int zone, struct page_info *pg, unsigned int order)
{
unsigned long mask;
+ int node = phys_to_nid(page_to_maddr(pg));
ASSERT(zone < NR_ZONES);
ASSERT(order <= MAX_ORDER);
+ ASSERT(node >= 0);
+ ASSERT(node < num_online_nodes());
spin_lock(&heap_lock);
map_free(page_to_mfn(pg), 1 << order);
- avail[zone] += 1 << order;
+ avail[zone][node] += 1 << order;
/* Merge chunks as far as possible. */
while ( order < MAX_ORDER )
@@ -370,10 +424,13 @@ void free_heap_pages(
}
order++;
+
+ /* after merging, pg should be in the same node */
+ ASSERT(phys_to_nid(page_to_maddr(pg)) == node );
}
PFN_ORDER(pg) = order;
- list_add_tail(&pg->list, &heap[zone][order]);
+ list_add_tail(&pg->list, &heap[zone][node][order]);
spin_unlock(&heap_lock);
}
@@ -466,7 +523,7 @@ void *alloc_xenheap_pages(unsigned int order)
int i;
local_irq_save(flags);
- pg = alloc_heap_pages(MEMZONE_XEN, order);
+ pg = alloc_heap_pages(MEMZONE_XEN, smp_processor_id(), order);
local_irq_restore(flags);
if ( unlikely(pg == NULL) )
@@ -580,8 +637,9 @@ int assign_pages(
}
-struct page_info *alloc_domheap_pages(
- struct domain *d, unsigned int order, unsigned int memflags)
+struct page_info *__alloc_domheap_pages(
+ struct domain *d, unsigned int cpu, unsigned int order,
+ unsigned int memflags)
{
struct page_info *pg = NULL;
cpumask_t mask;
@@ -591,17 +649,17 @@ struct page_info *alloc_domheap_pages(
if ( !(memflags & MEMF_dma) )
{
- pg = alloc_heap_pages(MEMZONE_DOM, order);
+ pg = alloc_heap_pages(MEMZONE_DOM, cpu, order);
/* Failure? Then check if we can fall back to the DMA pool. */
if ( unlikely(pg == NULL) &&
((order > MAX_ORDER) ||
- (avail[MEMZONE_DMADOM] <
+ (avail_heap_pages(MEMZONE_DMADOM,-1) <
(lowmem_emergency_pool_pages + (1UL << order)))) )
return NULL;
}
if ( pg == NULL )
- if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL )
+ if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, cpu, order)) == NULL )
return NULL;
mask = pg->u.free.cpumask;
@@ -640,6 +698,11 @@ struct page_info *alloc_domheap_pages(
return pg;
}
+inline struct page_info *alloc_domheap_pages(
+ struct domain *d, unsigned int order, unsigned int flags)
+{
+ return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
+}
void free_domheap_pages(struct page_info *pg, unsigned int order)
{
@@ -714,13 +777,27 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
}
+unsigned long avail_heap_pages(int zone, int node)
+{
+ int i,j, num_nodes = num_online_nodes();
+ unsigned long free_pages = 0;
+
+ for (i=0; i<NR_ZONES; i++)
+ if ( (zone == -1) || (zone == i) )
+ for (j=0; j < num_nodes; j++)
+ if ( (node == -1) || (node == j) )
+ free_pages += avail[i][j];
+
+ return free_pages;
+}
+
unsigned long avail_domheap_pages(void)
{
unsigned long avail_nrm, avail_dma;
+
+ avail_nrm = avail_heap_pages(MEMZONE_DOM,-1);
- avail_nrm = avail[MEMZONE_DOM];
-
- avail_dma = avail[MEMZONE_DMADOM];
+ avail_dma = avail_heap_pages(MEMZONE_DMADOM,-1);
if ( avail_dma > lowmem_emergency_pool_pages )
avail_dma -= lowmem_emergency_pool_pages;
else
@@ -729,6 +806,10 @@ unsigned long avail_domheap_pages(void)
return avail_nrm + avail_dma;
}
+unsigned long avail_nodeheap_pages(int node)
+{
+ return avail_heap_pages(-1, node);
+}
static void pagealloc_keyhandler(unsigned char key)
{
@@ -736,9 +817,9 @@ static void pagealloc_keyhandler(unsigned char key)
printk(" Xen heap: %lukB free\n"
" DMA heap: %lukB free\n"
" Dom heap: %lukB free\n",
- avail[MEMZONE_XEN]<<(PAGE_SHIFT-10),
- avail[MEMZONE_DMADOM]<<(PAGE_SHIFT-10),
- avail[MEMZONE_DOM]<<(PAGE_SHIFT-10));
+ avail_heap_pages(MEMZONE_XEN, -1) << (PAGE_SHIFT-10),
+ avail_heap_pages(MEMZONE_DMADOM, -1) <<(PAGE_SHIFT-10),
+ avail_heap_pages(MEMZONE_DOM, -1) <<(PAGE_SHIFT-10));
}
@@ -806,6 +887,46 @@ unsigned long avail_scrub_pages(void)
return scrub_pages;
}
+static unsigned long count_bucket(struct list_head* l, int order)
+{
+ unsigned long total_pages = 0;
+ int pages = 1 << order;
+ struct page_info *pg;
+
+ list_for_each_entry(pg, l, list)
+ total_pages += pages;
+
+ return total_pages;
+}
+
+static void dump_heap(unsigned char key)
+{
+ s_time_t now = NOW();
+ int i,j,k;
+ unsigned long total;
+
+ printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
+ (u32)(now>>32), (u32)now);
+
+ for (i=0; i<NR_ZONES; i++ )
+ for (j=0;j<MAX_NUMNODES;j++)
+ for (k=0;k<=MAX_ORDER;k++)
+ if ( !list_empty(&heap[i][j][k]) )
+ {
+ total = count_bucket(&heap[i][j][k], k);
+ printk("heap[%d][%d][%d]-> %lu pages\n",
+ i, j, k, total);
+ }
+}
+
+static __init int register_heap_trigger(void)
+{
+ register_keyhandler('H', dump_heap, "dump heap info");
+ return 0;
+}
+__initcall(register_heap_trigger);
+
+
static __init int page_scrub_init(void)
{
open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
diff --git a/xen/drivers/acpi/Makefile b/xen/drivers/acpi/Makefile
index 68dafe3a52..08844a529d 100644
--- a/xen/drivers/acpi/Makefile
+++ b/xen/drivers/acpi/Makefile
@@ -1 +1,2 @@
obj-y += tables.o
+obj-y += numa.o
diff --git a/xen/drivers/acpi/numa.c b/xen/drivers/acpi/numa.c
new file mode 100644
index 0000000000..ecf426ece4
--- /dev/null
+++ b/xen/drivers/acpi/numa.c
@@ -0,0 +1,216 @@
+/*
+ * acpi_numa.c - ACPI NUMA support
+ *
+ * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ */
+#if 0
+#include <linux/module.h>
+#include <linux/kernel.h>
+#endif
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/acpi.h>
+#include <xen/numa.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acmacros.h>
+#include <asm/page.h> /* __va() */
+
+#define ACPI_NUMA 0x80000000
+#define _COMPONENT ACPI_NUMA
+ACPI_MODULE_NAME("numa")
+
+extern int __init acpi_table_parse_madt_family(enum acpi_table_id id,
+ unsigned long madt_size,
+ int entry_id,
+ acpi_madt_entry_handler handler,
+ unsigned int max_entries);
+
+void __init acpi_table_print_srat_entry(acpi_table_entry_header * header)
+{
+
+ ACPI_FUNCTION_NAME("acpi_table_print_srat_entry");
+
+ if (!header)
+ return;
+
+ switch (header->type) {
+
+ case ACPI_SRAT_PROCESSOR_AFFINITY:
+#ifdef ACPI_DEBUG_OUTPUT
+ {
+ struct acpi_table_processor_affinity *p =
+ (struct acpi_table_processor_affinity *)header;
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
+ p->apic_id, p->lsapic_eid,
+ p->proximity_domain,
+ p->flags.
+ enabled ? "enabled" : "disabled"));
+ }
+#endif /* ACPI_DEBUG_OUTPUT */
+ break;
+
+ case ACPI_SRAT_MEMORY_AFFINITY:
+#ifdef ACPI_DEBUG_OUTPUT
+ {
+ struct acpi_table_memory_affinity *p =
+ (struct acpi_table_memory_affinity *)header;
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "SRAT Memory (0x%08x%08x length 0x%08x%08x type 0x%x) in proximity domain %d %s%s\n",
+ p->base_addr_hi, p->base_addr_lo,
+ p->length_hi, p->length_lo,
+ p->memory_type, p->proximity_domain,
+ p->flags.
+ enabled ? "enabled" : "disabled",
+ p->flags.
+ hot_pluggable ? " hot-pluggable" :
+ ""));
+ }
+#endif /* ACPI_DEBUG_OUTPUT */
+ break;
+
+ default:
+ printk(KERN_WARNING PREFIX
+ "Found unsupported SRAT entry (type = 0x%x)\n",
+ header->type);
+ break;
+ }
+}
+
+static int __init acpi_parse_slit(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_slit *slit;
+ u32 localities;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ slit = (struct acpi_table_slit *)__va(phys_addr);
+
+ /* downcast just for %llu vs %lu for i386/ia64 */
+ localities = (u32) slit->localities;
+
+ acpi_numa_slit_init(slit);
+
+ return 0;
+}
+
+static int __init
+acpi_parse_processor_affinity(acpi_table_entry_header * header,
+ const unsigned long end)
+{
+ struct acpi_table_processor_affinity *processor_affinity;
+
+ processor_affinity = (struct acpi_table_processor_affinity *)header;
+ if (!processor_affinity)
+ return -EINVAL;
+
+ acpi_table_print_srat_entry(header);
+
+ /* let architecture-dependent part to do it */
+ acpi_numa_processor_affinity_init(processor_affinity);
+
+ return 0;
+}
+
+static int __init
+acpi_parse_memory_affinity(acpi_table_entry_header * header,
+ const unsigned long end)
+{
+ struct acpi_table_memory_affinity *memory_affinity;
+
+ memory_affinity = (struct acpi_table_memory_affinity *)header;
+ if (!memory_affinity)
+ return -EINVAL;
+
+ acpi_table_print_srat_entry(header);
+
+ /* let architecture-dependent part to do it */
+ acpi_numa_memory_affinity_init(memory_affinity);
+
+ return 0;
+}
+
+static int __init acpi_parse_srat(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_srat *srat;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ srat = (struct acpi_table_srat *)__va(phys_addr);
+
+ return 0;
+}
+
+int __init
+acpi_table_parse_srat(enum acpi_srat_entry_id id,
+ acpi_madt_entry_handler handler, unsigned int max_entries)
+{
+ return acpi_table_parse_madt_family(ACPI_SRAT,
+ sizeof(struct acpi_table_srat), id,
+ handler, max_entries);
+}
+
+int __init acpi_numa_init(void)
+{
+ int result;
+
+ /* SRAT: Static Resource Affinity Table */
+ result = acpi_table_parse(ACPI_SRAT, acpi_parse_srat);
+
+ if (result > 0) {
+ result = acpi_table_parse_srat(ACPI_SRAT_PROCESSOR_AFFINITY,
+ acpi_parse_processor_affinity,
+ NR_CPUS);
+ result = acpi_table_parse_srat(ACPI_SRAT_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS); // IA64 specific
+ }
+
+ /* SLIT: System Locality Information Table */
+ result = acpi_table_parse(ACPI_SLIT, acpi_parse_slit);
+
+ acpi_numa_arch_fixup();
+ return 0;
+}
+
+#if 0
+int acpi_get_pxm(acpi_handle h)
+{
+ unsigned long pxm;
+ acpi_status status;
+ acpi_handle handle;
+ acpi_handle phandle = h;
+
+ do {
+ handle = phandle;
+ status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
+ if (ACPI_SUCCESS(status))
+ return (int)pxm;
+ status = acpi_get_parent(handle, &phandle);
+ } while (ACPI_SUCCESS(status));
+ return -1;
+}
+
+EXPORT_SYMBOL(acpi_get_pxm);
+#endif
diff --git a/xen/include/asm-ia64/vmx_platform.h b/xen/include/asm-ia64/vmx_platform.h
index 33c4003cf3..07d05a68c6 100644
--- a/xen/include/asm-ia64/vmx_platform.h
+++ b/xen/include/asm-ia64/vmx_platform.h
@@ -24,6 +24,8 @@
#include <asm/hvm/vioapic.h>
struct mmio_list;
typedef struct virtual_platform_def {
+ unsigned long buffered_io_va;
+ spinlock_t buffered_io_lock;
unsigned long shared_page_va;
unsigned long pib_base;
unsigned char xtp;
diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h
index 51c4b8e293..227f76325c 100644
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -157,6 +157,9 @@ static inline void check_acpi_pci(void) { }
static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
static inline int acpi_irq_balance_set(char *str) { return 0; }
+extern int acpi_scan_nodes(u64 start, u64 end);
+extern int acpi_numa;
+#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
#ifdef CONFIG_ACPI_SLEEP
@@ -173,5 +176,6 @@ extern void acpi_reserve_bootmem(void);
#endif /*CONFIG_ACPI_SLEEP*/
extern u8 x86_acpiid_to_apicid[];
+#define MAX_LOCAL_APIC 256
#endif /*_ASM_ACPI_H*/
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index e2ef90700c..879bdbf80b 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -24,6 +24,11 @@
#define CONFIG_X86_IO_APIC 1
#define CONFIG_HPET_TIMER 1
#define CONFIG_X86_MCE_P4THERMAL 1
+#define CONFIG_ACPI_NUMA 1
+#define CONFIG_NUMA 1
+#define CONFIG_ACPI_SRAT 1
+#define CONFIG_DISCONTIGMEM 1
+#define CONFIG_NUMA_EMU 1
/* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
#define CONFIG_X86_L1_CACHE_SHIFT 7
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 6561519cb1..0ebec779c1 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -23,7 +23,7 @@
#define __ASM_X86_HVM_DOMAIN_H__
#include <asm/hvm/vpic.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
#include <asm/hvm/vlapic.h>
#include <asm/hvm/vioapic.h>
#include <public/hvm/params.h>
diff --git a/xen/include/asm-x86/hvm/vpit.h b/xen/include/asm-x86/hvm/vpt.h
index 83b1af2622..ada8936af7 100644
--- a/xen/include/asm-x86/hvm/vpit.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -1,5 +1,5 @@
/*
- * vpit.h: Virtual PIT definitions
+ * vpt.h: Virtual Platform Timer definitions
*
* Copyright (c) 2004, Intel Corporation.
*
@@ -17,8 +17,8 @@
* Place - Suite 330, Boston, MA 02111-1307 USA.
*/
-#ifndef __ASM_X86_HVM_VPIT_H__
-#define __ASM_X86_HVM_VPIT_H__
+#ifndef __ASM_X86_HVM_VPT_H__
+#define __ASM_X86_HVM_VPT_H__
#include <xen/config.h>
#include <xen/init.h>
@@ -70,7 +70,17 @@ typedef struct RTCState {
struct vcpu *vcpu;
struct periodic_time *pt;
} RTCState;
-
+
+#define FREQUENCE_PMTIMER 3579545
+typedef struct PMTState {
+ uint32_t pm1_timer;
+ uint32_t pm1_status;
+ uint64_t last_gtime;
+ struct timer timer;
+ uint64_t scale;
+ struct vcpu *vcpu;
+} PMTState;
+
/*
* Abstract layer of periodic time, one short time.
*/
@@ -95,7 +105,7 @@ struct pl_time { /* platform time */
struct periodic_time periodic_tm;
struct PITState vpit;
struct RTCState vrtc;
- /* TODO: ACPI time */
+ struct PMTState vpmt;
};
static __inline__ s_time_t get_scheduled(
@@ -132,8 +142,10 @@ extern void destroy_periodic_time(struct periodic_time *pt);
void pit_init(struct vcpu *v, unsigned long cpu_khz);
void rtc_init(struct vcpu *v, int base, int irq);
void rtc_deinit(struct domain *d);
+void pmtimer_init(struct vcpu *v, int base);
+void pmtimer_deinit(struct domain *d);
int is_rtc_periodic_irq(void *opaque);
void pt_timer_fn(void *data);
void pit_time_fired(struct vcpu *v, void *priv);
-#endif /* __ASM_X86_HVM_VPIT_H__ */
+#endif /* __ASM_X86_HVM_VPT_H__ */
diff --git a/xen/include/asm-x86/mach-generic/mach_apic.h b/xen/include/asm-x86/mach-generic/mach_apic.h
index 1d3ed4dc67..1e0a6019d6 100644
--- a/xen/include/asm-x86/mach-generic/mach_apic.h
+++ b/xen/include/asm-x86/mach-generic/mach_apic.h
@@ -22,11 +22,7 @@ static inline void enable_apic_mode(void)
return;
}
-/* No sane NUMA support right now. We should parse ACPI SRAT. */
-static inline int apicid_to_node(int logical_apicid)
-{
- return 0;
-}
+#define apicid_to_node(apicid) ((int)apicid_to_node[(u8)apicid])
extern u8 bios_cpu_apicid[];
static inline int cpu_present_to_apicid(int mps_cpu)
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
new file mode 100644
index 0000000000..caa6491c96
--- /dev/null
+++ b/xen/include/asm-x86/numa.h
@@ -0,0 +1,78 @@
+#ifndef _ASM_X8664_NUMA_H
+#define _ASM_X8664_NUMA_H 1
+
+#include <xen/cpumask.h>
+
+#define NODES_SHIFT 6
+
+extern unsigned char cpu_to_node[];
+extern cpumask_t node_to_cpumask[];
+
+#define cpu_to_node(cpu) (cpu_to_node[cpu])
+#define parent_node(node) (node)
+#define node_to_first_cpu(node) (__ffs(node_to_cpumask[node]))
+#define node_to_cpumask(node) (node_to_cpumask[node])
+
+struct node {
+ u64 start,end;
+};
+
+extern int compute_hash_shift(struct node *nodes, int numnodes);
+extern int pxm_to_node(int nid);
+
+#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
+#define VIRTUAL_BUG_ON(x)
+#define NODEMAPSIZE 0xfff
+
+extern void numa_add_cpu(int cpu);
+extern void numa_init_array(void);
+extern int numa_off;
+
+extern void numa_set_node(int cpu, int node);
+
+extern void setup_node_bootmem(int nodeid, u64 start, u64 end);
+extern unsigned char apicid_to_node[256];
+#ifdef CONFIG_NUMA
+extern void __init init_cpu_to_node(void);
+
+static inline void clear_node_cpumask(int cpu)
+{
+ clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+}
+
+/* Simple perfect hash to map physical addresses to node numbers */
+extern int memnode_shift;
+extern u8 memnodemap[NODEMAPSIZE];
+
+struct node_data {
+ unsigned long node_start_pfn;
+ unsigned long node_spanned_pages;
+ unsigned int node_id;
+};
+
+extern struct node_data node_data[];
+
+static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
+{
+ unsigned nid;
+ VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);
+ nid = memnodemap[addr >> memnode_shift];
+ VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
+ return nid;
+}
+
+#define NODE_DATA(nid) (&(node_data[nid]))
+
+#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
+ NODE_DATA(nid)->node_spanned_pages)
+
+
+#else
+#define init_cpu_to_node() do {} while (0)
+#define clear_node_cpumask(cpu) do {} while (0)
+#endif
+
+#define NUMA_NO_NODE 0xff
+
+#endif
diff --git a/xen/include/public/arch-ia64.h b/xen/include/public/arch-ia64.h
index fd05ff9233..d7b35b4524 100644
--- a/xen/include/public/arch-ia64.h
+++ b/xen/include/public/arch-ia64.h
@@ -68,6 +68,9 @@ typedef unsigned long xen_ulong_t;
#define STORE_PAGE_START (IO_PAGE_START + IO_PAGE_SIZE)
#define STORE_PAGE_SIZE PAGE_SIZE
+#define BUFFER_IO_PAGE_START (STORE_PAGE_START+PAGE_SIZE)
+#define BUFFER_IO_PAGE_SIZE PAGE_SIZE
+
#define IO_SAPIC_START 0xfec00000UL
#define IO_SAPIC_SIZE 0x100000
diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h
index 8e92b004b1..992505e1c5 100644
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -86,6 +86,10 @@ struct buffered_iopage {
}; /* sizeof this structure must be in one page */
typedef struct buffered_iopage buffered_iopage_t;
+#define ACPI_PM1A_EVT_BLK_ADDRESS 0x000000000000c010
+#define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
+#define ACPI_PM_TMR_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08)
+
#endif /* _IOREQ_H_ */
/*
diff --git a/xen/include/xen/config.h b/xen/include/xen/config.h
index e3f94d5843..f79472da77 100644
--- a/xen/include/xen/config.h
+++ b/xen/include/xen/config.h
@@ -50,5 +50,7 @@
#endif /* !__ASSEMBLY__ */
#define fastcall
+#define __cpuinitdata
+#define __cpuinit
#endif /* __XEN_CONFIG_H__ */
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 8c9713971b..4d05f6917f 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -45,7 +45,8 @@ void end_boot_allocator(void);
/* Generic allocator. These functions are *not* interrupt-safe. */
void init_heap_pages(
unsigned int zone, struct page_info *pg, unsigned long nr_pages);
-struct page_info *alloc_heap_pages(unsigned int zone, unsigned int order);
+struct page_info *alloc_heap_pages(
+ unsigned int zone, unsigned int cpu, unsigned int order);
void free_heap_pages(
unsigned int zone, struct page_info *pg, unsigned int order);
void scrub_heap_pages(void);
@@ -61,8 +62,12 @@ void free_xenheap_pages(void *v, unsigned int order);
void init_domheap_pages(paddr_t ps, paddr_t pe);
struct page_info *alloc_domheap_pages(
struct domain *d, unsigned int order, unsigned int memflags);
+struct page_info *__alloc_domheap_pages(
+ struct domain *d, unsigned int cpu, unsigned int order,
+ unsigned int memflags);
void free_domheap_pages(struct page_info *pg, unsigned int order);
unsigned long avail_domheap_pages(void);
+unsigned long avail_heap_pages(int zone, int node);
#define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
#define free_domheap_page(p) (free_domheap_pages(p,0))
diff --git a/xen/include/xen/nodemask.h b/xen/include/xen/nodemask.h
new file mode 100644
index 0000000000..30ed6f4524
--- /dev/null
+++ b/xen/include/xen/nodemask.h
@@ -0,0 +1,338 @@
+#ifndef __LINUX_NODEMASK_H
+#define __LINUX_NODEMASK_H
+
+/*
+ * Nodemasks provide a bitmap suitable for representing the
+ * set of Node's in a system, one bit position per Node number.
+ *
+ * See detailed comments in the file linux/bitmap.h describing the
+ * data type on which these nodemasks are based.
+ *
+ * For details of nodemask_scnprintf() and nodemask_parse(),
+ * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ *
+ * The available nodemask operations are:
+ *
+ * void node_set(node, mask) turn on bit 'node' in mask
+ * void node_clear(node, mask) turn off bit 'node' in mask
+ * void nodes_setall(mask) set all bits
+ * void nodes_clear(mask) clear all bits
+ * int node_isset(node, mask) true iff bit 'node' set in mask
+ * int node_test_and_set(node, mask) test and set bit 'node' in mask
+ *
+ * void nodes_and(dst, src1, src2) dst = src1 & src2 [intersection]
+ * void nodes_or(dst, src1, src2) dst = src1 | src2 [union]
+ * void nodes_xor(dst, src1, src2) dst = src1 ^ src2
+ * void nodes_andnot(dst, src1, src2) dst = src1 & ~src2
+ * void nodes_complement(dst, src) dst = ~src
+ *
+ * int nodes_equal(mask1, mask2) Does mask1 == mask2?
+ * int nodes_intersects(mask1, mask2) Do mask1 and mask2 intersect?
+ * int nodes_subset(mask1, mask2) Is mask1 a subset of mask2?
+ * int nodes_empty(mask) Is mask empty (no bits sets)?
+ * int nodes_full(mask) Is mask full (all bits sets)?
+ * int nodes_weight(mask) Hamming weight - number of set bits
+ *
+ * void nodes_shift_right(dst, src, n) Shift right
+ * void nodes_shift_left(dst, src, n) Shift left
+ *
+ * int first_node(mask) Number lowest set bit, or MAX_NUMNODES
+ * int next_node(node, mask) Next node past 'node', or MAX_NUMNODES
+ * int first_unset_node(mask) First node not set in mask, or
+ * MAX_NUMNODES.
+ *
+ * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set
+ * NODE_MASK_ALL Initializer - all bits set
+ * NODE_MASK_NONE Initializer - no bits set
+ * unsigned long *nodes_addr(mask) Array of unsigned long's in mask
+ *
+ * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing
+ * int nodemask_parse(ubuf, ulen, mask) Parse ascii string as nodemask
+ *
+ * for_each_node_mask(node, mask) for-loop node over mask
+ *
+ * int num_online_nodes() Number of online Nodes
+ * int num_possible_nodes() Number of all possible Nodes
+ *
+ * int node_online(node) Is some node online?
+ * int node_possible(node) Is some node possible?
+ *
+ * int any_online_node(mask) First online node in mask
+ *
+ * node_set_online(node) set bit 'node' in node_online_map
+ * node_set_offline(node) clear bit 'node' in node_online_map
+ *
+ * for_each_node(node) for-loop node over node_possible_map
+ * for_each_online_node(node) for-loop node over node_online_map
+ *
+ * Subtlety:
+ * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway)
+ * to generate slightly worse code. So use a simple one-line #define
+ * for node_isset(), instead of wrapping an inline inside a macro, the
+ * way we do the other calls.
+ */
+
+#include <xen/kernel.h>
+#include <xen/bitmap.h>
+#include <xen/numa.h>
+
+typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
+extern nodemask_t _unused_nodemask_arg_;
+
+#define node_set(node, dst) __node_set((node), &(dst))
+static inline void __node_set(int node, volatile nodemask_t *dstp)
+{
+ set_bit(node, dstp->bits);
+}
+
+#define node_clear(node, dst) __node_clear((node), &(dst))
+static inline void __node_clear(int node, volatile nodemask_t *dstp)
+{
+ clear_bit(node, dstp->bits);
+}
+
+#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
+static inline void __nodes_setall(nodemask_t *dstp, int nbits)
+{
+ bitmap_fill(dstp->bits, nbits);
+}
+
+#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
+static inline void __nodes_clear(nodemask_t *dstp, int nbits)
+{
+ bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define node_isset(node, nodemask) test_bit((node), (nodemask).bits)
+
+#define node_test_and_set(node, nodemask) \
+ __node_test_and_set((node), &(nodemask))
+static inline int __node_test_and_set(int node, nodemask_t *addr)
+{
+ return test_and_set_bit(node, addr->bits);
+}
+
+#define nodes_and(dst, src1, src2) \
+ __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
+static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
+ const nodemask_t *src2p, int nbits)
+{
+ bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_or(dst, src1, src2) \
+ __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
+static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
+ const nodemask_t *src2p, int nbits)
+{
+ bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_xor(dst, src1, src2) \
+ __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
+static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
+ const nodemask_t *src2p, int nbits)
+{
+ bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_andnot(dst, src1, src2) \
+ __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
+static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
+ const nodemask_t *src2p, int nbits)
+{
+ bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_complement(dst, src) \
+ __nodes_complement(&(dst), &(src), MAX_NUMNODES)
+static inline void __nodes_complement(nodemask_t *dstp,
+ const nodemask_t *srcp, int nbits)
+{
+ bitmap_complement(dstp->bits, srcp->bits, nbits);
+}
+
+#define nodes_equal(src1, src2) \
+ __nodes_equal(&(src1), &(src2), MAX_NUMNODES)
+static inline int __nodes_equal(const nodemask_t *src1p,
+ const nodemask_t *src2p, int nbits)
+{
+ return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_intersects(src1, src2) \
+ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
+static inline int __nodes_intersects(const nodemask_t *src1p,
+ const nodemask_t *src2p, int nbits)
+{
+ return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_subset(src1, src2) \
+ __nodes_subset(&(src1), &(src2), MAX_NUMNODES)
+static inline int __nodes_subset(const nodemask_t *src1p,
+ const nodemask_t *src2p, int nbits)
+{
+ return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
+static inline int __nodes_empty(const nodemask_t *srcp, int nbits)
+{
+ return bitmap_empty(srcp->bits, nbits);
+}
+
+#define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
+static inline int __nodes_full(const nodemask_t *srcp, int nbits)
+{
+ return bitmap_full(srcp->bits, nbits);
+}
+
+#define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
+static inline int __nodes_weight(const nodemask_t *srcp, int nbits)
+{
+ return bitmap_weight(srcp->bits, nbits);
+}
+
+#define nodes_shift_right(dst, src, n) \
+ __nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
+static inline void __nodes_shift_right(nodemask_t *dstp,
+ const nodemask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
+}
+
+#define nodes_shift_left(dst, src, n) \
+ __nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
+static inline void __nodes_shift_left(nodemask_t *dstp,
+ const nodemask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+
+/* FIXME: better would be to fix all architectures to never return
+ > MAX_NUMNODES, then the silly min_ts could be dropped. */
+
+#define first_node(src) __first_node(&(src))
+static inline int __first_node(const nodemask_t *srcp)
+{
+ return min_t(int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
+}
+
+#define next_node(n, src) __next_node((n), &(src))
+static inline int __next_node(int n, const nodemask_t *srcp)
+{
+ return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
+}
+
+#define nodemask_of_node(node) \
+({ \
+ typeof(_unused_nodemask_arg_) m; \
+ if (sizeof(m) == sizeof(unsigned long)) { \
+ m.bits[0] = 1UL<<(node); \
+ } else { \
+ nodes_clear(m); \
+ node_set((node), m); \
+ } \
+ m; \
+})
+
+#define first_unset_node(mask) __first_unset_node(&(mask))
+static inline int __first_unset_node(const nodemask_t *maskp)
+{
+ return min_t(int,MAX_NUMNODES,
+ find_first_zero_bit(maskp->bits, MAX_NUMNODES));
+}
+
+#define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)
+
+#if MAX_NUMNODES <= BITS_PER_LONG
+
+#define NODE_MASK_ALL \
+((nodemask_t) { { \
+ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \
+} })
+
+#else
+
+#define NODE_MASK_ALL \
+((nodemask_t) { { \
+ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL, \
+ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \
+} })
+
+#endif
+
+#define NODE_MASK_NONE \
+((nodemask_t) { { \
+ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] = 0UL \
+} })
+
+#define nodes_addr(src) ((src).bits)
+
+#if 0
+#define nodemask_scnprintf(buf, len, src) \
+ __nodemask_scnprintf((buf), (len), &(src), MAX_NUMNODES)
+static inline int __nodemask_scnprintf(char *buf, int len,
+ const nodemask_t *srcp, int nbits)
+{
+ return bitmap_scnprintf(buf, len, srcp->bits, nbits);
+}
+
+#define nodemask_parse(ubuf, ulen, dst) \
+ __nodemask_parse((ubuf), (ulen), &(dst), MAX_NUMNODES)
+static inline int __nodemask_parse(const char __user *buf, int len,
+ nodemask_t *dstp, int nbits)
+{
+ return bitmap_parse(buf, len, dstp->bits, nbits);
+}
+#endif
+
+#if MAX_NUMNODES > 1
+#define for_each_node_mask(node, mask) \
+ for ((node) = first_node(mask); \
+ (node) < MAX_NUMNODES; \
+ (node) = next_node((node), (mask)))
+#else /* MAX_NUMNODES == 1 */
+#define for_each_node_mask(node, mask) \
+ if (!nodes_empty(mask)) \
+ for ((node) = 0; (node) < 1; (node)++)
+#endif /* MAX_NUMNODES */
+
+/*
+ * The following particular system nodemasks and operations
+ * on them manage all possible and online nodes.
+ */
+
+extern nodemask_t node_online_map;
+extern nodemask_t node_possible_map;
+
+#if MAX_NUMNODES > 1
+#define num_online_nodes() nodes_weight(node_online_map)
+#define num_possible_nodes() nodes_weight(node_possible_map)
+#define node_online(node) node_isset((node), node_online_map)
+#define node_possible(node) node_isset((node), node_possible_map)
+#else
+#define num_online_nodes() 1
+#define num_possible_nodes() 1
+#define node_online(node) ((node) == 0)
+#define node_possible(node) ((node) == 0)
+#endif
+
+#define any_online_node(mask) \
+({ \
+ int node; \
+ for_each_node_mask(node, (mask)) \
+ if (node_online(node)) \
+ break; \
+ node; \
+})
+
+#define node_set_online(node) set_bit((node), node_online_map.bits)
+#define node_set_offline(node) clear_bit((node), node_online_map.bits)
+
+#define for_each_node(node) for_each_node_mask((node), node_possible_map)
+#define for_each_online_node(node) for_each_node_mask((node), node_online_map)
+
+#endif /* __LINUX_NODEMASK_H */
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
new file mode 100644
index 0000000000..9585fc9c48
--- /dev/null
+++ b/xen/include/xen/numa.h
@@ -0,0 +1,13 @@
+#ifndef _XEN_NUMA_H
+#define _XEN_NUMA_H
+
+#include <xen/config.h>
+#include <asm/numa.h>
+
+#ifndef NODES_SHIFT
+#define NODES_SHIFT 0
+#endif
+
+#define MAX_NUMNODES (1 << NODES_SHIFT)
+
+#endif /* _XEN_NUMA_H */